bin: handle thousands of output files

Fixes #534.  Test case is only on next branch.

* bin/common_file.hh, bin/common_file.cc: Make it possible
to reopen a closed file.
* bin/common_output.cc, bin/common_aoutput.cc: Add a heuristic
to decide when to close files.
* NEWS: Mention the issue.
This commit is contained in:
Alexandre Duret-Lutz 2023-07-24 16:56:24 +02:00
parent adca03a30a
commit bd1809f758
5 changed files with 67 additions and 1 deletions

7
NEWS
View file

@ -2,6 +2,13 @@ New in spot 2.11.5.dev (not yet released)
Nothing yet.
Bug fixes:
- Running command lines such as "autfilt input.hoa -o output-%L.hoa"
where thousands of different filenames can be created failed with
"Too many open files". (Issue #534)
New in spot 2.11.5 (2023-04-20)
Bug fixes:

View file

@ -636,7 +636,27 @@ automaton_printer::print(const spot::twa_graph_ptr& aut,
auto [it, b] = outputfiles.try_emplace(fname, nullptr);
if (b)
it->second.reset(new output_file(fname.c_str()));
else
// reopen if the file has been closed; see below
it->second->reopen_for_append(fname);
out = &it->second->ostream();
// If we have opened fewer than 10 files, we keep them all open
// to avoid wasting time on open/close calls.
//
// However we cannot keep all files open, especially in
// scenarios were we use thousands of files only once. To keep
// things simple, we only close the previous file if it is not
// the current output. This way we still save the close/open
// cost when consecutive automata are sent to the same file.
static output_file* previous = nullptr;
static const std::string* previous_name = nullptr;
if (previous
&& outputfiles.size() > 10
&& &previous->ostream() != out)
previous->close(*previous_name);
previous = it->second.get();
previous_name = &it->first;
}
// Output it.

View file

@ -44,13 +44,30 @@ output_file::output_file(const char* name, bool force_append)
os_ = of_.get();
}
void
output_file::reopen_for_append(const std::string& name)
{
if (of_ && of_->is_open()) // nothing to do
return;
const char* cname = name.c_str();
if (cname[0] == '>' && cname[1] == '>')
cname += 2;
if (name[0] == '-' && name[1] == 0)
{
os_ = &std::cout;
return;
}
of_->open(cname, std::ios_base::app);
if (!*of_)
error(2, errno, "cannot reopen '%s'", cname);
}
void output_file::close(const std::string& name)
{
// We close of_, not os_, so that we never close std::cout.
if (os_)
os_->flush();
if (of_)
if (of_ && of_->is_open())
of_->close();
if (os_ && !*os_)
error(2, 0, "error writing to %s",

View file

@ -37,6 +37,8 @@ public:
void close(const std::string& name);
void reopen_for_append(const std::string& name);
bool append() const
{
return append_;

View file

@ -420,7 +420,27 @@ output_formula_checked(spot::formula f, spot::process_timer* ptimer,
auto [it, b] = outputfiles.try_emplace(fname, nullptr);
if (b)
it->second.reset(new output_file(fname.c_str()));
else
// reopen if the file has been closed; see below
it->second->reopen_for_append(fname);
out = &it->second->ostream();
// If we have opened fewer than 10 files, we keep them all open
// to avoid wasting time on open/close calls.
//
// However we cannot keep all files open, especially in
// scenarios were we use thousands of files only once. To keep
// things simple, we only close the previous file if it is not
// the current output. This way we still save the close/open
// cost when consecutive formulas are sent to the same file.
static output_file* previous = nullptr;
static const std::string* previous_name = nullptr;
if (previous
&& outputfiles.size() > 10
&& &previous->ostream() != out)
previous->close(*previous_name);
previous = it->second.get();
previous_name = &it->first;
}
output_formula(*out, f, ptimer, filename, linenum, prefix, suffix);
*out << output_terminator;