bin: handle thousands of output files

Fixes #534.

* bin/common_file.hh, bin/common_file.cc: Make it possible
to reopen a closed file.
* bin/common_output.cc, bin/common_aoutput.cc: Add a heuristic
to decide when to close files.
* tests/core/serial.test: Add a test case.
* NEWS: Mention the issue.
This commit is contained in:
Alexandre Duret-Lutz 2023-07-24 16:56:24 +02:00
parent 17a5b41d8c
commit 40e30df7e3
6 changed files with 76 additions and 1 deletions

7
NEWS
View file

@ -29,6 +29,13 @@ New in spot 2.11.5.dev (not yet released)
- spot::bdd_to_cnf_formula() is a new variant of spot::bdd_to_formula() - spot::bdd_to_cnf_formula() is a new variant of spot::bdd_to_formula()
that converts a BDD into a CNF instead of a DNF. that converts a BDD into a CNF instead of a DNF.
Bug fixes:
- Running command lines such as "autfilt input.hoa -o output-%L.hoa"
where thousands of different filenames can be created failed with
"Too many open files". (Issue #534)
New in spot 2.11.5 (2023-04-20) New in spot 2.11.5 (2023-04-20)
Bug fixes: Bug fixes:

View file

@ -644,7 +644,27 @@ automaton_printer::print(const spot::twa_graph_ptr& aut,
auto [it, b] = outputfiles.try_emplace(fname, nullptr); auto [it, b] = outputfiles.try_emplace(fname, nullptr);
if (b) if (b)
it->second.reset(new output_file(fname.c_str())); it->second.reset(new output_file(fname.c_str()));
else
// reopen if the file has been closed; see below
it->second->reopen_for_append(fname);
out = &it->second->ostream(); out = &it->second->ostream();
// If we have opened fewer than 10 files, we keep them all open
// to avoid wasting time on open/close calls.
//
// However we cannot keep all files open, especially in
// scenarios were we use thousands of files only once. To keep
// things simple, we only close the previous file if it is not
// the current output. This way we still save the close/open
// cost when consecutive automata are sent to the same file.
static output_file* previous = nullptr;
static const std::string* previous_name = nullptr;
if (previous
&& outputfiles.size() > 10
&& &previous->ostream() != out)
previous->close(*previous_name);
previous = it->second.get();
previous_name = &it->first;
} }
// Output it. // Output it.

View file

@ -44,13 +44,30 @@ output_file::output_file(const char* name, bool force_append)
os_ = of_.get(); os_ = of_.get();
} }
void
output_file::reopen_for_append(const std::string& name)
{
if (of_ && of_->is_open()) // nothing to do
return;
const char* cname = name.c_str();
if (cname[0] == '>' && cname[1] == '>')
cname += 2;
if (name[0] == '-' && name[1] == 0)
{
os_ = &std::cout;
return;
}
of_->open(cname, std::ios_base::app);
if (!*of_)
error(2, errno, "cannot reopen '%s'", cname);
}
void output_file::close(const std::string& name) void output_file::close(const std::string& name)
{ {
// We close of_, not os_, so that we never close std::cout. // We close of_, not os_, so that we never close std::cout.
if (os_) if (os_)
os_->flush(); os_->flush();
if (of_) if (of_ && of_->is_open())
of_->close(); of_->close();
if (os_ && !*os_) if (os_ && !*os_)
error(2, 0, "error writing to %s", error(2, 0, "error writing to %s",

View file

@ -37,6 +37,8 @@ public:
void close(const std::string& name); void close(const std::string& name);
void reopen_for_append(const std::string& name);
bool append() const bool append() const
{ {
return append_; return append_;

View file

@ -429,7 +429,27 @@ output_formula_checked(spot::formula f, spot::process_timer* ptimer,
auto [it, b] = outputfiles.try_emplace(fname, nullptr); auto [it, b] = outputfiles.try_emplace(fname, nullptr);
if (b) if (b)
it->second.reset(new output_file(fname.c_str())); it->second.reset(new output_file(fname.c_str()));
else
// reopen if the file has been closed; see below
it->second->reopen_for_append(fname);
out = &it->second->ostream(); out = &it->second->ostream();
// If we have opened fewer than 10 files, we keep them all open
// to avoid wasting time on open/close calls.
//
// However we cannot keep all files open, especially in
// scenarios were we use thousands of files only once. To keep
// things simple, we only close the previous file if it is not
// the current output. This way we still save the close/open
// cost when consecutive formulas are sent to the same file.
static output_file* previous = nullptr;
static const std::string* previous_name = nullptr;
if (previous
&& outputfiles.size() > 10
&& &previous->ostream() != out)
previous->close(*previous_name);
previous = it->second.get();
previous_name = &it->first;
} }
output_formula(*out, f, ptimer, filename, linenum, index, prefix, suffix); output_formula(*out, f, ptimer, filename, linenum, index, prefix, suffix);
*out << output_terminator; *out << output_terminator;

View file

@ -69,3 +69,12 @@ b/1
2,1 2,1
EOF EOF
diff -u out exp diff -u out exp
# Split on more than 1024 files. In Spot < 2.12 this was likely
# to run out of file descriptors, because they weren't closed.
randaut -Q3 2 -n 2000 -o randaut-%l.hoa
test 2000 = `ls -l randaut-*.hoa | wc -l`
# likewise for LTL formulas
randltl 2 -n 2000 -o randltl-%l.ltl
test 2000 = `ls -l randltl-*.ltl | wc -l`