bin: handle thousands of output files
Fixes #534. * bin/common_file.hh, bin/common_file.cc: Make it possible to reopen a closed file. * bin/common_output.cc, bin/common_aoutput.cc: Add a heuristic to decide when to close files. * tests/core/serial.test: Add a test case. * NEWS: Mention the issue.
This commit is contained in:
parent
17a5b41d8c
commit
40e30df7e3
6 changed files with 76 additions and 1 deletions
7
NEWS
7
NEWS
|
|
@ -29,6 +29,13 @@ New in spot 2.11.5.dev (not yet released)
|
|||
- spot::bdd_to_cnf_formula() is a new variant of spot::bdd_to_formula()
|
||||
that converts a BDD into a CNF instead of a DNF.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
- Running command lines such as "autfilt input.hoa -o output-%L.hoa"
|
||||
where thousands of different filenames can be created failed with
|
||||
"Too many open files". (Issue #534)
|
||||
|
||||
|
||||
New in spot 2.11.5 (2023-04-20)
|
||||
|
||||
Bug fixes:
|
||||
|
|
|
|||
|
|
@ -644,7 +644,27 @@ automaton_printer::print(const spot::twa_graph_ptr& aut,
|
|||
auto [it, b] = outputfiles.try_emplace(fname, nullptr);
|
||||
if (b)
|
||||
it->second.reset(new output_file(fname.c_str()));
|
||||
else
|
||||
// reopen if the file has been closed; see below
|
||||
it->second->reopen_for_append(fname);
|
||||
out = &it->second->ostream();
|
||||
|
||||
// If we have opened fewer than 10 files, we keep them all open
|
||||
// to avoid wasting time on open/close calls.
|
||||
//
|
||||
// However we cannot keep all files open, especially in
|
||||
// scenarios were we use thousands of files only once. To keep
|
||||
// things simple, we only close the previous file if it is not
|
||||
// the current output. This way we still save the close/open
|
||||
// cost when consecutive automata are sent to the same file.
|
||||
static output_file* previous = nullptr;
|
||||
static const std::string* previous_name = nullptr;
|
||||
if (previous
|
||||
&& outputfiles.size() > 10
|
||||
&& &previous->ostream() != out)
|
||||
previous->close(*previous_name);
|
||||
previous = it->second.get();
|
||||
previous_name = &it->first;
|
||||
}
|
||||
|
||||
// Output it.
|
||||
|
|
|
|||
|
|
@ -44,13 +44,30 @@ output_file::output_file(const char* name, bool force_append)
|
|||
os_ = of_.get();
|
||||
}
|
||||
|
||||
void
|
||||
output_file::reopen_for_append(const std::string& name)
|
||||
{
|
||||
if (of_ && of_->is_open()) // nothing to do
|
||||
return;
|
||||
const char* cname = name.c_str();
|
||||
if (cname[0] == '>' && cname[1] == '>')
|
||||
cname += 2;
|
||||
if (name[0] == '-' && name[1] == 0)
|
||||
{
|
||||
os_ = &std::cout;
|
||||
return;
|
||||
}
|
||||
of_->open(cname, std::ios_base::app);
|
||||
if (!*of_)
|
||||
error(2, errno, "cannot reopen '%s'", cname);
|
||||
}
|
||||
|
||||
void output_file::close(const std::string& name)
|
||||
{
|
||||
// We close of_, not os_, so that we never close std::cout.
|
||||
if (os_)
|
||||
os_->flush();
|
||||
if (of_)
|
||||
if (of_ && of_->is_open())
|
||||
of_->close();
|
||||
if (os_ && !*os_)
|
||||
error(2, 0, "error writing to %s",
|
||||
|
|
|
|||
|
|
@ -37,6 +37,8 @@ public:
|
|||
|
||||
void close(const std::string& name);
|
||||
|
||||
void reopen_for_append(const std::string& name);
|
||||
|
||||
bool append() const
|
||||
{
|
||||
return append_;
|
||||
|
|
|
|||
|
|
@ -429,7 +429,27 @@ output_formula_checked(spot::formula f, spot::process_timer* ptimer,
|
|||
auto [it, b] = outputfiles.try_emplace(fname, nullptr);
|
||||
if (b)
|
||||
it->second.reset(new output_file(fname.c_str()));
|
||||
else
|
||||
// reopen if the file has been closed; see below
|
||||
it->second->reopen_for_append(fname);
|
||||
out = &it->second->ostream();
|
||||
|
||||
// If we have opened fewer than 10 files, we keep them all open
|
||||
// to avoid wasting time on open/close calls.
|
||||
//
|
||||
// However we cannot keep all files open, especially in
|
||||
// scenarios were we use thousands of files only once. To keep
|
||||
// things simple, we only close the previous file if it is not
|
||||
// the current output. This way we still save the close/open
|
||||
// cost when consecutive formulas are sent to the same file.
|
||||
static output_file* previous = nullptr;
|
||||
static const std::string* previous_name = nullptr;
|
||||
if (previous
|
||||
&& outputfiles.size() > 10
|
||||
&& &previous->ostream() != out)
|
||||
previous->close(*previous_name);
|
||||
previous = it->second.get();
|
||||
previous_name = &it->first;
|
||||
}
|
||||
output_formula(*out, f, ptimer, filename, linenum, index, prefix, suffix);
|
||||
*out << output_terminator;
|
||||
|
|
|
|||
|
|
@ -69,3 +69,12 @@ b/1
|
|||
2,1
|
||||
EOF
|
||||
diff -u out exp
|
||||
|
||||
|
||||
# Split on more than 1024 files. In Spot < 2.12 this was likely
|
||||
# to run out of file descriptors, because they weren't closed.
|
||||
randaut -Q3 2 -n 2000 -o randaut-%l.hoa
|
||||
test 2000 = `ls -l randaut-*.hoa | wc -l`
|
||||
# likewise for LTL formulas
|
||||
randltl 2 -n 2000 -o randltl-%l.ltl
|
||||
test 2000 = `ls -l randltl-*.ltl | wc -l`
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue