From bd1809f7588c60325397a5fef42a62defc49f56e Mon Sep 17 00:00:00 2001 From: Alexandre Duret-Lutz Date: Mon, 24 Jul 2023 16:56:24 +0200 Subject: [PATCH] bin: handle thousands of output files Fixes #534. Test case is only on next branch. * bin/common_file.hh, bin/common_file.cc: Make it possible to reopen a closed file. * bin/common_output.cc, bin/common_aoutput.cc: Add a heuristic to decide when to close files. * NEWS: Mention the issue. --- NEWS | 7 +++++++ bin/common_aoutput.cc | 20 ++++++++++++++++++++ bin/common_file.cc | 19 ++++++++++++++++++- bin/common_file.hh | 2 ++ bin/common_output.cc | 20 ++++++++++++++++++++ 5 files changed, 67 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 0b425272f..29cfa7cb1 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,13 @@ New in spot 2.11.5.dev (not yet released) Nothing yet. + Bug fixes: + + - Running command lines such as "autfilt input.hoa -o output-%L.hoa" + where thousands of different filenames can be created failed with + "Too many open files". (Issue #534) + + New in spot 2.11.5 (2023-04-20) Bug fixes: diff --git a/bin/common_aoutput.cc b/bin/common_aoutput.cc index 60f83289e..ea44d6c22 100644 --- a/bin/common_aoutput.cc +++ b/bin/common_aoutput.cc @@ -636,7 +636,27 @@ automaton_printer::print(const spot::twa_graph_ptr& aut, auto [it, b] = outputfiles.try_emplace(fname, nullptr); if (b) it->second.reset(new output_file(fname.c_str())); + else + // reopen if the file has been closed; see below + it->second->reopen_for_append(fname); out = &it->second->ostream(); + + // If we have opened fewer than 10 files, we keep them all open + // to avoid wasting time on open/close calls. + // + // However we cannot keep all files open, especially in + // scenarios were we use thousands of files only once. To keep + // things simple, we only close the previous file if it is not + // the current output. This way we still save the close/open + // cost when consecutive automata are sent to the same file. + static output_file* previous = nullptr; + static const std::string* previous_name = nullptr; + if (previous + && outputfiles.size() > 10 + && &previous->ostream() != out) + previous->close(*previous_name); + previous = it->second.get(); + previous_name = &it->first; } // Output it. diff --git a/bin/common_file.cc b/bin/common_file.cc index 4e56c6d54..ebad8b878 100644 --- a/bin/common_file.cc +++ b/bin/common_file.cc @@ -44,13 +44,30 @@ output_file::output_file(const char* name, bool force_append) os_ = of_.get(); } +void +output_file::reopen_for_append(const std::string& name) +{ + if (of_ && of_->is_open()) // nothing to do + return; + const char* cname = name.c_str(); + if (cname[0] == '>' && cname[1] == '>') + cname += 2; + if (name[0] == '-' && name[1] == 0) + { + os_ = &std::cout; + return; + } + of_->open(cname, std::ios_base::app); + if (!*of_) + error(2, errno, "cannot reopen '%s'", cname); +} void output_file::close(const std::string& name) { // We close of_, not os_, so that we never close std::cout. if (os_) os_->flush(); - if (of_) + if (of_ && of_->is_open()) of_->close(); if (os_ && !*os_) error(2, 0, "error writing to %s", diff --git a/bin/common_file.hh b/bin/common_file.hh index b6aa0bec3..51000d18c 100644 --- a/bin/common_file.hh +++ b/bin/common_file.hh @@ -37,6 +37,8 @@ public: void close(const std::string& name); + void reopen_for_append(const std::string& name); + bool append() const { return append_; diff --git a/bin/common_output.cc b/bin/common_output.cc index 93cb2dfaf..4ab62a9aa 100644 --- a/bin/common_output.cc +++ b/bin/common_output.cc @@ -420,7 +420,27 @@ output_formula_checked(spot::formula f, spot::process_timer* ptimer, auto [it, b] = outputfiles.try_emplace(fname, nullptr); if (b) it->second.reset(new output_file(fname.c_str())); + else + // reopen if the file has been closed; see below + it->second->reopen_for_append(fname); out = &it->second->ostream(); + + // If we have opened fewer than 10 files, we keep them all open + // to avoid wasting time on open/close calls. + // + // However we cannot keep all files open, especially in + // scenarios were we use thousands of files only once. To keep + // things simple, we only close the previous file if it is not + // the current output. This way we still save the close/open + // cost when consecutive formulas are sent to the same file. + static output_file* previous = nullptr; + static const std::string* previous_name = nullptr; + if (previous + && outputfiles.size() > 10 + && &previous->ostream() != out) + previous->close(*previous_name); + previous = it->second.get(); + previous_name = &it->first; } output_formula(*out, f, ptimer, filename, linenum, prefix, suffix); *out << output_terminator;