From ca0d81b5d7e7517f6162eaa97afa72059c26d1d4 Mon Sep 17 00:00:00 2001 From: Alexandre Duret-Lutz Date: Mon, 8 Aug 2016 12:21:34 +0200 Subject: [PATCH] autfilt, dstar2tgba: add CSV input Fixes #91. * bin/autfilt.cc, bin/dstar2tgba.cc: Implement reading CSV files. * bin/common_finput.cc: Fix comments. * bin/common_aoutput.cc: Show %<, %> in help text. * NEWS, doc/org/csv.org: Document it. * tests/core/readsave.test: Add a short test case. --- NEWS | 6 +- bin/autfilt.cc | 120 +++++++++++++++++++++++++++++---------- bin/common_aoutput.cc | 6 ++ bin/common_finput.cc | 3 +- bin/dstar2tgba.cc | 85 ++++++++++++++++++++++----- doc/org/csv.org | 55 +++++++++++++++++- tests/core/readsave.test | 5 ++ 7 files changed, 229 insertions(+), 51 deletions(-) diff --git a/NEWS b/NEWS index ed85be019..f33111689 100644 --- a/NEWS +++ b/NEWS @@ -109,9 +109,13 @@ New in spot 2.0.3a (not yet released) * The --stats option of autfilt, dstar2tgba, ltl2tgba, ltldo, randaut learned to display the output (or input if that makes - sense) automaton as a HOA one-liner using %H (or %h), helping to + sense) automaton as a HOA one-liner using %h (or %H), helping to create CSV files contained automata. + * autfilt and dstar2tgba learned to read automata from columns in + CSV files, specified using the same filename/COLUMN syntax used by + tools reading formulas. + * Arguments passed to -x (in ltl2tgba, ltl2tgta, autfilt, dstar2tgba) that are not used are now reported as they might be typos. This ocurred a couple of times in our test-suite. A similar diff --git a/bin/autfilt.cc b/bin/autfilt.cc index 6435d68ef..8cc2e7ce4 100644 --- a/bin/autfilt.cc +++ b/bin/autfilt.cc @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include #include "error.h" @@ -874,6 +876,90 @@ namespace SPOT_UNREACHABLE(); } + int process_string(const std::string& input, const char* filename, + int linenum) override + { + std::ostringstream loc; + loc << filename << ':' << linenum; + std::string locstr = loc.str(); + return process_automaton_stream + (spot::automaton_stream_parser(input.c_str(), locstr, opt_parse), + locstr.c_str()); + } + + int + aborted(const spot::const_parsed_aut_ptr& h, const char* filename) + { + std::cerr << filename << ':' << h->loc << ": aborted input automaton\n"; + return 2; + } + + int + process_file(const char* filename) override + { + // If we have a filename like "foo/NN" such + // that: + // ① foo/NN is not a file, + // ② NN is a number, + // ③ foo is a file, + // then it means we want to open foo as + // a CSV file and process column NN. + if (const char* slash = strrchr(filename, '/')) + { + char* end; + errno = 0; + long int col = strtol(slash + 1, &end, 10); + if (errno == 0 && !*end && col != 0) + { + struct stat buf; + if (stat(filename, &buf) != 0) + { + col_to_read = col; + if (real_filename) + free(real_filename); + real_filename = strndup(filename, slash - filename); + + // Special case for stdin. + if (real_filename[0] == '-' && real_filename[1] == 0) + return process_stream(std::cin, real_filename); + + std::ifstream input(real_filename); + if (input) + return process_stream(input, real_filename); + + error(2, errno, "cannot open '%s' nor '%s'", + filename, real_filename); + } + } + } + + return process_automaton_stream(spot::automaton_stream_parser(filename, + opt_parse), + filename); + } + + int process_automaton_stream(spot::automaton_stream_parser&& hp, + const char* filename) + { + int err = 0; + while (!abort_run) + { + auto haut = hp.parse(opt->dict); + if (!haut->aut && haut->errors.empty()) + break; + if (haut->format_errors(std::cerr)) + err = 2; + if (!haut->aut) + error(2, 0, "failed to read automaton from %s", filename); + else if (haut->aborted) + err = std::max(err, aborted(haut, filename)); + else + process_automaton(haut, filename); + } + return err; + } + + int process_automaton(const spot::const_parsed_aut_ptr& haut, const char* filename) @@ -1111,42 +1197,14 @@ namespace ++match_count; - printer.print(aut, nullptr, filename, -1, conversion_time, haut); + printer.print(aut, nullptr, filename, -1, conversion_time, haut, + prefix, suffix); if (opt_max_count >= 0 && match_count >= opt_max_count) abort_run = true; return 0; } - - int - aborted(const spot::const_parsed_aut_ptr& h, const char* filename) - { - std::cerr << filename << ':' << h->loc << ": aborted input automaton\n"; - return 2; - } - - int - process_file(const char* filename) override - { - auto hp = spot::automaton_stream_parser(filename, opt_parse); - int err = 0; - while (!abort_run) - { - auto haut = hp.parse(opt->dict); - if (!haut->aut && haut->errors.empty()) - break; - if (haut->format_errors(std::cerr)) - err = 2; - if (!haut->aut) - error(2, 0, "failed to read automaton from %s", filename); - else if (haut->aborted) - err = std::max(err, aborted(haut, filename)); - else - process_automaton(haut, filename); - } - return err; - } }; } @@ -1155,7 +1213,7 @@ main(int argc, char** argv) { setup(argv); - const argp ap = { options, parse_opt, "[FILENAMES...]", + const argp ap = { options, parse_opt, "[FILENAME[/COL]...]", argp_program_doc, children, nullptr, nullptr }; try diff --git a/bin/common_aoutput.cc b/bin/common_aoutput.cc index afeabec4c..e4682aeb8 100644 --- a/bin/common_aoutput.cc +++ b/bin/common_aoutput.cc @@ -181,6 +181,12 @@ static const argp_option io_options[] = "one word accepted by the output automaton", 0 }, { "%%", 0, nullptr, OPTION_DOC | OPTION_NO_USAGE, "a single %", 0 }, + { "%<", 0, nullptr, OPTION_DOC | OPTION_NO_USAGE, + "the part of the line before the automaton if it " + "comes from a column extracted from a CSV file", 4 }, + { "%>", 0, nullptr, OPTION_DOC | OPTION_NO_USAGE, + "the part of the line after the automaton if it " + "comes from a column extracted from a CSV file", 4 }, { nullptr, 0, nullptr, 0, nullptr, 0 } }; diff --git a/bin/common_finput.cc b/bin/common_finput.cc index 78f556e70..f2f3ef22d 100644 --- a/bin/common_finput.cc +++ b/bin/common_finput.cc @@ -305,7 +305,7 @@ job_processor::process_file(const char* filename) // If we have a filename like "foo/NN" such // that: // ① foo/NN is not a file (already the case), - // ② NN is a number > 0, + // ② NN is a number, // ③ foo is a file, // then it means we want to open foo as // a CSV file and process column NN. @@ -315,7 +315,6 @@ job_processor::process_file(const char* filename) char* end; errno = 0; long int col = strtol(slash + 1, &end, 10); - // strtol ate all remaining characters and NN is positive if (errno == 0 && !*end && col != 0) { col_to_read = col; diff --git a/bin/dstar2tgba.cc b/bin/dstar2tgba.cc index c687bcac5..7df42b64b 100644 --- a/bin/dstar2tgba.cc +++ b/bin/dstar2tgba.cc @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include "error.h" @@ -123,19 +125,15 @@ namespace SPOT_UNREACHABLE(); } - int - process_automaton(const spot::const_parsed_aut_ptr& haut, - const char* filename) + int process_string(const std::string& input, const char* filename, + int linenum) override { - spot::stopwatch sw; - sw.start(); - auto nba = spot::to_generalized_buchi(haut->aut); - auto aut = post.run(nba, nullptr); - const double conversion_time = sw.stop(); - - printer.print(aut, nullptr, filename, -1, conversion_time, haut); - flush_cout(); - return 0; + std::ostringstream loc; + loc << filename << ':' << linenum; + std::string locstr = loc.str(); + return process_automaton_stream + (spot::automaton_stream_parser(input.c_str(), locstr, opt_parse), + locstr.c_str()); } int @@ -148,7 +146,50 @@ namespace int process_file(const char* filename) override { - auto hp = spot::automaton_stream_parser(filename, opt_parse); + // If we have a filename like "foo/NN" such + // that: + // ① foo/NN is not a file, + // ② NN is a number, + // ③ foo is a file, + // then it means we want to open foo as + // a CSV file and process column NN. + if (const char* slash = strrchr(filename, '/')) + { + char* end; + errno = 0; + long int col = strtol(slash + 1, &end, 10); + if (errno == 0 && !*end && col != 0) + { + struct stat buf; + if (stat(filename, &buf) != 0) + { + col_to_read = col; + if (real_filename) + free(real_filename); + real_filename = strndup(filename, slash - filename); + + // Special case for stdin. + if (real_filename[0] == '-' && real_filename[1] == 0) + return process_stream(std::cin, real_filename); + + std::ifstream input(real_filename); + if (input) + return process_stream(input, real_filename); + + error(2, errno, "cannot open '%s' nor '%s'", + filename, real_filename); + } + } + } + + return process_automaton_stream(spot::automaton_stream_parser(filename, + opt_parse), + filename); + } + + int process_automaton_stream(spot::automaton_stream_parser&& hp, + const char* filename) + { int err = 0; while (!abort_run) { @@ -166,6 +207,22 @@ namespace } return err; } + + + int + process_automaton(const spot::const_parsed_aut_ptr& haut, + const char* filename) + { + spot::stopwatch sw; + sw.start(); + auto nba = spot::to_generalized_buchi(haut->aut); + auto aut = post.run(nba, nullptr); + const double conversion_time = sw.stop(); + + printer.print(aut, nullptr, filename, -1, conversion_time, haut); + flush_cout(); + return 0; + } }; } @@ -174,7 +231,7 @@ main(int argc, char** argv) { setup(argv); - const argp ap = { options, parse_opt, "[FILENAMES...]", + const argp ap = { options, parse_opt, "[FILENAME[/COL]...]", argp_program_doc, children, nullptr, nullptr }; if (int err = argp_parse(&ap, argc, argv, ARGP_NO_HELP, nullptr, nullptr)) diff --git a/doc/org/csv.org b/doc/org/csv.org index 56a523c4e..122d277dd 100644 --- a/doc/org/csv.org +++ b/doc/org/csv.org @@ -9,8 +9,8 @@ tools to produce an consume CSV files. * Producing CSV files -All the tools that normally produce formulas (like [[file:genltl.org][=genltl=]], [[file:randltl.org][=randltl=]], -and [[file:ltlfilt.org][=ltlfilt=]]) have a [[file:ioltl.org][=--format= option]]. That can be used to +All the tools that normally produce formulas (like [[file:genltl.org][=genltl=]], +[[file:randltl.org][=randltl=]], and [[file:ltlfilt.org][=ltlfilt=]]) have a [[file:ioltl.org][=--format= option]] that can be used to customize the way output is formatted. For instance here is how we could use =genltl= to generate a CSV file @@ -38,7 +38,8 @@ u-left,5,(((p1 U p2) U p3) U p4) U p5 Tools that produce automata (like [[file:ltl2tgba.org][=ltl2tgba=]], [[file:dstar2tgba.org][=dstar2tgba=]], [[file:autfilt.org][=autfilt=]], or [[file:randaut.org][=randaut=]]) have a =--stats= option that can be used to output various statistics about the constructed automaton (these statistics -are shown *instead* of printing the automaton). +are shown *instead* of printing the automaton, but one of those +allows printing the automaton as a one-liner in the HOA format). For instance, the following command will translate all the previous formulas, and show the resulting number of states (=%s=) and edges @@ -231,3 +232,51 @@ ltlfilt -F results.csv/-1 --format='%f' --unique : 0 : !G(Fb | F!(b | Gb)) : G(Fb | F!(b | Gb)) + + +* CSV files containing automata + +The =--stats= option of tools that generate automata can be used to +generate CSV files that embed automata. For instance + +#+BEGIN_SRC sh :results verbatim :exports both +genltl --dac=1..3 | ltl2tgba --stats='"%f","%e edges","%h"' > csv-aut.csv +cat csv-aut.csv +#+END_SRC + +#+RESULTS: +: "G!p0","1 edges","HOA: v1 name: ""G!p0"" States: 1 Start: 0 AP: 1 ""p0"" acc-name: Buchi Acceptance: 1 Inf(0) properties: trans-labels explicit-labels state-acc colored deterministic stutter-invariant weak --BODY-- State: 0 {0} [!0] 0 --END--" +: "G!p0 | (!p1 U p0)","5 edges","HOA: v1 name: ""G!p0 | (!p1 U p0)"" States: 3 Start: 2 AP: 2 ""p0"" ""p1"" acc-name: Buchi Acceptance: 1 Inf(0) properties: trans-labels explicit-labels state-acc colored deterministic stutter-invariant weak --BODY-- State: 0 {0} [!0] 0 State: 1 {0} [t] 1 State: 2 {0} [!0&1] 0 [0] 1 [!0&!1] 2 --END--" +: "G(!p0 | G!p1)","3 edges","HOA: v1 name: ""G(!p0 | G!p1)"" States: 2 Start: 1 AP: 2 ""p0"" ""p1"" acc-name: Buchi Acceptance: 1 Inf(0) properties: trans-labels explicit-labels state-acc colored deterministic stutter-invariant weak --BODY-- State: 0 {0} [!1] 0 State: 1 {0} [0&!1] 0 [!0] 1 --END--" + +[[file:autfilt.org][=autfilt=]] can process a column of such a CSV file using the same +syntax we used previously, but by default it will just output the +automata. If you want to preserve the entire line, you should use +=%<= and =%>= in the =--stats= format. + +#+BEGIN_SRC sh :results verbatim :exports both +autfilt csv-aut.csv/3 --states=2..3 --stats='%<,"%h"' +#+END_SRC + +#+RESULTS: +: "G!p0 | (!p1 U p0)","5 edges","HOA: v1 States: 3 Start: 2 AP: 2 ""p0"" ""p1"" acc-name: Buchi Acceptance: 1 Inf(0) properties: trans-labels explicit-labels state-acc colored deterministic stutter-invariant weak --BODY-- State: 0 {0} [!0] 0 State: 1 {0} [t] 1 State: 2 {0} [!0&1] 0 [0] 1 [!0&!1] 2 --END--" +: "G(!p0 | G!p1)","3 edges","HOA: v1 States: 2 Start: 1 AP: 2 ""p0"" ""p1"" acc-name: Buchi Acceptance: 1 Inf(0) properties: trans-labels explicit-labels state-acc colored deterministic stutter-invariant weak --BODY-- State: 0 {0} [!1] 0 State: 1 {0} [0&!1] 0 [!0] 1 --END--" + + +Another source of automata in CSV format is =ltlcross=. Using options +=--automata= it will record the automata produced by each tool into +the CSV file: + +#+BEGIN_SRC sh :results verbatim :exports both +genltl --dac=1..3 | ltlcross --csv=result.csv --automata ltl2tgba +cat result.csv +#+END_SRC + +#+RESULTS: +: "formula","tool","exit_status","exit_code","time","states","edges","transitions","acc","scc","nonacc_scc","terminal_scc","weak_scc","strong_scc","nondet_states","nondet_aut","terminal_aut","weak_aut","strong_aut","ambiguous_aut","complete_aut","product_states","product_transitions","product_scc","automaton" +: "G(!(p0))","ltl2tgba","ok",0,0.0243614,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,200,2055,2,"HOA: v1 name: ""G!p0"" States: 1 Start: 0 AP: 1 ""p0"" acc-name: Buchi Acceptance: 1 Inf(0) properties: trans-labels explicit-labels state-acc colored deterministic stutter-invariant weak --BODY-- State: 0 {0} [!0] 0 --END--" +: "!(G(!(p0)))","ltl2tgba","ok",0,0.0240194,2,3,4,1,2,1,1,0,0,0,0,1,0,0,0,1,400,8272,3,"HOA: v1 name: ""Fp0"" States: 2 Start: 1 AP: 1 ""p0"" acc-name: Buchi Acceptance: 1 Inf(0) properties: trans-labels explicit-labels state-acc complete deterministic stutter-invariant terminal --BODY-- State: 0 {0} [t] 0 State: 1 [0] 0 [!0] 1 --END--" +: "(F(p0)) -> ((!(p1)) U (p0))","ltl2tgba","ok",0,0.0248105,3,5,10,1,3,0,1,2,0,0,0,0,1,0,0,0,598,10379,4,"HOA: v1 name: ""(!p1 U p0) | G!p0"" States: 3 Start: 2 AP: 2 ""p1"" ""p0"" acc-name: Buchi Acceptance: 1 Inf(0) properties: trans-labels explicit-labels state-acc colored deterministic stutter-invariant weak --BODY-- State: 0 {0} [!1] 0 State: 1 {0} [t] 1 State: 2 {0} [0&!1] 0 [1] 1 [!0&!1] 2 --END--" +: "!((F(p0)) -> ((!(p1)) U (p0)))","ltl2tgba","ok",0,0.0243147,3,5,10,1,3,2,1,0,0,0,0,1,0,0,0,0,598,10398,4,"HOA: v1 name: ""Fp0 & (p1 R !p0)"" States: 3 Start: 1 AP: 2 ""p1"" ""p0"" acc-name: Buchi Acceptance: 1 Inf(0) properties: trans-labels explicit-labels state-acc deterministic stutter-invariant weak --BODY-- State: 0 {0} [t] 0 State: 1 [!0&!1] 1 [0&!1] 2 State: 2 [1] 0 [!1] 2 --END--" +: "G((p0) -> (G(!(p1))))","ltl2tgba","ok",0,0.0201896,2,3,5,1,2,0,0,2,0,0,0,0,1,0,0,0,400,5025,2,"HOA: v1 name: ""G(!p0 | G!p1)"" States: 2 Start: 1 AP: 2 ""p0"" ""p1"" acc-name: Buchi Acceptance: 1 Inf(0) properties: trans-labels explicit-labels state-acc colored deterministic stutter-invariant weak --BODY-- State: 0 {0} [!1] 0 State: 1 {0} [0&!1] 0 [!0] 1 --END--" +: "!(G((p0) -> (G(!(p1)))))","ltl2tgba","ok",0,0.0194598,3,6,12,1,3,2,1,0,0,0,0,1,0,0,0,1,600,12354,3,"HOA: v1 name: ""F(p0 & Fp1)"" States: 3 Start: 1 AP: 2 ""p0"" ""p1"" acc-name: Buchi Acceptance: 1 Inf(0) properties: trans-labels explicit-labels state-acc complete deterministic stutter-invariant terminal --BODY-- State: 0 [!1] 0 [1] 2 State: 1 [0&!1] 0 [!0] 1 [0&1] 2 State: 2 {0} [t] 2 --END--" diff --git a/tests/core/readsave.test b/tests/core/readsave.test index bdfbad1b3..a506afe7f 100755 --- a/tests/core/readsave.test +++ b/tests/core/readsave.test @@ -1035,3 +1035,8 @@ autfilt -H1.1 input9 | autfilt -d > output9b diff output9 output9b test 2 = `ltl2tgba 'GFa' 'a U b' 'a U b U c'| autfilt --ap=2..3 --count` + +# reading CSV with embedded automata +test 2 = `genltl --dac=1..3 | ltl2tgba --stats='%e,"%h",%s' | + dstar2tgba -F-/2 --stats='%<,%>,"%h"' | + autfilt --states=2..3 -F-/3 --stats='%<,"%h"' | wc -l`