From 844fb887d9e53cb8b9135259164b611a415d2a21 Mon Sep 17 00:00:00 2001 From: Alexandre Duret-Lutz Date: Mon, 26 Aug 2024 11:42:09 +0200 Subject: [PATCH] ltlmix: add support for the I/O variants * bin/ltlmix.cc: Add options --ins, --outs, as well as the two-argument form of -A/-P. * bin/common_ioap.hh, bin/common_ioap.cc (is_output): New function. * spot/tl/apcollect.cc, spot/tl/apcollect.hh (create_atomic_prop_set): Allow the prefix string to be changed. * spot/tl/randomltl.cc, spot/tl/randomltl.hh: Add support for an I/O version with two set of atomic proposition, and a predicate to decide if the original proposition was input or output. * tests/core/ltlmix.test: More tests. --- bin/common_ioap.cc | 132 +++++++++++++++++++++-------------------- bin/common_ioap.hh | 9 +++ bin/ltlmix.cc | 88 ++++++++++++++++++++++++--- spot/tl/apcollect.cc | 4 +- spot/tl/apcollect.hh | 6 +- spot/tl/randomltl.cc | 88 ++++++++++++++++++++------- spot/tl/randomltl.hh | 35 +++++++++-- tests/core/ltlmix.test | 27 +++++++++ 8 files changed, 286 insertions(+), 103 deletions(-) diff --git a/bin/common_ioap.cc b/bin/common_ioap.cc index 312334051..65e05c7ca 100644 --- a/bin/common_ioap.cc +++ b/bin/common_ioap.cc @@ -88,6 +88,72 @@ list_aps_in_formula(spot::formula f) return aps; } + +bool +is_output(const std::string& a, const char* filename, int linenum) +{ + if (auto it = identifier_map.find(a); it != identifier_map.end()) + return it->second; + + bool found_in = false; + for (const std::regex& r: regex_in) + if (std::regex_search(a, r)) + { + found_in = true; + break; + } + bool found_out = false; + for (const std::regex& r: regex_out) + if (std::regex_search(a, r)) + { + found_out = true; + break; + } + if (all_input_aps.has_value() == all_output_aps.has_value()) + { + if (!all_input_aps.has_value()) + { + // If the atomic proposition hasn't been classified + // because neither --ins nor --out were specified, + // attempt to classify automatically using the first + // letter. + int fl = a[0]; + if (fl == 'i' || fl == 'I') + found_in = true; + else if (fl == 'o' || fl == 'O') + found_out = true; + } + if (found_in && found_out) + error_at_line(2, 0, filename, linenum, + "'%s' matches both --ins and --outs", + a.c_str()); + if (!found_in && !found_out) + { + if (all_input_aps.has_value() || all_output_aps.has_value()) + error_at_line(2, 0, filename, linenum, + "one of --ins or --outs should match '%s'", + a.c_str()); + else + error_at_line(2, 0, filename, linenum, + "since '%s' does not start with 'i' or 'o', " + "it is unclear if it is an input or " + "an output;\n use --ins or --outs", + a.c_str()); + } + } + else + { + // if we had only --ins or only --outs, anything not + // matching that was given is assumed to belong to the + // other one. + if (!all_input_aps.has_value() && !found_out) + found_in = true; + else if (!all_output_aps.has_value() && !found_in) + found_out = true; + } + return found_out; +} + // Takes a set of the atomic propositions appearing in the formula, // and separate them into two vectors: input APs and output APs. std::pair, std::vector> @@ -97,71 +163,7 @@ filter_list_of_aps(spot::formula f, const char* filename, int linenum) // now iterate over the list of atomic propositions to filter them std::vector matched[2]; // 0 = input, 1 = output for (const std::string& a: aps) - { - if (auto it = identifier_map.find(a); it != identifier_map.end()) - { - matched[it->second].push_back(a); - continue; - } - - bool found_in = false; - for (const std::regex& r: regex_in) - if (std::regex_search(a, r)) - { - found_in = true; - break; - } - bool found_out = false; - for (const std::regex& r: regex_out) - if (std::regex_search(a, r)) - { - found_out = true; - break; - } - if (all_input_aps.has_value() == all_output_aps.has_value()) - { - if (!all_input_aps.has_value()) - { - // If the atomic proposition hasn't been classified - // because neither --ins nor --out were specified, - // attempt to classify automatically using the first - // letter. - int fl = a[0]; - if (fl == 'i' || fl == 'I') - found_in = true; - else if (fl == 'o' || fl == 'O') - found_out = true; - } - if (found_in && found_out) - error_at_line(2, 0, filename, linenum, - "'%s' matches both --ins and --outs", - a.c_str()); - if (!found_in && !found_out) - { - if (all_input_aps.has_value() || all_output_aps.has_value()) - error_at_line(2, 0, filename, linenum, - "one of --ins or --outs should match '%s'", - a.c_str()); - else - error_at_line(2, 0, filename, linenum, - "since '%s' does not start with 'i' or 'o', " - "it is unclear if it is an input or " - "an output;\n use --ins or --outs", - a.c_str()); - } - } - else - { - // if we had only --ins or only --outs, anything not - // matching that was given is assumed to belong to the - // other one. - if (!all_input_aps.has_value() && !found_out) - found_in = true; - else if (!all_output_aps.has_value() && !found_in) - found_out = true; - } - matched[found_out].push_back(a); - } + matched[is_output(a, filename, linenum)].push_back(a); return {matched[0], matched[1]}; } diff --git a/bin/common_ioap.hh b/bin/common_ioap.hh index cc20c61a5..575e749bf 100644 --- a/bin/common_ioap.hh +++ b/bin/common_ioap.hh @@ -45,6 +45,15 @@ extern std::vector regex_out; // map identifier to input/output (false=input, true=output) extern std::unordered_map identifier_map; + +// Given an atomic proposition AP and the above +// regex_in/regex_out/identifier_map, decide if this AP is an output +// (true) or input (false. +bool +is_output(const std::string& ap, + const char* filename = nullptr, int linenum = 0); + + // Separate the set of the atomic propositions appearing in f, into // two vectors: input APs and output APs, becased on regex_in, // regex_out, and identifier_map. diff --git a/bin/ltlmix.cc b/bin/ltlmix.cc index 1e36eeeb2..a97c807d0 100644 --- a/bin/ltlmix.cc +++ b/bin/ltlmix.cc @@ -24,6 +24,7 @@ #include "common_setup.hh" #include "common_finput.hh" #include "common_output.hh" +#include "common_ioap.hh" #include "common_conv.hh" #include "common_cout.hh" #include "common_range.hh" @@ -36,7 +37,9 @@ enum { OPT_BOOLEAN_PRIORITIES = 256, OPT_DUMP_PRIORITIES, OPT_DUPS, + OPT_INS, OPT_LTL_PRIORITIES, + OPT_OUTS, OPT_SEED, OPT_TREE_SIZE, }; @@ -70,18 +73,21 @@ static const argp_option options[] = { { nullptr, 0, nullptr, 0, "Generation parameters:", 2 }, { "allow-dups", OPT_DUPS, nullptr, 0, "allow duplicate formulas to be output", 0 }, - { "ap-count", 'A', "N", 0, + { "ap-count", 'A', "N[,M]", 0, "rename the atomic propositions in each selected formula by drawing " "randomly from N atomic propositions (the rewriting is bijective " - "if N is larger than the original set)", 0 }, + "if N is larger than the original set). If M is specified, two sets " + "of atomic propositions are used to represent inputs and outputs, and " + "options --ins/--outs can be used to classify the original propositions.", + 0 }, { "boolean", 'B', nullptr, 0, "generate Boolean combinations of formulas (default)", 0 }, { "formulas", 'n', "INT", 0, "number of formulas to generate (default: 1);\n" "use a negative value for unbounded generation", 0 }, { "ltl", 'L', nullptr, 0, "generate LTL combinations of subformulas", 0 }, - { "polarized-ap", 'P', "N", 0, - "similar to -A N, but randomize the polarity of the new atomic " + { "polarized-ap", 'P', "N[,M]", 0, + "similar to -A, but randomize the polarity of the new atomic " "propositions", 0 }, { "random-conjuncts", 'C', "N", 0, "generate random-conjunctions of N conjuncts; " @@ -92,6 +98,12 @@ static const argp_option options[] = { { "tree-size", OPT_TREE_SIZE, "RANGE", 0, "tree size of main pattern generated (default: 5);\n" "input formulas count as size 1.", 0 }, + { "ins", OPT_INS, "PROPS", 0, + "comma-separated list of atomic propositions to consider as input, " + "interpreted as a regex if enclosed in slashes", 0 }, + { "outs", OPT_OUTS, "PROPS", 0, + "comma-separated list of atomic propositions to consider as putput, " + "interpreted as a regex if enclosed in slashes", 0 }, RANGE_DOC, /**************************************************/ { nullptr, 0, nullptr, 0, "Adjusting probabilities:", 4 }, @@ -140,7 +152,9 @@ static int opt_seed = 0; static range opt_tree_size = { 5, 5 }; static bool opt_unique = true; static int opt_ap_count = 0; +static int opt_out_ap_count = 0; static bool opt_literal = false; +static bool opt_io = false; namespace { @@ -160,8 +174,12 @@ namespace process_formula(spot::formula f, const char* filename = nullptr, int linenum = 0) override { - (void) filename; - (void) linenum; + if (opt_io) + // Filter the atomic propositions of each formula in order to + // report those that are not classifiable. Throw the result + // of that filtering away, as we only care about the potential + // diagnostics. + (void) filter_list_of_aps(f, filename, linenum); opt->sub.insert(f); return 0; } @@ -170,6 +188,24 @@ namespace static sub_processor subreader; +std::pair to_int_pair(const char* arg, const char* opt) +{ + const char* comma = strchr(arg, ','); + if (!comma) + { + int res = to_int(arg, opt); + return {res, 0}; + } + else + { + std::string arg1(arg, comma); + int res1 = to_int(arg1.c_str(), opt); + int res2 = to_int(comma + 1, opt); + return {res1, res2}; + } +} + + static int parse_opt(int key, char* arg, struct argp_state*) { @@ -178,7 +214,8 @@ parse_opt(int key, char* arg, struct argp_state*) switch (key) { case 'A': - opt_ap_count = to_int(arg, "-A/--ap-count"); + std::tie(opt_ap_count, opt_out_ap_count) = + to_int_pair(arg, "-A/--ap-count"); opt_literal = false; break; case 'B': @@ -199,7 +236,8 @@ parse_opt(int key, char* arg, struct argp_state*) opt_formulas = to_int(arg, "-n/--formulas"); break; case 'P': - opt_ap_count = to_int(arg, "-P/--polarized-ap"); + std::tie(opt_ap_count, opt_out_ap_count) = + to_int_pair(arg, "-P/--polarized-ap"); opt_literal = true; break; case OPT_BOOLEAN_PRIORITIES: @@ -214,6 +252,20 @@ parse_opt(int key, char* arg, struct argp_state*) case OPT_DUPS: opt_unique = false; break; + case OPT_INS: + { + all_input_aps.emplace(std::vector{}); + split_aps(arg, *all_input_aps); + opt_io = true; + break; + } + case OPT_OUTS: + { + all_output_aps.emplace(std::vector{}); + split_aps(arg, *all_output_aps); + opt_io = true; + break; + } case OPT_SEED: opt_seed = to_int(arg, "--seed"); break; @@ -249,6 +301,19 @@ main(int argc, char* argv[]) check_no_formula("combine"); + if (opt_io && !opt_out_ap_count) + error(2, 0, + "options --ins and --outs only make sense when the " + "two-argument version of '-A N,M' or '-P N,M' is used."); + if (opt_out_ap_count > 0) + // Do not require --ins/--outs to be used, as the input + // pattern may use atomic propositions starting with i/o + // already. Setting opt_io will cause the subreader to + // complain about unclassifible atomic propositions. + opt_io = true; + if (opt_io) + process_io_options(); + if (subreader.run()) return 2; @@ -257,11 +322,16 @@ main(int argc, char* argv[]) spot::srand(opt_seed); + std::function output_p = nullptr; + if (opt_out_ap_count) + output_p = [&](spot::formula f) { return is_output(f.ap_name()); }; + spot::randltlgenerator rg (opt_ap_count, [&] (){ spot::option_map opts; opts.set("output", output); + opts.set("out_ap_size", opt_out_ap_count); opts.set("tree_size_min", opt_tree_size.min); opts.set("tree_size_max", opt_tree_size.max); opts.set("seed", opt_seed); @@ -269,7 +339,7 @@ main(int argc, char* argv[]) opts.set("unique", opt_unique); opts.set("literals", opt_literal); return opts; - }(), opt_pL, nullptr, opt_pB, &opt->sub); + }(), opt_pL, nullptr, opt_pB, &opt->sub, output_p); if (opt_dump_priorities) { diff --git a/spot/tl/apcollect.cc b/spot/tl/apcollect.cc index 6cea88ea6..ca3004bd2 100644 --- a/spot/tl/apcollect.cc +++ b/spot/tl/apcollect.cc @@ -28,13 +28,13 @@ namespace spot { - atomic_prop_set create_atomic_prop_set(unsigned n) + atomic_prop_set create_atomic_prop_set(unsigned n, const char* prefix) { atomic_prop_set res; for (unsigned i = 0; i < n; ++i) { std::ostringstream p; - p << 'p' << i; + p << prefix << i; res.insert(formula::ap(p.str())); } return res; diff --git a/spot/tl/apcollect.hh b/spot/tl/apcollect.hh index fec68287c..0157f9c7c 100644 --- a/spot/tl/apcollect.hh +++ b/spot/tl/apcollect.hh @@ -32,9 +32,11 @@ namespace spot /// Set of atomic propositions. typedef std::set atomic_prop_set; - /// \brief construct an atomic_prop_set with n propositions + /// \brief construct an atomic_prop_set with n propositions starting + /// with \a prefix SPOT_API - atomic_prop_set create_atomic_prop_set(unsigned n); + atomic_prop_set create_atomic_prop_set(unsigned n, + const char* prefix = "p"); /// \brief Return the set of atomic propositions occurring in a formula. /// diff --git a/spot/tl/randomltl.cc b/spot/tl/randomltl.cc index 913b60522..f36d4c444 100644 --- a/spot/tl/randomltl.cc +++ b/spot/tl/randomltl.cc @@ -32,11 +32,30 @@ namespace spot // randomly from \a ap. Avoid repetition if \a ap is large // enough. If \a lit is true, change the polarity of the atomic // proposition randomly. + // + // If \a out_ap is non-empty, use \a is_output to decide if an original + // atomic proposition should be replaced by an AP from ap or out_ap. static formula - randomize_ap(formula f, const atomic_prop_set* ap, bool lit) + randomize_ap(formula f, const atomic_prop_set* ap, + const atomic_prop_set* out_ap, + std::function is_output, + bool lit) { std::vector randap(ap->begin(), ap->end()); + std::vector randap_out; + if (out_ap && is_output != nullptr) + { + randap_out.reserve(out_ap->size()); + randap_out.insert(randap_out.begin(), out_ap->begin(), out_ap->end()); + } + if (randap_out.empty()) + { + is_output = nullptr; + out_ap = nullptr; + } + unsigned current_range = randap.size(); + unsigned current_out_range = randap_out.size(); std::map mapping; auto relabel = [&](formula f, auto self) -> formula @@ -47,15 +66,29 @@ namespace spot if (auto it = mapping.find(f); it != mapping.end()) return it->second; - // If we exhausted all possible AP, start again - if (current_range == 0) - current_range = randap.size(); - - // - unsigned pos = mrand(current_range--); - formula ap = randap[pos]; - std::swap(randap[current_range], randap[pos]); + bool is_out = false; + if (out_ap && is_output != nullptr) + is_out = is_output(f); + formula ap; + if (!is_out) + { + // If we exhausted all possible AP, start again + if (current_range == 0) + current_range = randap.size(); + unsigned pos = mrand(current_range--); + ap = randap[pos]; + std::swap(randap[current_range], randap[pos]); + } + else + { + // If we exhausted all possible AP, start again + if (current_out_range == 0) + current_out_range = randap_out.size(); + unsigned pos = mrand(current_out_range--); + ap = randap_out[pos]; + std::swap(randap_out[current_out_range], randap_out[pos]); + } if (lit && drand() < 0.5) ap = formula::Not(ap); @@ -86,8 +119,11 @@ namespace spot std::advance(i, mrand(rl->patterns()->size())); formula f = *i; const atomic_prop_set* ap = rl->ap(); + const atomic_prop_set* out_ap = rl->output_ap(); + auto is_output = rl->is_output_fun(); if (ap && ap->size() > 0) - f = randomize_ap(f, ap, rl->draw_literals()); + f = randomize_ap(f, ap, out_ap, is_output, + rl->draw_literals()); return f; } @@ -407,8 +443,10 @@ namespace spot // Boolean formulae random_boolean::random_boolean(const atomic_prop_set* ap, + const atomic_prop_set* output_ap, + std::function is_output, const atomic_prop_set* patterns) - : random_formula(9, ap) + : random_formula(9, ap, output_ap, is_output) { if (patterns) { @@ -473,15 +511,19 @@ namespace spot } random_ltl::random_ltl(const atomic_prop_set* ap, + const atomic_prop_set* output_ap, + std::function is_output, const atomic_prop_set* patterns) - : random_formula(16, ap) + : random_formula(16, ap, output_ap, is_output) { setup_proba_(patterns); update_sums(); } - random_ltl::random_ltl(int size, const atomic_prop_set* ap) - : random_formula(size, ap) + random_ltl::random_ltl(int size, const atomic_prop_set* ap, + const atomic_prop_set* output_ap, + std::function is_output) + : random_formula(size, ap, output_ap, is_output) { setup_proba_(nullptr); // No call to update_sums(), this functions is always @@ -507,7 +549,8 @@ namespace spot char* opt_pL, char* opt_pS, char* opt_pB, - const atomic_prop_set* subs) + const atomic_prop_set* subs, + std::function is_output) : opt_simpl_level_(opts.get("simplification_level", 3)), simpl_(tl_simplifier_options{opt_simpl_level_}) { @@ -518,6 +561,9 @@ namespace spot opt_tree_size_max_ = opts.get("tree_size_max", 15); opt_unique_ = opts.get("unique", 1); opt_wf_ = opts.get("wf", 0); + unsigned opt_output = opts.get("out_ap_size"); + if (opt_output > 0) + aprops_out_ = create_atomic_prop_set(opt_output, "o"); bool lit = opts.get("literals", 0); const char* tok_pL = nullptr; @@ -527,7 +573,7 @@ namespace spot switch (output_) { case randltlgenerator::LTL: - rf_ = new random_ltl(&aprops_, subs); + rf_ = new random_ltl(&aprops_, &aprops_out_, is_output, subs); rf_->draw_literals(lit); if (opt_pS) throw std::invalid_argument("Cannot set SERE priorities with " @@ -538,7 +584,7 @@ namespace spot tok_pL = rf_->parse_options(opt_pL); break; case randltlgenerator::Bool: - rf_ = new random_boolean(&aprops_, subs); + rf_ = new random_boolean(&aprops_, &aprops_out_, is_output, subs); rf_->draw_literals(lit); tok_pB = rf_->parse_options(opt_pB); if (opt_pL) @@ -583,9 +629,11 @@ namespace spot char* opt_pL, char* opt_pS, char* opt_pB, - const atomic_prop_set* subs) - : randltlgenerator(create_atomic_prop_set(aprops_n), opts, - opt_pL, opt_pS, opt_pB, subs) + const atomic_prop_set* subs, + std::function is_output) + : randltlgenerator(create_atomic_prop_set(aprops_n, + is_output == nullptr ? "p" : "i"), + opts, opt_pL, opt_pS, opt_pB, subs, is_output) { } diff --git a/spot/tl/randomltl.hh b/spot/tl/randomltl.hh index 8c2e7e0cd..2968ab2ef 100644 --- a/spot/tl/randomltl.hh +++ b/spot/tl/randomltl.hh @@ -34,8 +34,11 @@ namespace spot { public: random_formula(unsigned proba_size, - const atomic_prop_set* ap): - proba_size_(proba_size), proba_(new op_proba[proba_size_]), ap_(ap) + const atomic_prop_set* ap, + const atomic_prop_set* output_ap = nullptr, + std::function is_output = nullptr): + proba_size_(proba_size), proba_(new op_proba[proba_size_]), ap_(ap), + output_ap_(output_ap), is_output_(is_output) { } @@ -50,6 +53,17 @@ namespace spot return ap_; } + /// Return the set of atomic proposition used to build formulas. + const atomic_prop_set* output_ap() const + { + return output_ap_; + } + + std::function is_output_fun() const + { + return is_output_; + } + /// Return the set of patterns (sub-formulas) used to build formulas. const atomic_prop_set* patterns() const { @@ -115,7 +129,9 @@ namespace spot op_proba* proba_2_or_more_; double total_2_and_more_; const atomic_prop_set* ap_; + const atomic_prop_set* output_ap_ = nullptr; const atomic_prop_set* patterns_ = nullptr; + std::function is_output_ = nullptr; bool draw_literals_; }; @@ -174,11 +190,15 @@ namespace spot /// some from \a ap. The probability of false/true to be generated /// default to 0 in this case. random_ltl(const atomic_prop_set* ap, + const atomic_prop_set* output_ap = nullptr, + std::function is_output = nullptr, const atomic_prop_set* subformulas = nullptr); protected: void setup_proba_(const atomic_prop_set* patterns); - random_ltl(int size, const atomic_prop_set* ap); + random_ltl(int size, const atomic_prop_set* ap, + const atomic_prop_set* output_ap = nullptr, + std::function is_output = nullptr); }; /// \ingroup tl_io @@ -225,6 +245,8 @@ namespace spot /// atoms. Atomic propositions in patterns will be rewritten /// randomly by drawing some from \a ap. random_boolean(const atomic_prop_set* ap, + const atomic_prop_set* output_ap = nullptr, + std::function is_output = nullptr, const atomic_prop_set* subformulas = nullptr); }; @@ -342,13 +364,15 @@ namespace spot char* opt_pL = nullptr, char* opt_pS = nullptr, char* opt_pB = nullptr, - const atomic_prop_set* subformulas = nullptr); + const atomic_prop_set* subformulas = nullptr, + std::function is_output = nullptr); randltlgenerator(atomic_prop_set aprops, const option_map& opts, char* opt_pL = nullptr, char* opt_pS = nullptr, char* opt_pB = nullptr, - const atomic_prop_set* subformulas = nullptr); + const atomic_prop_set* subformulas = nullptr, + std::function is_output = nullptr); ~randltlgenerator(); @@ -366,6 +390,7 @@ namespace spot private: fset_t unique_set_; atomic_prop_set aprops_; + atomic_prop_set aprops_out_; int opt_seed_; int opt_tree_size_min_; diff --git a/tests/core/ltlmix.test b/tests/core/ltlmix.test index 232c1f76c..9e65b0021 100755 --- a/tests/core/ltlmix.test +++ b/tests/core/ltlmix.test @@ -94,3 +94,30 @@ test 10 -eq `grep '&.*&' < out | wc -l` ltlmix -fa -A500 -C3 -n10 | tee out2 diff out out2 + + +ltlmix -fGi -fFo -A3,3 -C4 -n10 | tee out +cat >expected < out +diff out expected + +ltlmix -fGa -fFz -A3,3 --outs='/[m-o]/,z' -C4 -n10 > out +diff out expected + + +ltlmix -fGa -fFz -A3,3 -C4 -n10 2> err && exit 1 +cat err +grep 'ins.*outs' err