ltlmix: add support for the I/O variants

* bin/ltlmix.cc: Add options --ins, --outs, as well as the two-argument form of -A/-P. * bin/common_ioap.hh, bin/common_ioap.cc (is_output): New function. * spot/tl/apcollect.cc, spot/tl/apcollect.hh (create_atomic_prop_set): Allow the prefix string to be changed. * spot/tl/randomltl.cc, spot/tl/randomltl.hh: Add support for an I/O version with two set of atomic proposition, and a predicate to decide if the original proposition was input or output. * tests/core/ltlmix.test: More tests.
2024-08-26 11:42:09 +02:00 · 2024-08-26 11:42:09 +02:00 · 844fb887d9
commit 844fb887d9
parent 6fa42c90b8
8 changed files with 286 additions and 103 deletions
--- a/bin/common_ioap.cc
+++ b/bin/common_ioap.cc
@ -88,6 +88,72 @@ list_aps_in_formula(spot::formula f)
  return aps;
 }

+
+bool
+is_output(const std::string& a, const char* filename, int linenum)
+{
+  if (auto it = identifier_map.find(a); it != identifier_map.end())
+    return it->second;
+
+  bool found_in = false;
+  for (const std::regex& r: regex_in)
+    if (std::regex_search(a, r))
+      {
+        found_in = true;
+        break;
+      }
+  bool found_out = false;
+  for (const std::regex& r: regex_out)
+    if (std::regex_search(a, r))
+      {
+        found_out = true;
+        break;
+      }
+  if (all_input_aps.has_value() == all_output_aps.has_value())
+    {
+      if (!all_input_aps.has_value())
+        {
+          // If the atomic proposition hasn't been classified
+          // because neither --ins nor --out were specified,
+          // attempt to classify automatically using the first
+          // letter.
+          int fl = a[0];
+          if (fl == 'i' || fl == 'I')
+            found_in = true;
+          else if (fl == 'o' || fl == 'O')
+            found_out = true;
+        }
+      if (found_in && found_out)
+        error_at_line(2, 0, filename, linenum,
+                      "'%s' matches both --ins and --outs",
+                      a.c_str());
+      if (!found_in && !found_out)
+        {
+          if (all_input_aps.has_value() || all_output_aps.has_value())
+            error_at_line(2, 0, filename, linenum,
+                          "one of --ins or --outs should match '%s'",
+                          a.c_str());
+          else
+            error_at_line(2, 0, filename, linenum,
+                          "since '%s' does not start with 'i' or 'o', "
+                          "it is unclear if it is an input or "
+                          "an output;\n    use --ins or --outs",
+                          a.c_str());
+        }
+    }
+  else
+    {
+      // if we had only --ins or only --outs, anything not
+      // matching that was given is assumed to belong to the
+      // other one.
+      if (!all_input_aps.has_value() && !found_out)
+        found_in = true;
+      else if (!all_output_aps.has_value() && !found_in)
+        found_out = true;
+    }
+  return found_out;
+}
+
 // Takes a set of the atomic propositions appearing in the formula,
 // and separate them into two vectors: input APs and output APs.
 std::pair<std::vector<std::string>, std::vector<std::string>>
@ -97,71 +163,7 @@ filter_list_of_aps(spot::formula f, const char* filename, int linenum)
  // now iterate over the list of atomic propositions to filter them
  std::vector<std::string> matched[2]; // 0 = input, 1 = output
  for (const std::string& a: aps)
-    {
-      if (auto it = identifier_map.find(a); it != identifier_map.end())
-        {
-          matched[it->second].push_back(a);
-          continue;
-        }
-
-      bool found_in = false;
-      for (const std::regex& r: regex_in)
-        if (std::regex_search(a, r))
-          {
-            found_in = true;
-            break;
-          }
-      bool found_out = false;
-      for (const std::regex& r: regex_out)
-        if (std::regex_search(a, r))
-          {
-            found_out = true;
-            break;
-          }
-      if (all_input_aps.has_value() == all_output_aps.has_value())
-        {
-          if (!all_input_aps.has_value())
-            {
-              // If the atomic proposition hasn't been classified
-              // because neither --ins nor --out were specified,
-              // attempt to classify automatically using the first
-              // letter.
-              int fl = a[0];
-              if (fl == 'i' || fl == 'I')
-                found_in = true;
-              else if (fl == 'o' || fl == 'O')
-                found_out = true;
-            }
-          if (found_in && found_out)
-            error_at_line(2, 0, filename, linenum,
-                          "'%s' matches both --ins and --outs",
-                          a.c_str());
-          if (!found_in && !found_out)
-            {
-              if (all_input_aps.has_value() || all_output_aps.has_value())
-                error_at_line(2, 0, filename, linenum,
-                              "one of --ins or --outs should match '%s'",
-                              a.c_str());
-              else
-                error_at_line(2, 0, filename, linenum,
-                              "since '%s' does not start with 'i' or 'o', "
-                              "it is unclear if it is an input or "
-                              "an output;\n    use --ins or --outs",
-                              a.c_str());
-            }
-        }
-      else
-        {
-          // if we had only --ins or only --outs, anything not
-          // matching that was given is assumed to belong to the
-          // other one.
-          if (!all_input_aps.has_value() && !found_out)
-            found_in = true;
-          else if (!all_output_aps.has_value() && !found_in)
-            found_out = true;
-        }
-      matched[found_out].push_back(a);
-    }
+    matched[is_output(a, filename, linenum)].push_back(a);
  return {matched[0], matched[1]};
 }

--- a/bin/common_ioap.hh
+++ b/bin/common_ioap.hh
@ -45,6 +45,15 @@ extern std::vector<std::regex> regex_out;
 // map identifier to input/output (false=input, true=output)
 extern std::unordered_map<std::string, bool> identifier_map;

+
+// Given an atomic proposition AP and the above
+// regex_in/regex_out/identifier_map, decide if this AP is an output
+// (true) or input (false.
+bool
+is_output(const std::string& ap,
+          const char* filename = nullptr, int linenum = 0);
+
+
 // Separate the set of the atomic propositions appearing in f, into
 // two vectors: input APs and output APs, becased on regex_in,
 // regex_out, and identifier_map.
--- a/bin/ltlmix.cc
+++ b/bin/ltlmix.cc
@ -24,6 +24,7 @@
 #include "common_setup.hh"
 #include "common_finput.hh"
 #include "common_output.hh"
+#include "common_ioap.hh"
 #include "common_conv.hh"
 #include "common_cout.hh"
 #include "common_range.hh"
@ -36,7 +37,9 @@ enum {
  OPT_BOOLEAN_PRIORITIES = 256,
  OPT_DUMP_PRIORITIES,
  OPT_DUPS,
+  OPT_INS,
  OPT_LTL_PRIORITIES,
+  OPT_OUTS,
  OPT_SEED,
  OPT_TREE_SIZE,
 };
@ -70,18 +73,21 @@ static const argp_option options[] = {
    { nullptr, 0, nullptr, 0, "Generation parameters:", 2 },
    { "allow-dups", OPT_DUPS, nullptr, 0,
      "allow duplicate formulas to be output", 0 },
-    { "ap-count", 'A', "N", 0,
+    { "ap-count", 'A', "N[,M]", 0,
      "rename the atomic propositions in each selected formula by drawing "
      "randomly from N atomic propositions (the rewriting is bijective "
-      "if N is larger than the original set)", 0 },
+      "if N is larger than the original set).  If M is specified, two sets "
+      "of atomic propositions are used to represent inputs and outputs, and "
+      "options --ins/--outs can be used to classify the original propositions.",
+      0 },
    { "boolean", 'B', nullptr, 0,
      "generate Boolean combinations of formulas (default)", 0 },
    { "formulas", 'n', "INT", 0,
      "number of formulas to generate (default: 1);\n"
      "use a negative value for unbounded generation", 0 },
    { "ltl", 'L', nullptr, 0, "generate LTL combinations of subformulas", 0 },
-    { "polarized-ap", 'P', "N", 0,
-      "similar to -A N, but randomize the polarity of the new atomic "
+    { "polarized-ap", 'P', "N[,M]", 0,
+      "similar to -A, but randomize the polarity of the new atomic "
      "propositions", 0 },
    { "random-conjuncts", 'C', "N", 0,
      "generate random-conjunctions of N conjuncts; "
@ -92,6 +98,12 @@ static const argp_option options[] = {
    { "tree-size", OPT_TREE_SIZE, "RANGE", 0,
      "tree size of main pattern generated (default: 5);\n"
      "input formulas count as size 1.", 0 },
+    { "ins", OPT_INS, "PROPS", 0,
+      "comma-separated list of atomic propositions to consider as input, "
+      "interpreted as a regex if enclosed in slashes", 0 },
+    { "outs", OPT_OUTS, "PROPS", 0,
+      "comma-separated list of atomic propositions to consider as putput, "
+      "interpreted as a regex if enclosed in slashes", 0 },
    RANGE_DOC,
    /**************************************************/
    { nullptr, 0, nullptr, 0, "Adjusting probabilities:", 4 },
@ -140,7 +152,9 @@ static int opt_seed = 0;
 static range opt_tree_size = { 5, 5 };
 static bool opt_unique = true;
 static int opt_ap_count = 0;
+static int opt_out_ap_count = 0;
 static bool opt_literal = false;
+static bool opt_io = false;

 namespace
 {
@ -160,8 +174,12 @@ namespace
    process_formula(spot::formula f, const char* filename = nullptr,
                    int linenum = 0) override
    {
-      (void) filename;
-      (void) linenum;
+      if (opt_io)
+        // Filter the atomic propositions of each formula in order to
+        // report those that are not classifiable.  Throw the result
+        // of that filtering away, as we only care about the potential
+        // diagnostics.
+        (void) filter_list_of_aps(f, filename, linenum);
      opt->sub.insert(f);
      return 0;
    }
@ -170,6 +188,24 @@ namespace

 static sub_processor subreader;

+std::pair<int, int> to_int_pair(const char* arg, const char* opt)
+{
+  const char* comma = strchr(arg, ',');
+  if (!comma)
+    {
+      int res = to_int(arg, opt);
+      return {res, 0};
+    }
+  else
+    {
+      std::string arg1(arg, comma);
+      int res1 = to_int(arg1.c_str(), opt);
+      int res2 = to_int(comma + 1, opt);
+      return {res1, res2};
+    }
+}
+
+
 static int
 parse_opt(int key, char* arg, struct argp_state*)
 {
@ -178,7 +214,8 @@ parse_opt(int key, char* arg, struct argp_state*)
  switch (key)
    {
    case 'A':
-      opt_ap_count = to_int(arg, "-A/--ap-count");
+      std::tie(opt_ap_count, opt_out_ap_count) =
+        to_int_pair(arg, "-A/--ap-count");
      opt_literal = false;
      break;
    case 'B':
@ -199,7 +236,8 @@ parse_opt(int key, char* arg, struct argp_state*)
      opt_formulas = to_int(arg, "-n/--formulas");
      break;
    case 'P':
-      opt_ap_count = to_int(arg, "-P/--polarized-ap");
+      std::tie(opt_ap_count, opt_out_ap_count) =
+        to_int_pair(arg, "-P/--polarized-ap");
      opt_literal = true;
      break;
    case OPT_BOOLEAN_PRIORITIES:
@ -214,6 +252,20 @@ parse_opt(int key, char* arg, struct argp_state*)
    case OPT_DUPS:
      opt_unique = false;
      break;
+    case OPT_INS:
+      {
+        all_input_aps.emplace(std::vector<std::string>{});
+        split_aps(arg, *all_input_aps);
+        opt_io = true;
+        break;
+      }
+    case OPT_OUTS:
+      {
+        all_output_aps.emplace(std::vector<std::string>{});
+        split_aps(arg, *all_output_aps);
+        opt_io = true;
+        break;
+      }
    case OPT_SEED:
      opt_seed = to_int(arg, "--seed");
      break;
@ -249,6 +301,19 @@ main(int argc, char* argv[])

      check_no_formula("combine");

+      if (opt_io && !opt_out_ap_count)
+        error(2, 0,
+              "options --ins and --outs only make sense when the "
+              "two-argument version of '-A N,M' or '-P N,M' is used.");
+      if (opt_out_ap_count > 0)
+        // Do not require --ins/--outs to be used, as the input
+        // pattern may use atomic propositions starting with i/o
+        // already.  Setting opt_io will cause the subreader to
+        // complain about unclassifible atomic propositions.
+        opt_io = true;
+      if (opt_io)
+        process_io_options();
+
      if (subreader.run())
        return 2;

@ -257,11 +322,16 @@ main(int argc, char* argv[])

      spot::srand(opt_seed);

+      std::function<bool(spot::formula)> output_p = nullptr;
+      if (opt_out_ap_count)
+        output_p = [&](spot::formula f) { return is_output(f.ap_name()); };
+
      spot::randltlgenerator rg
        (opt_ap_count,
         [&] (){
          spot::option_map opts;
          opts.set("output", output);
+          opts.set("out_ap_size", opt_out_ap_count);
          opts.set("tree_size_min", opt_tree_size.min);
          opts.set("tree_size_max", opt_tree_size.max);
          opts.set("seed", opt_seed);
@ -269,7 +339,7 @@ main(int argc, char* argv[])
          opts.set("unique", opt_unique);
          opts.set("literals", opt_literal);
          return opts;
-         }(), opt_pL, nullptr, opt_pB, &opt->sub);
+         }(), opt_pL, nullptr, opt_pB, &opt->sub, output_p);

      if (opt_dump_priorities)
        {