From 15b876d36842d7624fc918bdb1a63e918068021e Mon Sep 17 00:00:00 2001
From: Alexandre Duret-Lutz <adl@lrde.epita.fr>
Date: Sat, 17 Feb 2024 12:56:28 +0100
Subject: [PATCH] ltlsynt: allow regular expressions in --ins/--outs

* bin/ltlsynt.cc: Implement this.
* doc/org/ltlsynt.org, NEWS: Adjust documentation.
* tests/core/ltlsynt.test: Add test cases.
---
 NEWS                    |  13 +++
 bin/ltlsynt.cc          | 214 ++++++++++++++++++++++++++--------------
 doc/org/ltlsynt.org     |  78 ++++++++-------
 tests/core/ltlsynt.test |  39 ++++++--
 4 files changed, 227 insertions(+), 117 deletions(-)
diff --git a/NEWS b/NEWS
index 0da1a2847..7508311ea 100644
--- a/NEWS
+++ b/NEWS
@@ -16,6 +16,19 @@ New in spot 2.11.6.dev  (not yet released)
     will replace boolean subformulas by fresh atomic propositions even
     if those subformulas share atomic propositions.
 
+  - ltlsynt's --ins and --outs options will iterpret any atomic
+    proposition surrounded by '/' as a regular expressions.
+    For intance with
+
+      ltlsynt --ins='/^in/,/env/' --outs=/^out/,/control/' ...
+
+    any atomic proposition that start with 'in' or contains 'env'
+    will be considered as inputs, and those that start with 'out'
+    or contain 'control' will be considered output.
+
+    By default, if neither --ins nor --outs is given, ltlsynt will
+    behave as if --ins='/^[iI]/' and --outs='/^[oO]/ were used.
+
   - ltlsynt will now check for atomic propositions that always have
     the same polarity in the specification.  When this happens, these
     output APs are replaced by true or false before running the
diff --git a/bin/ltlsynt.cc b/bin/ltlsynt.cc
index 78dfb8829..2b0facc57 100644
--- a/bin/ltlsynt.cc
+++ b/bin/ltlsynt.cc
@@ -43,6 +43,7 @@
 #include <spot/twaalgos/product.hh>
 #include <spot/twaalgos/synthesis.hh>
 #include <spot/twaalgos/translate.hh>
+#include <regex>
 
 enum
 {
@@ -73,10 +74,10 @@ static const argp_option options[] =
     { nullptr, 0, nullptr, 0, "Input options:", 1 },
     { "outs", OPT_OUTPUT, "PROPS", 0,
       "comma-separated list of controllable (a.k.a. output) atomic"
-      " propositions", 0 },
+      " propositions, , interpreted as a regex if enclosed in slashes", 0 },
     { "ins", OPT_INPUT, "PROPS", 0,
       "comma-separated list of uncontrollable (a.k.a. input) atomic"
-      " propositions", 0 },
+      " propositions, interpreted as a regex if enclosed in slashes", 0 },
     { "tlsf", OPT_TLSF, "FILENAME", 0,
       "Read a TLSF specification from FILENAME, and call syfco to "
       "convert it into LTL", 0 },
@@ -171,9 +172,17 @@ Exit status:\n\
   1   if at least one input problem was not realizable\n\
   2   if any error has been reported";
 
+// --ins and --outs, as supplied on the command-line
 static std::optional<std::vector<std::string>> all_output_aps;
 static std::optional<std::vector<std::string>> all_input_aps;
 
+// first, separate the filters that are regular expressions from
+// the others.  Compile the regular expressions while we are at it.
+static std::vector<std::regex> regex_in;
+static std::vector<std::regex> regex_out;
+// map identifier to input/output (false=input, true=output)
+static std::unordered_map<std::string, bool> identifier_map;
+
 static const char* opt_csv = nullptr;
 static bool opt_print_pg = false;
 static bool opt_print_hoa = false;
@@ -690,78 +699,111 @@ namespace
       }
   }
 
+  static std::unordered_set<std::string>
+  list_aps_in_formula(spot::formula f)
+  {
+    std::unordered_set<std::string> aps;
+    f.traverse([&aps](spot::formula s) {
+      if (s.is(spot::op::ap))
+        aps.emplace(s.ap_name());
+      return false;
+    });
+    return aps;
+  }
+
+  // Takes a set of the atomic propositions appearing in the formula,
+  // and seperate them into two vectors: input APs and output APs.
+  static std::pair<std::vector<std::string>, std::vector<std::string>>
+  filter_list_of_aps(const std::unordered_set<std::string>& aps,
+                     const char* filename, int linenum)
+  {
+    // now iterate over the list of atomic propositions to filter them
+    std::vector<std::string> matched[2]; // 0 = input, 1 = output
+    for (const std::string& a: aps)
+      {
+        if (auto it = identifier_map.find(a); it != identifier_map.end())
+          {
+            matched[it->second].push_back(a);
+            continue;
+          }
+
+        bool found_in = false;
+        for (const std::regex& r: regex_in)
+          if (std::regex_search(a, r))
+            {
+              found_in = true;
+              break;
+            }
+        bool found_out = false;
+        for (const std::regex& r: regex_out)
+          if (std::regex_search(a, r))
+            {
+              found_out = true;
+              break;
+            }
+        if (all_input_aps.has_value() == all_output_aps.has_value())
+          {
+            if (!all_input_aps.has_value())
+              {
+                // If the atomic proposition hasn't been classified
+                // because neither --ins nor --out were specified,
+                // attempt to classify automatically using the first
+                // letter.
+                int fl = a[0];
+                if (fl == 'i' || fl == 'I')
+                  found_in = true;
+                else if (fl == 'o' || fl == 'O')
+                  found_out = true;
+              }
+            if (found_in && found_out)
+              error_at_line(2, 0, filename, linenum,
+                            "'%s' matches both --ins and --outs",
+                            a.c_str());
+            if (!found_in && !found_out)
+              {
+                if (all_input_aps.has_value() || all_output_aps.has_value())
+                  error_at_line(2, 0, filename, linenum,
+                                "one of --ins or --outs should match '%s'",
+                                a.c_str());
+                else
+                  error_at_line(2, 0, filename, linenum,
+                                "since '%s' does not start with 'i' or 'o', "
+                                "it is unclear if it is an input or "
+                                "an output;\n    use --ins or --outs",
+                                a.c_str());
+              }
+          }
+        else
+          {
+            // if we had only --ins or only --outs, anything not
+            // matching was was given is assumed to belong to the
+            // other one.
+            if (!all_input_aps.has_value() && !found_out)
+              found_in = true;
+            else if (!all_output_aps.has_value() && !found_in)
+              found_out = true;
+          }
+        matched[found_out].push_back(a);
+      }
+    return {matched[0], matched[1]};
+  }
+
+
+
   class ltl_processor final : public job_processor
   {
-  private:
-    std::optional<std::vector<std::string>> input_aps_;
-    std::optional<std::vector<std::string>> output_aps_;
-
   public:
-    ltl_processor(std::optional<std::vector<std::string>> input_aps_,
-                  std::optional<std::vector<std::string>> output_aps_)
-        : input_aps_(std::move(input_aps_)),
-          output_aps_(std::move(output_aps_))
+    ltl_processor()
     {
     }
 
     int process_formula(spot::formula f,
                         const char* filename, int linenum) override
     {
-      auto unknown_aps = [](spot::formula f,
-                    const std::optional<std::vector<std::string>>& known,
-                    const std::optional<std::vector<std::string>>& known2 = {})
-      {
-        std::vector<std::string> unknown;
-        std::set<spot::formula> seen;
-        // If we don't have --ins and --outs, we must not find an AP.
-        bool can_have_ap = known.has_value();
-        f.traverse([&](const spot::formula& s)
-        {
-          if (s.is(spot::op::ap))
-            {
-              if (!seen.insert(s).second)
-                return false;
-              const std::string& a = s.ap_name();
-              if (!can_have_ap
-                  || (std::find(known->begin(), known->end(), a) == known->end()
-                  && (!known2.has_value()
-                      || std::find(known2->begin(),
-                                   known2->end(), a) == known2->end())))
-                unknown.push_back(a);
-            }
-          return false;
-        });
-        return unknown;
-      };
-
-      // Decide which atomic propositions are input or output.
-      int res;
-      if (!input_aps_.has_value() && output_aps_.has_value())
-        {
-          res = solve_formula(f, unknown_aps(f, output_aps_), *output_aps_);
-        }
-      else if (!output_aps_.has_value() && input_aps_.has_value())
-        {
-          res = solve_formula(f, *input_aps_, unknown_aps(f, input_aps_));
-        }
-      else if (!output_aps_.has_value() && !input_aps_.has_value())
-        {
-          for (const std::string& ap: unknown_aps(f, input_aps_, output_aps_))
-            error_at_line(2, 0, filename, linenum,
-                          "one of --ins or --outs should list '%s'",
-                          ap.c_str());
-          res = solve_formula(f, *input_aps_, *output_aps_);
-        }
-      else
-        {
-          for (const std::string& ap: unknown_aps(f, input_aps_, output_aps_))
-            error_at_line(2, 0, filename, linenum,
-                          "both --ins and --outs are specified, "
-                          "but '%s' is unlisted",
-                          ap.c_str());
-          res = solve_formula(f, *input_aps_, *output_aps_);
-        }
-
+      std::unordered_set<std::string> aps = list_aps_in_formula(f);
+      auto [input_aps, output_aps] =
+        filter_list_of_aps(aps, filename, linenum);
+      int res = solve_formula(f, input_aps, output_aps);
       if (opt_csv)
         print_csv(f);
       return res;
@@ -782,7 +824,7 @@ namespace
       // The set of atomic proposition will be temporary set to those
       // given by syfco, unless they were forced from the command-line.
       bool reset_aps = false;
-      if (!input_aps_.has_value() && !output_aps_.has_value())
+      if (!all_input_aps.has_value() && !all_output_aps.has_value())
         {
           reset_aps = true;
           static char arg5[] = "--print-output-signals";
@@ -790,12 +832,17 @@ namespace
                               const_cast<char*>(filename), nullptr };
           std::string res = read_stdout_of_command(command);
 
-          output_aps_.emplace(std::vector<std::string>{});
-          split_aps(res, *output_aps_);
+          all_output_aps.emplace(std::vector<std::string>{});
+          split_aps(res, *all_output_aps);
+          for (const std::string& a: *all_output_aps)
+            identifier_map.emplace(a, true);
         }
       int res = process_string(tlsf_string, filename);
       if (reset_aps)
-        output_aps_.reset();
+        {
+          all_output_aps.reset();
+          identifier_map.clear();
+        }
       return res;
     }
 
@@ -1077,14 +1124,29 @@ main(int argc, char **argv)
 
     check_no_formula();
 
-    // Check if inputs and outputs are distinct
-    if (all_input_aps.has_value() && all_output_aps.has_value())
-      for (const std::string& ai : *all_input_aps)
-        if (std::find(all_output_aps->begin(), all_output_aps->end(), ai)
-            != all_output_aps->end())
-          error(2, 0, "'%s' appears both in --ins and --outs", ai.c_str());
+    // Filter identifiers from regexes.
+    if (all_input_aps.has_value())
+      for (const std::string& f: *all_input_aps)
+        {
+          unsigned sz = f.size();
+          if (f[0] == '/' && f[sz - 1] == '/')
+            regex_in.push_back(std::regex(f.substr(1, sz - 2)));
+          else
+            identifier_map.emplace(f, false);
+        }
+    if (all_output_aps.has_value())
+      for (const std::string& f: *all_output_aps)
+        {
+          unsigned sz = f.size();
+          if (f[0] == '/' && f[sz - 1] == '/')
+            regex_out.push_back(std::regex(f.substr(1, sz - 2)));
+          else if (auto [it, is_new] = identifier_map.try_emplace(f, true);
+                   !is_new && !it->second)
+            error(2, 0, "'%s' appears in both --ins and --outs",
+                  f.c_str());
+        }
 
-    ltl_processor processor(all_input_aps, all_output_aps);
+    ltl_processor processor;
     if (int res = processor.run(); res == 0 || res == 1)
       {
         // Diagnose unused -x options
diff --git a/doc/org/ltlsynt.org b/doc/org/ltlsynt.org
index cd3c23d62..be1c8d085 100644
--- a/doc/org/ltlsynt.org
+++ b/doc/org/ltlsynt.org
@@ -22,12 +22,15 @@ specifically as Mealy machines).  In the automaton representing the
 controller, the acceptance condition is irrelevant and trivially true.
 
 =ltlsynt= has three mandatory options:
-- =--ins=: a comma-separated list of input atomic propositions;
-- =--outs=: a comma-separated list of output atomic propositions;
+- =--ins=: a comma-separated list of input atomic propositions, or input regexes enclosed in slashes;
+- =--outs=: a comma-separated list of output atomic propositions, or output regexes enclosed in slashes;
 - =--formula= or =--file=: a specification in LTL or PSL.
 
-One of =--ins= or =--outs= may be omitted, as any atomic proposition not listed
-as input can be assumed to be output and vice versa.
+One of =--ins= or =--outs= may be omitted, as any atomic proposition
+not listed as input can be assumed to be output and vice versa.  If
+both are omitted, =ltlsynts= will assume ~--ins=/^[iI]/~ and
+~--outs=/^[oO]/~, i.e., atomic propositions will be classified as
+input or output based on their first letter.
 
 The following example illustrates the synthesis of a controller
 ensuring that input =i1= and =i2= are both true initially if and only
@@ -36,7 +39,7 @@ Note that this is an equivalence, not an implication.
 
 #+NAME: example
 #+BEGIN_SRC sh :exports both
-ltlsynt --ins=i1,i2 -f '(i1 & i2) <-> F(o1 & X(!o1))'
+ltlsynt -f '(i1 & i2) <-> F(o1 & X(!o1))'
 #+END_SRC
 
 #+RESULTS: example
@@ -55,24 +58,27 @@ State: 0
 [0&1&2] 1
 [!0&2 | !1&2] 2
 State: 1
-[!2] 0
+[!2] 1
 State: 2
 [2] 2
 --END--
 #+end_example
 
 The output is composed of two parts:
-- The first one is a single line =REALIZABLE= or =UNREALIZABLE=; the presence of this
-  line, required by the [[http://http://www.syntcomp.org/][SyntComp competition]], can be disabled with option =--hide-status=.
-- The second one, only present in the =REALIZABLE= case, is an automaton describing the controller.
+- The first part is a single line stating =REALIZABLE= or
+  =UNREALIZABLE=; the presence of this line, required by the [[http://http://www.syntcomp.org/][SyntComp
+  competition]], can be disabled with option =--hide-status=.
+- The second part, only present in the =REALIZABLE= case, is an
+  automaton describing the controller.
 
-The controller contains the line =controllable-AP: 2=, which means that this automaton
-should be interpreted as a Mealy machine where =o0= is part of the output.
-Using the =--dot= option, makes it easier to visualize this machine.
+The controller contains the line =controllable-AP: 2=, which means
+that this automaton should be interpreted as a Mealy machine where
+=o0= is part of the output.  Using the =--dot= option, makes it easier
+to visualize this machine.
 
 #+NAME: exampledot
 #+BEGIN_SRC sh :exports code
-ltlsynt --ins=i1,i2 -f '(i1 & i2) <-> F(o1 & X(!o1))' --hide-status --dot
+ltlsynt -f '(i1 & i2) <-> F(o1 & X(!o1))' --hide-status --dot
 #+END_SRC
 
 #+BEGIN_SRC dot :file ltlsyntex.svg :var txt=exampledot :exports results
@@ -99,28 +105,32 @@ flag. This is the output format required for the [[http://syntcomp.org/][SYNTCOM
 
 #+NAME: exampleaig
 #+BEGIN_SRC sh :exports both
-ltlsynt --ins=i1,i2 -f '(i1 & i2) <-> F(o1 & X(!o1))' --aiger
+ltlsynt -f '(i1 & i2) <-> F(o1 & X(!o1))' --aiger
 #+END_SRC
 
 #+RESULTS: exampleaig
 #+begin_example
 REALIZABLE
-aag 14 2 2 1 10
+aag 18 2 2 1 14
 2
 4
-6 14
-8 29
+6 23
+8 37
 7
-10 7 9
-12 4 10
-14 2 12
-16 7 8
-18 4 16
-20 5 7
-22 21 19
-24 2 23
-26 3 7
-28 27 25
+10 6 9
+12 4 9
+14 5 10
+16 13 15
+18 2 17
+20 3 10
+22 19 21
+24 7 8
+26 4 24
+28 5 7
+30 27 29
+32 2 31
+34 3 7
+36 33 35
 i0 i1
 i1 i2
 o0 o1
@@ -132,7 +142,7 @@ the controller:
 
 #+NAME: exampleaigdot
 #+BEGIN_SRC sh :exports code
-ltlsynt --ins=i1,i2 -f '(i1 & i2) <-> F(o1 & X(!o1))' --hide-status --aiger --dot
+ltlsynt -f '(i1 & i2) <-> F(o1 & X(!o1))' --hide-status --aiger --dot
 #+END_SRC
 
 #+BEGIN_SRC dot :file ltlsyntexaig.svg :var txt=exampleaigdot :exports results
@@ -147,7 +157,7 @@ circles represent inversions (or negations), colored triangles are
 used to represent input signals (at the bottom) and output signals (at
 the top), and finally rectangles represent latches.  A latch is a one
 bit register that delays the signal by one step.  Initially, all
-latches are assumed to contain =false=, and them emit their value from
+latches are assumed to contain =false=, and they emit their value from
 the =L0_out= and =L1_out= rectangles at the bottom.  Their input value,
 to be emitted at the next step, is received via the =L0_in= and =L1_in=
 boxes at the top.  In =ltlsynt='s encoding, the set of latches is used
@@ -172,8 +182,9 @@ be synthesized using =syfco= and =ltlsynt=:
 ltlsynt --tlsf FILE
 #+END_SRC
 
-The above =--tlsf= option will call =syfco= to perform the conversion
-and extract output signals, as if you had used:
+The above =--tlsf= option will call =syfco= (which must be on your
+=$PATH=) to perform the conversion and extract output signals, as if
+you had used:
 
 #+BEGIN_SRC sh :export code
 LTL=$(syfco -f ltlxba -m fully FILE)
@@ -181,6 +192,7 @@ OUT=$(syfco --print-output-signals FILE)
 ltlsynt --formula="$LTL" --outs="$OUT"
 #+END_SRC
 
+
 * Internal details
 
 The tool reduces the synthesis problem to a parity game, and solves the parity
@@ -237,13 +249,13 @@ be tried by separating them using commas.  For instance
 
 You can also ask =ltlsynt= to print to obtained parity game into
 [[https://github.com/tcsprojects/pgsolver][PGSolver]] format, with the flag =--print-pg=, or in the HOA format,
-using =--print-game-hoa=.  These flag deactivate the resolution of the
+using =--print-game-hoa=.  These flags deactivate the resolution of the
 parity game.  Note that if any of those flag is used with =--dot=, the game
 will be printed in the Dot format instead:
 
 #+NAME: examplegamedot
 #+BEGIN_SRC sh :exports code
-ltlsynt --ins=i1,i2 -f '(i1 & i2) <-> F(o1 & X(!o1))' --print-game-hoa --dot
+ltlsynt -f '(i1 & i2) <-> F(o1 & X(!o1))' --print-game-hoa --dot
 #+END_SRC
 #+BEGIN_SRC dot :file ltlsyntexgame.svg :var txt=examplegamedot :exports results
   $txt
diff --git a/tests/core/ltlsynt.test b/tests/core/ltlsynt.test
index 7165f00c5..14a18b9c3 100644
--- a/tests/core/ltlsynt.test
+++ b/tests/core/ltlsynt.test
@@ -242,7 +242,7 @@ automaton has 6 states
 solving game with acceptance: co-Büchi
 game solved in X seconds
 EOF
-ltlsynt -f "G(Fi0 && Fi1 && Fi2) -> G(i1 <-> o0)" --outs="o0" --algo=lar \
+ltlsynt -f "G(Fi0 && Fi1 && Fi2) -> G(i1 <-> o0)" --algo=lar \
         --verbose --realizability 2> out
 sed 's/ [0-9.e-]* seconds/ X seconds/g' out > outx
 diff outx exp
@@ -625,14 +625,14 @@ diff stdout expected
 
 ltlsynt --ins=a,b --outs=c,a -f 'GFa | FGc | GFb' 2>stderr && :
 test $? -eq 2
-grep "'a' appears both" stderr
+grep "'a' appears in both" stderr
 
 ltlsynt --ins=a --outs=c -f 'GFa | FGb | GFc' 2>stderr && :
 test $? -eq 2
-grep "both.*but 'b' is unlisted" stderr
+grep "one.*should match 'b'" stderr
 ltlsynt -f 'GFa | FGb | GFc' 2>stderr && :
 test $? -eq 2
-grep "one of --ins or --outs" stderr
+grep "[-]-ins or --outs" stderr
 
 # Try to find a direct strategy for GFa <-> GFb and a direct strategy for
 # Gc
@@ -903,7 +903,7 @@ ltlsynt --outs="" -f "GFb" | grep "UNREALIZABLE"
 ltlsynt --outs="" -f "1"
 
 ltlsynt --outs="" --ins="" -f "GFa" 2>&1 | \
-  grep "both --ins and --outs are specified"
+  grep "one of --ins or --outs should match 'a'"
 
 LTL='(((((G (((((((g_0) && (G (! (r_0)))) -> (F (! (g_0)))) && (((g_0) &&
 (X ((! (r_0)) && (! (g_0))))) -> (X ((r_0) R (! (g_0)))))) && (((g_1) &&
@@ -1099,17 +1099,23 @@ s7="G(o07 <-> (i7 & i8)) & G((i7 & i8) -> (o11 U i3)) & GFo12 & G(o04 <-> "
 s8="(i4 & i6)) & G(o05 <-> !(i4 & i6)) & G(o15 <-> (i7 & i8)) & G(i7 -> o02) & "
 s9="G((!i7 & !(i1 & i2 & !i5 & i6)) -> o03) & G(o01 <-> (i1 & i2 & !i5 & i6))))"
 s=$s1$s2$s3$s4$s5$s6$s7$s8$s9
-ltlsynt --decomp=yes -f "$s" --ins=i1,i2,i3,i4,i5,i6,i7,i8 --realizability >out
-ltlsynt --decomp=no -f "$s" --ins=i1,i2,i3,i4,i5,i6,i7,i8 --realizability >>out
+ltlsynt --decomp=yes -f "$s" --realizability >out
+ltlsynt --decomp=no --outs='/^o[0-9]*$/' -f "$s" --realizability >>out
+ltlsynt --decomp=no --outs='/^o[0-9]$/' -f "$s" --realizability >>out && :
+ltlsynt  -f "$s" --ins='/^i[0-9]*$/' --realizability >>out
 cat >expected <<EOF
 REALIZABLE
 REALIZABLE
+UNREALIZABLE
+REALIZABLE
 EOF
 diff out expected
 
 f1="((G ((p0) <-> (! (p1)))) && (((((F ((b) && (G (F (a))))) ||\
   (F ((c) && (G (F (! (a))))))) && (F (b))) && (F (c))) <-> (G (F (p0)))))"
-ltlsynt -f "$f1" --outs="p1, p0" --aiger > /dev/null
+ltlsynt -f "$f1" --outs="p1, p0" --aiger > out1.hoa
+ltlsynt -f "$f1" --outs="p1, /^p/" --aiger > out2.hoa
+diff out1.hoa out2.hoa
 
 # issue #557
 ltlsynt  -f 'G(in1 <-> out0) & G(in0 <-> out1)' --ins=in1,in0 --verb 2>err >out
@@ -1120,3 +1126,20 @@ cat >err2.ex <<EOF
 EOF
 diff err2 err2.ex
 grep -F '[!0&!1&!2&!3 | !0&!1&2&3 | 0&1&!2&!3 | 0&1&2&3] 0' out
+
+ltlsynt -f 'G((in1 && in2) <-> XXout2) && G((in1 | !in2) -> Fout2)' \
+        --realizability >out && :
+test $? -eq 1
+# specitying --outs=in2 should have priority over regular expressions
+ltlsynt -f 'G((in1 && in2) <-> XXout2) && G((in1 | !in2) -> Fout2)' \
+        --realizability --ins='/^i/' --outs='out2,in2' >>out
+cat >expected <<EOF
+UNREALIZABLE
+REALIZABLE
+EOF
+diff out expected
+
+ltlsynt --ins='/^in/,/env/' --outs='/^out/,/control/' \
+  -f 'G(controlenv <-> input)' 2>err && :
+test $? -eq 2
+grep 'controlenv.*matches both' err