relabel_bse: rework to simplify more patterns

Rework the way we compute and use cut-points to catch more patterns we can rewrite. Also Use BDDs to check if a Boolean sub-expression is false or true. Fixes issue #540. * spot/tl/relabel.hh: Update documentation * spot/tl/relabel.cc (relabel_bse): Rework. * tests/core/ltlfilt.test: Add more test cases. * tests/python/_mealy.ipynb: Update. * NEWS: Mention the change.
2023-09-13 11:31:49 +02:00 · 2023-09-13 11:31:49 +02:00 · 7149521f48
commit 7149521f48
parent cbb981ffd5
5 changed files with 305 additions and 159 deletions
--- a/spot/tl/relabel.cc
+++ b/spot/tl/relabel.cc
@ -19,6 +19,7 @@

 #include "config.h"
 #include <spot/tl/relabel.hh>
+#include <spot/tl/simplify.hh>
 #include <sstream>
 #include <spot/misc/hash.hh>
 #include <map>
@ -82,7 +83,7 @@ namespace spot

    // if subexp == false, matches APs
    // if subexp == true, matches boolean subexps
-    template <bool subexp>
+    template <bool subexp, bool use_bdd = false>
    class relabeler
    {
    public:
@ -90,6 +91,7 @@ namespace spot
      map newname;
      ap_generator* gen;
      relabeling_map* oldnames;
+      tl_simplifier tl;

      relabeler(ap_generator* gen, relabeling_map* m)
        : gen(gen), oldnames(m)
@ -115,6 +117,16 @@ namespace spot
        if (!r.second)
          return r.first->second;

+        if constexpr (use_bdd)
+          if (!old.is(op::ap))
+            {
+              bdd b = tl.as_bdd(old);
+              if (b == bddtrue)
+                return r.first->second = formula::tt();
+              if (b == bddfalse)
+                return r.first->second = formula::ff();
+            }
+
        formula res = gen->next();
        r.first->second = res;
        if (oldnames)
@ -269,7 +281,7 @@ namespace spot
  // Furthermore, because we are already representing LTL formulas
  // with sharing of identical sub-expressions we can easily rename
  // a subexpression (such as c&d above) only once.  However this
-  // scheme has two problems:
+  // scheme (done by relabel_overlapping_bse()) has two problems:
  //
  //   A. It will not detect inter-dependent Boolean subexpressions.
  //      For instance it will mistakenly relabel "(a & b) U (a & !b)"
@ -278,53 +290,40 @@ namespace spot
  //   B. Because of our n-ary operators, it will fail to
  //      notice that (a & b) is a sub-expression of (a & b & c).
  //
-  // The way we compute the subexpressions that can be relabeled is
-  // by transforming the formula syntax tree into an undirected
-  // graph, and computing the cut points of this graph.  The cut
-  // points (or articulation points) are the nodes whose removal
-  // would split the graph in two components.  To ensure that a
-  // Boolean operator is only considered as a cut point if it would
-  // separate all of its children from the rest of the graph, we
-  // connect all the children of Boolean operators.
+  // The way we compute the subexpressions that can be relabeled is by
+  // transforming the formula syntax tree into an undirected graph,
+  // and computing the cut-points of this graph.  The cut-points (or
+  // articulation points) are the nodes whose removal would split the
+  // graph in two components; in our case, we extend this definition to
+  // also consider the leaves as cut-points.
  //
-  // For instance (a & b) U (c & d) has two (Boolean) cut points
-  // corresponding to the two AND operators:
+  // For instance ((a|b)&c&d)U(!d&e&f) is represented by
+  // the following graph, were cut-points are marked with *.
  //
-  //             (a&b)U(c&d)
-  //             ╱         ╲
-  //           a&b         c&d
-  //          ╱   ╲       ╱   ╲
-  //         a─────b     c─────d
+  //             ((a|b)&c&d)U(!d&e&f)
+  //                 ╱           ╲
+  //            ((a|b)&c&d)*   (!d&e&f)*
+  //             ╱   │   ╲     ╱  │  ╲
+  //           a|b*  │    ╲   !   │   ╲
+  //           ╱ ╲   │     ╲ ╱    │    ╲
+  //          a*  b* c*     d     e*    f*
  //
-  // (The root node is also a cut point, but we only consider Boolean
-  // cut points for relabeling.)
+  // The relabeling of a formula is done in 3 passes:
+  //  1. Convert the formula's syntax tree into an undirected graph.
+  //  2. Compute the (Boolean) cut points of that graph, using the
+  //     Hopcroft-Tarjan algorithm (see below for a reference).
+  //  3. Recursively scan the formula's tree until we reach
+  //     a (Boolean) cut-point.  If all the children of this node
+  //     are cut-points, rename the node with a fresh label.
+  //     If it's a n-ary operator, group all children that are
+  //     and cut-points relabel them as a whole.
  //
-  // On the other hand, (a & b) U (b & !c) has only one Boolean
-  // cut-point which corresponds to the NOT operator:
-  //
-  //             (a&b)U(b&!c)
-  //                ╱   ╲
-  //              a&b   b&!c
-  //             ╱   ╲ ╱   ╲
-  //            a─────b────!c
-  //                        │
-  //                        c
-  //
-  // Note that if the children of a&b and b&c were not connected,
-  // a&b and b&c would be considered as cut points because they
-  // separate "a" or "!c" from the rest of the graph.
-  //
-  // The relabeling of a formula is therefore done in 3 passes:
-  //  1. convert the formula's syntax tree into an undirected graph,
-  //     adding links between children of Boolean operators
-  //  2. compute the (Boolean) cut points of that graph, using the
-  //     Hopcroft-Tarjan algorithm (see below for a reference)
-  //  3. recursively scan the formula's tree until we reach
-  //     either a (Boolean) cut point or an atomic proposition, and
-  //     replace that node by a fresh atomic proposition.
-  //
-  // In the example above (a&b)U(b&!c), the last recursion
-  // stops on a, b, and !c, producing (p0&p1)U(p1&p2).
+  // On the above example, when processing the cut-point
+  // ((a|b)&c&d) we group its children that are cut-points
+  // (a|b)&c and rename this group as p0.   Then d gets
+  // his own name p1, and when processing (!d&e&f) we group
+  // e&f because they are both cut-points, are rename them p1.
+  // The result is (p0 & p1) U (!p1 & p2).
  //
  // Problem #B above (handling of n-ary expression) need some
  // additional tricks.  Consider (a&b&c&d) U X(c&d), and assume
@ -343,24 +342,19 @@ namespace spot

    // Convert the formula's syntax tree into an undirected graph
    // labeled by subformulas.
-    class formula_to_fgraph
+    class formula_to_fgraph final
    {
    public:
      fgraph& g;
      std::stack<formula> s;
      sub_formula_count_t& subcount;

-      formula_to_fgraph(fgraph& g, sub_formula_count_t& subcount):
-        g(g), subcount(subcount)
-        {
-        }
+      formula_to_fgraph(fgraph& g, sub_formula_count_t& subcount)
+        : g(g), subcount(subcount)
+      {
+      }

-      ~formula_to_fgraph()
-        {
-        }
-
-      void
-        visit(formula f)
+      void visit(formula f)
      {
        {
          // Connect to parent
@ -411,27 +405,6 @@ namespace spot
          }
        for (; i < sz; ++i)
          visit(f[i]);
-        if (sz > 1 && f.is_boolean())
-          {
-            // For Boolean nodes, connect all children in a
-            // loop.  This way the node can only be a cut point
-            // if it separates all children from the reset of
-            // the graph (not only one).
-            formula pred = f[0];
-            for (i = 1; i < sz; ++i)
-              {
-                formula next = f[i];
-                // Note that we add an edge in both directions,
-                // as the cut point algorithm really need undirected
-                // graphs.  (We used to do only one direction, and
-                // that turned out to be a bug.)
-                g[pred].emplace_back(next);
-                g[next].emplace_back(pred);
-                pred = next;
-              }
-            g[pred].emplace_back(f[0]);
-            g[f[0]].emplace_back(pred);
-          }
      done:
        s.pop();
      }
@ -465,11 +438,12 @@ namespace spot
    // the ACM, 16 (6), June 1973.
    //
    // It differs from the original algorithm by returning only the
-    // Boolean cutpoints, and not dealing with the initial state
+    // Boolean cut-points, not dealing with the initial state
    // properly (our initial state will always be considered as a
    // cut-point, but since we only return Boolean cut-points it's
    // OK: if the top-most formula is Boolean we want to replace it
-    // as a whole).
+    // as a whole), and considering the atomic propositions that
+    // are leaves as cutpoints too.
    void cut_points(const fgraph& g, fset& c, formula start)
    {
      stack_t s;
@ -530,7 +504,14 @@ namespace spot
                  data_entry& dgrand_parent = data[grand_parent];
                  if (dparent.low >= dgrand_parent.num // cut-point
                      && grand_parent.is_boolean())
-                    c.insert(grand_parent);
+                    {
+                      c.insert(grand_parent);
+                      // Also consider atomic propositions as
+                      // cut-points if they are leaves.
+                      if (parent.is(op::ap)
+                          && g.find(parent)->second.size() == 1)
+                        c.insert(parent);
+                    }
                  if (dparent.low < dgrand_parent.low)
                    dgrand_parent.low = dparent.low;
                }
@ -539,7 +520,7 @@ namespace spot
    }


-    class bse_relabeler final: public relabeler<false>
+    class bse_relabeler final: public relabeler<false, true>
    {
    public:
      const fset& c;
@ -553,12 +534,45 @@ namespace spot

      using relabeler::visit;

-      formula
-        visit(formula f)
+      formula visit(formula f)
      {
-        if (f.is(op::ap) || (c.find(f) != c.end()))
+        if (f.is(op::ap))
          return rename(f);

+        // This is Boolean cut-point?
+        // We can only relabel it if all its children are cut-points.
+        if (c.find(f) != c.end())
+          {
+            unsigned fsz = f.size();
+            assert(fsz > 0);    // A cut point has children
+            if (fsz == 1
+                || (fsz == 2
+                    && ((c.find(f[0]) != c.end())
+                        == (c.find(f[1]) != c.end()))))
+              return rename(f);
+            if (fsz > 2)
+              {
+                // cp[0] will contains non cut-points
+                // cp[1] will contain cut-points or atomic propositions
+                std::vector<formula> cp[2];
+                cp[0].reserve(fsz);
+                cp[1].reserve(fsz);
+                for (unsigned i = 0; i < fsz; ++i)
+                  {
+                    formula cf = f[i];
+                    cp[c.find(cf) != c.end()].push_back(cf);
+                  }
+                if (cp[0].empty()
+                    || cp[1].empty())
+                  // all children are cut-points or non-cut-points
+                  return rename(f);
+                formula cp1group = rename(formula::multop(f.kind(), cp[1]));
+                formula cp0group = visit(formula::multop(f.kind(), cp[0]));
+                return formula::multop(f.kind(), {cp1group, cp0group});
+              }
+          }
+
+        // Not a cut-point, recurse
        unsigned sz = f.size();
        if (sz <= 2)
          return f.map([this](formula f)
@ -566,24 +580,24 @@ namespace spot
                         return visit(f);
                       });

-        unsigned i = 0;
-        std::vector<formula> res;
        if (f.is_boolean() && sz > 2)
-          {
-            // If we have a Boolean formula with more than two
-            // children, like (a & b & c & d) where some children
-            // (assume {a,b}) are used only once, but some other
-            // (assume {c,d}) are used multiple time in the formula,
-            // then split that into ((a & b) & (c & d)) to give
-            // (a & b) a chance to be relabeled as a whole.
-            auto pair = split_used_once(f, subcount);
-            if (pair.second)
-              return formula::multop(f.kind(), { visit(pair.first),
-                                                 visit(pair.second) });
-          }
+          // If we have a Boolean formula with more than two
+          // children, like (a & b & c & d) where some children
+          // (assume {a,b}) are used only once, but some other
+          // (assume {c,d}) are used multiple time in the formula,
+          // then split that into ((a & b) & (c & d)) to give
+          // (a & b) a chance to be relabeled as a whole.
+          if (auto pair = split_used_once(f, subcount); pair.second)
+            {
+              formula left = visit(pair.first);
+              formula right = visit(pair.second);
+              return formula::multop(f.kind(), { left, right });
+            }
        /// If we have a formula like (a & b & Xc), consider
        /// it as ((a & b) & Xc) in the graph to isolate the
        /// Boolean operands as a single node.
+        unsigned i = 0;
+        std::vector<formula> res;
        formula b = f.boolean_operands(&i);
        if (b && b != f)
          {
@ -630,6 +644,10 @@ namespace spot
    fset c;
    cut_points(g, c, f);

+    // std::cerr << "cut-points\n";
+    // for (formula cp: c)
+    //   std::cerr << "  - " << cp << '\n';
+
    // Relabel the formula recursively, stopping
    // at cut-points or atomic propositions.
    ap_generator* gen = nullptr;