determinize: speedup on automata with many AP and few labels

This uses the same trick as discussed in issue #566 and issue #568. * spot/twaalgos/determinize.cc (safra_support): Use a basis if it is smaller than 2^|support| for the current Safra state. * tests/core/568.test: Add some tests. * NEWS: Mention the optimization.
2024-03-25 10:36:34 +01:00 · 2024-03-25 10:36:34 +01:00 · 26ef5458eb
commit 26ef5458eb
parent bda40a5f19
3 changed files with 71 additions and 9 deletions
--- a/12
+++ b/12
@ -165,12 +165,12 @@ New in spot 2.11.6.dev  (not yet released)
  - spot::dualize() learned a trick to be faster on states that have
    less outgoing edges than atomic proposition declared on the
-    automaton.  spot::remove_alternation(), spot::tgba_powerset(), and
+    automaton.  spot::remove_alternation(), spot::tgba_powerset(),
-    simulation-based reductions learned a similar trick, except it
+    simulation-based reductions, and spot::tgba_determinize() learned
-    isn't applied at the state level but if the entire automaton use
+    a similar trick, except it isn't applied at the state level but if
-    few distinct labels.  These changes may speed up the processing of
+    the entire automaton use few distinct labels.  These changes may
-    automata with many atomic propositions but few distinct labels.
+    speed up the processing of automata with many atomic propositions
-    (Issue #566 and issue #568.)
+    but few distinct labels.  (Issue #566 and issue #568.)
  - [Potential backward incompatibility] spot::dualize() does not call
    cleanup_acceptance() anymore.  This change ensures that the dual
--- a/spot/twaalgos/determinize.cc
+++ b/spot/twaalgos/determinize.cc
@ -23,6 +23,8 @@
 #include <utility>
 #include <set>
 #include <map>
 #include <cmath>
 #include <spot/misc/clz.hh>
 #include <spot/misc/bddlt.hh>
 #include <spot/twaalgos/sccinfo.hh>
 #include <spot/twaalgos/determinize.hh>
@ -31,6 +33,7 @@
 #include <spot/twaalgos/simulation.hh>
 #include <spot/twaalgos/isdet.hh>
 #include <spot/twaalgos/parity.hh>
 #include <spot/twaalgos/split.hh>
 #include <spot/priv/robin_hood.hh>
 namespace spot
@ -586,9 +589,27 @@ namespace spot
    {
      const std::vector<bdd>& state_supports;
      robin_hood::unordered_flat_map<bdd, std::vector<bdd>, bdd_hash> cache;
-
+      std::vector<bdd> basis;
      unsigned log_basis_size = 0;
    public:
-      safra_support(const std::vector<bdd>& s): state_supports(s) {}
+      safra_support(const std::vector<bdd>& s,
                    const const_twa_graph_ptr& orig_aut)
        : state_supports(s)
      {
        unsigned nap = orig_aut->ap().size();
        if (nap > 5)
          {
            edge_separator es;
            // Gather all labels, but stop if we see too many.  The
            // threshold below is arbitrary: adjust if you know better.
            if (es.add_to_basis(orig_aut, 256 * nap))
              {
                basis = es.basis();
                auto sz = basis.size();
                log_basis_size = CHAR_BIT*sizeof(sz) - clz(sz);
              }
          }
      }
      const std::vector<bdd>&
      get(const safra_state& s)
@ -600,6 +621,21 @@ namespace spot
        if (i.second) // insertion took place
          {
            std::vector<bdd>& res = i.first->second;
            // If we have a basis, we probably want to use it.
            // But we should do that only if 2^|supp| is larger.
            if (log_basis_size)
              {
                // Compute the size of the support
                bdd s = supp;
                unsigned sz = log_basis_size;
                while (sz && s != bddtrue)
                  {
                    --sz;
                    s = bdd_high(s);
                  }
                if (s != bddtrue)
                  return res = basis;
              }
            for (bdd one: minterms_of(bddtrue, supp))
              res.emplace_back(one);
          }
@ -971,7 +1007,7 @@ namespace spot
          }
      }
-    safra_support safra2letters(support);
+    safra_support safra2letters(support, aut);
    auto res = make_twa_graph(aut->get_dict());
    res->copy_ap_of(aut);
--- a/tests/core/568.test
+++ b/tests/core/568.test
@ -75,3 +75,29 @@ cat >expected <<EOF
 6400 -> 80
 EOF
 diff expected out
 genaut --cycle-onehot=7..12 |
  autfilt --parity -D --stats="%S -> %s" > out
 cat >expected <<EOF
 49 -> 13
 64 -> 15
 81 -> 17
 100 -> 19
 121 -> 4360
 144 -> 9481
 EOF
 genaut --cycle-onehot=7..12 |
  autfilt -x simul-max=10000 --parity -D --stats="%S -> %s" > out
 cat >expected <<EOF
 49 -> 13
 64 -> 15
 81 -> 17
 100 -> 19
 121 -> 21
 144 -> 23
 EOF
 # Using autcross will also test tgba_determinize
 genaut --cycle-onehot-nba=11..12 --cycle-log-nba=11..12 |
  autcross --language-preserved 'autfilt --small' --verbose