determinize: work around overflow in reachability matrix indices

Fixes #541, reported by David Dokoupil. * spot/twaalgos/determinize.cc: Disable use_simulation when the input has more than 2^16 SCCs.. Also rework the reachability matrix to store only its lower half triangle. * spot/twaalgos/determinize.hh, NEWS: Mention the limitation of use_simulation. * THANKS: Add David.
2023-09-04 17:51:26 +02:00 · 2023-09-04 17:51:26 +02:00 · e2149fabf4
commit e2149fabf4
parent 110b052b7d
4 changed files with 91 additions and 59 deletions
--- a/8
+++ b/8
@ -60,6 +60,14 @@ New in spot 2.11.6.dev  (not yet released)
    36 seconds; it now produce an AIG circuit with 53 nodes in only
    0.1 second.
  Bugs fixed:
  - tgba_determinize()'s use_simulation option would cause it to
    segfault on automata with more than 2^16 SCCs, due to overflows in
    computations of indices in the reachability matrix for SCCs.
    (Issue #541.)  This has been fixed by disabled the use_simulation
    optimization in this case.
 New in spot 2.11.6  (2023-08-01)
  Bug fixes:
--- a/1
+++ b/1
@ -10,6 +10,7 @@ Caroline Lemieux
 Christian Dax
 Christopher Ziegler
 Clément Tamines
 David Dokoupil
 David Müller
 Dávid Smolka
 Edmond Irani Liu
--- a/spot/twaalgos/determinize.cc
+++ b/spot/twaalgos/determinize.cc
@ -1,5 +1,5 @@
 // -*- coding: utf-8 -*-
-// Copyright (C) 2015-2022 Laboratoire de Recherche et
+// Copyright (C) 2015-2023 Laboratoire de Recherche et
 // Développement de l'Epita.
 //
 // This file is part of Spot, a model checking library.
@ -820,30 +820,44 @@ namespace spot
    return nodes_ == other.nodes_ && braces_ == other.braces_;
  }
-  // res[i + scccount*j] = 1 iff SCC i is reachable from SCC j
+  namespace
  std::vector<char>
  find_scc_paths(const scc_info& scc)
  {
-    unsigned scccount = scc.scc_count();
+    class reachability_matrix final
-    std::vector<char> res(scccount * scccount, 0);
+    {
-    for (unsigned i = 0; i != scccount; ++i)
+      // Store a lower triangular matrix.
-      res[i + scccount * i] = 1;
+      // (j can reach i) <=> (i<=j and m[j*(j+1)/2 + i]==1)
-    for (unsigned i = 0; i != scccount; ++i)
+      std::vector<char> m;
    public:
      reachability_matrix(const scc_info& scc)
      {
-        unsigned ibase = i * scccount;
+        unsigned scccount = scc.scc_count();
-        for (unsigned d: scc.succ(i))
+        m.resize(scccount * (scccount + 1) / 2, 0);
        for (unsigned i = 0; i < scccount; ++i)
          m[(i * (i + 1) / 2) + i] = 1;
        for (unsigned i = 1; i < scccount; ++i)
          {
-            // we necessarily have d < i because of the way SCCs are
+            unsigned ibase = i * (i + 1) / 2;
-            // numbered, so we can build the transitive closure by
+            for (unsigned d: scc.succ(i))
-            // just ORing any SCC reachable from d.
+              {
-            unsigned dbase = d * scccount;
+                // we necessarily have d < i because of the way SCCs are
-            for (unsigned j = 0; j != scccount; ++j)
+                // numbered, so we can build the transitive closure by
-              res[ibase + j] |= res[dbase + j];
+                // just ORing any SCC reachable from d.
                unsigned dbase = d * (d + 1) / 2;
                for (unsigned j = 0; j <= d; ++j)
                  m[ibase + j] |= m[dbase + j];
              }
          }
      }
-    return res;
+
      bool operator()(unsigned j, unsigned i) const
      {
        return i <= j && m[(j * (j + 1) / 2) + i];
      }
    };
  }
  twa_graph_ptr
  tgba_determinize(const const_twa_graph_ptr& a,
                   bool pretty_print, bool use_scc,
@ -882,49 +896,56 @@ namespace spot
      scc_opt = scc_info_options::TRACK_SUCCS | scc_info_options::TRACK_STATES;
    scc_info scc = scc_info(aut, scc_opt);
    // If we have too many SCCs, disable simulation-based checks, as
    // computations to index the matrix would overflow.  (Issue #541.)
    if (scc.scc_count() >= (1 << 16))
      {
        use_simulation = false;
        implications.clear();
      }
    // If use_simulation is false, implications is empty, so nothing is built
    std::vector<std::vector<char>> implies(
        implications.size(),
        std::vector<char>(implications.size(), 0));
-    {
+    if (use_simulation)
-      std::vector<char> is_connected = find_scc_paths(scc);
+      {
-      unsigned sccs = scc.scc_count();
+        reachability_matrix scc_can_reach(scc);
-      bool something_implies_something = false;
+        bool something_implies_something = false;
-      for (unsigned i = 0; i != implications.size(); ++i)
+        for (unsigned i = 0; i != implications.size(); ++i)
-        {
+          {
-          // NB spot::simulation() does not remove unreachable states, as it
+            // NB spot::simulation() does not remove unreachable states, as it
-          // would invalidate the contents of 'implications'.
+            // would invalidate the contents of 'implications'.
-          // so we need to explicitly test for unreachable states
+            // so we need to explicitly test for unreachable states
-          // FIXME: based on the scc_info, we could remove the unreachable
+            // FIXME: based on the scc_info, we could remove the unreachable
-          // states, both in the input automaton and in 'implications'
+            // states, both in the input automaton and in 'implications'
-          // to reduce the size of 'implies'.
+            // to reduce the size of 'implies'.
-          if (!scc.reachable_state(i))
+            if (!scc.reachable_state(i))
-            continue;
+              continue;
-          unsigned scc_of_i = scc.scc_of(i);
+            unsigned scc_of_i = scc.scc_of(i);
-          bool i_implies_something = false;
+            bool i_implies_something = false;
-          for (unsigned j = 0; j != implications.size(); ++j)
+            for (unsigned j = 0; j != implications.size(); ++j)
-            {
+              {
-              if (!scc.reachable_state(j))
+                if (!scc.reachable_state(j))
-                continue;
+                  continue;
              bool i_implies_j = !is_connected[sccs * scc.scc_of(j) + scc_of_i]
                && bdd_implies(implications[i], implications[j]);
              implies[i][j] = i_implies_j;
              i_implies_something |= i_implies_j;
            }
          // Clear useless lines.
          if (!i_implies_something)
            implies[i].clear();
          else
            something_implies_something = true;
        }
      if (!something_implies_something)
        {
          implies.clear();
          use_simulation = false;
        }
    }
                bool i_implies_j = !scc_can_reach(scc.scc_of(j), scc_of_i)
                  && bdd_implies(implications[i], implications[j]);
                implies[i][j] = i_implies_j;
                i_implies_something |= i_implies_j;
              }
            // Clear useless lines.
            if (!i_implies_something)
              implies[i].clear();
            else
              something_implies_something = true;
          }
        if (!something_implies_something)
          {
            implies.clear();
            use_simulation = false;
          }
      }
    // Compute the support of each state
    std::vector<bdd> support(aut->num_states());
--- a/spot/twaalgos/determinize.hh
+++ b/spot/twaalgos/determinize.hh
@ -1,6 +1,6 @@
 // -*- coding: utf-8 -*-
-// Copyright (C) 2015-2016, 2019-2021 Laboratoire de Recherche et
+// Copyright (C) 2015-2016, 2019-2021, 2023 Laboratoire de Recherche
-// Développement de l'Epita.
+// et Développement de l'Epita.
 //
 // This file is part of Spot, a model checking library.
 //
@ -64,7 +64,9 @@ namespace spot
  ///
  /// \param use_simulation whether to simplify the construction based
  ///                       on simulation relations between states in
-  ///                       the original automaton.
+  ///                       the original automaton.  This optimization
  ///                       is automatically disabled on automata with
  ///                       more than 2^16 SCCs.
  ///
  /// \param use_stutter whether to simplify the construction when the
  ///                    input automaton is known to be