determinize: work around overflow in reachability matrix indices

Fixes #541, reported by David Dokoupil.

* spot/twaalgos/determinize.cc: Disable use_simulation when the input
has more than 2^16 SCCs..  Also rework the reachability
matrix to store only its lower half triangle.
* spot/twaalgos/determinize.hh, NEWS: Mention the limitation of
use_simulation.
* THANKS: Add David.
This commit is contained in:
Alexandre Duret-Lutz 2023-09-04 17:51:26 +02:00
parent 110b052b7d
commit e2149fabf4
4 changed files with 91 additions and 59 deletions

8
NEWS
View file

@ -60,6 +60,14 @@ New in spot 2.11.6.dev (not yet released)
36 seconds; it now produce an AIG circuit with 53 nodes in only 36 seconds; it now produce an AIG circuit with 53 nodes in only
0.1 second. 0.1 second.
Bugs fixed:
- tgba_determinize()'s use_simulation option would cause it to
segfault on automata with more than 2^16 SCCs, due to overflows in
computations of indices in the reachability matrix for SCCs.
(Issue #541.) This has been fixed by disabled the use_simulation
optimization in this case.
New in spot 2.11.6 (2023-08-01) New in spot 2.11.6 (2023-08-01)
Bug fixes: Bug fixes:

1
THANKS
View file

@ -10,6 +10,7 @@ Caroline Lemieux
Christian Dax Christian Dax
Christopher Ziegler Christopher Ziegler
Clément Tamines Clément Tamines
David Dokoupil
David Müller David Müller
Dávid Smolka Dávid Smolka
Edmond Irani Liu Edmond Irani Liu

View file

@ -1,5 +1,5 @@
// -*- coding: utf-8 -*- // -*- coding: utf-8 -*-
// Copyright (C) 2015-2022 Laboratoire de Recherche et // Copyright (C) 2015-2023 Laboratoire de Recherche et
// Développement de l'Epita. // Développement de l'Epita.
// //
// This file is part of Spot, a model checking library. // This file is part of Spot, a model checking library.
@ -820,30 +820,44 @@ namespace spot
return nodes_ == other.nodes_ && braces_ == other.braces_; return nodes_ == other.nodes_ && braces_ == other.braces_;
} }
// res[i + scccount*j] = 1 iff SCC i is reachable from SCC j namespace
std::vector<char>
find_scc_paths(const scc_info& scc)
{ {
unsigned scccount = scc.scc_count(); class reachability_matrix final
std::vector<char> res(scccount * scccount, 0); {
for (unsigned i = 0; i != scccount; ++i) // Store a lower triangular matrix.
res[i + scccount * i] = 1; // (j can reach i) <=> (i<=j and m[j*(j+1)/2 + i]==1)
for (unsigned i = 0; i != scccount; ++i) std::vector<char> m;
public:
reachability_matrix(const scc_info& scc)
{ {
unsigned ibase = i * scccount; unsigned scccount = scc.scc_count();
for (unsigned d: scc.succ(i)) m.resize(scccount * (scccount + 1) / 2, 0);
for (unsigned i = 0; i < scccount; ++i)
m[(i * (i + 1) / 2) + i] = 1;
for (unsigned i = 1; i < scccount; ++i)
{ {
// we necessarily have d < i because of the way SCCs are unsigned ibase = i * (i + 1) / 2;
// numbered, so we can build the transitive closure by for (unsigned d: scc.succ(i))
// just ORing any SCC reachable from d. {
unsigned dbase = d * scccount; // we necessarily have d < i because of the way SCCs are
for (unsigned j = 0; j != scccount; ++j) // numbered, so we can build the transitive closure by
res[ibase + j] |= res[dbase + j]; // just ORing any SCC reachable from d.
unsigned dbase = d * (d + 1) / 2;
for (unsigned j = 0; j <= d; ++j)
m[ibase + j] |= m[dbase + j];
}
} }
} }
return res;
bool operator()(unsigned j, unsigned i) const
{
return i <= j && m[(j * (j + 1) / 2) + i];
}
};
} }
twa_graph_ptr twa_graph_ptr
tgba_determinize(const const_twa_graph_ptr& a, tgba_determinize(const const_twa_graph_ptr& a,
bool pretty_print, bool use_scc, bool pretty_print, bool use_scc,
@ -882,49 +896,56 @@ namespace spot
scc_opt = scc_info_options::TRACK_SUCCS | scc_info_options::TRACK_STATES; scc_opt = scc_info_options::TRACK_SUCCS | scc_info_options::TRACK_STATES;
scc_info scc = scc_info(aut, scc_opt); scc_info scc = scc_info(aut, scc_opt);
// If we have too many SCCs, disable simulation-based checks, as
// computations to index the matrix would overflow. (Issue #541.)
if (scc.scc_count() >= (1 << 16))
{
use_simulation = false;
implications.clear();
}
// If use_simulation is false, implications is empty, so nothing is built // If use_simulation is false, implications is empty, so nothing is built
std::vector<std::vector<char>> implies( std::vector<std::vector<char>> implies(
implications.size(), implications.size(),
std::vector<char>(implications.size(), 0)); std::vector<char>(implications.size(), 0));
{ if (use_simulation)
std::vector<char> is_connected = find_scc_paths(scc); {
unsigned sccs = scc.scc_count(); reachability_matrix scc_can_reach(scc);
bool something_implies_something = false; bool something_implies_something = false;
for (unsigned i = 0; i != implications.size(); ++i) for (unsigned i = 0; i != implications.size(); ++i)
{ {
// NB spot::simulation() does not remove unreachable states, as it // NB spot::simulation() does not remove unreachable states, as it
// would invalidate the contents of 'implications'. // would invalidate the contents of 'implications'.
// so we need to explicitly test for unreachable states // so we need to explicitly test for unreachable states
// FIXME: based on the scc_info, we could remove the unreachable // FIXME: based on the scc_info, we could remove the unreachable
// states, both in the input automaton and in 'implications' // states, both in the input automaton and in 'implications'
// to reduce the size of 'implies'. // to reduce the size of 'implies'.
if (!scc.reachable_state(i)) if (!scc.reachable_state(i))
continue; continue;
unsigned scc_of_i = scc.scc_of(i); unsigned scc_of_i = scc.scc_of(i);
bool i_implies_something = false; bool i_implies_something = false;
for (unsigned j = 0; j != implications.size(); ++j) for (unsigned j = 0; j != implications.size(); ++j)
{ {
if (!scc.reachable_state(j)) if (!scc.reachable_state(j))
continue; continue;
bool i_implies_j = !is_connected[sccs * scc.scc_of(j) + scc_of_i]
&& bdd_implies(implications[i], implications[j]);
implies[i][j] = i_implies_j;
i_implies_something |= i_implies_j;
}
// Clear useless lines.
if (!i_implies_something)
implies[i].clear();
else
something_implies_something = true;
}
if (!something_implies_something)
{
implies.clear();
use_simulation = false;
}
}
bool i_implies_j = !scc_can_reach(scc.scc_of(j), scc_of_i)
&& bdd_implies(implications[i], implications[j]);
implies[i][j] = i_implies_j;
i_implies_something |= i_implies_j;
}
// Clear useless lines.
if (!i_implies_something)
implies[i].clear();
else
something_implies_something = true;
}
if (!something_implies_something)
{
implies.clear();
use_simulation = false;
}
}
// Compute the support of each state // Compute the support of each state
std::vector<bdd> support(aut->num_states()); std::vector<bdd> support(aut->num_states());

View file

@ -1,6 +1,6 @@
// -*- coding: utf-8 -*- // -*- coding: utf-8 -*-
// Copyright (C) 2015-2016, 2019-2021 Laboratoire de Recherche et // Copyright (C) 2015-2016, 2019-2021, 2023 Laboratoire de Recherche
// Développement de l'Epita. // et Développement de l'Epita.
// //
// This file is part of Spot, a model checking library. // This file is part of Spot, a model checking library.
// //
@ -64,7 +64,9 @@ namespace spot
/// ///
/// \param use_simulation whether to simplify the construction based /// \param use_simulation whether to simplify the construction based
/// on simulation relations between states in /// on simulation relations between states in
/// the original automaton. /// the original automaton. This optimization
/// is automatically disabled on automata with
/// more than 2^16 SCCs.
/// ///
/// \param use_stutter whether to simplify the construction when the /// \param use_stutter whether to simplify the construction when the
/// input automaton is known to be /// input automaton is known to be