diff --git a/NEWS b/NEWS index f2d65c401..1d8494ae0 100644 --- a/NEWS +++ b/NEWS @@ -165,12 +165,12 @@ New in spot 2.11.6.dev (not yet released) - spot::dualize() learned a trick to be faster on states that have less outgoing edges than atomic proposition declared on the - automaton. spot::remove_alternation() and - spot::tgba_determinize() learned a similar trick, except it isn't - applied at the state level but of the entire alternating use few - distinct labels. These changes may speed up the complementation - of some very weak automata, and the minimization of some - WDBA. (Issue #566.) + automaton. spot::remove_alternation(), spot::tgba_powerset(), and + simulation-based reductions learned a similar trick, except it + isn't applied at the state level but if the entire automaton use + few distinct labels. These changes may speed up the processing of + automata with many atomic propositions but few distinct labels. + (Issue #566 and issue #568.) - [Potential backward incompatibility] spot::dualize() does not call cleanup_acceptance() anymore. This change ensures that the dual diff --git a/spot/twaalgos/simulation.cc b/spot/twaalgos/simulation.cc index 1beb41b75..9fe714e03 100644 --- a/spot/twaalgos/simulation.cc +++ b/spot/twaalgos/simulation.cc @@ -29,6 +29,8 @@ #include #include #include +#include +#include // Work around GCC bug 80947 (dominates_edge is causing spurious // visibility warnings) @@ -507,6 +509,35 @@ namespace spot res->copy_ap_of(a_); res->copy_acceptance_of(a_); + // We have two ways of "spliting" a signature to create the + // outgoing edges. One is to iterate over 2^AP, then collect + // the destinations. The second is to first create a coarser + // basis for the original set of labels, and then iterate on + // this basis. The latter is good when we have few distinct + // labels. With too many different labels that may have + // nonempty intersections, the basis approach can consume a + // lot of memory. We have to heuristically select between + // those two. + unsigned nap = res->ap().size(); + bool will_use_basis = nap > 5; + edge_separator es; + if (will_use_basis) + // Gather all labels, but stop if we see too many. The + // threshold below is arbitrary: adjust if you know better. + will_use_basis = es.add_to_basis(a_, 256 * nap); + // We use a cache to avoid the costly loop over the basis. + // + // Cache entries have the form (bdd, [begin, end]) where bdd + // what should be split using the basis, and begin/end denotes + // a range of existing transition numbers that cover the + // split. + // + // std::pair causes some noexcept warnings when used in + // robin_hood::unordered_map with GCC 9.4. Use robin_hood::pair + // instead. + typedef robin_hood::pair cached_t; + robin_hood::unordered_map split_cond; + auto state_mapping = new std::vector(); state_mapping->resize(a_->num_states()); res->set_named_prop("simulated-states", state_mapping); @@ -549,6 +580,58 @@ namespace spot auto all_inf = all_inf_; unsigned srcst = 0; + + auto create_edges = [&](int srcid, bdd one, bdd dest) { + // Iterate over all possible destination classes. We + // use minato_isop here, because if the same valuation + // of atomic properties can go to two different + // classes C1 and C2, iterating on C1 + C2 with other + // means would see C1 then (!C1)C2, instead of C1 then + // C2. With minatop_isop, we ensure that no negative + // class variable will be seen (likewise for + // promises). + minato_isop isop(dest); + + ++nb_minterms; + bdd cond_acc_dest; + while ((cond_acc_dest = isop.next()) != bddfalse) + { + ++stat.edges; + ++nb_minato; + + // Take the edge, and keep only the variable which + // are used to represent the class. + bdd dst = bdd_existcomp(cond_acc_dest, all_class_var_); + + // Keep only ones who are acceptance condition. + auto acc = bdd_to_mark(bdd_existcomp(cond_acc_dest, + all_proms_)); + + // Because we have complemented all the Inf + // acceptance conditions on the input automaton, + // we must revert them to create a new edge. + acc ^= all_inf; + if (Cosimulation) + { + if (Sba) + { + // acc should be attached to src, or rather, + // in our edge-based representation) + // to all edges leaving src. As we + // can't do this here, store this in a table + // so we can fix it later. + accst[srcst] = acc; + acc = {}; + } + gb->new_edge(dst.id(), srcid, one, acc); + } + else + { + gb->new_edge(srcid, dst.id(), one, acc); + } + } + }; + // For each class, we will create // all the edges between the states. for (auto& p: sorted_classes_) @@ -566,11 +649,10 @@ namespace spot if (Cosimulation) sig = bdd_compose(sig, bddfalse, bdd_var(bdd_initial)); - // Get all the variables in the signature. bdd sup_sig = bdd_support(sig); - // Get the variable in the signature which represents the + // Get the variables in the signature which represent the // conditions. bdd sup_all_atomic_prop = bdd_exist(sup_sig, nonapvars); @@ -578,60 +660,44 @@ namespace spot // proposition. bdd all_atomic_prop = bdd_exist(sig, nonapvars); - // First loop over all possible valuations atomic properties. - for (bdd one: minterms_of(all_atomic_prop, sup_all_atomic_prop)) + if (!will_use_basis) { - // For each possible valuation, iterate over all possible - // destination classes. We use minato_isop here, because - // if the same valuation of atomic properties can go - // to two different classes C1 and C2, iterating on - // C1 + C2 with the above minters_of loop will see - // C1 then (!C1)C2, instead of C1 then C2. - // With minatop_isop, we ensure that the no negative - // class variable will be seen (likewise for promises). - minato_isop isop(bdd_restrict(sig, one)); - ++nb_minterms; - - bdd cond_acc_dest; - while ((cond_acc_dest = isop.next()) != bddfalse) + for (bdd one: minterms_of(all_atomic_prop, sup_all_atomic_prop)) + create_edges(src.id(), one, bdd_restrict(sig, one)); + } + else + { + auto& [begin, end] = split_cond[all_atomic_prop]; + if (begin == end) { - ++stat.edges; - - ++nb_minato; - - // Take the edge, and keep only the variable which - // are used to represent the class. - bdd dst = bdd_existcomp(cond_acc_dest, all_class_var_); - - // Keep only ones who are acceptance condition. - auto acc = bdd_to_mark(bdd_existcomp(cond_acc_dest, - all_proms_)); - - // Because we have complemented all the Inf - // acceptance conditions on the input automaton, - // we must revert them to create a new edge. - acc ^= all_inf; - if (Cosimulation) + begin = res->num_edges() + 1; + for (bdd label: es.basis()) + create_edges(src.id(), label, + bdd_relprod(label, sig, + res->ap_vars())); + end = res->num_edges() + 1; + } + else + { + // We have already split all_atomic_prop once, so + // we can simply reuse the set of labels we used + // then, avoiding the iteration on es.basis(). + auto& g = res->get_graph(); + bdd last = bddfalse; + for (unsigned i = begin; i < end; ++i) { - if (Sba) - { - // acc should be attached to src, or rather, - // in our edge-based representation) - // to all edges leaving src. As we - // can't do this here, store this in a table - // so we can fix it later. - accst[srcst] = acc; - acc = {}; - } - gb->new_edge(dst.id(), src.id(), one, acc); - } - else - { - gb->new_edge(src.id(), dst.id(), one, acc); + bdd label = g.edge_storage(i).cond; + if (label == last) + continue; + last = label; + create_edges(src.id(), label, + bdd_relprod(label, sig, + res->ap_vars())); } } } + ++srcst; } diff --git a/tests/Makefile.am b/tests/Makefile.am index c6430b2ac..ebee91fa9 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -221,6 +221,7 @@ TESTS_twa = \ core/521.test \ core/522.test \ core/566.test \ + core/568.test \ core/acc.test \ core/acc2.test \ core/bdddict.test \ diff --git a/tests/core/568.test b/tests/core/568.test new file mode 100755 index 000000000..2fb4b8df2 --- /dev/null +++ b/tests/core/568.test @@ -0,0 +1,77 @@ +#!/bin/sh +# -*- coding: utf-8 -*- +# Copyright (C) by the Spot authors, see the AUTHORS file for details. +# +# This file is part of Spot, a model checking library. +# +# Spot is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Spot is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +# License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. ./defs + +set -e + +# For issue #568. Before the patch implemented for #568, running +# simulation-based reduction on automata with many APs was getting +# exponentially slow even if few different labels were used. +# +# For instance we had this: +# +# % genaut --cycle-onehot=10..20 --name='size %L' | +# autfilt --small --stats='%M: %S->%s in %r seconds' +# size 10: 100->10 in 0.0395407 seconds +# size 11: 121->11 in 0.0950484 seconds +# size 12: 144->12 in 0.227828 seconds +# size 13: 169->13 in 0.391545 seconds +# size 14: 196->14 in 0.954784 seconds +# size 15: 225->15 in 2.34656 seconds +# size 16: 256->16 in 5.80549 seconds +# size 17: 289->17 in 14.3545 seconds +# size 18: 324->18 in 47.1589 seconds +# size 19: 361->19 in 138.023 seconds +# +# The test below shows that even with --cycle-onehot-nba=80 we don't +# have any problem now. + +genaut --cycle-onehot-nba=20 \ + --cycle-onehot-nba=40 \ + --cycle-onehot-nba=60 \ + --cycle-onehot-nba=80 \ + --cycle-log-nba=20 \ + --cycle-log-nba=40 \ + --cycle-log-nba=60 \ + --cycle-log-nba=80 \ + | autfilt --small --stats='%S -> %s' > out +cat >expected < 20 +1600 -> 40 +3600 -> 60 +6400 -> 6400 +400 -> 20 +1600 -> 40 +3600 -> 60 +6400 -> 6400 +EOF +diff expected out + +# the reason for the 6400 states above is that simulation-based +# reductions are disabled above 4096 states by default. This can be +# changed with -x simul-max=N. + +genaut --cycle-onehot-nba=80 --cycle-log-nba=80 \ + | autfilt -x simul-max=6400 --small --stats='%S -> %s' > out +cat >expected < 80 +6400 -> 80 +EOF +diff expected out