diff --git a/NEWS b/NEWS index 832e0fd61..4b20958f4 100644 --- a/NEWS +++ b/NEWS @@ -153,10 +153,12 @@ New in spot 2.11.6.dev (not yet released) - spot::dualize() learned a trick to be faster on states that have less outgoing edges than atomic proposition declared on the - automaton. spot::remove_alternation() learned a similar trick, - except it isn't applied at the state level but of the entire - alternating use few distinct labels. These two changes speed up - the complementation of very weak automata. (Issue #566.) + automaton. spot::remove_alternation() and + spot::tgba_determinize() learned a similar trick, except it isn't + applied at the state level but of the entire alternating use few + distinct labels. These changes may speed up the complementation + of some very weak automata, and the minimization of some + WDBA. (Issue #566.) - [Potential backward incompatibility] spot::dualize() does not call cleanup_acceptance() anymore. This change ensures that the dual diff --git a/spot/twaalgos/powerset.cc b/spot/twaalgos/powerset.cc index 044ea21b9..2163f5ffc 100644 --- a/spot/twaalgos/powerset.cc +++ b/spot/twaalgos/powerset.cc @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -83,20 +84,44 @@ namespace spot if ((-1UL / ns) >> nap == 0) throw std::runtime_error("too many atomic propositions (or states)"); + // we have two ways of "spliting" the labels when determinizing. + // One is to iterate over 2^AP, the second is to partition the set + // of edges labels. We don't have a very clean rule to chose. The + // former is expansive when we have a lot of AP. The latter is + // good when we have few distinct labels. With too many different + // labels that may have nonempty intersections, the + // partition-based approach can consume a lot of memory. + bool will_use_labels = nap > 5; + edge_separator es; + if (will_use_labels) + { + // Gather all labels, but stop if we see too many. The + // threshold below is arbitrary: adjust if you know better. + will_use_labels = es.add_to_basis(aut, 256 * nap); + } + // Build a correspondence between conjunctions of APs and unsigned // indexes. std::vector num2bdd; num2bdd.reserve(1UL << nap); std::map bdd2num; bdd allap = aut->ap_vars(); - for (bdd one: minterms_of(bddtrue, allap)) - { - bdd2num.emplace(one, num2bdd.size()); - num2bdd.emplace_back(one); - } + + if (!will_use_labels) + for (bdd one: minterms_of(bddtrue, allap)) + { + bdd2num.emplace(one, num2bdd.size()); + num2bdd.emplace_back(one); + } + else + for (bdd one: es.basis()) + { + bdd2num.emplace(one, num2bdd.size()); + num2bdd.emplace_back(one); + } size_t nc = num2bdd.size(); // number of conditions - assert(nc == (1UL << nap)); + assert(will_use_labels || nc == (1UL << nap)); // Conceptually, we represent the automaton as an array 'bv' of // ns*nc bit vectors of size 'ns'. Each original state is @@ -172,12 +197,18 @@ namespace spot for (unsigned i = 0; i < nc; ++i) bv->at(base + i).clear_all(); for (auto& t: aut->out(src)) - for (bdd one: minterms_of(t.cond, allap)) - { - unsigned num = bdd2num[one]; - bv->at(base + num).set(t.dst); - } - + if (!will_use_labels) + for (bdd one: minterms_of(t.cond, allap)) + { + unsigned num = bdd2num[one]; + bv->at(base + num).set(t.dst); + } + else + for (bdd one: es.separate_implying(t.cond)) + { + unsigned num = bdd2num[one]; + bv->at(base + num).set(t.dst); + } assert(idx == lru.begin()->first); return idx; }; diff --git a/tests/core/566.test b/tests/core/566.test index 2399576c4..db4796c8e 100755 --- a/tests/core/566.test +++ b/tests/core/566.test @@ -138,3 +138,8 @@ test "$res" = "5 13 85 6 13 12582912" res=`autfilt --complement 21.hoa --stats='%S %E %T %s %e %t'` test "$res" = "5 13 85 5 11 10485760" + +res=`autfilt --small 21.hoa --stats='%S %E %T %s %e %t'` +test "$res" = "5 13 85 2 2 22" + +autcross --language-preserved 'autfilt --small' -F 21.hoa --verbose