From 786599ed202089134c88cb96c38fef4d55f56bd9 Mon Sep 17 00:00:00 2001 From: philipp Date: Thu, 12 Aug 2021 00:04:29 +0200 Subject: [PATCH] Adding selective edge sorting and state merging The merging is (possibly) more expensive but also merges more states when applied to all states. * spot/graph/graph.hh: edge sorting * spot/twa/twagraph.cc, spot/twa/twagraph.hh: selective state merging * tests/core/twagraph.cc: Adjusting tests --- spot/graph/graph.hh | 45 +++++++++++++ spot/twa/twagraph.cc | 147 +++++++++++++++++++++++++++++++++++++++++ spot/twa/twagraph.hh | 13 ++++ tests/core/twagraph.cc | 76 ++++++++++++++++++++- 4 files changed, 280 insertions(+), 1 deletion(-) diff --git a/spot/graph/graph.hh b/spot/graph/graph.hh index 0d68ba9a9..75e0977b7 100644 --- a/spot/graph/graph.hh +++ b/spot/graph/graph.hh @@ -1226,6 +1226,51 @@ namespace spot std::stable_sort(edges_.begin() + 1, edges_.end(), p); } + /// \brief Sort edges of the given states + /// + /// \tparam Predicate : Comparison type + /// \param p : Comparison callable + /// \param to_sort_ptr : which states to sort. If null, all will be sorted + /// \note No need to call chain_edges_, they are in a coherent state. + /// todo: If pred does not involve bdd action other than id -> parallelize + template> + void sort_edges_of_(Predicate p = Predicate(), + const std::vector* to_sort_ptr = nullptr) + { + SPOT_ASSERT((to_sort_ptr == nullptr) + || (to_sort_ptr->size() == num_states())); + //std::cerr << "\nbefore\n"; + //dump_storage(std::cerr); + auto pi = [&](unsigned t1, unsigned t2) + {return p(edges_[t1], edges_[t2]); }; + std::vector sort_idx_; + for (unsigned i = 0; i < num_states(); ++i) + { + if (to_sort_ptr && !(*to_sort_ptr)[i]) + continue; + + sort_idx_.clear(); + unsigned t = states_[i].succ; + do + { + sort_idx_.push_back(t); + t = edges_[t].next_succ; + } while (t != 0); + if constexpr (Stable) + std::stable_sort(sort_idx_.begin(), sort_idx_.end(), pi); + else + std::sort(sort_idx_.begin(), sort_idx_.end(), pi); + // Update the graph + states_[i].succ = sort_idx_.front(); + states_[i].succ_tail = sort_idx_.back(); + const unsigned n_outs_n1 = sort_idx_.size() - 1; + for (unsigned k = 0; k < n_outs_n1; ++k) + edges_[sort_idx_[k]].next_succ = sort_idx_[k+1]; + edges_[sort_idx_.back()].next_succ = 0; // terminal + } + // Done + } + /// \brief Reconstruct the chain of outgoing edges /// /// Should be called only when it is known that all edges diff --git a/spot/twa/twagraph.cc b/spot/twa/twagraph.cc index dbe710d5f..5748de23c 100644 --- a/spot/twa/twagraph.cc +++ b/spot/twa/twagraph.cc @@ -385,6 +385,153 @@ namespace spot return merged; } + unsigned twa_graph::merge_states_of(bool stable, + const std::vector* to_merge_ptr) + { + if (!is_existential()) + throw std::runtime_error( + "twa_graph::merge_states() does not work on alternating automata"); + + typedef graph_t::edge_storage_t tr_t; + if (stable) + g_.sort_edges_of_([](const tr_t& lhs, const tr_t& rhs) + { + if (lhs.acc < rhs.acc) + return true; + if (lhs.acc > rhs.acc) + return false; + if (bdd_less_than_stable lt; lt(lhs.cond, rhs.cond)) + return true; + if (rhs.cond != lhs.cond) + return false; + // The destination must be sorted last + // for our self-loop optimization to work. + return lhs.dst < rhs.dst; + }); + else + g_.sort_edges_of_([](const tr_t& lhs, const tr_t& rhs) + { + if (lhs.acc < rhs.acc) + return true; + if (lhs.acc > rhs.acc) + return false; + if (bdd_less_than lt; lt(lhs.cond, rhs.cond)) + return true; + if (rhs.cond != lhs.cond) + return false; + // The destination must be sorted last + // for our self-loop optimization to work. + return lhs.dst < rhs.dst; + }); + + // Associates a hash value to a vector of classes + std::unordered_map>> equiv_class_; + + auto hash_state_ = [&](unsigned s)->size_t + { + // Hash the edges + // bottle_neck? + size_t h = fnv::init; + for (const edge_storage_t& e : out(s)) + { + h ^= knuth32_hash(e.dst); + h ^= knuth32_hash(e.cond.id()); + h ^= e.acc.hash(); + h = wang32_hash(h); + } + return h; + }; + + const unsigned nb_states = num_states(); + + std::vector comp_classes_; // Classes to be merged + for (unsigned i = 0; i != nb_states; ++i) + { + if (to_merge_ptr && !(*to_merge_ptr)[nb_states]) + continue; + + size_t hi = hash_state_(i); + auto equal_to_i_ = [&, outi = out(i)](unsigned j) + { + auto outj = out(j); + return std::equal(outi.begin(), outi.end(), + outj.begin(), outj.end(), + [](const edge_storage_t& a, + const edge_storage_t& b) + { return ((a.dst == b.dst + || (a.dst == a.src && b.dst == b.src)) + && a.data() == b.data()); }); + }; + comp_classes_.clear(); + // get all compatible classes + // Candidate classes share a hash + // A state is compatible to a class if it is compatble + // to any of its states + auto& cand_classes = equiv_class_[hi]; + unsigned n_c_classes = cand_classes.size(); + // Built it in "reverse order" the idx of + // classes to be merged + for (unsigned nc = n_c_classes - 1; nc < n_c_classes; --nc) + if (std::any_of(cand_classes[nc].begin(), + cand_classes[nc].end(), + [&](unsigned j) + {return equal_to_i_(j); })) + comp_classes_.push_back(nc); + // Possible results: + // 1) comp_classes_ is empty -> i gets its own class + // 2) fuse together all comp_classes and add i + if (comp_classes_.empty()) + cand_classes.emplace_back(std::set({i})); + else + { + // Lowest idx + auto& base_class = cand_classes[comp_classes_.back()]; + comp_classes_.pop_back(); // Keep this one + for (unsigned compi : comp_classes_) + { + // fuse with base and delete + base_class.insert(cand_classes[compi].begin(), + cand_classes[compi].end()); + std::swap(cand_classes[compi], cand_classes.back()); + cand_classes.pop_back(); + } + // finally add the current state that caused all the merging + base_class.emplace_hint(base_class.end(), i); + } + }; + + // Now we have equivalence classes + // and a state can only be in exactly + // (Otherwise the classes would have fused) + // For each equiv class we take the first state as representative + // and redirect all incoming edges to this one + std::vector remap(nb_states, -1U); + for (const auto& [_, class_v] : equiv_class_) + for (const auto& aclass : class_v) + { + unsigned rep = *aclass.begin(); + for (auto it = ++aclass.begin(); it != aclass.end(); ++it) + remap[*it] = rep; + } + + for (auto& e: edges()) + if (remap[e.dst] != -1U) + e.dst = remap[e.dst]; + + if (remap[get_init_state_number()] != -1U) + set_init_state(remap[get_init_state_number()]); + + unsigned st = 0; + for (auto& s: remap) + if (s == -1U) + s = st++; + else + s = -1U; + + defrag_states(remap, st); + return remap.size() - st; + } + void twa_graph::purge_unreachable_states(shift_action* f, void* action_data) { unsigned num_states = g_.num_states(); diff --git a/spot/twa/twagraph.hh b/spot/twa/twagraph.hh index 11a7165cf..bc2c23230 100644 --- a/spot/twa/twagraph.hh +++ b/spot/twa/twagraph.hh @@ -593,6 +593,19 @@ namespace spot /// \return the number of states that have been merged and removed. unsigned merge_states(); + /// \brief Like merge states, but one can chose which states are + /// candidates for merging. + /// + /// \param stable Determines whether or not a stable sorting is used for + /// the edges + /// \param to_merge_ptr Determines which states are candidates. + /// If null, all states are considered + /// The actual implementation differd from merge_states(). + /// It is more costly, but is more precise, in the sense that + /// more states are merged. + unsigned merge_states_of(bool stable = true, + const std::vector* to_merge_ptr = nullptr); + /// \brief Remove all dead states /// /// Dead states are all the states that cannot be part of diff --git a/tests/core/twagraph.cc b/tests/core/twagraph.cc index 39a2dc946..e5d70e59a 100644 --- a/tests/core/twagraph.cc +++ b/tests/core/twagraph.cc @@ -88,7 +88,9 @@ static void f1() // Test purge with named and highlighted states. static void f2() { - auto d = spot::make_bdd_dict(); + auto + + d = spot::make_bdd_dict(); auto tg = make_twa_graph(d); auto s1 = tg->new_state(); @@ -186,6 +188,76 @@ static void f5() spot::print_hoa(std::cout, tg) << '\n'; } +// Test merge_states_of() +static void f6() +{ + auto d = spot::make_bdd_dict(); + auto tg = make_twa_graph(d); + + auto s1 = tg->new_state(); + auto s2 = tg->new_state(); + auto s3 = tg->new_state(); + auto s4 = tg->new_state(); + auto s5 = tg->new_state(); + + tg->set_init_state(s5); + tg->new_edge(s1, s2, bddtrue); + tg->new_edge(s2, s2, bddtrue); + tg->new_edge(s3, s2, bddtrue); + tg->new_edge(s4, s4, bddtrue); + tg->new_edge(s5, s1, bddtrue); + tg->new_edge(s5, s2, bddtrue); + tg->new_edge(s5, s3, bddtrue); + tg->new_edge(s5, s4, bddtrue); + + unsigned out = tg->merge_states_of(); + assert(out == 3); +} + +// Compare merge_states() and merge_states_of() +// when faced with a more involved problem +static void f7() +{ + // The current mege_states implementation of "next" + // needs two successive calls to obtain an automaton with only 3 states + // This is especially annoying as this depends on the numbering. + // By renumbering 2->1 3->2 1->3 the current version only needs one call too + auto get_aut = []() + { + auto d = spot::make_bdd_dict(); + auto aut = make_twa_graph(d); + + aut->new_states(5); + + aut->new_edge(0, 1, bddtrue); + aut->new_edge(0, 2, bddtrue); + + aut->new_edge(1, 1, bddtrue); + aut->new_edge(1, 4, bddtrue); + + aut->new_edge(2, 3, bddtrue); + aut->new_edge(2, 4, bddtrue); + + aut->new_edge(3, 3, bddtrue); + aut->new_edge(3, 4, bddtrue); + + return aut; + }; + + auto aut = get_aut(); + // Basic merge_states needs 2 iterations + // Merging only one step each + assert(aut->merge_states() == 1u); + assert(aut->merge_states() == 1u); + assert(aut->num_states() == 3u); + + // merge_states_of can do it in one iteration + // when used on all states + aut = get_aut(); + assert(aut->merge_states_of() == 2u); + assert(aut->num_states() == 3u); +} + int main() { f1(); @@ -193,4 +265,6 @@ int main() f3(); f4(); f5(); + f6(); + f7(); }