From 92126a6cf9dd4921bf2d876ffe827601d5bb73c4 Mon Sep 17 00:00:00 2001 From: Alexandre Duret-Lutz Date: Wed, 5 Jan 2011 19:38:48 +0100 Subject: [PATCH] Cleanup the DFA minimization algorithm. * src/tgbaalgos/minimize.cc (minimize): Move the minimization code into... (minimize_dfa): ... this new function, and fix the condition under which a partition is considered to be minimal. Also use a map instead of a list to lookup known formulae. --- ChangeLog | 10 + src/tgbaalgos/minimize.cc | 392 +++++++++++++++++++++++--------------- 2 files changed, 247 insertions(+), 155 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5a1d6dad4..610ade58f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2011-01-05 Alexandre Duret-Lutz + + Cleanup the DFA minimization algorithm. + + * src/tgbaalgos/minimize.cc (minimize): Move the minimization + code into... + (minimize_dfa): ... this new function, and fix the condition + under which a partition is considered to be minimal. Also + use a map instead of a list to lookup known formulae. + 2011-01-05 Alexandre Duret-Lutz Speed up the obligation test. diff --git a/src/tgbaalgos/minimize.cc b/src/tgbaalgos/minimize.cc index 8b61bd10c..266e512de 100644 --- a/src/tgbaalgos/minimize.cc +++ b/src/tgbaalgos/minimize.cc @@ -18,14 +18,25 @@ // Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA // 02111-1307, USA. + +//#define TRACE + +#ifdef TRACE +# define trace std::cerr +#else +# define trace while (0) std::cerr +#endif + #include #include #include #include #include +#include #include "minimize.hh" #include "ltlast/allnodes.hh" #include "misc/hash.hh" +#include "misc/bddlt.hh" #include "tgba/tgbaproduct.hh" #include "tgba/tgbatba.hh" #include "tgbaalgos/powerset.hh" @@ -43,6 +54,31 @@ namespace spot typedef Sgi::hash_map hash_map; + namespace + { + static std::ostream& + dump_hash_set(const hash_set* hs, const tgba* aut, std::ostream& out) + { + out << "{"; + const char* sep = ""; + for (hash_set::const_iterator i = hs->begin(); i != hs->end(); ++i) + { + out << sep << aut->format_state(*i); + sep = ", "; + } + out << "}"; + return out; + } + + static std::string + format_hash_set(const hash_set* hs, const tgba* aut) + { + std::ostringstream s; + dump_hash_set(hs, aut, s); + return s.str(); + } + } + // Given an automaton a, find all states that are not in "final" and add // them to the set "non_final". void init_sets(const tgba_explicit* a, @@ -269,22 +305,216 @@ namespace spot } + tgba_explicit_number* minimize_dfa(const tgba_explicit_number* det_a, + hash_set* final) + { + typedef std::list partition_t; + partition_t cur_run; + partition_t next_run; + + // The list of equivalent states. + partition_t done; + + hash_map state_set_map; + + hash_set* non_final = new hash_set; + + init_sets(det_a, *final, *non_final); + // Size of det_a + unsigned size = final->size() + non_final->size(); + // Use bdd variables to number sets. set_num is the first variable + // available. + unsigned set_num = + det_a->get_dict()->register_anonymous_variables(size, det_a); + + std::set free_var; + for (unsigned i = set_num; i < set_num + size; ++i) + free_var.insert(i); + std::map used_var; + + hash_set* final_copy; + + if (!final->empty()) + { + unsigned s = final->size(); + used_var[set_num] = s; + free_var.erase(set_num); + if (s > 1) + cur_run.push_back(final); + else + done.push_back(final); + for (hash_set::const_iterator i = final->begin(); + i != final->end(); ++i) + state_set_map[*i] = set_num; + + final_copy = new hash_set(*final); + } + else + { + final_copy = final; + } + + if (!non_final->empty()) + { + unsigned s = non_final->size(); + unsigned num = set_num + 1; + used_var[num] = s; + free_var.erase(num); + if (s > 1) + cur_run.push_back(non_final); + else + done.push_back(non_final); + for (hash_set::const_iterator i = non_final->begin(); + i != non_final->end(); ++i) + state_set_map[*i] = num; + } + else + { + delete non_final; + } + + // A bdd_states_map is a list of formulae (in a BDD form) associated with a + // destination set of states. + typedef std::map bdd_states_map; + + bool did_split = true; + + while (did_split) + { + did_split = false; + while (!cur_run.empty()) + { + // Get a set to process. + hash_set* cur = cur_run.front(); + cur_run.pop_front(); + + trace << "processing " << format_hash_set(cur, det_a) << std::endl; + + hash_set::iterator hi; + bdd_states_map bdd_map; + for (hi = cur->begin(); hi != cur->end(); ++hi) + { + const state* src = *hi; + bdd f = bddfalse; + tgba_succ_iterator* si = det_a->succ_iter(src); + for (si->first(); !si->done(); si->next()) + { + const state* dst = si->current_state(); + unsigned dst_set = state_set_map[dst]; + delete dst; + f |= (bdd_ithvar(dst_set) & si->current_condition()); + } + delete si; + + // Have we already seen this formula ? + bdd_states_map::iterator bsi = bdd_map.find(f); + if (bsi == bdd_map.end()) + { + // No, create a new set. + hash_set* new_set = new hash_set; + new_set->insert(src); + bdd_map[f] = new_set; + } + else + { + // Yes, add the current state to the set. + bsi->second->insert(src); + } + } + + bdd_states_map::iterator bsi = bdd_map.begin(); + if (bdd_map.size() == 1) + { + // The set was not split. + trace << "set " << format_hash_set(bsi->second, det_a) + << " was not split" << std::endl; + next_run.push_back(bsi->second); + } + else + { + for (; bsi != bdd_map.end(); ++bsi) + { + hash_set* set = bsi->second; + // Free the number associated to these states. + unsigned num = state_set_map[*set->begin()]; + assert(used_var.find(num) != used_var.end()); + unsigned left = (used_var[num] -= set->size()); + // Make sure LEFT does not become negative (hence bigger + // than SIZE when read as unsigned) + assert(left < size); + if (left == 0) + { + used_var.erase(num); + free_var.insert(num); + } + // Pick a free number + assert(!free_var.empty()); + num = *free_var.begin(); + free_var.erase(free_var.begin()); + used_var[num] = set->size(); + for (hash_set::iterator hit = set->begin(); + hit != set->end(); ++hit) + state_set_map[*hit] = num; + // Trivial sets can't be splitted any further. + if (set->size() == 1) + { + trace << "set " << format_hash_set(set, det_a) + << " is minimal" << std::endl; + done.push_back(set); + } + else + { + did_split = true; + trace << "set " << format_hash_set(set, det_a) + << " should be processed further" << std::endl; + next_run.push_back(set); + } + } + } + delete cur; + } + if (did_split) + trace << "splitting did occur during this pass." << std::endl; + else + trace << "splitting did not occur during this pass." << std::endl; + std::swap(cur_run, next_run); + } + + done.splice(done.end(), cur_run); + +#ifdef TRACE + trace << "Final partition: "; + for (partition_t::const_iterator i = done.begin(); i != done.end(); ++i) + trace << format_hash_set(*i, det_a) << " "; + trace << std::endl; +#endif + + // Build the result. + tgba_explicit_number* res = build_result(det_a, done, final_copy); + + // Free all the allocated memory. + delete final_copy; + hash_map::iterator hit; + for (hit = state_set_map.begin(); hit != state_set_map.end(); ++hit) + delete hit->first; + std::list::iterator it; + for (it = done.begin(); it != done.end(); ++it) + delete *it; + delete det_a; + + return res; + } tgba_explicit_number* minimize(const tgba* a, bool monitor) { - std::queue todo; - // The list of equivalent states. - std::list done; hash_set* final = new hash_set; - hash_set* non_final = new hash_set; - hash_map state_set_map; - tgba_explicit_number* det_a; { power_map pm; det_a = tgba_powerset(a, pm); + if (!monitor) { // For each SCC of the deterministic automaton, determine if @@ -340,155 +570,7 @@ namespace spot } } - init_sets(det_a, *final, *non_final); - // Size of det_a - unsigned size = final->size() + non_final->size(); - // Use bdd variables to number sets. set_num is the first variable - // available. - unsigned set_num = - det_a->get_dict()->register_anonymous_variables(size, det_a); - - std::set free_var; - for (unsigned i = set_num; i < set_num + size; ++i) - free_var.insert(i); - std::map used_var; - - hash_set* final_copy; - - if (!final->empty()) - { - unsigned s = final->size(); - used_var[set_num] = s; - free_var.erase(set_num); - if (s > 1) - todo.push(final); - else - done.push_back(final); - for (hash_set::const_iterator i = final->begin(); - i != final->end(); ++i) - state_set_map[*i] = set_num; - - final_copy = new hash_set(*final); - } - else - { - final_copy = final; - } - - if (!non_final->empty()) - { - unsigned s = non_final->size(); - unsigned num = set_num + 1; - used_var[num] = s; - free_var.erase(num); - if (s > 1) - todo.push(non_final); - else - done.push_back(non_final); - for (hash_set::const_iterator i = non_final->begin(); - i != non_final->end(); ++i) - state_set_map[*i] = num; - } - else - { - delete non_final; - } - - // A bdd_states_map is a list of formulae (in a BDD form) associated with a - // destination set of states. - typedef std::list > bdd_states_map; - // While we have unprocessed sets. - while (!todo.empty()) - { - // Get a set to process. - hash_set* cur = todo.front(); - todo.pop(); - hash_set::iterator hi; - bdd_states_map bdd_map; - for (hi = cur->begin(); hi != cur->end(); ++hi) - { - const state* src = *hi; - bdd f = bddfalse; - tgba_succ_iterator* si = det_a->succ_iter(src); - for (si->first(); !si->done(); si->next()) - { - const state* dst = si->current_state(); - unsigned dst_set = state_set_map[dst]; - delete dst; - f |= (bdd_ithvar(dst_set) & si->current_condition()); - } - delete si; - bdd_states_map::iterator bsi; - // Have we already seen this formula ? - for (bsi = bdd_map.begin(); bsi != bdd_map.end() && bsi->first != f; - ++bsi) - continue; - if (bsi == bdd_map.end()) - { - // No, create a new set. - hash_set* new_set = new hash_set; - new_set->insert(src); - bdd_map.push_back(std::make_pair(f, new_set)); - } - else - { - // Yes, add the current state to the set. - hash_set* set = bsi->second; - set->insert(src); - } - } - bdd_states_map::iterator bsi = bdd_map.begin(); - // The set is minimal. - if (bdd_map.size() == 1) - done.push_back(bsi->second); - else - { - for (; bsi != bdd_map.end(); ++bsi) - { - hash_set* set = bsi->second; - // Free the number associated to these states. - unsigned num = state_set_map[*set->begin()]; - assert(used_var.find(num) != used_var.end()); - unsigned left = (used_var[num] -= set->size()); - // Make sure LEFT does not become negative (hence bigger - // than SIZE when read as unsigned) - assert(left < size); - if (left == 0) - { - used_var.erase(num); - free_var.insert(num); - } - // Pick a free number - assert(!free_var.empty()); - num = *free_var.begin(); - free_var.erase(free_var.begin()); - used_var[num] = set->size(); - for (hash_set::iterator hit = set->begin(); hit != set->end(); ++hit) - state_set_map[*hit] = num; - // Trivial sets can't be splitted any further. - if (set->size() == 1) - done.push_back(set); - else - todo.push(set); - } - } - delete cur; - } - - // Build the result. - tgba_explicit_number* res = build_result(det_a, done, final_copy); - - // Free all the allocated memory. - delete final_copy; - hash_map::iterator hit; - for (hit = state_set_map.begin(); hit != state_set_map.end(); ++hit) - delete hit->first; - std::list::iterator it; - for (it = done.begin(); it != done.end(); ++it) - delete *it; - delete det_a; - - return res; + return minimize_dfa(det_a, final); } const tgba*