spot/spot/twaalgos/determinize.cc
Maximilien Colange bd739a5712 Heavily rewrite and optimize the determinization
* NEWS: document the rewrite
* spot/twaalgos/determinize.cc: lots of code optimizations
* tests/core/safra.test, tests/python/highlighting.ipynb,
  tests/python/simstate.py: Update tests
2017-10-19 14:17:55 +02:00

935 lines
28 KiB
C++

// -*- coding: utf-8 -*-
// Copyright (C) 2015-2017 Laboratoire de Recherche et
// Développement de l'Epita.
//
// This file is part of Spot, a model checking library.
//
// Spot is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.
//
// Spot is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
// License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include <algorithm>
#include <deque>
#include <stack>
#include <utility>
#include <unordered_map>
#include <set>
#include <map>
#include <spot/misc/bddlt.hh>
#include <spot/twaalgos/sccinfo.hh>
#include <spot/twaalgos/determinize.hh>
#include <spot/twaalgos/degen.hh>
#include <spot/twaalgos/sccfilter.hh>
#include <spot/twaalgos/simulation.hh>
#include <spot/twaalgos/isdet.hh>
namespace spot
{
namespace
{
// forward declaration
struct safra_build;
}
class safra_state final
{
public:
// a helper method to check invariants
void
check() const
{
// do not refer to braces that do not exist
for (const auto& p : nodes_)
if (p.second >= 0)
if (((unsigned)p.second) >= braces_.size())
assert(false);
// braces_ describes the parenthood relation, -1 meaning toplevel
// so braces_[b] < b always, and -1 is the only negative number allowed
for (int b : braces_)
{
if (b < 0 && b != -1)
assert(false);
if (b >= 0 && braces_[b] > b)
assert(false);
}
// no unused braces
std::set<int> used_braces;
for (const auto& n : nodes_)
{
int b = n.second;
while (b >= 0)
{
used_braces.insert(b);
b = braces_[b];
}
}
assert(used_braces.size() == braces_.size());
}
public:
using state_t = unsigned;
using safra_node_t = std::pair<state_t, std::vector<int>>;
bool operator<(const safra_state&) const;
bool operator==(const safra_state&) const;
size_t hash() const;
// Print the number of states in each brace
// default constructor
safra_state(): safra_state(0) {}
safra_state(state_t state_number, bool acceptance_scc = false);
safra_state(safra_build&& s, unsigned c);
// Compute successor for transition ap
safra_state
compute_succ(const const_twa_graph_ptr& aut,
const bdd& ap,
const scc_info& scc,
const std::vector<bdd>& implications,
const std::vector<char>& is_connected,
bool use_scc,
bool use_simulation) const;
// each brace points to its parent.
// braces_[i] is the parent of i
// Note that braces_[i] < i, -1 stands for "no parent" (top-level)
std::vector<int> braces_;
std::vector<std::pair<state_t, int>> nodes_;
// FIXME this does not belong to the safra_state
unsigned color_;
};
namespace
{
struct hash_safra
{
size_t
operator()(const safra_state& s) const
{
return s.hash();
}
};
template<class T>
struct ref_wrap_equal
{
bool
operator()(const std::reference_wrapper<T>& x,
const std::reference_wrapper<T>& y) const
{
return std::equal_to<T>()(x.get(), y.get());
}
};
using power_set = std::unordered_map<safra_state, unsigned, hash_safra>;
std::string
nodes_to_string(const const_twa_graph_ptr& aut,
const safra_state& states);
// Given a certain transition_label, compute all the successors of a
// safra_state under that label, and return the new nodes in res.
class compute_succs final
{
const safra_state& src;
const const_twa_graph_ptr& aut;
const power_set& seen;
const scc_info& scc;
const std::vector<bdd>& implications;
const std::vector<char>& is_connected;
const std::vector<bdd>& all_bdds;
bool use_scc;
bool use_simulation;
bool use_stutter;
// to iterate on successors
std::vector<bdd>::const_iterator bddit;
safra_state ss;
public:
compute_succs(const safra_state& src,
const const_twa_graph_ptr& aut,
const power_set& seen,
const scc_info& scc,
const std::vector<bdd>& implications,
const std::vector<char>& is_connected,
const std::vector<bdd>& all_bdds,
bool use_scc,
bool use_simulation,
bool use_stutter)
: src(src)
, aut(aut)
, seen(seen)
, scc(scc)
, implications(implications)
, is_connected(is_connected)
, all_bdds(all_bdds)
, use_scc(use_scc)
, use_simulation(use_simulation)
, use_stutter(use_stutter)
{}
struct iterator
{
const compute_succs& cs_;
std::vector<bdd>::const_iterator bddit;
safra_state ss;
iterator(const compute_succs& c, std::vector<bdd>::const_iterator it)
: cs_(c)
, bddit(it)
{
compute_();
}
bool
operator!=(const iterator& other) const
{
return bddit != other.bddit;
}
iterator&
operator++()
{
++bddit;
compute_();
return *this;
}
// no need to implement postfix increment
bdd
cond() const
{
return *bddit;
}
const safra_state&
operator*() const
{
return ss;
}
const safra_state*
operator->() const
{
return &ss;
}
private:
void
compute_()
{
if (bddit == cs_.all_bdds.end())
return;
const bdd& ap = *bddit;
ss = cs_.src;
if (cs_.use_stutter && cs_.aut->prop_stutter_invariant())
{
bool stop = false;
std::deque<safra_state> path;
std::unordered_set<
std::reference_wrapper<const safra_state>,
hash_safra,
ref_wrap_equal<const safra_state>> states;
unsigned mincolor = -1U;
while (!stop)
{
path.emplace_back(std::move(ss));
auto i = states.insert(path.back());
assert(i.second);
ss = path.back().compute_succ(cs_.aut, ap, cs_.scc,
cs_.implications,
cs_.is_connected, cs_.use_scc,
cs_.use_simulation);
mincolor = std::min(ss.color_, mincolor);
stop = states.find(ss) != states.end();
}
// also insert last element (/!\ it thus appears twice in path)
path.emplace_back(std::move(ss));
const safra_state& loopstart = path.back();
bool in_seen = cs_.seen.find(ss) != cs_.seen.end();
unsigned tokeep = path.size()-1;
unsigned idx = path.size()-2;
// The loop is guaranteed to end, because path contains too
// occurrences of loopstart
while (!(loopstart == path[idx]))
{
// if path[tokeep] is already in seen, replace it with a
// smaller state also in seen.
if (in_seen && cs_.seen.find(path[idx]) != cs_.seen.end())
if (path[idx] < path[tokeep])
tokeep = idx;
// if path[tokeep] is not in seen, replace it either with a
// state in seen or with a smaller state
if (!in_seen)
{
if (cs_.seen.find(path[idx]) != cs_.seen.end())
{
tokeep = idx;
in_seen = true;
}
else if (path[idx] < path[tokeep])
tokeep = idx;
}
--idx;
}
// clean references to path before move (see next line)
states.clear();
// move is safe, no dangling references
ss = std::move(path[tokeep]);
ss.color_ = mincolor;
}
else
ss = cs_.src.compute_succ(cs_.aut, ap, cs_.scc, cs_.implications,
cs_.is_connected, cs_.use_scc,
cs_.use_simulation);
}
};
iterator
begin() const
{
return iterator(*this, all_bdds.begin());
}
iterator
end() const
{
return iterator(*this, all_bdds.end());
}
};
const char* const sub[10] =
{
"\u2080",
"\u2081",
"\u2082",
"\u2083",
"\u2084",
"\u2085",
"\u2086",
"\u2087",
"\u2088",
"\u2089",
};
std::string subscript(unsigned start)
{
std::string res;
do
{
res = sub[start % 10] + res;
start /= 10;
}
while (start);
return res;
}
// Returns true if lhs has a smaller nesting pattern than rhs
// If lhs and rhs are the same, return false.
// NB the nesting patterns are backwards.
bool nesting_cmp(const std::vector<int>& lhs,
const std::vector<int>& rhs)
{
unsigned m = std::min(lhs.size(), rhs.size());
auto lit = lhs.rbegin();
auto rit = rhs.rbegin();
for (unsigned i = 0; i != m; ++i)
{
if (*lit != *rit)
return *lit < *rit;
}
return lhs.size() > rhs.size();
}
// Used to remove all acceptance whose value is above or equal to max_acc
void remove_dead_acc(twa_graph_ptr& aut, unsigned max_acc)
{
assert(max_acc < 32);
unsigned mask = (1 << max_acc) - 1;
for (auto& t: aut->edges())
t.acc &= mask;
}
struct compare
{
bool
operator() (const safra_state::safra_node_t& lhs,
const safra_state::safra_node_t& rhs)
{
return lhs.second < rhs.second;
}
};
// Return the nodes sorted in ascending order
std::vector<safra_state::safra_node_t>
sorted_nodes(const safra_state& s)
{
std::vector<safra_state::safra_node_t> res;
for (const auto& n: s.nodes_)
{
int brace = n.second;
std::vector<int> tmp;
while (brace >= 0)
{
// FIXME is not there a smarter way?
tmp.insert(tmp.begin(), brace);
brace = s.braces_[brace];
}
res.emplace_back(n.first, std::move(tmp));
}
std::sort(res.begin(), res.end(), compare());
return res;
}
std::string
nodes_to_string(const const_twa_graph_ptr& aut,
const safra_state& states)
{
auto copy = sorted_nodes(states);
std::ostringstream os;
std::stack<int> s;
bool first = true;
for (const auto& n: copy)
{
auto it = n.second.begin();
// Find brace on top of stack in vector
// If brace is not present, then we close it as no other ones of that
// type will be found since we ordered our vector
while (!s.empty())
{
it = std::lower_bound(n.second.begin(), n.second.end(),
s.top());
if (it == n.second.end() || *it != s.top())
{
os << subscript(s.top()) << '}';
s.pop();
}
else
{
if (*it == s.top())
++it;
break;
}
}
// Add new braces
while (it != n.second.end())
{
os << '{' << subscript(*it);
s.push(*it);
++it;
first = true;
}
if (!first)
os << ' ';
os << aut->format_state(n.first);
first = false;
}
// Finish unwinding stack to print last braces
while (!s.empty())
{
os << subscript(s.top()) << '}';
s.pop();
}
return os.str();
}
std::vector<std::string>*
print_debug(const const_twa_graph_ptr& aut,
const power_set& states)
{
auto res = new std::vector<std::string>(states.size());
for (const auto& p: states)
(*res)[p.second] = nodes_to_string(aut, p.first);
return res;
}
// a helper class for building the successor of a safra_state
struct safra_build final
{
std::vector<int> braces_;
std::map<unsigned, int> nodes_;
safra_build(const std::vector<int>& b)
: braces_(b)
{}
bool
compare_braces(int a, int b)
{
std::vector<int> a_pattern;
std::vector<int> b_pattern;
a_pattern.reserve(a+1);
b_pattern.reserve(b+1);
while (a != b)
{
if (a > b)
{
a_pattern.emplace_back(a);
a = braces_[a];
}
else
{
b_pattern.emplace_back(b);
b = braces_[b];
}
}
return nesting_cmp(a_pattern, b_pattern);
}
// Used when creating the list of successors
// A new intermediate node is created with src's braces and with dst as id
// A merge is done if dst already existed in *this
void
update_succ(int brace, unsigned dst, const acc_cond::mark_t acc)
{
int newb = brace;
if (acc.count())
{
assert(acc.has(0) && acc.count() == 1 && "Only TBA are accepted");
// Accepting edges generate new braces: step A1
newb = braces_.size();
braces_.emplace_back(brace);
}
auto i = nodes_.emplace(dst, newb);
if (!i.second) // dst already exists
{
// Step A2: Only keep the smallest nesting pattern.
// Use nesting_cmp to compare nesting patterns.
if (compare_braces(newb, i.first->second))
{
i.first->second = newb;
}
else
{
if (newb != brace) // new brace was created but is not needed
braces_.pop_back();
}
}
}
// When a node a implies a node b, remove the node a.
void
merge_redundant_states(const std::vector<bdd>& implications,
const scc_info& scc,
const std::vector<char>& is_connected)
{
std::vector<unsigned> to_remove;
for (const auto& n1: nodes_)
for (const auto& n2: nodes_)
{
if (n1 == n2)
continue;
// index to see if there is a path from scc2 -> scc1
unsigned idx = scc.scc_count() * scc.scc_of(n2.first) +
scc.scc_of(n1.first);
if (!is_connected[idx] && bdd_implies(implications.at(n1.first),
implications.at(n2.first)))
to_remove.emplace_back(n1.first);
}
for (unsigned n: to_remove)
nodes_.erase(n);
}
// Return the emitted color, red or green
unsigned
finalize_construction()
{
unsigned red = -1U;
unsigned green = -1U;
// use std::vector<char> to avoid std::vector<bool>
// a char encodes several bools:
// * first bit says whether the brace is empty and red
// * second bit says whether the brace is green
// brackets removed from green pairs can be safely be marked as red,
// because their enclosing green has a lower number
// beware of pairs marked both as red and green: they are actually empty
constexpr char is_empty = 1;
constexpr char is_green = 2;
std::vector<char> empty_green(braces_.size(), 3);
for (const auto& n : nodes_)
if (n.second >= 0)
{
int brace = n.second;
// Step A4: For a brace to be green it must not contain states
// on its own.
empty_green[brace] &= ~is_green;
while (brace >= 0 && (empty_green[brace] & is_empty))
{
empty_green[brace] &= ~is_empty;
brace = braces_[brace];
}
}
// Step A4 Remove brackets within green pairs
// for each bracket, find its highest green ancestor
// 0 cannot be in a green pair, its highest green ancestor is itself
std::vector<unsigned> highest_green_ancestor(braces_.size(), 0);
for (unsigned b = 0; b != braces_.size(); ++b)
{
highest_green_ancestor[b] = b;
int ancestor = braces_[b];
// Note that ancestor strictly decreases
// FIXME is there a smarter way to do that?
while (ancestor >= 0)
{
if (empty_green[ancestor] & is_green)
highest_green_ancestor[b] = ancestor;
ancestor = braces_[ancestor];
}
if (highest_green_ancestor[b] != b)
empty_green[b] |= is_empty; // mark brace for removal
}
for (auto& n : nodes_)
if (n.second >= 0)
n.second = highest_green_ancestor[n.second];
// find red and green signals to emit
// also compute the number of braces to remove for renumbering
std::vector<unsigned> decr_by(braces_.size());
unsigned decr = 0;
for (unsigned i = 0; i != braces_.size(); ++i)
{
if (empty_green[i] & is_empty)
{
// Step A5 renumber braces
++decr;
// Step A3 emit red
red = std::min(red, 2*i);
}
else if (empty_green[i] & is_green)
{
// Step A4 emit green
green = std::min(green, 2*i+1);
}
decr_by[i] = decr;
}
std::vector<int> new_braces(braces_.size() - decr);
for (auto& n : nodes_)
{
if (n.second >= 0)
{
unsigned i = n.second;
int j = braces_[i] >=0 ? braces_[i] - decr_by[braces_[i]] : -1;
new_braces[i - decr_by[i]] = j;
n.second = i - decr_by[i];
}
}
std::swap(braces_, new_braces);
return std::min(red, green);
}
};
// Compute a vector of letters from a given support
std::vector<bdd>
letters(bdd allap)
{
std::vector<bdd> res;
bdd all = bddtrue;
while (all != bddfalse)
{
bdd one = bdd_satoneset(all, allap, bddfalse);
all -= one;
res.emplace_back(one);
}
return res;
}
class safra_support
{
const std::vector<bdd>& state_supports;
std::unordered_map<bdd, std::vector<bdd>, bdd_hash> cache;
public:
safra_support(const std::vector<bdd>& s): state_supports(s) {}
const std::vector<bdd>&
get(const safra_state& s)
{
bdd supp = bddtrue;
for (const auto& n : s.nodes_)
supp &= state_supports[n.first];
auto it = cache.find(supp);
if (it == cache.end())
it = cache.emplace(supp, letters(supp)).first;
return it->second;
}
};
}
std::vector<char> find_scc_paths(const scc_info& scc);
safra_state
safra_state::compute_succ(const const_twa_graph_ptr& aut,
const bdd& ap,
const scc_info& scc,
const std::vector<bdd>& implications,
const std::vector<char>& is_connected,
bool use_scc,
bool use_simulation) const
{
// FIXME: we manipulate one safra_build at a time: how can we optimize/avoid
// allocation/deallocation?
safra_build ss(braces_);
for (const auto& node: nodes_)
{
for (const auto& t: aut->out(node.first))
{
if (!bdd_implies(ap, t.cond))
continue;
// Check if we are leaving the SCC, if so we delete all the
// braces as no cycles can be found with that node
if (use_scc && scc.scc_of(node.first) != scc.scc_of(t.dst))
if (scc.is_accepting_scc(scc.scc_of(t.dst)))
// Entering accepting SCC so add brace
ss.update_succ(-1, t.dst, { 0 });
else
// When entering non accepting SCC don't create any braces
ss.update_succ(-1, t.dst, { /* empty */ });
else
ss.update_succ(node.second, t.dst, t.acc);
}
}
if (use_simulation)
ss.merge_redundant_states(implications, scc, is_connected);
unsigned color = ss.finalize_construction();
return safra_state(std::move(ss), color);
}
// Called only to initialize first state
safra_state::safra_state(state_t val, bool accepting_scc)
{
if (!accepting_scc)
{
// -1 means "no braces"
nodes_.emplace_back(val, -1);
}
else
{
braces_.emplace_back(-1);
nodes_.emplace_back(val, 0);
}
}
safra_state::safra_state(safra_build&& s, unsigned c)
: braces_(std::move(s.braces_))
, nodes_(std::make_move_iterator(s.nodes_.begin()),
std::make_move_iterator(s.nodes_.end()))
, color_(c)
{}
bool
safra_state::operator<(const safra_state& other) const
{
// FIXME what is the right, if any, comparison to perform?
return braces_ == other.braces_ ? nodes_ < other.nodes_
: braces_ < other.braces_;
}
size_t
safra_state::hash() const
{
size_t res = 0;
for (const auto& p : nodes_)
{
res ^= (res << 3) ^ p.first;
res ^= (res << 3) ^ p.second;
}
for (const auto& b : braces_)
res ^= (res << 3) ^ b;
return res;
}
bool
safra_state::operator==(const safra_state& other) const
{
return nodes_ == other.nodes_ && braces_ == other.braces_;
}
// res[i + scccount*j] = 1 iff SCC i is reachable from SCC j
std::vector<char>
find_scc_paths(const scc_info& scc)
{
unsigned scccount = scc.scc_count();
std::vector<char> res(scccount * scccount, 0);
for (unsigned i = 0; i != scccount; ++i)
res[i + scccount * i] = 1;
for (unsigned i = 0; i != scccount; ++i)
{
std::stack<unsigned> s;
s.push(i);
while (!s.empty())
{
unsigned src = s.top();
s.pop();
for (unsigned d: scc.succ(src))
{
s.push(d);
unsigned idx = scccount * i + d;
res[idx] = 1;
}
}
}
return res;
}
twa_graph_ptr
tgba_determinize(const const_twa_graph_ptr& a,
bool pretty_print, bool use_scc,
bool use_simulation, bool use_stutter)
{
if (!a->is_existential())
throw std::runtime_error
("tgba_determinize() does not support alternation");
if (is_universal(a))
return std::const_pointer_cast<twa_graph>(a);
// Degeneralize
const_twa_graph_ptr aut;
std::vector<bdd> implications;
{
twa_graph_ptr aut_tmp = spot::degeneralize_tba(a);
if (pretty_print)
aut_tmp->copy_state_names_from(a);
if (use_simulation)
{
aut_tmp = spot::scc_filter(aut_tmp);
auto aut2 = simulation(aut_tmp, &implications);
if (pretty_print)
aut2->copy_state_names_from(aut_tmp);
aut_tmp = aut2;
}
aut = aut_tmp;
}
scc_info_options scc_opt = scc_info_options::TRACK_SUCCS;
// We do need to track states in SCC for stutter invariance (see below how
// supports are computed in this case)
if (use_stutter && aut->prop_stutter_invariant())
scc_opt = scc_info_options::TRACK_SUCCS | scc_info_options::TRACK_STATES;
scc_info scc = scc_info(aut, scc_opt);
std::vector<char> is_connected = find_scc_paths(scc);
// Compute the support of each state
std::vector<bdd> support(aut->num_states());
if (use_stutter && aut->prop_stutter_invariant())
{
// FIXME this could be improved
// supports of states should account for possible stuttering if we plan
// to use stuttering invariance
for (unsigned c = 0; c != scc.scc_count(); ++c)
{
bdd c_supp = scc.scc_ap_support(c);
for (const auto& su: scc.succ(c))
c_supp &= support[scc.one_state_of(su)];
for (unsigned st: scc.states_of(c))
support[st] = c_supp;
}
}
else
{
for (unsigned i = 0; i != aut->num_states(); ++i)
{
bdd res = bddtrue;
for (const auto& e : aut->out(i))
res &= bdd_support(e.cond);
support[i] = res;
}
}
safra_support safra2letters(support);
auto res = make_twa_graph(aut->get_dict());
res->copy_ap_of(aut);
res->prop_copy(aut,
{ false, // state based
false, // inherently_weak
false, false, // deterministic
true, // complete
true // stutter inv
});
// Given a safra_state get its associated state in output automata.
// Required to create new edges from 2 safra-state
power_set seen;
// As per the standard, references to elements in a std::unordered_set or
// std::unordered_map are invalidated by erasure only.
std::deque<std::reference_wrapper<power_set::value_type>> todo;
auto get_state = [&res, &seen, &todo](const safra_state& s) -> unsigned
{
auto i = seen.emplace(std::make_pair(s, 0));
unsigned dst_num;
if (i.second) // insertion did take place
{
dst_num = res->new_state();
i.first->second = dst_num;
todo.emplace_back(*i.first);
}
else // element was already in seen
dst_num = i.first->second;
return dst_num;
};
{
unsigned init_state = aut->get_init_state_number();
bool start_accepting =
!use_scc || scc.is_accepting_scc(scc.scc_of(init_state));
safra_state init(init_state, start_accepting);
unsigned num = get_state(init); // inserts both in seen and in todo
res->set_init_state(num);
}
unsigned sets = 0;
while (!todo.empty())
{
const safra_state& curr = todo.front().get().first;
unsigned src_num = todo.front().get().second;
todo.pop_front();
compute_succs succs(curr, aut, seen, scc, implications, is_connected,
safra2letters.get(curr), use_scc, use_simulation,
use_stutter);
for (auto s = succs.begin(); s != succs.end(); ++s)
{
// Don't construct sink state as complete does a better job at this
if (s->nodes_.empty())
continue;
unsigned dst_num = get_state(*s);
if (s->color_ != -1U)
{
res->new_edge(src_num, dst_num, s.cond(), {s->color_});
// We only care about green acc which are odd
if (s->color_ % 2 == 1)
sets = std::max(s->color_ + 1, sets);
}
else
res->new_edge(src_num, dst_num, s.cond());
}
}
remove_dead_acc(res, sets);
// Acceptance is now min(odd) since we can emit Red on paths 0 with new opti
res->set_acceptance(sets, acc_cond::acc_code::parity(false, true, sets));
res->prop_universal(true);
res->prop_state_acc(false);
if (pretty_print)
res->set_named_prop("state-names", print_debug(aut, seen));
return res;
}
}