relabel_bse: rework to simplify more patterns

Rework the way we compute and use cut-points to catch more patterns we
can rewrite.  Also Use BDDs to check if a Boolean sub-expression is
false or true.   Fixes issue #540.

* spot/tl/relabel.hh: Update documentation
* spot/tl/relabel.cc (relabel_bse): Rework.
* tests/core/ltlfilt.test: Add more test cases.
* tests/python/_mealy.ipynb: Update.
* NEWS: Mention the change.
This commit is contained in:
Alexandre Duret-Lutz 2023-09-13 11:31:49 +02:00
parent cbb981ffd5
commit 7149521f48
5 changed files with 305 additions and 159 deletions

View file

@ -19,6 +19,7 @@
#include "config.h"
#include <spot/tl/relabel.hh>
#include <spot/tl/simplify.hh>
#include <sstream>
#include <spot/misc/hash.hh>
#include <map>
@ -82,7 +83,7 @@ namespace spot
// if subexp == false, matches APs
// if subexp == true, matches boolean subexps
template <bool subexp>
template <bool subexp, bool use_bdd = false>
class relabeler
{
public:
@ -90,6 +91,7 @@ namespace spot
map newname;
ap_generator* gen;
relabeling_map* oldnames;
tl_simplifier tl;
relabeler(ap_generator* gen, relabeling_map* m)
: gen(gen), oldnames(m)
@ -115,6 +117,16 @@ namespace spot
if (!r.second)
return r.first->second;
if constexpr (use_bdd)
if (!old.is(op::ap))
{
bdd b = tl.as_bdd(old);
if (b == bddtrue)
return r.first->second = formula::tt();
if (b == bddfalse)
return r.first->second = formula::ff();
}
formula res = gen->next();
r.first->second = res;
if (oldnames)
@ -269,7 +281,7 @@ namespace spot
// Furthermore, because we are already representing LTL formulas
// with sharing of identical sub-expressions we can easily rename
// a subexpression (such as c&d above) only once. However this
// scheme has two problems:
// scheme (done by relabel_overlapping_bse()) has two problems:
//
// A. It will not detect inter-dependent Boolean subexpressions.
// For instance it will mistakenly relabel "(a & b) U (a & !b)"
@ -278,53 +290,40 @@ namespace spot
// B. Because of our n-ary operators, it will fail to
// notice that (a & b) is a sub-expression of (a & b & c).
//
// The way we compute the subexpressions that can be relabeled is
// by transforming the formula syntax tree into an undirected
// graph, and computing the cut points of this graph. The cut
// points (or articulation points) are the nodes whose removal
// would split the graph in two components. To ensure that a
// Boolean operator is only considered as a cut point if it would
// separate all of its children from the rest of the graph, we
// connect all the children of Boolean operators.
// The way we compute the subexpressions that can be relabeled is by
// transforming the formula syntax tree into an undirected graph,
// and computing the cut-points of this graph. The cut-points (or
// articulation points) are the nodes whose removal would split the
// graph in two components; in our case, we extend this definition to
// also consider the leaves as cut-points.
//
// For instance (a & b) U (c & d) has two (Boolean) cut points
// corresponding to the two AND operators:
// For instance ((a|b)&c&d)U(!d&e&f) is represented by
// the following graph, were cut-points are marked with *.
//
// (a&b)U(c&d)
//
// a&b c&d
//
// a─────b c─────d
// ((a|b)&c&d)U(!d&e&f)
//
// ((a|b)&c&d)* (!d&e&f)*
// │ ╲ │ ╲
// a|b* │ ╲ ! │ ╲
// ╲ │ ╲ │ ╲
// a* b* c* d e* f*
//
// (The root node is also a cut point, but we only consider Boolean
// cut points for relabeling.)
// The relabeling of a formula is done in 3 passes:
// 1. Convert the formula's syntax tree into an undirected graph.
// 2. Compute the (Boolean) cut points of that graph, using the
// Hopcroft-Tarjan algorithm (see below for a reference).
// 3. Recursively scan the formula's tree until we reach
// a (Boolean) cut-point. If all the children of this node
// are cut-points, rename the node with a fresh label.
// If it's a n-ary operator, group all children that are
// and cut-points relabel them as a whole.
//
// On the other hand, (a & b) U (b & !c) has only one Boolean
// cut-point which corresponds to the NOT operator:
//
// (a&b)U(b&!c)
//
// a&b b&!c
//
// a─────b────!c
// │
// c
//
// Note that if the children of a&b and b&c were not connected,
// a&b and b&c would be considered as cut points because they
// separate "a" or "!c" from the rest of the graph.
//
// The relabeling of a formula is therefore done in 3 passes:
// 1. convert the formula's syntax tree into an undirected graph,
// adding links between children of Boolean operators
// 2. compute the (Boolean) cut points of that graph, using the
// Hopcroft-Tarjan algorithm (see below for a reference)
// 3. recursively scan the formula's tree until we reach
// either a (Boolean) cut point or an atomic proposition, and
// replace that node by a fresh atomic proposition.
//
// In the example above (a&b)U(b&!c), the last recursion
// stops on a, b, and !c, producing (p0&p1)U(p1&p2).
// On the above example, when processing the cut-point
// ((a|b)&c&d) we group its children that are cut-points
// (a|b)&c and rename this group as p0. Then d gets
// his own name p1, and when processing (!d&e&f) we group
// e&f because they are both cut-points, are rename them p1.
// The result is (p0 & p1) U (!p1 & p2).
//
// Problem #B above (handling of n-ary expression) need some
// additional tricks. Consider (a&b&c&d) U X(c&d), and assume
@ -343,24 +342,19 @@ namespace spot
// Convert the formula's syntax tree into an undirected graph
// labeled by subformulas.
class formula_to_fgraph
class formula_to_fgraph final
{
public:
fgraph& g;
std::stack<formula> s;
sub_formula_count_t& subcount;
formula_to_fgraph(fgraph& g, sub_formula_count_t& subcount):
g(g), subcount(subcount)
{
}
formula_to_fgraph(fgraph& g, sub_formula_count_t& subcount)
: g(g), subcount(subcount)
{
}
~formula_to_fgraph()
{
}
void
visit(formula f)
void visit(formula f)
{
{
// Connect to parent
@ -411,27 +405,6 @@ namespace spot
}
for (; i < sz; ++i)
visit(f[i]);
if (sz > 1 && f.is_boolean())
{
// For Boolean nodes, connect all children in a
// loop. This way the node can only be a cut point
// if it separates all children from the reset of
// the graph (not only one).
formula pred = f[0];
for (i = 1; i < sz; ++i)
{
formula next = f[i];
// Note that we add an edge in both directions,
// as the cut point algorithm really need undirected
// graphs. (We used to do only one direction, and
// that turned out to be a bug.)
g[pred].emplace_back(next);
g[next].emplace_back(pred);
pred = next;
}
g[pred].emplace_back(f[0]);
g[f[0]].emplace_back(pred);
}
done:
s.pop();
}
@ -465,11 +438,12 @@ namespace spot
// the ACM, 16 (6), June 1973.
//
// It differs from the original algorithm by returning only the
// Boolean cutpoints, and not dealing with the initial state
// Boolean cut-points, not dealing with the initial state
// properly (our initial state will always be considered as a
// cut-point, but since we only return Boolean cut-points it's
// OK: if the top-most formula is Boolean we want to replace it
// as a whole).
// as a whole), and considering the atomic propositions that
// are leaves as cutpoints too.
void cut_points(const fgraph& g, fset& c, formula start)
{
stack_t s;
@ -530,7 +504,14 @@ namespace spot
data_entry& dgrand_parent = data[grand_parent];
if (dparent.low >= dgrand_parent.num // cut-point
&& grand_parent.is_boolean())
c.insert(grand_parent);
{
c.insert(grand_parent);
// Also consider atomic propositions as
// cut-points if they are leaves.
if (parent.is(op::ap)
&& g.find(parent)->second.size() == 1)
c.insert(parent);
}
if (dparent.low < dgrand_parent.low)
dgrand_parent.low = dparent.low;
}
@ -539,7 +520,7 @@ namespace spot
}
class bse_relabeler final: public relabeler<false>
class bse_relabeler final: public relabeler<false, true>
{
public:
const fset& c;
@ -553,12 +534,45 @@ namespace spot
using relabeler::visit;
formula
visit(formula f)
formula visit(formula f)
{
if (f.is(op::ap) || (c.find(f) != c.end()))
if (f.is(op::ap))
return rename(f);
// This is Boolean cut-point?
// We can only relabel it if all its children are cut-points.
if (c.find(f) != c.end())
{
unsigned fsz = f.size();
assert(fsz > 0); // A cut point has children
if (fsz == 1
|| (fsz == 2
&& ((c.find(f[0]) != c.end())
== (c.find(f[1]) != c.end()))))
return rename(f);
if (fsz > 2)
{
// cp[0] will contains non cut-points
// cp[1] will contain cut-points or atomic propositions
std::vector<formula> cp[2];
cp[0].reserve(fsz);
cp[1].reserve(fsz);
for (unsigned i = 0; i < fsz; ++i)
{
formula cf = f[i];
cp[c.find(cf) != c.end()].push_back(cf);
}
if (cp[0].empty()
|| cp[1].empty())
// all children are cut-points or non-cut-points
return rename(f);
formula cp1group = rename(formula::multop(f.kind(), cp[1]));
formula cp0group = visit(formula::multop(f.kind(), cp[0]));
return formula::multop(f.kind(), {cp1group, cp0group});
}
}
// Not a cut-point, recurse
unsigned sz = f.size();
if (sz <= 2)
return f.map([this](formula f)
@ -566,24 +580,24 @@ namespace spot
return visit(f);
});
unsigned i = 0;
std::vector<formula> res;
if (f.is_boolean() && sz > 2)
{
// If we have a Boolean formula with more than two
// children, like (a & b & c & d) where some children
// (assume {a,b}) are used only once, but some other
// (assume {c,d}) are used multiple time in the formula,
// then split that into ((a & b) & (c & d)) to give
// (a & b) a chance to be relabeled as a whole.
auto pair = split_used_once(f, subcount);
if (pair.second)
return formula::multop(f.kind(), { visit(pair.first),
visit(pair.second) });
}
// If we have a Boolean formula with more than two
// children, like (a & b & c & d) where some children
// (assume {a,b}) are used only once, but some other
// (assume {c,d}) are used multiple time in the formula,
// then split that into ((a & b) & (c & d)) to give
// (a & b) a chance to be relabeled as a whole.
if (auto pair = split_used_once(f, subcount); pair.second)
{
formula left = visit(pair.first);
formula right = visit(pair.second);
return formula::multop(f.kind(), { left, right });
}
/// If we have a formula like (a & b & Xc), consider
/// it as ((a & b) & Xc) in the graph to isolate the
/// Boolean operands as a single node.
unsigned i = 0;
std::vector<formula> res;
formula b = f.boolean_operands(&i);
if (b && b != f)
{
@ -630,6 +644,10 @@ namespace spot
fset c;
cut_points(g, c, f);
// std::cerr << "cut-points\n";
// for (formula cp: c)
// std::cerr << " - " << cp << '\n';
// Relabel the formula recursively, stopping
// at cut-points or atomic propositions.
ap_generator* gen = nullptr;