* spot/tl/relabel.cc (formula_to_fgraph): Do not assume that n-ary operators are Boolean operators. * tests/python/relabel.py: Add a test case found while discussing some expression with Antoine Martin. * NEWS: Mention it.
624 lines
19 KiB
C++
624 lines
19 KiB
C++
// -*- coding: utf-8 -*-
|
||
// Copyright (C) 2012-2016, 2018-2020, 2022 Laboratoire de Recherche et
|
||
// Développement de l'Epita (LRDE).
|
||
//
|
||
// This file is part of Spot, a model checking library.
|
||
//
|
||
// Spot is free software; you can redistribute it and/or modify it
|
||
// under the terms of the GNU General Public License as published by
|
||
// the Free Software Foundation; either version 3 of the License, or
|
||
// (at your option) any later version.
|
||
//
|
||
// Spot is distributed in the hope that it will be useful, but WITHOUT
|
||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
||
// License for more details.
|
||
//
|
||
// You should have received a copy of the GNU General Public License
|
||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
||
#include "config.h"
|
||
#include <spot/tl/relabel.hh>
|
||
#include <sstream>
|
||
#include <spot/misc/hash.hh>
|
||
#include <map>
|
||
#include <set>
|
||
#include <stack>
|
||
#include <iostream>
|
||
|
||
namespace spot
|
||
{
|
||
//////////////////////////////////////////////////////////////////////
|
||
// Basic relabeler
|
||
//////////////////////////////////////////////////////////////////////
|
||
|
||
namespace
|
||
{
|
||
struct ap_generator
|
||
{
|
||
virtual formula next() = 0;
|
||
virtual ~ap_generator() {}
|
||
};
|
||
|
||
struct pnn_generator final: ap_generator
|
||
{
|
||
unsigned nn;
|
||
pnn_generator()
|
||
: nn(0)
|
||
{
|
||
}
|
||
|
||
virtual formula next() override
|
||
{
|
||
std::ostringstream s;
|
||
s << 'p' << nn++;
|
||
return formula::ap(s.str());
|
||
}
|
||
};
|
||
|
||
struct abc_generator final: ap_generator
|
||
{
|
||
public:
|
||
abc_generator()
|
||
: nn(0)
|
||
{
|
||
}
|
||
|
||
unsigned nn;
|
||
|
||
virtual formula next() override
|
||
{
|
||
std::string s;
|
||
unsigned n = nn++;
|
||
do
|
||
{
|
||
s.push_back('a' + (n % 26));
|
||
n /= 26;
|
||
}
|
||
while (n);
|
||
return formula::ap(s);
|
||
}
|
||
};
|
||
|
||
|
||
class relabeler
|
||
{
|
||
public:
|
||
typedef std::unordered_map<formula, formula> map;
|
||
map newname;
|
||
ap_generator* gen;
|
||
relabeling_map* oldnames;
|
||
|
||
relabeler(ap_generator* gen, relabeling_map* m)
|
||
: gen(gen), oldnames(m)
|
||
{
|
||
}
|
||
|
||
~relabeler()
|
||
{
|
||
delete gen;
|
||
}
|
||
|
||
formula rename(formula old)
|
||
{
|
||
auto r = newname.emplace(old, nullptr);
|
||
if (!r.second)
|
||
{
|
||
return r.first->second;
|
||
}
|
||
else
|
||
{
|
||
formula res = gen->next();
|
||
r.first->second = res;
|
||
if (oldnames)
|
||
(*oldnames)[res] = old;
|
||
return res;
|
||
}
|
||
}
|
||
|
||
formula
|
||
visit(formula f)
|
||
{
|
||
if (f.is(op::ap))
|
||
return rename(f);
|
||
else
|
||
return f.map([this](formula f)
|
||
{
|
||
return this->visit(f);
|
||
});
|
||
}
|
||
|
||
};
|
||
|
||
}
|
||
|
||
|
||
formula
|
||
relabel(formula f, relabeling_style style, relabeling_map* m)
|
||
{
|
||
ap_generator* gen = nullptr;
|
||
switch (style)
|
||
{
|
||
case Pnn:
|
||
gen = new pnn_generator;
|
||
break;
|
||
case Abc:
|
||
gen = new abc_generator;
|
||
break;
|
||
}
|
||
|
||
relabeler r(gen, m);
|
||
return r.visit(f);
|
||
}
|
||
|
||
namespace
|
||
{
|
||
typedef std::map<formula, int> sub_formula_count_t;
|
||
|
||
static void
|
||
sub_formula_collect(formula f, sub_formula_count_t* s)
|
||
{
|
||
assert(s);
|
||
f.traverse([&](const formula& f)
|
||
{
|
||
auto p = s->emplace(f, 1);
|
||
if (p.second)
|
||
return false;
|
||
p.first->second += 1;
|
||
return true;
|
||
});
|
||
}
|
||
|
||
static std::pair<formula, formula>
|
||
split_used_once(formula f, const sub_formula_count_t& subcount)
|
||
{
|
||
assert(f.is_boolean());
|
||
unsigned sz = f.size();
|
||
if (sz <= 2)
|
||
return {f, nullptr};
|
||
// If we have a Boolean formula with more than two
|
||
// children, like (a & b & c & d) where some children
|
||
// (assume {a,b}) are used only once, but some other
|
||
// (assume {c,d}) are used multiple time in the formula,
|
||
// then split that into ((a & b) & (c & d)) to give
|
||
// (a & b) a chance to be relabeled as a whole.
|
||
bool has_once = false;
|
||
bool has_mult = false;
|
||
for (unsigned j = 0; j < sz; ++j)
|
||
{
|
||
auto p = subcount.find(f[j]);
|
||
assert(p != subcount.end());
|
||
unsigned sc = p->second;
|
||
assert(sc > 0);
|
||
if (sc == 1)
|
||
has_once = true;
|
||
else
|
||
has_mult = true;
|
||
if (has_once && has_mult)
|
||
{
|
||
std::vector<formula> once;
|
||
std::vector<formula> mult;
|
||
for (unsigned i = 0; i < j; ++i)
|
||
mult.push_back(f[i]);
|
||
once.push_back(f[j]);
|
||
if (sc > 1)
|
||
std::swap(once, mult);
|
||
for (++j; j < sz; ++j)
|
||
{
|
||
auto p = subcount.find(f[j]);
|
||
assert(p != subcount.end());
|
||
unsigned sc = p->second;
|
||
((sc == 1) ? once : mult).push_back(f[j]);
|
||
}
|
||
formula f1 = formula::multop(f.kind(), std::move(once));
|
||
formula f2 = formula::multop(f.kind(), std::move(mult));
|
||
return { f1, f2 };
|
||
}
|
||
}
|
||
return {f, nullptr};
|
||
}
|
||
}
|
||
|
||
|
||
//////////////////////////////////////////////////////////////////////
|
||
// Boolean-subexpression relabeler
|
||
//////////////////////////////////////////////////////////////////////
|
||
|
||
// Here we want to rewrite a formula such as
|
||
// "a & b & X(c & d) & GF(c & d)" into "p0 & Xp1 & GFp1"
|
||
// where Boolean subexpressions are replaced by fresh propositions.
|
||
//
|
||
// Detecting Boolean subexpressions is not a problem.
|
||
// Furthermore, because we are already representing LTL formulas
|
||
// with sharing of identical sub-expressions we can easily rename
|
||
// a subexpression (such as c&d above) only once. However this
|
||
// scheme has two problems:
|
||
//
|
||
// A. It will not detect inter-dependent Boolean subexpressions.
|
||
// For instance it will mistakenly relabel "(a & b) U (a & !b)"
|
||
// as "p0 U p1", hiding the dependency between a&b and a&!b.
|
||
//
|
||
// B. Because of our n-ary operators, it will fail to
|
||
// notice that (a & b) is a sub-expression of (a & b & c).
|
||
//
|
||
// The way we compute the subexpressions that can be relabeled is
|
||
// by transforming the formula syntax tree into an undirected
|
||
// graph, and computing the cut points of this graph. The cut
|
||
// points (or articulation points) are the nodes whose removal
|
||
// would split the graph in two components. To ensure that a
|
||
// Boolean operator is only considered as a cut point if it would
|
||
// separate all of its children from the rest of the graph, we
|
||
// connect all the children of Boolean operators.
|
||
//
|
||
// For instance (a & b) U (c & d) has two (Boolean) cut points
|
||
// corresponding to the two AND operators:
|
||
//
|
||
// (a&b)U(c&d)
|
||
// ╱ ╲
|
||
// a&b c&d
|
||
// ╱ ╲ ╱ ╲
|
||
// a─────b c─────d
|
||
//
|
||
// (The root node is also a cut point, but we only consider Boolean
|
||
// cut points for relabeling.)
|
||
//
|
||
// On the other hand, (a & b) U (b & !c) has only one Boolean
|
||
// cut-point which corresponds to the NOT operator:
|
||
//
|
||
// (a&b)U(b&!c)
|
||
// ╱ ╲
|
||
// a&b b&!c
|
||
// ╱ ╲ ╱ ╲
|
||
// a─────b────!c
|
||
// │
|
||
// c
|
||
//
|
||
// Note that if the children of a&b and b&c were not connected,
|
||
// a&b and b&c would be considered as cut points because they
|
||
// separate "a" or "!c" from the rest of the graph.
|
||
//
|
||
// The relabeling of a formula is therefore done in 3 passes:
|
||
// 1. convert the formula's syntax tree into an undirected graph,
|
||
// adding links between children of Boolean operators
|
||
// 2. compute the (Boolean) cut points of that graph, using the
|
||
// Hopcroft-Tarjan algorithm (see below for a reference)
|
||
// 3. recursively scan the formula's tree until we reach
|
||
// either a (Boolean) cut point or an atomic proposition, and
|
||
// replace that node by a fresh atomic proposition.
|
||
//
|
||
// In the example above (a&b)U(b&!c), the last recursion
|
||
// stops on a, b, and !c, producing (p0&p1)U(p1&p2).
|
||
//
|
||
// Problem #B above (handling of n-ary expression) need some
|
||
// additional tricks. Consider (a&b&c&d) U X(c&d), and assume
|
||
// {a,b,c,d} are Boolean subformulas. The construction, as we have
|
||
// presented it, would interconnect all of {a,b,c,d}, preventing c&d
|
||
// from being relabeled together. To help with that, we count the
|
||
// number of time of each subformula is used (or how many parents
|
||
// its has in the syntax DAG), and use that to split (a&b&c&d) into
|
||
// (a&b)&(c&d), separating subformulas that are used only once. The
|
||
// counting is done by sub_formula_collect(), and the split by
|
||
// split_used_once().
|
||
namespace
|
||
{
|
||
typedef std::vector<formula> succ_vec;
|
||
typedef std::map<formula, succ_vec> fgraph;
|
||
|
||
// Convert the formula's syntax tree into an undirected graph
|
||
// labeled by subformulas.
|
||
class formula_to_fgraph
|
||
{
|
||
public:
|
||
fgraph& g;
|
||
std::stack<formula> s;
|
||
sub_formula_count_t& subcount;
|
||
|
||
formula_to_fgraph(fgraph& g, sub_formula_count_t& subcount):
|
||
g(g), subcount(subcount)
|
||
{
|
||
}
|
||
|
||
~formula_to_fgraph()
|
||
{
|
||
}
|
||
|
||
void
|
||
visit(formula f)
|
||
{
|
||
{
|
||
// Connect to parent
|
||
auto in = g.emplace(f, succ_vec());
|
||
if (!s.empty())
|
||
{
|
||
formula top = s.top();
|
||
in.first->second.emplace_back(top);
|
||
g[top].emplace_back(f);
|
||
if (!in.second)
|
||
return;
|
||
}
|
||
else
|
||
{
|
||
assert(in.second);
|
||
}
|
||
}
|
||
s.push(f);
|
||
|
||
unsigned sz = f.size();
|
||
unsigned i = 0;
|
||
if (sz > 2 && f.is_boolean())
|
||
{
|
||
// If we have a Boolean formula with more than two
|
||
// children, like (a & b & c & d) where some children
|
||
// (assume {a,b}) are used only once, but some other
|
||
// (assume {c,d}) are used multiple time in the formula,
|
||
// then split that into ((a & b) & (c & d)) to give
|
||
// (a & b) a chance to be relabeled as a whole.
|
||
auto pair = split_used_once(f, subcount);
|
||
if (pair.second)
|
||
{
|
||
visit(pair.first);
|
||
visit(pair.second);
|
||
g[pair.first].emplace_back(pair.second);
|
||
g[pair.second].emplace_back(pair.first);
|
||
goto done;
|
||
}
|
||
}
|
||
if (sz > 2 && !f.is_boolean() && f.is(op::And, op::Or))
|
||
{
|
||
/// If we have a formula like (a & b & Xc), consider
|
||
/// it as ((a & b) & Xc) in the graph to isolate the
|
||
/// Boolean operands as a single node.
|
||
formula b = f.boolean_operands(&i);
|
||
if (b)
|
||
visit(b);
|
||
}
|
||
for (; i < sz; ++i)
|
||
visit(f[i]);
|
||
if (sz > 1 && f.is_boolean())
|
||
{
|
||
// For Boolean nodes, connect all children in a
|
||
// loop. This way the node can only be a cut point
|
||
// if it separates all children from the reset of
|
||
// the graph (not only one).
|
||
formula pred = f[0];
|
||
for (i = 1; i < sz; ++i)
|
||
{
|
||
formula next = f[i];
|
||
// Note that we add an edge in both directions,
|
||
// as the cut point algorithm really need undirected
|
||
// graphs. (We used to do only one direction, and
|
||
// that turned out to be a bug.)
|
||
g[pred].emplace_back(next);
|
||
g[next].emplace_back(pred);
|
||
pred = next;
|
||
}
|
||
g[pred].emplace_back(f[0]);
|
||
g[f[0]].emplace_back(pred);
|
||
}
|
||
done:
|
||
s.pop();
|
||
}
|
||
};
|
||
|
||
|
||
typedef std::set<formula> fset;
|
||
struct data_entry // for each node of the graph
|
||
{
|
||
unsigned num; // serial number, in pre-order
|
||
unsigned low; // lowest number accessible via unstacked descendants
|
||
data_entry(unsigned num = 0, unsigned low = 0)
|
||
: num(num), low(low)
|
||
{
|
||
}
|
||
};
|
||
typedef std::unordered_map<formula, data_entry> fmap_t;
|
||
struct stack_entry
|
||
{
|
||
formula grand_parent;
|
||
formula parent; // current node
|
||
succ_vec::const_iterator current_child;
|
||
succ_vec::const_iterator last_child;
|
||
};
|
||
typedef std::stack<stack_entry> stack_t;
|
||
|
||
// Fill c with the Boolean cutpoints of g, starting from start.
|
||
//
|
||
// This is based no "Efficient Algorithms for Graph
|
||
// Manipulation", J. Hopcroft & R. Tarjan, in Communications of
|
||
// the ACM, 16 (6), June 1973.
|
||
//
|
||
// It differs from the original algorithm by returning only the
|
||
// Boolean cutpoints, and not dealing with the initial state
|
||
// properly (our initial state will always be considered as a
|
||
// cut-point, but since we only return Boolean cut-points it's
|
||
// OK: if the top-most formula is Boolean we want to replace it
|
||
// as a whole).
|
||
void cut_points(const fgraph& g, fset& c, formula start)
|
||
{
|
||
stack_t s;
|
||
|
||
unsigned num = 0;
|
||
fmap_t data;
|
||
data_entry d = { num, num };
|
||
data[start] = d;
|
||
++num;
|
||
const succ_vec& children = g.find(start)->second;
|
||
stack_entry e = { start, start, children.begin(), children.end() };
|
||
s.push(e);
|
||
|
||
while (!s.empty())
|
||
{
|
||
stack_entry& e = s.top();
|
||
if (e.current_child != e.last_child)
|
||
{
|
||
// Skip the edge if it is just the reverse of the one
|
||
// we took.
|
||
formula child = *e.current_child;
|
||
if (child == e.grand_parent)
|
||
{
|
||
++e.current_child;
|
||
continue;
|
||
}
|
||
auto i = data.emplace(std::piecewise_construct,
|
||
std::forward_as_tuple(child),
|
||
std::forward_as_tuple(num, num));
|
||
if (i.second) // New destination.
|
||
{
|
||
++num;
|
||
const succ_vec& children = g.find(child)->second;
|
||
stack_entry newe = { e.parent, child,
|
||
children.begin(), children.end() };
|
||
s.push(newe);
|
||
}
|
||
else // Destination exists.
|
||
{
|
||
data_entry& dparent = data[e.parent];
|
||
data_entry& dchild = i.first->second;
|
||
// If this is a back-edge, update
|
||
// the low field of the parent.
|
||
if (dchild.num <= dparent.num)
|
||
if (dparent.low > dchild.num)
|
||
dparent.low = dchild.num;
|
||
}
|
||
++e.current_child;
|
||
}
|
||
else
|
||
{
|
||
formula grand_parent = e.grand_parent;
|
||
formula parent = e.parent;
|
||
s.pop();
|
||
if (!s.empty())
|
||
{
|
||
data_entry& dparent = data[parent];
|
||
data_entry& dgrand_parent = data[grand_parent];
|
||
if (dparent.low >= dgrand_parent.num // cut-point
|
||
&& grand_parent.is_boolean())
|
||
c.insert(grand_parent);
|
||
if (dparent.low < dgrand_parent.low)
|
||
dgrand_parent.low = dparent.low;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
class bse_relabeler final: public relabeler
|
||
{
|
||
public:
|
||
const fset& c;
|
||
const sub_formula_count_t& subcount;
|
||
|
||
bse_relabeler(ap_generator* gen, const fset& c,
|
||
relabeling_map* m, const sub_formula_count_t& subcount)
|
||
: relabeler(gen, m), c(c), subcount(subcount)
|
||
{
|
||
}
|
||
|
||
using relabeler::visit;
|
||
|
||
formula
|
||
visit(formula f)
|
||
{
|
||
if (f.is(op::ap) || (c.find(f) != c.end()))
|
||
return rename(f);
|
||
|
||
unsigned sz = f.size();
|
||
if (sz <= 2)
|
||
return f.map([this](formula f)
|
||
{
|
||
return visit(f);
|
||
});
|
||
|
||
unsigned i = 0;
|
||
std::vector<formula> res;
|
||
if (f.is_boolean() && sz > 2)
|
||
{
|
||
// If we have a Boolean formula with more than two
|
||
// children, like (a & b & c & d) where some children
|
||
// (assume {a,b}) are used only once, but some other
|
||
// (assume {c,d}) are used multiple time in the formula,
|
||
// then split that into ((a & b) & (c & d)) to give
|
||
// (a & b) a chance to be relabeled as a whole.
|
||
auto pair = split_used_once(f, subcount);
|
||
if (pair.second)
|
||
return formula::multop(f.kind(), { visit(pair.first),
|
||
visit(pair.second) });
|
||
}
|
||
/// If we have a formula like (a & b & Xc), consider
|
||
/// it as ((a & b) & Xc) in the graph to isolate the
|
||
/// Boolean operands as a single node.
|
||
formula b = f.boolean_operands(&i);
|
||
if (b && b != f)
|
||
{
|
||
res.reserve(sz - i + 1);
|
||
res.emplace_back(visit(b));
|
||
}
|
||
else
|
||
{
|
||
i = 0;
|
||
res.reserve(sz);
|
||
}
|
||
for (; i < sz; ++i)
|
||
res.emplace_back(visit(f[i]));
|
||
return formula::multop(f.kind(), res);
|
||
}
|
||
};
|
||
}
|
||
|
||
|
||
formula
|
||
relabel_bse(formula f, relabeling_style style, relabeling_map* m)
|
||
{
|
||
fgraph g;
|
||
sub_formula_count_t subcount;
|
||
|
||
// Scan f for sub-formulas used once.
|
||
sub_formula_collect(f, &subcount);
|
||
|
||
// Build the graph g from the formula f.
|
||
{
|
||
formula_to_fgraph conv(g, subcount);
|
||
conv.visit(f);
|
||
}
|
||
|
||
//// Uncomment to print the graph.
|
||
// for (auto& [f, sv]: g)
|
||
// {
|
||
// std::cerr << f << ":\n";
|
||
// for (auto& s: sv)
|
||
// std::cerr << " " << s << '\n';
|
||
// }
|
||
|
||
// Compute its cut-points
|
||
fset c;
|
||
cut_points(g, c, f);
|
||
|
||
// Relabel the formula recursively, stopping
|
||
// at cut-points or atomic propositions.
|
||
ap_generator* gen = nullptr;
|
||
switch (style)
|
||
{
|
||
case Pnn:
|
||
gen = new pnn_generator;
|
||
break;
|
||
case Abc:
|
||
gen = new abc_generator;
|
||
break;
|
||
}
|
||
bse_relabeler rel(gen, c, m, subcount);
|
||
return rel.visit(f);
|
||
}
|
||
|
||
formula
|
||
relabel_apply(formula f, relabeling_map* m)
|
||
{
|
||
if (f.is(op::ap))
|
||
{
|
||
auto i = m->find(f);
|
||
if (i != m->end())
|
||
return i->second;
|
||
}
|
||
return f.map(relabel_apply, m);
|
||
}
|
||
|
||
}
|