determinize: various micro optimizations

* spot/twaalgos/determinize.cc: Implement various micro
optimizations.  Use robin-hood hashing in a few places.
* spot/priv/robin_hood.hh: New file.
* spot/priv/Makefile.am: Add it.
* tests/sanity/80columns.test, tests/sanity/includes.test,
tests/sanity/style.test: Adjust to skip robin_hood.hh.
This commit is contained in:
Alexandre Duret-Lutz 2019-10-15 13:37:36 +02:00
parent 98dc1a5629
commit f0b52ce218
6 changed files with 2168 additions and 76 deletions

View file

@ -1,5 +1,5 @@
## -*- coding: utf-8 -*- ## -*- coding: utf-8 -*-
## Copyright (C) 2013-2018 Laboratoire de Recherche et Développement ## Copyright (C) 2013-2019 Laboratoire de Recherche et Développement
## de l'Epita (LRDE). ## de l'Epita (LRDE).
## ##
## This file is part of Spot, a model checking library. ## This file is part of Spot, a model checking library.
@ -29,9 +29,17 @@ libpriv_la_SOURCES = \
bddalloc.hh \ bddalloc.hh \
freelist.cc \ freelist.cc \
freelist.hh \ freelist.hh \
robin_hood.hh \
satcommon.hh\ satcommon.hh\
satcommon.cc\ satcommon.cc\
trim.cc \ trim.cc \
trim.hh \ trim.hh \
weight.cc \ weight.cc \
weight.hh weight.hh
GH = https://raw.githubusercontent.com/martinus
RH = $(GH)/robin-hood-hashing/master/src/include/robin_hood.h
.PHONY: update
update:
wget $(RH) -O $(srcdir)/robin_hood.hh || \
curl $(RH) -o $(srcdir)/robin_hood.hh

2056
spot/priv/robin_hood.hh Normal file

File diff suppressed because it is too large Load diff

View file

@ -34,6 +34,7 @@
#include <spot/twaalgos/simulation.hh> #include <spot/twaalgos/simulation.hh>
#include <spot/twaalgos/isdet.hh> #include <spot/twaalgos/isdet.hh>
#include <spot/twaalgos/parity.hh> #include <spot/twaalgos/parity.hh>
#include <spot/priv/robin_hood.hh>
namespace spot namespace spot
{ {
@ -132,7 +133,8 @@ namespace spot
} }
}; };
using power_set = std::unordered_map<safra_state, unsigned, hash_safra>; using power_set =
robin_hood::unordered_node_map<safra_state, unsigned, hash_safra>;
std::string std::string
nodes_to_string(const const_twa_graph_ptr& aut, nodes_to_string(const const_twa_graph_ptr& aut,
@ -191,7 +193,7 @@ namespace spot
update_succ(int brace, unsigned dst, const acc_cond::mark_t& acc) update_succ(int brace, unsigned dst, const acc_cond::mark_t& acc)
{ {
int newb = brace; int newb = brace;
if (acc.count()) if (acc)
{ {
assert(acc.has(0) && acc.count() == 1 && "Only TBA are accepted"); assert(acc.has(0) && acc.count() == 1 && "Only TBA are accepted");
// Accepting edges generate new braces: step A1 // Accepting edges generate new braces: step A1
@ -338,6 +340,8 @@ namespace spot
} }
private: private:
std::vector<safra_state> stutter_path_;
void void
compute_() compute_()
{ {
@ -346,60 +350,63 @@ namespace spot
const bdd& ap = *bddit; const bdd& ap = *bddit;
// In stutter-invariant automata, every time we follow a
// transition labeled by L, we can actually stutter the L
// label and jump further away. The following code performs
// this stuttering until a cycle is found, and select one
// state of the cycle as the destination to jump to.
if (cs_.use_stutter && cs_.aut->prop_stutter_invariant()) if (cs_.use_stutter && cs_.aut->prop_stutter_invariant())
{ {
ss = *cs_.src; ss = *cs_.src;
bool stop = false; // The path is usually quite small (3-4 states), so it's
std::deque<safra_state> path; // not worth setting up a hash table to detect a cycle.
std::unordered_set< stutter_path_.clear();
std::reference_wrapper<const safra_state>, std::vector<safra_state>::iterator cycle_seed;
hash_safra,
ref_wrap_equal<const safra_state>> states;
unsigned mincolor = -1U; unsigned mincolor = -1U;
while (!stop) // stutter forward until we cycle
for (;;)
{ {
path.emplace_back(std::move(ss)); // any duplicate value, if any, is usually close to
auto i = states.insert(path.back()); // the end, so search backward.
SPOT_ASSUME(i.second); auto it = std::find(stutter_path_.rbegin(),
ss = path.back().compute_succ(cs_, ap, color_); stutter_path_.rend(), ss);
mincolor = std::min(color_, mincolor); if (it != stutter_path_.rend())
stop = states.find(ss) != states.end();
}
// also insert last element (/!\ it thus appears twice in path)
path.emplace_back(std::move(ss));
const safra_state& loopstart = path.back();
bool in_seen = cs_.seen.find(ss) != cs_.seen.end();
unsigned tokeep = path.size()-1;
unsigned idx = path.size()-2;
// The loop is guaranteed to end, because path contains too
// occurrences of loopstart
while (!(loopstart == path[idx]))
{
// if path[tokeep] is already in seen, replace it with a
// smaller state also in seen.
if (in_seen && cs_.seen.find(path[idx]) != cs_.seen.end())
if (path[idx] < path[tokeep])
tokeep = idx;
// if path[tokeep] is not in seen, replace it either with a
// state in seen or with a smaller state
if (!in_seen)
{ {
if (cs_.seen.find(path[idx]) != cs_.seen.end()) cycle_seed = (it + 1).base();
break;
}
stutter_path_.emplace_back(std::move(ss));
ss = stutter_path_.back().compute_succ(cs_, ap, color_);
mincolor = std::min(color_, mincolor);
}
bool in_seen = cs_.seen.find(*cycle_seed) != cs_.seen.end();
for (auto it = cycle_seed + 1; it < stutter_path_.end(); ++it)
{
if (in_seen)
{
// if *cycle_seed is already in seen, replace
// it with a smaller state also in seen.
if (cs_.seen.find(*it) != cs_.seen.end()
&& *it < *cycle_seed)
cycle_seed = it;
}
else
{
// if *cycle_seed is not in seen, replace it
// either with a state in seen or with a smaller
// state
if (cs_.seen.find(*it) != cs_.seen.end())
{ {
tokeep = idx; cycle_seed = it;
in_seen = true; in_seen = true;
} }
else if (path[idx] < path[tokeep]) else if (*it < *cycle_seed)
tokeep = idx; {
cycle_seed = it;
}
} }
--idx;
} }
// clean references to path before move (see next line) ss = std::move(*cycle_seed);
states.clear();
// move is safe, no dangling references
ss = std::move(path[tokeep]);
color_ = mincolor; color_ = mincolor;
} }
else else
@ -558,7 +565,7 @@ namespace spot
class safra_support class safra_support
{ {
const std::vector<bdd>& state_supports; const std::vector<bdd>& state_supports;
std::unordered_map<bdd, std::vector<bdd>, bdd_hash> cache; robin_hood::unordered_flat_map<bdd, std::vector<bdd>, bdd_hash> cache;
public: public:
safra_support(const std::vector<bdd>& s): state_supports(s) {} safra_support(const std::vector<bdd>& s): state_supports(s) {}
@ -617,20 +624,19 @@ namespace spot
while (it1 != nodes_.end()) while (it1 != nodes_.end())
{ {
const auto& imp1 = implies[it1->first]; const auto& imp1 = implies[it1->first];
bool erased = false; auto old_it1 = it1++;
if (imp1.empty())
continue;
for (auto it2 = nodes_.begin(); it2 != nodes_.end(); ++it2) for (auto it2 = nodes_.begin(); it2 != nodes_.end(); ++it2)
{ {
if (it1 == it2) if (old_it1 == it2)
continue; continue;
if (imp1[it2->first]) if (imp1[it2->first])
{ {
erased = true; it1 = nodes_.erase(old_it1);
it1 = nodes_.erase(it1);
break; break;
} }
} }
if (!erased)
++it1;
} }
} }
@ -758,13 +764,20 @@ namespace spot
safra_state::hash() const safra_state::hash() const
{ {
size_t res = 0; size_t res = 0;
//std::cerr << this << " [";
for (const auto& p : nodes_) for (const auto& p : nodes_)
{ {
res ^= (res << 3) ^ p.first; res ^= (res << 3) ^ p.first;
res ^= (res << 3) ^ p.second; res ^= (res << 3) ^ p.second;
// std::cerr << '(' << p.first << ',' << p.second << ')';
} }
// std::cerr << "][ ";
for (const auto& b : braces_) for (const auto& b : braces_)
res ^= (res << 3) ^ b; {
res ^= (res << 3) ^ b;
// std::cerr << b << ' ';
}
// std::cerr << "]: " << std::hex << res << std::dec << '\n';
return res; return res;
} }
@ -784,18 +797,15 @@ namespace spot
res[i + scccount * i] = 1; res[i + scccount * i] = 1;
for (unsigned i = 0; i != scccount; ++i) for (unsigned i = 0; i != scccount; ++i)
{ {
std::stack<unsigned> s; unsigned ibase = i * scccount;
s.push(i); for (unsigned d: scc.succ(i))
while (!s.empty())
{ {
unsigned src = s.top(); // we necessarily have d < i because of the way SCCs are
s.pop(); // numbered, so we can build the transitive closure by
for (unsigned d: scc.succ(src)) // just ORing any SCC reachable from d.
{ unsigned dbase = d * scccount;
s.push(d); for (unsigned j = 0; j != scccount; ++j)
unsigned idx = scccount * i + d; res[ibase + j] |= res[dbase + j];
res[idx] = 1;
}
} }
} }
return res; return res;
@ -843,26 +853,40 @@ namespace spot
std::vector<char>(implications.size(), 0)); std::vector<char>(implications.size(), 0));
{ {
std::vector<char> is_connected = find_scc_paths(scc); std::vector<char> is_connected = find_scc_paths(scc);
unsigned sccs = scc.scc_count();
bool something_implies_something = false;
for (unsigned i = 0; i != implications.size(); ++i) for (unsigned i = 0; i != implications.size(); ++i)
{ {
// NB spot::simulation() does not remove unreachable states, as it // NB spot::simulation() does not remove unreachable states, as it
// would invalidate the contents of 'implications'. // would invalidate the contents of 'implications'.
// so we need to explicitely test for unreachable states // so we need to explicitly test for unreachable states
// FIXME based on the scc_info, we could remove the unreachable // FIXME based on the scc_info, we could remove the unreachable
// states, both in the input automaton and in 'implications' // states, both in the input automaton and in 'implications'
// to reduce the size of 'implies'. // to reduce the size of 'implies'.
if (!scc.reachable_state(i)) if (!scc.reachable_state(i))
continue; continue;
unsigned scc_of_i = scc.scc_of(i);
bool i_implies_something = false;
for (unsigned j = 0; j != implications.size(); ++j) for (unsigned j = 0; j != implications.size(); ++j)
{ {
if (!scc.reachable_state(j)) if (!scc.reachable_state(j))
continue; continue;
// index to see if there is a path from scc2 -> scc1 bool i_implies_j = !is_connected[sccs * scc.scc_of(j) + scc_of_i]
unsigned idx = scc.scc_count() * scc.scc_of(j) + scc.scc_of(i); && bdd_implies(implications[i], implications[j]);
implies[i][j] = !is_connected[idx] implies[i][j] = i_implies_j;
&& bdd_implies(implications[i], implications[j]); i_implies_something |= i_implies_j;
} }
// Clear useless lines.
if (!i_implies_something)
implies[i].clear();
else
something_implies_something = true;
}
if (!something_implies_something)
{
implies.clear();
use_simulation = false;
} }
} }
@ -962,8 +986,7 @@ namespace spot
} }
// Green and red colors work in pairs, so the number of parity conditions is // Green and red colors work in pairs, so the number of parity conditions is
// necessarily even. // necessarily even.
if (sets % 2 == 1) sets += sets & 1;
sets += 1;
// Acceptance is now min(odd) since we can emit Red on paths 0 with new opti // Acceptance is now min(odd) since we can emit Red on paths 0 with new opti
res->set_acceptance(sets, acc_cond::acc_code::parity_min_odd(sets)); res->set_acceptance(sets, acc_cond::acc_code::parity_min_odd(sets));
res->prop_universal(true); res->prop_universal(true);

View file

@ -1,7 +1,7 @@
#! /bin/sh #! /bin/sh
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2012, 2016, 2017 Laboratoire de Recherche et Développement de # Copyright (C) 2012, 2016-2017, 2019 Laboratoire de Recherche et
# l'Epita (LRDE). # Développement de l'Epita (LRDE).
# Copyright (C) 2004, 2005 Laboratoire d'Informatique de Paris 6 # Copyright (C) 2004, 2005 Laboratoire d'Informatique de Paris 6
# (LIP6), département Systèmes Répartis Coopératifs (SRC), Université # (LIP6), département Systèmes Répartis Coopératifs (SRC), Université
# Pierre et Marie Curie. # Pierre et Marie Curie.
@ -54,6 +54,7 @@ for dir in "${srcdir-.}/../../spot" "${srcdir-.}/../../bin" "${srcdir-.}/.."; do
while read file; do while read file; do
if (expand $file | grep -q $x) 2>/dev/null; then if (expand $file | grep -q $x) 2>/dev/null; then
if grep 'GNU Bison' "$file" >/dev/null || if grep 'GNU Bison' "$file" >/dev/null ||
grep '/robin-hood-hashing' "$file" >/dev/null ||
grep 'generated by flex' "$file" >/dev/null ; then grep 'generated by flex' "$file" >/dev/null ; then
: :
else else

View file

@ -1,7 +1,7 @@
#! /bin/sh #! /bin/sh
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2008, 2011, 2012, 2016, 2018 Laboratoire de Recherche et # Copyright (C) 2008, 2011-2012, 2016, 2018-2019 Laboratoire de
# Développement de l'Epita (LRDE). # Recherche et Développement de l'Epita (LRDE).
# Copyright (C) 2004, 2005 Laboratoire d'Informatique de Paris 6 # Copyright (C) 2004, 2005 Laboratoire d'Informatique de Paris 6
# (LIP6), département Systèmes Répartis Coopératifs (SRC), Université # (LIP6), département Systèmes Répartis Coopératifs (SRC), Université
# Pierre et Marie Curie. # Pierre et Marie Curie.
@ -39,6 +39,9 @@ for file in `find "$INCDIR" \( -name "${1-*}.hh" \
elif grep -q 'made by GNU Bison' "$INCDIR/$file"; then elif grep -q 'made by GNU Bison' "$INCDIR/$file"; then
# Those are not public headers, so we do not care # Those are not public headers, so we do not care
continue continue
elif grep -q '/robin-hood-hashing' "$INCDIR/$file"; then
# Those are not public headers, so we do not care
continue
else else
echo "FAIL: $file (missing #pragma once)" echo "FAIL: $file (missing #pragma once)"
echo " $file (missing #pragma once)" >> failures.inc echo " $file (missing #pragma once)" >> failures.inc

View file

@ -68,6 +68,7 @@ for dir in "$TOP/spot" "$TOP/bin" "$TOP/tests"; do
-a -type f -a -print | -a -type f -a -print |
while read file; do while read file; do
if $GREP 'GNU Bison' "$file" >/dev/null || if $GREP 'GNU Bison' "$file" >/dev/null ||
$GREP '/robin-hood-hashing' "$file" >/dev/null ||
$GREP 'generated by flex' "$file" >/dev/null ; then $GREP 'generated by flex' "$file" >/dev/null ; then
continue continue
fi fi