parseaut: improve parsing of HOA labels

On a debug build with the automaton from #476, the gain seems to be
about 33% of the parsing time.

* spot/parseaut/parseaut.yy, spot/parseaut/parsedecl.hh,
spot/parseaut/scanaut.ll: Share a hash map of string->BDD
between the scanner and parser so that [labels] can be looked
up by the scanner if they have already been parsed once.
* NEWS: Mention it.
This commit is contained in:
Alexandre Duret-Lutz 2021-09-13 17:21:49 +02:00
parent 9539fbcf4c
commit ce1cf5507f
4 changed files with 51 additions and 17 deletions

6
NEWS
View file

@ -232,6 +232,12 @@ New in spot 2.9.8.dev (not yet released)
that error messages should be emitted as if the next automaton
was read from "filename" starting on line 10. (Issue #232)
- The automaton parser for HOA will now be faster at parsing files
where a label is used multiple times (i.e., most automata): it
uses a hash table to avoid reconstructing BDDs corresponding to
label that have already been parsed. This trick was already used
in the never claim parser.
- spot::twa_graph::merge_states() will now sort the vector of edges
before attempting to detect mergeable states. When merging states
with identical outgoing transitions, it will now also consider

View file

@ -52,8 +52,8 @@ extern "C" int strverscmp(const char *s1, const char *s2);
#endif
// Work around Bison not letting us write
// %lex-param { res.h->errors }
#define PARSE_ERROR_LIST res.h->errors
// %lex-param { res.h->errors, res.fcache }
#define PARSE_ERROR_LIST res.h->errors, res.fcache
inline namespace hoayy_support
{
@ -237,26 +237,27 @@ extern "C" int strverscmp(const char *s1, const char *s2);
%token PROPERTIES "properties:"
%token BODY "--BODY--"
%token END "--END--"
%token STATE "State:";
%token SPOT_HIGHLIGHT_EDGES "spot.highlight.edges:";
%token SPOT_HIGHLIGHT_STATES "spot.highlight.states:";
%token SPOT_STATE_PLAYER "spot.state-player:";
%token <str> IDENTIFIER "identifier"; // also used by neverclaim
%token <str> HEADERNAME "header name";
%token <str> ANAME "alias name";
%token <str> STRING "string";
%token <num> INT "integer";
%token STATE "State:"
%token SPOT_HIGHLIGHT_EDGES "spot.highlight.edges:"
%token SPOT_HIGHLIGHT_STATES "spot.highlight.states:"
%token SPOT_STATE_PLAYER "spot.state-player:"
%token <str> IDENTIFIER "identifier" // also used by neverclaim
%token <str> HEADERNAME "header name"
%token <str> ANAME "alias name"
%token <str> STRING "string"
%token <num> INT "integer"
%token ENDOFFILE 0 "end of file"
%token <str> '['
%token DRA "DRA"
%token DSA "DSA"
%token V2 "v2"
%token EXPLICIT "explicit"
%token ACCPAIRS "Acceptance-Pairs:"
%token ACCSIG "Acc-Sig:";
%token ENDOFHEADER "---";
%token ACCSIG "Acc-Sig:"
%token ENDOFHEADER "---"
%token <str> LINEDIRECTIVE "#line"
%token <b> BDD
%left '|'
%left '&'
@ -1394,11 +1395,16 @@ label: '[' label-expr ']'
{
res.cur_label = bdd_from_int($2);
bdd_delref($2);
if ($1)
res.fcache[*$1] = res.cur_label;
delete $1;
}
| BDD { res.cur_label = bdd_from_int($1); }
| '[' error ']'
{
error(@$, "ignoring this invalid label");
res.cur_label = bddtrue;
delete $1;
}
state-label_opt: %empty { res.has_state_label = false; }
| label

View file

@ -1,5 +1,5 @@
// -*- coding: utf-8 -*-
// Copyright (C) 2014, 2015, 2017 Laboratoire de Recherche et
// Copyright (C) 2014, 2015, 2017, 2021 Laboratoire de Recherche et
// Développement de l'EPITA.
//
// This file is part of Spot, a model checking library.
@ -27,7 +27,8 @@
int hoayylex(hoayy::parser::semantic_type *yylval, \
spot::location *yylloc, \
void* yyscanner, \
spot::parse_aut_error_list& error_list)
spot::parse_aut_error_list& error_list, \
std::map<std::string, bdd>& fmap)
YY_DECL;
namespace spot

View file

@ -154,7 +154,28 @@ identifier [[:alpha:]_][[:alnum:]_.-]*
"--BODY--" return token::BODY;
"--END--" BEGIN(INITIAL); return token::END;
"State:" return token::STATE;
[tf{}()\[\]&|!] return *yytext;
"["[^\n\r\[\]]+"]" {
// For labels that do not span over several lines,
// we look them up in fmap to speed the construction
// of automata that use the same label multiple times.
std::string* s =
new std::string(yytext + 1, yyleng - 2);
if (auto i = fmap.find(*s); i != fmap.end())
{
delete s;
yylval->b = i->second.id();
return token::BDD;
}
yylval->str = s;
yylloc->end = yylloc->begin + 1;
yyless(1);
return *yytext;
}
"[" {
yylval->str = nullptr;
return *yytext;
}
[tf{}()\]&|!] return *yytext;
{identifier} {
yylval->str = new std::string(yytext, yyleng);