parseaut: improve parsing of HOA labels

On a debug build with the automaton from #476, the gain seems to be
about 33% of the parsing time.

* spot/parseaut/parseaut.yy, spot/parseaut/parsedecl.hh,
spot/parseaut/scanaut.ll: Share a hash map of string->BDD
between the scanner and parser so that [labels] can be looked
up by the scanner if they have already been parsed once.
* NEWS: Mention it.
This commit is contained in:
Alexandre Duret-Lutz 2021-09-13 17:21:49 +02:00
parent 9539fbcf4c
commit ce1cf5507f
4 changed files with 51 additions and 17 deletions

6
NEWS
View file

@ -232,6 +232,12 @@ New in spot 2.9.8.dev (not yet released)
that error messages should be emitted as if the next automaton that error messages should be emitted as if the next automaton
was read from "filename" starting on line 10. (Issue #232) was read from "filename" starting on line 10. (Issue #232)
- The automaton parser for HOA will now be faster at parsing files
where a label is used multiple times (i.e., most automata): it
uses a hash table to avoid reconstructing BDDs corresponding to
label that have already been parsed. This trick was already used
in the never claim parser.
- spot::twa_graph::merge_states() will now sort the vector of edges - spot::twa_graph::merge_states() will now sort the vector of edges
before attempting to detect mergeable states. When merging states before attempting to detect mergeable states. When merging states
with identical outgoing transitions, it will now also consider with identical outgoing transitions, it will now also consider

View file

@ -52,8 +52,8 @@ extern "C" int strverscmp(const char *s1, const char *s2);
#endif #endif
// Work around Bison not letting us write // Work around Bison not letting us write
// %lex-param { res.h->errors } // %lex-param { res.h->errors, res.fcache }
#define PARSE_ERROR_LIST res.h->errors #define PARSE_ERROR_LIST res.h->errors, res.fcache
inline namespace hoayy_support inline namespace hoayy_support
{ {
@ -237,26 +237,27 @@ extern "C" int strverscmp(const char *s1, const char *s2);
%token PROPERTIES "properties:" %token PROPERTIES "properties:"
%token BODY "--BODY--" %token BODY "--BODY--"
%token END "--END--" %token END "--END--"
%token STATE "State:"; %token STATE "State:"
%token SPOT_HIGHLIGHT_EDGES "spot.highlight.edges:"; %token SPOT_HIGHLIGHT_EDGES "spot.highlight.edges:"
%token SPOT_HIGHLIGHT_STATES "spot.highlight.states:"; %token SPOT_HIGHLIGHT_STATES "spot.highlight.states:"
%token SPOT_STATE_PLAYER "spot.state-player:"; %token SPOT_STATE_PLAYER "spot.state-player:"
%token <str> IDENTIFIER "identifier"; // also used by neverclaim %token <str> IDENTIFIER "identifier" // also used by neverclaim
%token <str> HEADERNAME "header name"; %token <str> HEADERNAME "header name"
%token <str> ANAME "alias name"; %token <str> ANAME "alias name"
%token <str> STRING "string"; %token <str> STRING "string"
%token <num> INT "integer"; %token <num> INT "integer"
%token ENDOFFILE 0 "end of file" %token ENDOFFILE 0 "end of file"
%token <str> '['
%token DRA "DRA" %token DRA "DRA"
%token DSA "DSA" %token DSA "DSA"
%token V2 "v2" %token V2 "v2"
%token EXPLICIT "explicit" %token EXPLICIT "explicit"
%token ACCPAIRS "Acceptance-Pairs:" %token ACCPAIRS "Acceptance-Pairs:"
%token ACCSIG "Acc-Sig:"; %token ACCSIG "Acc-Sig:"
%token ENDOFHEADER "---"; %token ENDOFHEADER "---"
%token <str> LINEDIRECTIVE "#line" %token <str> LINEDIRECTIVE "#line"
%token <b> BDD
%left '|' %left '|'
%left '&' %left '&'
@ -1394,11 +1395,16 @@ label: '[' label-expr ']'
{ {
res.cur_label = bdd_from_int($2); res.cur_label = bdd_from_int($2);
bdd_delref($2); bdd_delref($2);
if ($1)
res.fcache[*$1] = res.cur_label;
delete $1;
} }
| BDD { res.cur_label = bdd_from_int($1); }
| '[' error ']' | '[' error ']'
{ {
error(@$, "ignoring this invalid label"); error(@$, "ignoring this invalid label");
res.cur_label = bddtrue; res.cur_label = bddtrue;
delete $1;
} }
state-label_opt: %empty { res.has_state_label = false; } state-label_opt: %empty { res.has_state_label = false; }
| label | label

View file

@ -1,5 +1,5 @@
// -*- coding: utf-8 -*- // -*- coding: utf-8 -*-
// Copyright (C) 2014, 2015, 2017 Laboratoire de Recherche et // Copyright (C) 2014, 2015, 2017, 2021 Laboratoire de Recherche et
// Développement de l'EPITA. // Développement de l'EPITA.
// //
// This file is part of Spot, a model checking library. // This file is part of Spot, a model checking library.
@ -27,7 +27,8 @@
int hoayylex(hoayy::parser::semantic_type *yylval, \ int hoayylex(hoayy::parser::semantic_type *yylval, \
spot::location *yylloc, \ spot::location *yylloc, \
void* yyscanner, \ void* yyscanner, \
spot::parse_aut_error_list& error_list) spot::parse_aut_error_list& error_list, \
std::map<std::string, bdd>& fmap)
YY_DECL; YY_DECL;
namespace spot namespace spot

View file

@ -154,7 +154,28 @@ identifier [[:alpha:]_][[:alnum:]_.-]*
"--BODY--" return token::BODY; "--BODY--" return token::BODY;
"--END--" BEGIN(INITIAL); return token::END; "--END--" BEGIN(INITIAL); return token::END;
"State:" return token::STATE; "State:" return token::STATE;
[tf{}()\[\]&|!] return *yytext; "["[^\n\r\[\]]+"]" {
// For labels that do not span over several lines,
// we look them up in fmap to speed the construction
// of automata that use the same label multiple times.
std::string* s =
new std::string(yytext + 1, yyleng - 2);
if (auto i = fmap.find(*s); i != fmap.end())
{
delete s;
yylval->b = i->second.id();
return token::BDD;
}
yylval->str = s;
yylloc->end = yylloc->begin + 1;
yyless(1);
return *yytext;
}
"[" {
yylval->str = nullptr;
return *yytext;
}
[tf{}()\]&|!] return *yytext;
{identifier} { {identifier} {
yylval->str = new std::string(yytext, yyleng); yylval->str = new std::string(yytext, yyleng);