From ce1cf5507f2756124f8039e2164fdcc0030afc1b Mon Sep 17 00:00:00 2001 From: Alexandre Duret-Lutz Date: Mon, 13 Sep 2021 17:21:49 +0200 Subject: [PATCH] parseaut: improve parsing of HOA labels On a debug build with the automaton from #476, the gain seems to be about 33% of the parsing time. * spot/parseaut/parseaut.yy, spot/parseaut/parsedecl.hh, spot/parseaut/scanaut.ll: Share a hash map of string->BDD between the scanner and parser so that [labels] can be looked up by the scanner if they have already been parsed once. * NEWS: Mention it. --- NEWS | 6 ++++++ spot/parseaut/parseaut.yy | 34 ++++++++++++++++++++-------------- spot/parseaut/parsedecl.hh | 5 +++-- spot/parseaut/scanaut.ll | 23 ++++++++++++++++++++++- 4 files changed, 51 insertions(+), 17 deletions(-) diff --git a/NEWS b/NEWS index a425a352d..45607b1d1 100644 --- a/NEWS +++ b/NEWS @@ -232,6 +232,12 @@ New in spot 2.9.8.dev (not yet released) that error messages should be emitted as if the next automaton was read from "filename" starting on line 10. (Issue #232) + - The automaton parser for HOA will now be faster at parsing files + where a label is used multiple times (i.e., most automata): it + uses a hash table to avoid reconstructing BDDs corresponding to + label that have already been parsed. This trick was already used + in the never claim parser. + - spot::twa_graph::merge_states() will now sort the vector of edges before attempting to detect mergeable states. When merging states with identical outgoing transitions, it will now also consider diff --git a/spot/parseaut/parseaut.yy b/spot/parseaut/parseaut.yy index 2346bd242..3efa85047 100644 --- a/spot/parseaut/parseaut.yy +++ b/spot/parseaut/parseaut.yy @@ -52,8 +52,8 @@ extern "C" int strverscmp(const char *s1, const char *s2); #endif // Work around Bison not letting us write -// %lex-param { res.h->errors } -#define PARSE_ERROR_LIST res.h->errors +// %lex-param { res.h->errors, res.fcache } +#define PARSE_ERROR_LIST res.h->errors, res.fcache inline namespace hoayy_support { @@ -237,26 +237,27 @@ extern "C" int strverscmp(const char *s1, const char *s2); %token PROPERTIES "properties:" %token BODY "--BODY--" %token END "--END--" -%token STATE "State:"; -%token SPOT_HIGHLIGHT_EDGES "spot.highlight.edges:"; -%token SPOT_HIGHLIGHT_STATES "spot.highlight.states:"; -%token SPOT_STATE_PLAYER "spot.state-player:"; -%token IDENTIFIER "identifier"; // also used by neverclaim -%token HEADERNAME "header name"; -%token ANAME "alias name"; -%token STRING "string"; -%token INT "integer"; +%token STATE "State:" +%token SPOT_HIGHLIGHT_EDGES "spot.highlight.edges:" +%token SPOT_HIGHLIGHT_STATES "spot.highlight.states:" +%token SPOT_STATE_PLAYER "spot.state-player:" +%token IDENTIFIER "identifier" // also used by neverclaim +%token HEADERNAME "header name" +%token ANAME "alias name" +%token STRING "string" +%token INT "integer" %token ENDOFFILE 0 "end of file" +%token '[' %token DRA "DRA" %token DSA "DSA" %token V2 "v2" %token EXPLICIT "explicit" %token ACCPAIRS "Acceptance-Pairs:" -%token ACCSIG "Acc-Sig:"; -%token ENDOFHEADER "---"; +%token ACCSIG "Acc-Sig:" +%token ENDOFHEADER "---" %token LINEDIRECTIVE "#line" - +%token BDD %left '|' %left '&' @@ -1394,11 +1395,16 @@ label: '[' label-expr ']' { res.cur_label = bdd_from_int($2); bdd_delref($2); + if ($1) + res.fcache[*$1] = res.cur_label; + delete $1; } + | BDD { res.cur_label = bdd_from_int($1); } | '[' error ']' { error(@$, "ignoring this invalid label"); res.cur_label = bddtrue; + delete $1; } state-label_opt: %empty { res.has_state_label = false; } | label diff --git a/spot/parseaut/parsedecl.hh b/spot/parseaut/parsedecl.hh index 22ed90440..ad2d1ae78 100644 --- a/spot/parseaut/parsedecl.hh +++ b/spot/parseaut/parsedecl.hh @@ -1,5 +1,5 @@ // -*- coding: utf-8 -*- -// Copyright (C) 2014, 2015, 2017 Laboratoire de Recherche et +// Copyright (C) 2014, 2015, 2017, 2021 Laboratoire de Recherche et // Développement de l'EPITA. // // This file is part of Spot, a model checking library. @@ -27,7 +27,8 @@ int hoayylex(hoayy::parser::semantic_type *yylval, \ spot::location *yylloc, \ void* yyscanner, \ - spot::parse_aut_error_list& error_list) + spot::parse_aut_error_list& error_list, \ + std::map& fmap) YY_DECL; namespace spot diff --git a/spot/parseaut/scanaut.ll b/spot/parseaut/scanaut.ll index fdb6deb9b..d4d053381 100644 --- a/spot/parseaut/scanaut.ll +++ b/spot/parseaut/scanaut.ll @@ -154,7 +154,28 @@ identifier [[:alpha:]_][[:alnum:]_.-]* "--BODY--" return token::BODY; "--END--" BEGIN(INITIAL); return token::END; "State:" return token::STATE; - [tf{}()\[\]&|!] return *yytext; + "["[^\n\r\[\]]+"]" { + // For labels that do not span over several lines, + // we look them up in fmap to speed the construction + // of automata that use the same label multiple times. + std::string* s = + new std::string(yytext + 1, yyleng - 2); + if (auto i = fmap.find(*s); i != fmap.end()) + { + delete s; + yylval->b = i->second.id(); + return token::BDD; + } + yylval->str = s; + yylloc->end = yylloc->begin + 1; + yyless(1); + return *yytext; + } + "[" { + yylval->str = nullptr; + return *yytext; + } + [tf{}()\]&|!] return *yytext; {identifier} { yylval->str = new std::string(yytext, yyleng);