parseaut: improve parsing of HOA labels

On a debug build with the automaton from #476, the gain seems to be about 33% of the parsing time. * spot/parseaut/parseaut.yy, spot/parseaut/parsedecl.hh, spot/parseaut/scanaut.ll: Share a hash map of string->BDD between the scanner and parser so that [labels] can be looked up by the scanner if they have already been parsed once. * NEWS: Mention it.
2021-09-13 17:21:49 +02:00 · 2021-09-13 17:21:49 +02:00 · ce1cf5507f
commit ce1cf5507f
parent 9539fbcf4c
4 changed files with 51 additions and 17 deletions
--- a/6
+++ b/6
@ -232,6 +232,12 @@ New in spot 2.9.8.dev (not yet released)
    that error messages should be emitted as if the next automaton
    was read from "filename" starting on line 10.  (Issue #232)

+  - The automaton parser for HOA will now be faster at parsing files
+    where a label is used multiple times (i.e., most automata): it
+    uses a hash table to avoid reconstructing BDDs corresponding to
+    label that have already been parsed.  This trick was already used
+    in the never claim parser.
+
  - spot::twa_graph::merge_states() will now sort the vector of edges
    before attempting to detect mergeable states.  When merging states
    with identical outgoing transitions, it will now also consider
--- a/spot/parseaut/parseaut.yy
+++ b/spot/parseaut/parseaut.yy
@ -52,8 +52,8 @@ extern "C" int strverscmp(const char *s1, const char *s2);
 #endif

 // Work around Bison not letting us write
-//  %lex-param { res.h->errors }
-#define PARSE_ERROR_LIST res.h->errors
+//  %lex-param { res.h->errors, res.fcache }
+#define PARSE_ERROR_LIST res.h->errors, res.fcache

  inline namespace hoayy_support
  {
@ -237,26 +237,27 @@ extern "C" int strverscmp(const char *s1, const char *s2);
 %token PROPERTIES "properties:"
 %token BODY "--BODY--"
 %token END "--END--"
-%token STATE "State:";
-%token SPOT_HIGHLIGHT_EDGES "spot.highlight.edges:";
-%token SPOT_HIGHLIGHT_STATES "spot.highlight.states:";
-%token SPOT_STATE_PLAYER "spot.state-player:";
-%token <str> IDENTIFIER "identifier";  // also used by neverclaim
-%token <str> HEADERNAME "header name";
-%token <str> ANAME "alias name";
-%token <str> STRING "string";
-%token <num> INT "integer";
+%token STATE "State:"
+%token SPOT_HIGHLIGHT_EDGES "spot.highlight.edges:"
+%token SPOT_HIGHLIGHT_STATES "spot.highlight.states:"
+%token SPOT_STATE_PLAYER "spot.state-player:"
+%token <str> IDENTIFIER "identifier"  // also used by neverclaim
+%token <str> HEADERNAME "header name"
+%token <str> ANAME "alias name"
+%token <str> STRING "string"
+%token <num> INT "integer"
 %token ENDOFFILE 0 "end of file"
+%token <str> '['

 %token DRA "DRA"
 %token DSA "DSA"
 %token V2 "v2"
 %token EXPLICIT "explicit"
 %token ACCPAIRS "Acceptance-Pairs:"
-%token ACCSIG "Acc-Sig:";
-%token ENDOFHEADER "---";
+%token ACCSIG "Acc-Sig:"
+%token ENDOFHEADER "---"
 %token <str> LINEDIRECTIVE "#line"
-
+%token <b> BDD

 %left '|'
 %left '&'
@ -1394,11 +1395,16 @@ label: '[' label-expr ']'
 	   {
             res.cur_label = bdd_from_int($2);
             bdd_delref($2);
+             if ($1)
+               res.fcache[*$1] = res.cur_label;
+             delete $1;
 	   }
+     | BDD { res.cur_label = bdd_from_int($1); }
     | '[' error ']'
           {
 	     error(@$, "ignoring this invalid label");
 	     res.cur_label = bddtrue;
+             delete $1;
 	   }
 state-label_opt: %empty { res.has_state_label = false; }
               | label
--- a/spot/parseaut/parsedecl.hh
+++ b/spot/parseaut/parsedecl.hh
@ -1,5 +1,5 @@
 // -*- coding: utf-8 -*-
-// Copyright (C) 2014, 2015, 2017 Laboratoire de Recherche et
+// Copyright (C) 2014, 2015, 2017, 2021 Laboratoire de Recherche et
 // Développement de l'EPITA.
 //
 // This file is part of Spot, a model checking library.
@ -27,7 +27,8 @@
  int hoayylex(hoayy::parser::semantic_type *yylval, \
               spot::location *yylloc, \
               void* yyscanner, \
-               spot::parse_aut_error_list& error_list)
+               spot::parse_aut_error_list& error_list, \
+               std::map<std::string, bdd>& fmap)
 YY_DECL;

 namespace spot
--- a/spot/parseaut/scanaut.ll
+++ b/spot/parseaut/scanaut.ll
@ -154,7 +154,28 @@ identifier  [[:alpha:]_][[:alnum:]_.-]*
  "--BODY--"		return token::BODY;
  "--END--"		BEGIN(INITIAL); return token::END;
  "State:"		return token::STATE;
-  [tf{}()\[\]&|!]	return *yytext;
+  "["[^\n\r\[\]]+"]"        {
+                          // For labels that do not span over several lines,
+                          // we look them up in fmap to speed the construction
+                          // of automata that use the same label multiple times.
+                          std::string* s =
+                              new std::string(yytext + 1, yyleng - 2);
+                          if (auto i = fmap.find(*s); i != fmap.end())
+                             {
+                                delete s;
+                                yylval->b = i->second.id();
+                                return token::BDD;
+                             }
+                          yylval->str = s;
+                          yylloc->end = yylloc->begin + 1;
+                          yyless(1);
+                          return *yytext;
+                        }
+  "["                   {
+                          yylval->str = nullptr;
+                          return *yytext;
+                        }
+  [tf{}()\]&|!]	        return *yytext;

  {identifier}          {
 			   yylval->str = new std::string(yytext, yyleng);