hoaparse: also accept LBTT input

This is probably the worse grammar I wrote: the LBTT format is designed to be scanned with scanf, and very inconvenient to parse with bison/flex. Here the scanner basically has to emulate a parser to classify the different INTs as tokens with different types. * src/hoaparse/hoaparse.yy, src/hoaparse/hoascan.ll: Add rules for LBTT. * src/hoaparse/parsedecl.hh: Add a way to reset the parser between each automata. * src/tgbatest/hoaparse.test, src/tgbatest/lbttparse.test: Add more tests.
2014-12-10 16:26:51 +01:00 · 2014-12-10 16:26:51 +01:00 · 6eb2b06fa7
commit 6eb2b06fa7
parent e4158c21ee
5 changed files with 340 additions and 72 deletions
--- a/src/hoaparse/hoaparse.yy
+++ b/src/hoaparse/hoaparse.yy
@ -37,6 +37,8 @@
 #include "ltlast/constant.hh"
 #include "tgba/formula2bdd.hh"
 #include "public.hh"
 #include "priv/accmap.hh"
 #include "ltlparse/public.hh"
  /* Cache parsed formulae.  Labels on arcs are frequently identical
   and it would be a waste of time to parse them to formula* over and
@ -58,6 +60,7 @@
    spot::ltl::environment* env;
    formula_cache fcache;
    named_tgba_t* namer = nullptr;
    spot::acc_mapper_int* acc_mapper = nullptr;
    std::vector<int> ap;
    std::vector<bdd> guards;
    std::vector<bdd>::const_iterator cur_guard;
@ -94,6 +97,7 @@
    ~result_()
    {
      delete namer;
      delete acc_mapper;
    }
  };
 }
@ -178,8 +182,16 @@
 %type <list> nc-transitions nc-transition-block
 %type <str> nc-one-ident nc-ident-list
-
+/**** LBTT tokens *****/
-
+ // Also using INT, STRING
 %token ENDAUT "-1"
 %token <num> LBTT "LBTT header"
 %token <num> INT_S "state acceptance"
 %token <num> LBTT_EMPTY "acceptance sets for empty automaton"
 %token <num> ACC "acceptance set"
 %token <num> STATE_NUM "state number"
 %token <num> DEST_NUM "destination number"
 %type <mark> lbtt-acc
 %destructor { delete $$; } <str>
 %destructor { bdd_delref($$); } <b>
@ -207,6 +219,7 @@ aut: aut-1     { res.h->loc = @$; YYACCEPT; }
 aut-1: hoa
     | never
     | lbtt
 /**********************************************************************/
@ -921,6 +934,93 @@ nc-transition:
      $$ = $3;
    }
 /**********************************************************************/
 /*                         Rules for LBTT                             */
 /**********************************************************************/
 lbtt: lbtt-header lbtt-body ENDAUT
    | lbtt-header-states LBTT_EMPTY
      {
        res.h->aut->acc().add_sets($2);
      }
 lbtt-header-states: LBTT
                  {
 		    res.states = $1;
 		    res.states_loc = @1;
 		    res.h->aut->new_states($1);
 		  }
 lbtt-header: lbtt-header-states INT_S
           {
 	     res.acc_mapper = new spot::acc_mapper_int(res.h->aut, $2);
 	     res.h->aut->prop_state_based_acc();
 	   }
           | lbtt-header-states INT
           {
 	     res.acc_mapper = new spot::acc_mapper_int(res.h->aut, $2);
 	   }
 lbtt-body: lbtt-states
 lbtt-states:
           | lbtt-states lbtt-state lbtt-transitions
 lbtt-state: STATE_NUM INT lbtt-acc
          {
 	    res.cur_state = $1;
 	    if ((int) res.cur_state >= res.states)
 	      {
 		error(@1, "state number is larger than state "
 		      "count...");
 		error(res.states_loc, "... declared here.");
 		res.cur_state = 0;
 	      }
 	    else if ($2)
 	      res.start.emplace_back(@1 + @2, $1);
 	    res.acc_state = $3;
 	  }
 lbtt-acc:               { $$ = 0U; }
        | lbtt-acc ACC
 	{
 	  $$  = $1;
 	  auto p = res.acc_mapper->lookup($2);
 	  if (p.first)
 	    $$ |= p.second;
 	  else
 	    error(@2, "more acceptance sets used than declared");
 	}
 lbtt-guard: STRING
          {
 	    spot::ltl::parse_error_list pel;
 	    auto* f = spot::ltl::parse_lbt(*$1, pel, *res.env);
 	    if (!f || !pel.empty())
 	      {
 		// FIXME: show pel.
 		error(@$, "failed to parse guard");
 		if (f)
 		  f->destroy();
 		res.cur_label = bddtrue;
 	      }
 	    else
 	      {
 		res.cur_label =
 		  formula_to_bdd(f, res.h->aut->get_dict(), res.h->aut);
 		f->destroy();
 	      }
 	    delete $1;
 	  }
 lbtt-transitions:
                | lbtt-transitions DEST_NUM lbtt-acc lbtt-guard
                {
 		  if ((int) $2 >= res.states)
 		    {
 		      error(@2, "state number is larger than state "
 			    "count...");
 		      error(res.states_loc, "... declared here.");
 		    }
 		  else
 		    res.h->aut->new_transition(res.cur_state, $2,
 					       res.cur_label,
 					       res.acc_state | $3);
 		}
 %%
 static void fill_guards(result_& r)
@ -1038,6 +1138,7 @@ namespace spot
    r.env = &env;
    hoayy::parser parser(error_list, r, last_loc);
    parser.set_debug_level(debug);
    hoayyreset();
    try
      {
 	if (parser.parse())
--- a/src/hoaparse/hoascan.ll
+++ b/src/hoaparse/hoascan.ll
@ -35,6 +35,9 @@ static unsigned comment_level = 0;
 static unsigned parent_level = 0;
 static int orig_cond = 0;
 static bool missing_parent = false;
 static bool lbtt_s = false;
 static bool lbtt_t = false;
 static unsigned lbtt_states = 0;
 %}
@ -43,13 +46,27 @@ eol2        (\n\r)+|(\r\n)+
 identifier  [[:alpha:]_][[:alnum:]_-]*
 %x in_COMMENT in_STRING in_NEVER_PAR
-%s in_HOA in_NEVER
+%s in_HOA in_NEVER in_LBTT_HEADER
-
+%s in_LBTT_STATE in_LBTT_INIT in_LBTT_TRANS
 %s in_LBTT_T_ACC in_LBTT_S_ACC in_LBTT_GUARD
 %%
 %{
  std::string s;
  yylloc->step();
  auto parse_int = [&](){
    errno = 0;
    char* end;
    unsigned long n = strtoul(yytext, &end, 10);
    yylval->num = n;
    if (errno || yylval->num != n)
      {
        error_list.push_back(spot::hoa_parse_error(*yylloc, "value too large"));
        yylval->num = 0;
      }
    return end;
  };
 %}
@ -62,14 +79,30 @@ identifier  [[:alpha:]_][[:alnum:]_-]*
 			  BEGIN(in_COMMENT);
 			  comment_level = 1;
 			}
-"\""                    {
+<INITIAL>"HOA:"           BEGIN(in_HOA); return token::HOA;
                          orig_cond = YY_START;
 			  BEGIN(in_STRING);
 			  comment_level = 1;
 			}
 "HOA:"                  BEGIN(in_HOA); return token::HOA;
 <INITIAL,in_HOA>"--ABORT--" BEGIN(INITIAL); throw spot::hoa_abort{*yylloc};
-"never"			BEGIN(in_NEVER); return token::NEVER;
+<INITIAL>"never"	  BEGIN(in_NEVER); return token::NEVER;
 <INITIAL>[0-9]+[ \t][0-9]+[ts]?  {
 	                  BEGIN(in_LBTT_HEADER);
 			  char* end = 0;
 			  errno = 0;
 			  unsigned long n = strtoul(yytext, &end, 10);
 			  yylval->num = n;
 			  unsigned s = end - yytext;
 			  yylloc->end = yylloc->begin;
 			  yylloc->end.columns(s);
 			  yyless(s);
 			  if (errno || yylval->num != n)
 			    {
                              error_list.push_back(
 			        spot::hoa_parse_error(*yylloc,
 				  "value too large"));
 			      yylval->num = 0;
                            }
                          lbtt_states = yylval->num;
 			  return token::LBTT;
 			}
 <in_HOA>{
  "States:"		return token::STATES;
@ -98,19 +131,7 @@ identifier  [[:alpha:]_][[:alnum:]_-]*
 			   yylval->str = new std::string(yytext + 1, yyleng - 1);
 			   return token::ANAME;
 			}
-  [0-9]+		{
+  [0-9]+                parse_int(); return token::INT;
 			  errno = 0;
 			  unsigned long n = strtoul(yytext, 0, 10);
                          yylval->num = n;
 			  if (errno || yylval->num != n)
 			    {
                              error_list.push_back(
 			        spot::hoa_parse_error(*yylloc,
 				  "value too large"));
 			      yylval->num = 0;
                            }
                          return token::INT;
 			}
 }
 <in_NEVER>{
@ -140,7 +161,89 @@ identifier  [[:alpha:]_][[:alnum:]_-]*
 			  yylval->str = new std::string(yytext, yyleng);
 	                  return token::IDENTIFIER;
 		        }
 }
  /* Note: the LBTT format is scanf friendly, but not Bison-friendly.
     If we only tokenize it as a stream of INTs, the parser will have
     a very hard time recognizing what is a state from what is a
     transitions.  As a consequence we abuse the start conditions to
     maintain a state an return integers with different sementic types
     depending on the purpose of those integers. */
 <in_LBTT_HEADER>{
  [0-9]+[st]*           {
 			  BEGIN(in_LBTT_STATE);
                          auto end = parse_int();
 			  lbtt_s = false;
 			  lbtt_t = false;
 			  if (end)
 			    while (int c = *end++)
 			      if (c == 's')
 			        lbtt_s = true;
 			      else // c == 't'
 			        lbtt_t = true;
  		          if (!lbtt_t)
 			    lbtt_s = true;
 			  if (lbtt_states == 0)
 			    {
                              BEGIN(INITIAL);
                              return token::LBTT_EMPTY;
 			    }
 			  if (lbtt_s && !lbtt_t)
 			    return token::INT_S;
 			  else
 			    return token::INT;
 			}
 }
 <in_LBTT_STATE>[0-9]+   {
                           parse_int();
 			   BEGIN(in_LBTT_INIT);
 			   return token::STATE_NUM;
 			}
 <in_LBTT_INIT>[01]	{
                           yylval->num = *yytext - '0';
 			   if (lbtt_s)
 			      BEGIN(in_LBTT_S_ACC);
 			   else
 			      BEGIN(in_LBTT_TRANS);
 			   return token::INT;
 			}
 <in_LBTT_S_ACC>{
  [0-9]+		parse_int(); return token::ACC;
  "-1"                  BEGIN(in_LBTT_TRANS); yylloc->step();
 }
 <in_LBTT_TRANS>{
  [0-9+]                {
 			  parse_int();
 			  if (lbtt_t)
 			    BEGIN(in_LBTT_T_ACC);
 			  else
 			    BEGIN(in_LBTT_GUARD);
 			  return token::DEST_NUM;
 			}
  "-1"			{
                          if (--lbtt_states)
 			    {
 			       BEGIN(in_LBTT_STATE);
 			       yylloc->step();
 			    }
 			  else
 			    {
 			       BEGIN(INITIAL);
 			       return token::ENDAUT;
 			    }
 			}
 }
 <in_LBTT_T_ACC>{
  [0-9+]	        parse_int(); return token::ACC;
  "-1"			BEGIN(in_LBTT_GUARD); yylloc->step();
 }
 <in_LBTT_GUARD>{
  [^\n\r]*		{
  			  yylval->str = new std::string(yytext, yyleng);
 			  BEGIN(in_LBTT_TRANS);
 			  return token::STRING;
 			}
 }
@ -161,6 +264,13 @@ identifier  [[:alpha:]_][[:alnum:]_-]*
                        }
 }
  /* matched late, so that the in_LBTT_GUARD pattern has precedence */
 "\""                    {
                          orig_cond = YY_START;
 			  BEGIN(in_STRING);
 			  comment_level = 1;
 			}
 <in_STRING>{
  \"	                {
                           BEGIN(orig_cond);
@ -236,6 +346,15 @@ identifier  [[:alpha:]_][[:alnum:]_-]*
 namespace spot
 {
  void
  hoayyreset()
  {
    BEGIN(INITIAL);
    comment_level = 0;
    parent_level = 0;
    missing_parent = false;
  }
  int
  hoayyopen(const std::string &name)
  {
@ -253,10 +372,7 @@ namespace spot
    // Reset the lexer in case a previous parse
    // ended badly.
    YY_NEW_FILE;
-    BEGIN(INITIAL);
+    hoayyreset();
    comment_level = 0;
    parent_level = 0;
    missing_parent = false;
    return 0;
  }
--- a/src/hoaparse/parsedecl.hh
+++ b/src/hoaparse/parsedecl.hh
@ -32,6 +32,7 @@ YY_DECL;
 namespace spot
 {
  void hoayyreset();
  int hoayyopen(const std::string& name);
  void hoayyclose();
--- a/src/tgbatest/hoaparse.test
+++ b/src/tgbatest/hoaparse.test
@ -74,6 +74,14 @@ State: 0 {0}
 State: 1 {0}
 [t] 1
 --END--
 2 1t
 2 1
 1 -1 "b"
 0 -1 & "a" ! "b"
 -1
 1 0
 2 0 -1 t
 -1
 EOF
 expecterr input <<EOF
@ -83,6 +91,10 @@ input:9.8: state number is larger than state count...
 input:2.1-9: ... declared here.
 input:10.5: state number is larger than state count...
 input:2.1-9: ... declared here.
 input:13.1: state number is larger than state count...
 input:12.1: ... declared here.
 input:18.1: state number is larger than state count...
 input:12.1: ... declared here.
 EOF
 cat >input <<EOF
@ -482,7 +494,7 @@ EOF
 diff expected input.out
-# Mix HOA with neverclaims
+# Mix HOA with neverclaims and LBTT automata
 cat >input <<EOF
 HOA: v1
 States: 2
@ -500,6 +512,29 @@ State: 1
 [0] 0
 --END--
 /* some LBTT with mixed state/transition acceptance,
   just because we can */
 4 2ts
 0 1 -1
 1 -1 "c"
 2 -1 & "b" ! "c"
 3 -1 ! "c"
 -1
 1 0 1 -1
 1 0 -1 t
 -1
 2 0 -1
 1 -1 "c"
 2 -1 & "b" ! "c"
 -1
 3 0 -1
 3 0 1 -1 & "a" "b"
 3 1 -1 & "a" ! "b"
 3 0 -1 & ! "a" "b"
 3 -1 & ! "a" ! "b"
 -1
 never  {    /* a U b */
 T0_init:
        if
@ -539,6 +574,29 @@ State: 2 {0}
 [!1] 2
 --END--
 HOA: v1
 States: 4
 Start: 0
 AP: 3 "c" "b" "a"
 acc-name: generalized-Buchi 2
 Acceptance: 2 Inf(0)&Inf(1)
 properties: trans-labels explicit-labels trans-acc
 --BODY--
 State: 0
 [0] 1
 [!0&1] 2
 [!0] 3
 State: 1
 [t] 1 {0 1}
 State: 2
 [0] 1
 [!0&1] 2
 State: 3
 [1&2] 3 {0 1}
 [!1&2] 3 {0}
 [1&!2] 3 {1}
 [!1&!2] 3
 --END--
 HOA: v1
 States: 2
 Start: 0
 AP: 2 "b" "a"
@ -722,16 +780,17 @@ and even more garbage
 EOF
 se='syntax error, unexpected'  # this is just to keep lines short
 end='end of file or HOA: or never or LBTT header'
 expecterr input <<EOF
 input:8.11: $se identifier, expecting integer or '}'
 input:8.10-12: ignoring this invalid acceptance set
 input:11.2: $se identifier
 input:11.1-3: ignoring this invalid label
 input:21.5-7: $se string, expecting integer
-input:25.1: $se \$undefined, expecting end of file or HOA: or never
+input:25.1: $se \$undefined, expecting $end
 input:25.1-12: ignoring leading garbage
 input:32.1-5: $se header name, expecting --END-- or State:
-input:37.1: $se 't', expecting end of file or HOA: or never
+input:37.1: $se 't', expecting $end
 autfilt: failed to read automaton from input
 EOF
--- a/src/tgbatest/lbttparse.test
+++ b/src/tgbatest/lbttparse.test
@ -1,6 +1,6 @@
 #!/bin/sh
 # -*- coding: utf-8 -*-
-# Copyright (C) 2012, 2013 Laboratoire de Recherche et Développement
+# Copyright (C) 2012, 2013, 2014 Laboratoire de Recherche et Développement
 # de l'Epita (LRDE).
 #
 # This file is part of Spot, a model checking library.
@ -25,45 +25,41 @@ set -e
 for f in 'p0 U p1 U p2' 'Gp00 | Gp13 | Gp42' '{(1;1)*}[]->p1'
 do
  # Make sure Spot can read the LBTT it produces
-  run 0 ../ltl2tgba -t "$f" > out
+  run 0 ../../bin/ltl2tgba --lbtt "$f" > out
  s=`wc -l < out`
-  run 0 ../ltl2tgba -t -XL out > out2
+  head -n 1 out | grep t
  run 0 ../../bin/autfilt --lbtt out > out2
  s2=`wc -l < out2`
  test "$s" -eq "$s2"
  # The LBTT output use 2 lines par state, one line per transition,
  # and one extra line for header.
-  run 0 ../ltl2tgba -ks "$f" > size
+  run 0 ../../bin/ltl2tgba "$f" --stats 'expr %s \* 2 + %e + 1' > size
-  st=`cat size | sed -n 's/states: //p'`
+  l=$(eval "$(cat size)")
  tr=`cat size | sed -n 's/transitions: //p'`
  l=`expr $st \* 2 + $tr + 1`
  test "$s" -eq "$l"
  # Do the same with bin/ltl2tgba
  run 0 ../../bin/ltl2tgba --low --any --lbtt "$f" >out3
  cmp out out3
  head -n 1 out3 | grep t
  # Make sure we output the state-based format
  # for BA...
  run 0 ../../bin/ltl2tgba --ba --lbtt --low --any "$f" >out4
  head -n 1 out4 | grep t && exit 1
  s4=`wc -l < out4`
  test "$s" -eq "$s4"
-  run 0 ../ltl2tgba -t -XL out4 > out5
+  run 0 ../../bin/autfilt --lbtt out4 > out5
-  s5=`wc -l < out5`
+  run 0 ../../bin/autfilt out4 --are-isomorphic out5
  test "$s" -eq "$s5"
  # ... unless --lbtt=t is used.
  run 0 ../../bin/ltl2tgba --ba --lbtt=t --low --any "$f" >out6
  head -n 1 out6 | grep t
  s6=`wc -l < out6`
  test "$s" -eq "$s6"
-  run 0 ../ltl2tgba -t -XL out6 > out7
+  run 0 ../../bin/autfilt --lbtt out6 > out7
-  s7=`wc -l < out7`
+  run 0 ../../bin/autfilt out6 --are-isomorphic out7
  test "$s" -eq "$s7"
 done
-# This is the output of 'lbt' on the formula 'U p0 p1'.
+
-cat >Up0p1 <<EOF
+# multiple inputs (from different tools)
 cat >input <<EOF
 /* This is the output of 'lbt' on the formula 'U p0 p1'. */
 4 1
 0 1 -1
 1 p0
@ -79,16 +75,8 @@ cat >Up0p1 <<EOF
 3 0 0 -1
 3 t
 -1
-EOF
+/* This kind of output is returned by wring2lbtt, on the same formula.
-
+   (Newer versions of LBTT reject this input with missing new lines.) */
 run 0 ../ltl2tgba -ks -XL Up0p1 > size
 test "`cat size | sed -n 's/states: //p'`" = 4
 test "`cat size | sed -n 's/transitions: //p'`" = 6
 # This kind of output is returned by wring2lbtt, on the same formula.
 # (Newer versions of LBTT reject this input with missing new lines.)
 cat >wring2lbtt <<EOF
 4 1 0 1 -1 1 p0
 2 p1
 -1 1 0 -1 1 p0
@ -96,15 +84,12 @@ cat >wring2lbtt <<EOF
 -1 2 0 0 -1 3 t
 -1 3 0 0 -1 3 t
 -1
-EOF
+/* This is an automaton without state and three acceptance sets.
-
+   Spot is not able to deal with automata that do not have initial
-run 0 ../ltl2tgba -ks -XL wring2lbtt > size
+   state, so it will add a dummy state. */
-test "`cat size | sed -n 's/states: //p'`" = 4
+0 3
-test "`cat size | sed -n 's/transitions: //p'`" = 6
+/* Another example from wring2lbtt (or modella), showing that the
-
+   acceptance set of the states is not always numbered from 0. */
 # Another example from wring2lbtt (or modella), showing that the
 # acceptance set of the state is not always numbered from 0.
 cat >wring2lbtt2 <<EOF
 6 1 0 1 -1 1 | & ! p0 ! p1 & p0 ! p1
 2 & ! p0 ! p1
 3 | & p0 p1 & ! p0 p1
@ -117,6 +102,12 @@ cat >wring2lbtt2 <<EOF
 -1
 EOF
-run 0 ../ltl2tgba -ks -XL wring2lbtt2 > size
+run 0 ../../bin/autfilt --stats '%s %t %e %a' input > output
-test "`cat size | sed -n 's/states: //p'`" = 6
+cat >expected<<EOF
-test "`cat size | sed -n 's/transitions: //p'`" = 9
+4 16 6 1
 4 16 6 1
 1 0 0 3
 6 20 9 1
 EOF
 diff output expected