diff --git a/src/hoaparse/hoaparse.yy b/src/hoaparse/hoaparse.yy index 7b6b2c5cc..89db32899 100644 --- a/src/hoaparse/hoaparse.yy +++ b/src/hoaparse/hoaparse.yy @@ -37,6 +37,8 @@ #include "ltlast/constant.hh" #include "tgba/formula2bdd.hh" #include "public.hh" +#include "priv/accmap.hh" +#include "ltlparse/public.hh" /* Cache parsed formulae. Labels on arcs are frequently identical and it would be a waste of time to parse them to formula* over and @@ -58,6 +60,7 @@ spot::ltl::environment* env; formula_cache fcache; named_tgba_t* namer = nullptr; + spot::acc_mapper_int* acc_mapper = nullptr; std::vector ap; std::vector guards; std::vector::const_iterator cur_guard; @@ -94,6 +97,7 @@ ~result_() { delete namer; + delete acc_mapper; } }; } @@ -178,8 +182,16 @@ %type nc-transitions nc-transition-block %type nc-one-ident nc-ident-list - - +/**** LBTT tokens *****/ + // Also using INT, STRING +%token ENDAUT "-1" +%token LBTT "LBTT header" +%token INT_S "state acceptance" +%token LBTT_EMPTY "acceptance sets for empty automaton" +%token ACC "acceptance set" +%token STATE_NUM "state number" +%token DEST_NUM "destination number" +%type lbtt-acc %destructor { delete $$; } %destructor { bdd_delref($$); } @@ -207,6 +219,7 @@ aut: aut-1 { res.h->loc = @$; YYACCEPT; } aut-1: hoa | never + | lbtt /**********************************************************************/ @@ -921,6 +934,93 @@ nc-transition: $$ = $3; } +/**********************************************************************/ +/* Rules for LBTT */ +/**********************************************************************/ + +lbtt: lbtt-header lbtt-body ENDAUT + | lbtt-header-states LBTT_EMPTY + { + res.h->aut->acc().add_sets($2); + } + +lbtt-header-states: LBTT + { + res.states = $1; + res.states_loc = @1; + res.h->aut->new_states($1); + } +lbtt-header: lbtt-header-states INT_S + { + res.acc_mapper = new spot::acc_mapper_int(res.h->aut, $2); + res.h->aut->prop_state_based_acc(); + } + | lbtt-header-states INT + { + res.acc_mapper = new spot::acc_mapper_int(res.h->aut, $2); + } +lbtt-body: lbtt-states +lbtt-states: + | lbtt-states lbtt-state lbtt-transitions +lbtt-state: STATE_NUM INT lbtt-acc + { + res.cur_state = $1; + if ((int) res.cur_state >= res.states) + { + error(@1, "state number is larger than state " + "count..."); + error(res.states_loc, "... declared here."); + res.cur_state = 0; + } + else if ($2) + res.start.emplace_back(@1 + @2, $1); + res.acc_state = $3; + } +lbtt-acc: { $$ = 0U; } + | lbtt-acc ACC + { + $$ = $1; + auto p = res.acc_mapper->lookup($2); + if (p.first) + $$ |= p.second; + else + error(@2, "more acceptance sets used than declared"); + } +lbtt-guard: STRING + { + spot::ltl::parse_error_list pel; + auto* f = spot::ltl::parse_lbt(*$1, pel, *res.env); + if (!f || !pel.empty()) + { + // FIXME: show pel. + error(@$, "failed to parse guard"); + if (f) + f->destroy(); + res.cur_label = bddtrue; + } + else + { + res.cur_label = + formula_to_bdd(f, res.h->aut->get_dict(), res.h->aut); + f->destroy(); + } + delete $1; + } +lbtt-transitions: + | lbtt-transitions DEST_NUM lbtt-acc lbtt-guard + { + if ((int) $2 >= res.states) + { + error(@2, "state number is larger than state " + "count..."); + error(res.states_loc, "... declared here."); + } + else + res.h->aut->new_transition(res.cur_state, $2, + res.cur_label, + res.acc_state | $3); + } + %% static void fill_guards(result_& r) @@ -1038,6 +1138,7 @@ namespace spot r.env = &env; hoayy::parser parser(error_list, r, last_loc); parser.set_debug_level(debug); + hoayyreset(); try { if (parser.parse()) diff --git a/src/hoaparse/hoascan.ll b/src/hoaparse/hoascan.ll index 58a7b0a0b..1ab0390e4 100644 --- a/src/hoaparse/hoascan.ll +++ b/src/hoaparse/hoascan.ll @@ -35,6 +35,9 @@ static unsigned comment_level = 0; static unsigned parent_level = 0; static int orig_cond = 0; static bool missing_parent = false; +static bool lbtt_s = false; +static bool lbtt_t = false; +static unsigned lbtt_states = 0; %} @@ -43,13 +46,27 @@ eol2 (\n\r)+|(\r\n)+ identifier [[:alpha:]_][[:alnum:]_-]* %x in_COMMENT in_STRING in_NEVER_PAR -%s in_HOA in_NEVER - +%s in_HOA in_NEVER in_LBTT_HEADER +%s in_LBTT_STATE in_LBTT_INIT in_LBTT_TRANS +%s in_LBTT_T_ACC in_LBTT_S_ACC in_LBTT_GUARD %% %{ std::string s; yylloc->step(); + + auto parse_int = [&](){ + errno = 0; + char* end; + unsigned long n = strtoul(yytext, &end, 10); + yylval->num = n; + if (errno || yylval->num != n) + { + error_list.push_back(spot::hoa_parse_error(*yylloc, "value too large")); + yylval->num = 0; + } + return end; + }; %} @@ -62,14 +79,30 @@ identifier [[:alpha:]_][[:alnum:]_-]* BEGIN(in_COMMENT); comment_level = 1; } -"\"" { - orig_cond = YY_START; - BEGIN(in_STRING); - comment_level = 1; - } -"HOA:" BEGIN(in_HOA); return token::HOA; +"HOA:" BEGIN(in_HOA); return token::HOA; "--ABORT--" BEGIN(INITIAL); throw spot::hoa_abort{*yylloc}; -"never" BEGIN(in_NEVER); return token::NEVER; +"never" BEGIN(in_NEVER); return token::NEVER; + +[0-9]+[ \t][0-9]+[ts]? { + BEGIN(in_LBTT_HEADER); + char* end = 0; + errno = 0; + unsigned long n = strtoul(yytext, &end, 10); + yylval->num = n; + unsigned s = end - yytext; + yylloc->end = yylloc->begin; + yylloc->end.columns(s); + yyless(s); + if (errno || yylval->num != n) + { + error_list.push_back( + spot::hoa_parse_error(*yylloc, + "value too large")); + yylval->num = 0; + } + lbtt_states = yylval->num; + return token::LBTT; + } { "States:" return token::STATES; @@ -98,19 +131,7 @@ identifier [[:alpha:]_][[:alnum:]_-]* yylval->str = new std::string(yytext + 1, yyleng - 1); return token::ANAME; } - [0-9]+ { - errno = 0; - unsigned long n = strtoul(yytext, 0, 10); - yylval->num = n; - if (errno || yylval->num != n) - { - error_list.push_back( - spot::hoa_parse_error(*yylloc, - "value too large")); - yylval->num = 0; - } - return token::INT; - } + [0-9]+ parse_int(); return token::INT; } { @@ -140,7 +161,89 @@ identifier [[:alpha:]_][[:alnum:]_-]* yylval->str = new std::string(yytext, yyleng); return token::IDENTIFIER; } +} + /* Note: the LBTT format is scanf friendly, but not Bison-friendly. + If we only tokenize it as a stream of INTs, the parser will have + a very hard time recognizing what is a state from what is a + transitions. As a consequence we abuse the start conditions to + maintain a state an return integers with different sementic types + depending on the purpose of those integers. */ +{ + [0-9]+[st]* { + BEGIN(in_LBTT_STATE); + auto end = parse_int(); + lbtt_s = false; + lbtt_t = false; + if (end) + while (int c = *end++) + if (c == 's') + lbtt_s = true; + else // c == 't' + lbtt_t = true; + if (!lbtt_t) + lbtt_s = true; + if (lbtt_states == 0) + { + BEGIN(INITIAL); + return token::LBTT_EMPTY; + } + if (lbtt_s && !lbtt_t) + return token::INT_S; + else + return token::INT; + } +} + +[0-9]+ { + parse_int(); + BEGIN(in_LBTT_INIT); + return token::STATE_NUM; + } +[01] { + yylval->num = *yytext - '0'; + if (lbtt_s) + BEGIN(in_LBTT_S_ACC); + else + BEGIN(in_LBTT_TRANS); + return token::INT; + } +{ + [0-9]+ parse_int(); return token::ACC; + "-1" BEGIN(in_LBTT_TRANS); yylloc->step(); +} +{ + [0-9+] { + parse_int(); + if (lbtt_t) + BEGIN(in_LBTT_T_ACC); + else + BEGIN(in_LBTT_GUARD); + return token::DEST_NUM; + } + "-1" { + if (--lbtt_states) + { + BEGIN(in_LBTT_STATE); + yylloc->step(); + } + else + { + BEGIN(INITIAL); + return token::ENDAUT; + } + } +} +{ + [0-9+] parse_int(); return token::ACC; + "-1" BEGIN(in_LBTT_GUARD); yylloc->step(); +} +{ + [^\n\r]* { + yylval->str = new std::string(yytext, yyleng); + BEGIN(in_LBTT_TRANS); + return token::STRING; + } } @@ -161,6 +264,13 @@ identifier [[:alpha:]_][[:alnum:]_-]* } } + /* matched late, so that the in_LBTT_GUARD pattern has precedence */ +"\"" { + orig_cond = YY_START; + BEGIN(in_STRING); + comment_level = 1; + } + { \" { BEGIN(orig_cond); @@ -236,6 +346,15 @@ identifier [[:alpha:]_][[:alnum:]_-]* namespace spot { + void + hoayyreset() + { + BEGIN(INITIAL); + comment_level = 0; + parent_level = 0; + missing_parent = false; + } + int hoayyopen(const std::string &name) { @@ -253,10 +372,7 @@ namespace spot // Reset the lexer in case a previous parse // ended badly. YY_NEW_FILE; - BEGIN(INITIAL); - comment_level = 0; - parent_level = 0; - missing_parent = false; + hoayyreset(); return 0; } diff --git a/src/hoaparse/parsedecl.hh b/src/hoaparse/parsedecl.hh index 232b196a1..e3ddf4ccf 100644 --- a/src/hoaparse/parsedecl.hh +++ b/src/hoaparse/parsedecl.hh @@ -32,6 +32,7 @@ YY_DECL; namespace spot { + void hoayyreset(); int hoayyopen(const std::string& name); void hoayyclose(); diff --git a/src/tgbatest/hoaparse.test b/src/tgbatest/hoaparse.test index 9ba078ab0..85497be07 100755 --- a/src/tgbatest/hoaparse.test +++ b/src/tgbatest/hoaparse.test @@ -74,6 +74,14 @@ State: 0 {0} State: 1 {0} [t] 1 --END-- +2 1t +2 1 +1 -1 "b" +0 -1 & "a" ! "b" +-1 +1 0 +2 0 -1 t +-1 EOF expecterr input <input <input <p1' do # Make sure Spot can read the LBTT it produces - run 0 ../ltl2tgba -t "$f" > out + run 0 ../../bin/ltl2tgba --lbtt "$f" > out s=`wc -l < out` - run 0 ../ltl2tgba -t -XL out > out2 + head -n 1 out | grep t + run 0 ../../bin/autfilt --lbtt out > out2 s2=`wc -l < out2` test "$s" -eq "$s2" # The LBTT output use 2 lines par state, one line per transition, # and one extra line for header. - run 0 ../ltl2tgba -ks "$f" > size - st=`cat size | sed -n 's/states: //p'` - tr=`cat size | sed -n 's/transitions: //p'` - l=`expr $st \* 2 + $tr + 1` + run 0 ../../bin/ltl2tgba "$f" --stats 'expr %s \* 2 + %e + 1' > size + l=$(eval "$(cat size)") test "$s" -eq "$l" - # Do the same with bin/ltl2tgba - run 0 ../../bin/ltl2tgba --low --any --lbtt "$f" >out3 - cmp out out3 - head -n 1 out3 | grep t # Make sure we output the state-based format # for BA... run 0 ../../bin/ltl2tgba --ba --lbtt --low --any "$f" >out4 head -n 1 out4 | grep t && exit 1 s4=`wc -l < out4` test "$s" -eq "$s4" - run 0 ../ltl2tgba -t -XL out4 > out5 - s5=`wc -l < out5` - test "$s" -eq "$s5" + run 0 ../../bin/autfilt --lbtt out4 > out5 + run 0 ../../bin/autfilt out4 --are-isomorphic out5 # ... unless --lbtt=t is used. run 0 ../../bin/ltl2tgba --ba --lbtt=t --low --any "$f" >out6 head -n 1 out6 | grep t s6=`wc -l < out6` test "$s" -eq "$s6" - run 0 ../ltl2tgba -t -XL out6 > out7 - s7=`wc -l < out7` - test "$s" -eq "$s7" + run 0 ../../bin/autfilt --lbtt out6 > out7 + run 0 ../../bin/autfilt out6 --are-isomorphic out7 done -# This is the output of 'lbt' on the formula 'U p0 p1'. -cat >Up0p1 <input <Up0p1 < size -test "`cat size | sed -n 's/states: //p'`" = 4 -test "`cat size | sed -n 's/transitions: //p'`" = 6 - - -# This kind of output is returned by wring2lbtt, on the same formula. -# (Newer versions of LBTT reject this input with missing new lines.) -cat >wring2lbtt <wring2lbtt < size -test "`cat size | sed -n 's/states: //p'`" = 4 -test "`cat size | sed -n 's/transitions: //p'`" = 6 - -# Another example from wring2lbtt (or modella), showing that the -# acceptance set of the state is not always numbered from 0. -cat >wring2lbtt2 <wring2lbtt2 < size -test "`cat size | sed -n 's/states: //p'`" = 6 -test "`cat size | sed -n 's/transitions: //p'`" = 9 +run 0 ../../bin/autfilt --stats '%s %t %e %a' input > output +cat >expected<