hoaparse: also accept LBTT input

This is probably the worse grammar I wrote: the LBTT format is designed
to be scanned with scanf, and very inconvenient to parse with
bison/flex.  Here the scanner basically has to emulate a parser to
classify the different INTs as tokens with different types.

* src/hoaparse/hoaparse.yy, src/hoaparse/hoascan.ll: Add rules for LBTT.
* src/hoaparse/parsedecl.hh: Add a way to reset the parser between each
automata.
* src/tgbatest/hoaparse.test, src/tgbatest/lbttparse.test: Add more
tests.
This commit is contained in:
Alexandre Duret-Lutz 2014-12-10 16:26:51 +01:00
parent e4158c21ee
commit 6eb2b06fa7
5 changed files with 340 additions and 72 deletions

View file

@ -37,6 +37,8 @@
#include "ltlast/constant.hh" #include "ltlast/constant.hh"
#include "tgba/formula2bdd.hh" #include "tgba/formula2bdd.hh"
#include "public.hh" #include "public.hh"
#include "priv/accmap.hh"
#include "ltlparse/public.hh"
/* Cache parsed formulae. Labels on arcs are frequently identical /* Cache parsed formulae. Labels on arcs are frequently identical
and it would be a waste of time to parse them to formula* over and and it would be a waste of time to parse them to formula* over and
@ -58,6 +60,7 @@
spot::ltl::environment* env; spot::ltl::environment* env;
formula_cache fcache; formula_cache fcache;
named_tgba_t* namer = nullptr; named_tgba_t* namer = nullptr;
spot::acc_mapper_int* acc_mapper = nullptr;
std::vector<int> ap; std::vector<int> ap;
std::vector<bdd> guards; std::vector<bdd> guards;
std::vector<bdd>::const_iterator cur_guard; std::vector<bdd>::const_iterator cur_guard;
@ -94,6 +97,7 @@
~result_() ~result_()
{ {
delete namer; delete namer;
delete acc_mapper;
} }
}; };
} }
@ -178,8 +182,16 @@
%type <list> nc-transitions nc-transition-block %type <list> nc-transitions nc-transition-block
%type <str> nc-one-ident nc-ident-list %type <str> nc-one-ident nc-ident-list
/**** LBTT tokens *****/
// Also using INT, STRING
%token ENDAUT "-1"
%token <num> LBTT "LBTT header"
%token <num> INT_S "state acceptance"
%token <num> LBTT_EMPTY "acceptance sets for empty automaton"
%token <num> ACC "acceptance set"
%token <num> STATE_NUM "state number"
%token <num> DEST_NUM "destination number"
%type <mark> lbtt-acc
%destructor { delete $$; } <str> %destructor { delete $$; } <str>
%destructor { bdd_delref($$); } <b> %destructor { bdd_delref($$); } <b>
@ -207,6 +219,7 @@ aut: aut-1 { res.h->loc = @$; YYACCEPT; }
aut-1: hoa aut-1: hoa
| never | never
| lbtt
/**********************************************************************/ /**********************************************************************/
@ -921,6 +934,93 @@ nc-transition:
$$ = $3; $$ = $3;
} }
/**********************************************************************/
/* Rules for LBTT */
/**********************************************************************/
lbtt: lbtt-header lbtt-body ENDAUT
| lbtt-header-states LBTT_EMPTY
{
res.h->aut->acc().add_sets($2);
}
lbtt-header-states: LBTT
{
res.states = $1;
res.states_loc = @1;
res.h->aut->new_states($1);
}
lbtt-header: lbtt-header-states INT_S
{
res.acc_mapper = new spot::acc_mapper_int(res.h->aut, $2);
res.h->aut->prop_state_based_acc();
}
| lbtt-header-states INT
{
res.acc_mapper = new spot::acc_mapper_int(res.h->aut, $2);
}
lbtt-body: lbtt-states
lbtt-states:
| lbtt-states lbtt-state lbtt-transitions
lbtt-state: STATE_NUM INT lbtt-acc
{
res.cur_state = $1;
if ((int) res.cur_state >= res.states)
{
error(@1, "state number is larger than state "
"count...");
error(res.states_loc, "... declared here.");
res.cur_state = 0;
}
else if ($2)
res.start.emplace_back(@1 + @2, $1);
res.acc_state = $3;
}
lbtt-acc: { $$ = 0U; }
| lbtt-acc ACC
{
$$ = $1;
auto p = res.acc_mapper->lookup($2);
if (p.first)
$$ |= p.second;
else
error(@2, "more acceptance sets used than declared");
}
lbtt-guard: STRING
{
spot::ltl::parse_error_list pel;
auto* f = spot::ltl::parse_lbt(*$1, pel, *res.env);
if (!f || !pel.empty())
{
// FIXME: show pel.
error(@$, "failed to parse guard");
if (f)
f->destroy();
res.cur_label = bddtrue;
}
else
{
res.cur_label =
formula_to_bdd(f, res.h->aut->get_dict(), res.h->aut);
f->destroy();
}
delete $1;
}
lbtt-transitions:
| lbtt-transitions DEST_NUM lbtt-acc lbtt-guard
{
if ((int) $2 >= res.states)
{
error(@2, "state number is larger than state "
"count...");
error(res.states_loc, "... declared here.");
}
else
res.h->aut->new_transition(res.cur_state, $2,
res.cur_label,
res.acc_state | $3);
}
%% %%
static void fill_guards(result_& r) static void fill_guards(result_& r)
@ -1038,6 +1138,7 @@ namespace spot
r.env = &env; r.env = &env;
hoayy::parser parser(error_list, r, last_loc); hoayy::parser parser(error_list, r, last_loc);
parser.set_debug_level(debug); parser.set_debug_level(debug);
hoayyreset();
try try
{ {
if (parser.parse()) if (parser.parse())

View file

@ -35,6 +35,9 @@ static unsigned comment_level = 0;
static unsigned parent_level = 0; static unsigned parent_level = 0;
static int orig_cond = 0; static int orig_cond = 0;
static bool missing_parent = false; static bool missing_parent = false;
static bool lbtt_s = false;
static bool lbtt_t = false;
static unsigned lbtt_states = 0;
%} %}
@ -43,13 +46,27 @@ eol2 (\n\r)+|(\r\n)+
identifier [[:alpha:]_][[:alnum:]_-]* identifier [[:alpha:]_][[:alnum:]_-]*
%x in_COMMENT in_STRING in_NEVER_PAR %x in_COMMENT in_STRING in_NEVER_PAR
%s in_HOA in_NEVER %s in_HOA in_NEVER in_LBTT_HEADER
%s in_LBTT_STATE in_LBTT_INIT in_LBTT_TRANS
%s in_LBTT_T_ACC in_LBTT_S_ACC in_LBTT_GUARD
%% %%
%{ %{
std::string s; std::string s;
yylloc->step(); yylloc->step();
auto parse_int = [&](){
errno = 0;
char* end;
unsigned long n = strtoul(yytext, &end, 10);
yylval->num = n;
if (errno || yylval->num != n)
{
error_list.push_back(spot::hoa_parse_error(*yylloc, "value too large"));
yylval->num = 0;
}
return end;
};
%} %}
@ -62,14 +79,30 @@ identifier [[:alpha:]_][[:alnum:]_-]*
BEGIN(in_COMMENT); BEGIN(in_COMMENT);
comment_level = 1; comment_level = 1;
} }
"\"" { <INITIAL>"HOA:" BEGIN(in_HOA); return token::HOA;
orig_cond = YY_START;
BEGIN(in_STRING);
comment_level = 1;
}
"HOA:" BEGIN(in_HOA); return token::HOA;
<INITIAL,in_HOA>"--ABORT--" BEGIN(INITIAL); throw spot::hoa_abort{*yylloc}; <INITIAL,in_HOA>"--ABORT--" BEGIN(INITIAL); throw spot::hoa_abort{*yylloc};
"never" BEGIN(in_NEVER); return token::NEVER; <INITIAL>"never" BEGIN(in_NEVER); return token::NEVER;
<INITIAL>[0-9]+[ \t][0-9]+[ts]? {
BEGIN(in_LBTT_HEADER);
char* end = 0;
errno = 0;
unsigned long n = strtoul(yytext, &end, 10);
yylval->num = n;
unsigned s = end - yytext;
yylloc->end = yylloc->begin;
yylloc->end.columns(s);
yyless(s);
if (errno || yylval->num != n)
{
error_list.push_back(
spot::hoa_parse_error(*yylloc,
"value too large"));
yylval->num = 0;
}
lbtt_states = yylval->num;
return token::LBTT;
}
<in_HOA>{ <in_HOA>{
"States:" return token::STATES; "States:" return token::STATES;
@ -98,19 +131,7 @@ identifier [[:alpha:]_][[:alnum:]_-]*
yylval->str = new std::string(yytext + 1, yyleng - 1); yylval->str = new std::string(yytext + 1, yyleng - 1);
return token::ANAME; return token::ANAME;
} }
[0-9]+ { [0-9]+ parse_int(); return token::INT;
errno = 0;
unsigned long n = strtoul(yytext, 0, 10);
yylval->num = n;
if (errno || yylval->num != n)
{
error_list.push_back(
spot::hoa_parse_error(*yylloc,
"value too large"));
yylval->num = 0;
}
return token::INT;
}
} }
<in_NEVER>{ <in_NEVER>{
@ -140,7 +161,89 @@ identifier [[:alpha:]_][[:alnum:]_-]*
yylval->str = new std::string(yytext, yyleng); yylval->str = new std::string(yytext, yyleng);
return token::IDENTIFIER; return token::IDENTIFIER;
} }
}
/* Note: the LBTT format is scanf friendly, but not Bison-friendly.
If we only tokenize it as a stream of INTs, the parser will have
a very hard time recognizing what is a state from what is a
transitions. As a consequence we abuse the start conditions to
maintain a state an return integers with different sementic types
depending on the purpose of those integers. */
<in_LBTT_HEADER>{
[0-9]+[st]* {
BEGIN(in_LBTT_STATE);
auto end = parse_int();
lbtt_s = false;
lbtt_t = false;
if (end)
while (int c = *end++)
if (c == 's')
lbtt_s = true;
else // c == 't'
lbtt_t = true;
if (!lbtt_t)
lbtt_s = true;
if (lbtt_states == 0)
{
BEGIN(INITIAL);
return token::LBTT_EMPTY;
}
if (lbtt_s && !lbtt_t)
return token::INT_S;
else
return token::INT;
}
}
<in_LBTT_STATE>[0-9]+ {
parse_int();
BEGIN(in_LBTT_INIT);
return token::STATE_NUM;
}
<in_LBTT_INIT>[01] {
yylval->num = *yytext - '0';
if (lbtt_s)
BEGIN(in_LBTT_S_ACC);
else
BEGIN(in_LBTT_TRANS);
return token::INT;
}
<in_LBTT_S_ACC>{
[0-9]+ parse_int(); return token::ACC;
"-1" BEGIN(in_LBTT_TRANS); yylloc->step();
}
<in_LBTT_TRANS>{
[0-9+] {
parse_int();
if (lbtt_t)
BEGIN(in_LBTT_T_ACC);
else
BEGIN(in_LBTT_GUARD);
return token::DEST_NUM;
}
"-1" {
if (--lbtt_states)
{
BEGIN(in_LBTT_STATE);
yylloc->step();
}
else
{
BEGIN(INITIAL);
return token::ENDAUT;
}
}
}
<in_LBTT_T_ACC>{
[0-9+] parse_int(); return token::ACC;
"-1" BEGIN(in_LBTT_GUARD); yylloc->step();
}
<in_LBTT_GUARD>{
[^\n\r]* {
yylval->str = new std::string(yytext, yyleng);
BEGIN(in_LBTT_TRANS);
return token::STRING;
}
} }
@ -161,6 +264,13 @@ identifier [[:alpha:]_][[:alnum:]_-]*
} }
} }
/* matched late, so that the in_LBTT_GUARD pattern has precedence */
"\"" {
orig_cond = YY_START;
BEGIN(in_STRING);
comment_level = 1;
}
<in_STRING>{ <in_STRING>{
\" { \" {
BEGIN(orig_cond); BEGIN(orig_cond);
@ -236,6 +346,15 @@ identifier [[:alpha:]_][[:alnum:]_-]*
namespace spot namespace spot
{ {
void
hoayyreset()
{
BEGIN(INITIAL);
comment_level = 0;
parent_level = 0;
missing_parent = false;
}
int int
hoayyopen(const std::string &name) hoayyopen(const std::string &name)
{ {
@ -253,10 +372,7 @@ namespace spot
// Reset the lexer in case a previous parse // Reset the lexer in case a previous parse
// ended badly. // ended badly.
YY_NEW_FILE; YY_NEW_FILE;
BEGIN(INITIAL); hoayyreset();
comment_level = 0;
parent_level = 0;
missing_parent = false;
return 0; return 0;
} }

View file

@ -32,6 +32,7 @@ YY_DECL;
namespace spot namespace spot
{ {
void hoayyreset();
int hoayyopen(const std::string& name); int hoayyopen(const std::string& name);
void hoayyclose(); void hoayyclose();

View file

@ -74,6 +74,14 @@ State: 0 {0}
State: 1 {0} State: 1 {0}
[t] 1 [t] 1
--END-- --END--
2 1t
2 1
1 -1 "b"
0 -1 & "a" ! "b"
-1
1 0
2 0 -1 t
-1
EOF EOF
expecterr input <<EOF expecterr input <<EOF
@ -83,6 +91,10 @@ input:9.8: state number is larger than state count...
input:2.1-9: ... declared here. input:2.1-9: ... declared here.
input:10.5: state number is larger than state count... input:10.5: state number is larger than state count...
input:2.1-9: ... declared here. input:2.1-9: ... declared here.
input:13.1: state number is larger than state count...
input:12.1: ... declared here.
input:18.1: state number is larger than state count...
input:12.1: ... declared here.
EOF EOF
cat >input <<EOF cat >input <<EOF
@ -482,7 +494,7 @@ EOF
diff expected input.out diff expected input.out
# Mix HOA with neverclaims # Mix HOA with neverclaims and LBTT automata
cat >input <<EOF cat >input <<EOF
HOA: v1 HOA: v1
States: 2 States: 2
@ -500,6 +512,29 @@ State: 1
[0] 0 [0] 0
--END-- --END--
/* some LBTT with mixed state/transition acceptance,
just because we can */
4 2ts
0 1 -1
1 -1 "c"
2 -1 & "b" ! "c"
3 -1 ! "c"
-1
1 0 1 -1
1 0 -1 t
-1
2 0 -1
1 -1 "c"
2 -1 & "b" ! "c"
-1
3 0 -1
3 0 1 -1 & "a" "b"
3 1 -1 & "a" ! "b"
3 0 -1 & ! "a" "b"
3 -1 & ! "a" ! "b"
-1
never { /* a U b */ never { /* a U b */
T0_init: T0_init:
if if
@ -539,6 +574,29 @@ State: 2 {0}
[!1] 2 [!1] 2
--END-- --END--
HOA: v1 HOA: v1
States: 4
Start: 0
AP: 3 "c" "b" "a"
acc-name: generalized-Buchi 2
Acceptance: 2 Inf(0)&Inf(1)
properties: trans-labels explicit-labels trans-acc
--BODY--
State: 0
[0] 1
[!0&1] 2
[!0] 3
State: 1
[t] 1 {0 1}
State: 2
[0] 1
[!0&1] 2
State: 3
[1&2] 3 {0 1}
[!1&2] 3 {0}
[1&!2] 3 {1}
[!1&!2] 3
--END--
HOA: v1
States: 2 States: 2
Start: 0 Start: 0
AP: 2 "b" "a" AP: 2 "b" "a"
@ -722,16 +780,17 @@ and even more garbage
EOF EOF
se='syntax error, unexpected' # this is just to keep lines short se='syntax error, unexpected' # this is just to keep lines short
end='end of file or HOA: or never or LBTT header'
expecterr input <<EOF expecterr input <<EOF
input:8.11: $se identifier, expecting integer or '}' input:8.11: $se identifier, expecting integer or '}'
input:8.10-12: ignoring this invalid acceptance set input:8.10-12: ignoring this invalid acceptance set
input:11.2: $se identifier input:11.2: $se identifier
input:11.1-3: ignoring this invalid label input:11.1-3: ignoring this invalid label
input:21.5-7: $se string, expecting integer input:21.5-7: $se string, expecting integer
input:25.1: $se \$undefined, expecting end of file or HOA: or never input:25.1: $se \$undefined, expecting $end
input:25.1-12: ignoring leading garbage input:25.1-12: ignoring leading garbage
input:32.1-5: $se header name, expecting --END-- or State: input:32.1-5: $se header name, expecting --END-- or State:
input:37.1: $se 't', expecting end of file or HOA: or never input:37.1: $se 't', expecting $end
autfilt: failed to read automaton from input autfilt: failed to read automaton from input
EOF EOF

View file

@ -1,6 +1,6 @@
#!/bin/sh #!/bin/sh
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2012, 2013 Laboratoire de Recherche et Développement # Copyright (C) 2012, 2013, 2014 Laboratoire de Recherche et Développement
# de l'Epita (LRDE). # de l'Epita (LRDE).
# #
# This file is part of Spot, a model checking library. # This file is part of Spot, a model checking library.
@ -25,45 +25,41 @@ set -e
for f in 'p0 U p1 U p2' 'Gp00 | Gp13 | Gp42' '{(1;1)*}[]->p1' for f in 'p0 U p1 U p2' 'Gp00 | Gp13 | Gp42' '{(1;1)*}[]->p1'
do do
# Make sure Spot can read the LBTT it produces # Make sure Spot can read the LBTT it produces
run 0 ../ltl2tgba -t "$f" > out run 0 ../../bin/ltl2tgba --lbtt "$f" > out
s=`wc -l < out` s=`wc -l < out`
run 0 ../ltl2tgba -t -XL out > out2 head -n 1 out | grep t
run 0 ../../bin/autfilt --lbtt out > out2
s2=`wc -l < out2` s2=`wc -l < out2`
test "$s" -eq "$s2" test "$s" -eq "$s2"
# The LBTT output use 2 lines par state, one line per transition, # The LBTT output use 2 lines par state, one line per transition,
# and one extra line for header. # and one extra line for header.
run 0 ../ltl2tgba -ks "$f" > size run 0 ../../bin/ltl2tgba "$f" --stats 'expr %s \* 2 + %e + 1' > size
st=`cat size | sed -n 's/states: //p'` l=$(eval "$(cat size)")
tr=`cat size | sed -n 's/transitions: //p'`
l=`expr $st \* 2 + $tr + 1`
test "$s" -eq "$l" test "$s" -eq "$l"
# Do the same with bin/ltl2tgba
run 0 ../../bin/ltl2tgba --low --any --lbtt "$f" >out3
cmp out out3
head -n 1 out3 | grep t
# Make sure we output the state-based format # Make sure we output the state-based format
# for BA... # for BA...
run 0 ../../bin/ltl2tgba --ba --lbtt --low --any "$f" >out4 run 0 ../../bin/ltl2tgba --ba --lbtt --low --any "$f" >out4
head -n 1 out4 | grep t && exit 1 head -n 1 out4 | grep t && exit 1
s4=`wc -l < out4` s4=`wc -l < out4`
test "$s" -eq "$s4" test "$s" -eq "$s4"
run 0 ../ltl2tgba -t -XL out4 > out5 run 0 ../../bin/autfilt --lbtt out4 > out5
s5=`wc -l < out5` run 0 ../../bin/autfilt out4 --are-isomorphic out5
test "$s" -eq "$s5"
# ... unless --lbtt=t is used. # ... unless --lbtt=t is used.
run 0 ../../bin/ltl2tgba --ba --lbtt=t --low --any "$f" >out6 run 0 ../../bin/ltl2tgba --ba --lbtt=t --low --any "$f" >out6
head -n 1 out6 | grep t head -n 1 out6 | grep t
s6=`wc -l < out6` s6=`wc -l < out6`
test "$s" -eq "$s6" test "$s" -eq "$s6"
run 0 ../ltl2tgba -t -XL out6 > out7 run 0 ../../bin/autfilt --lbtt out6 > out7
s7=`wc -l < out7` run 0 ../../bin/autfilt out6 --are-isomorphic out7
test "$s" -eq "$s7"
done done
# This is the output of 'lbt' on the formula 'U p0 p1'.
cat >Up0p1 <<EOF # multiple inputs (from different tools)
cat >input <<EOF
/* This is the output of 'lbt' on the formula 'U p0 p1'. */
4 1 4 1
0 1 -1 0 1 -1
1 p0 1 p0
@ -79,16 +75,8 @@ cat >Up0p1 <<EOF
3 0 0 -1 3 0 0 -1
3 t 3 t
-1 -1
EOF /* This kind of output is returned by wring2lbtt, on the same formula.
(Newer versions of LBTT reject this input with missing new lines.) */
run 0 ../ltl2tgba -ks -XL Up0p1 > size
test "`cat size | sed -n 's/states: //p'`" = 4
test "`cat size | sed -n 's/transitions: //p'`" = 6
# This kind of output is returned by wring2lbtt, on the same formula.
# (Newer versions of LBTT reject this input with missing new lines.)
cat >wring2lbtt <<EOF
4 1 0 1 -1 1 p0 4 1 0 1 -1 1 p0
2 p1 2 p1
-1 1 0 -1 1 p0 -1 1 0 -1 1 p0
@ -96,15 +84,12 @@ cat >wring2lbtt <<EOF
-1 2 0 0 -1 3 t -1 2 0 0 -1 3 t
-1 3 0 0 -1 3 t -1 3 0 0 -1 3 t
-1 -1
EOF /* This is an automaton without state and three acceptance sets.
Spot is not able to deal with automata that do not have initial
run 0 ../ltl2tgba -ks -XL wring2lbtt > size state, so it will add a dummy state. */
test "`cat size | sed -n 's/states: //p'`" = 4 0 3
test "`cat size | sed -n 's/transitions: //p'`" = 6 /* Another example from wring2lbtt (or modella), showing that the
acceptance set of the states is not always numbered from 0. */
# Another example from wring2lbtt (or modella), showing that the
# acceptance set of the state is not always numbered from 0.
cat >wring2lbtt2 <<EOF
6 1 0 1 -1 1 | & ! p0 ! p1 & p0 ! p1 6 1 0 1 -1 1 | & ! p0 ! p1 & p0 ! p1
2 & ! p0 ! p1 2 & ! p0 ! p1
3 | & p0 p1 & ! p0 p1 3 | & p0 p1 & ! p0 p1
@ -117,6 +102,12 @@ cat >wring2lbtt2 <<EOF
-1 -1
EOF EOF
run 0 ../ltl2tgba -ks -XL wring2lbtt2 > size run 0 ../../bin/autfilt --stats '%s %t %e %a' input > output
test "`cat size | sed -n 's/states: //p'`" = 6 cat >expected<<EOF
test "`cat size | sed -n 's/transitions: //p'`" = 9 4 16 6 1
4 16 6 1
1 0 0 3
6 20 9 1
EOF
diff output expected