hoaparse: also accept LBTT input
This is probably the worse grammar I wrote: the LBTT format is designed to be scanned with scanf, and very inconvenient to parse with bison/flex. Here the scanner basically has to emulate a parser to classify the different INTs as tokens with different types. * src/hoaparse/hoaparse.yy, src/hoaparse/hoascan.ll: Add rules for LBTT. * src/hoaparse/parsedecl.hh: Add a way to reset the parser between each automata. * src/tgbatest/hoaparse.test, src/tgbatest/lbttparse.test: Add more tests.
This commit is contained in:
parent
e4158c21ee
commit
6eb2b06fa7
5 changed files with 340 additions and 72 deletions
|
|
@ -37,6 +37,8 @@
|
|||
#include "ltlast/constant.hh"
|
||||
#include "tgba/formula2bdd.hh"
|
||||
#include "public.hh"
|
||||
#include "priv/accmap.hh"
|
||||
#include "ltlparse/public.hh"
|
||||
|
||||
/* Cache parsed formulae. Labels on arcs are frequently identical
|
||||
and it would be a waste of time to parse them to formula* over and
|
||||
|
|
@ -58,6 +60,7 @@
|
|||
spot::ltl::environment* env;
|
||||
formula_cache fcache;
|
||||
named_tgba_t* namer = nullptr;
|
||||
spot::acc_mapper_int* acc_mapper = nullptr;
|
||||
std::vector<int> ap;
|
||||
std::vector<bdd> guards;
|
||||
std::vector<bdd>::const_iterator cur_guard;
|
||||
|
|
@ -94,6 +97,7 @@
|
|||
~result_()
|
||||
{
|
||||
delete namer;
|
||||
delete acc_mapper;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
@ -178,8 +182,16 @@
|
|||
%type <list> nc-transitions nc-transition-block
|
||||
%type <str> nc-one-ident nc-ident-list
|
||||
|
||||
|
||||
|
||||
/**** LBTT tokens *****/
|
||||
// Also using INT, STRING
|
||||
%token ENDAUT "-1"
|
||||
%token <num> LBTT "LBTT header"
|
||||
%token <num> INT_S "state acceptance"
|
||||
%token <num> LBTT_EMPTY "acceptance sets for empty automaton"
|
||||
%token <num> ACC "acceptance set"
|
||||
%token <num> STATE_NUM "state number"
|
||||
%token <num> DEST_NUM "destination number"
|
||||
%type <mark> lbtt-acc
|
||||
|
||||
%destructor { delete $$; } <str>
|
||||
%destructor { bdd_delref($$); } <b>
|
||||
|
|
@ -207,6 +219,7 @@ aut: aut-1 { res.h->loc = @$; YYACCEPT; }
|
|||
|
||||
aut-1: hoa
|
||||
| never
|
||||
| lbtt
|
||||
|
||||
|
||||
/**********************************************************************/
|
||||
|
|
@ -921,6 +934,93 @@ nc-transition:
|
|||
$$ = $3;
|
||||
}
|
||||
|
||||
/**********************************************************************/
|
||||
/* Rules for LBTT */
|
||||
/**********************************************************************/
|
||||
|
||||
lbtt: lbtt-header lbtt-body ENDAUT
|
||||
| lbtt-header-states LBTT_EMPTY
|
||||
{
|
||||
res.h->aut->acc().add_sets($2);
|
||||
}
|
||||
|
||||
lbtt-header-states: LBTT
|
||||
{
|
||||
res.states = $1;
|
||||
res.states_loc = @1;
|
||||
res.h->aut->new_states($1);
|
||||
}
|
||||
lbtt-header: lbtt-header-states INT_S
|
||||
{
|
||||
res.acc_mapper = new spot::acc_mapper_int(res.h->aut, $2);
|
||||
res.h->aut->prop_state_based_acc();
|
||||
}
|
||||
| lbtt-header-states INT
|
||||
{
|
||||
res.acc_mapper = new spot::acc_mapper_int(res.h->aut, $2);
|
||||
}
|
||||
lbtt-body: lbtt-states
|
||||
lbtt-states:
|
||||
| lbtt-states lbtt-state lbtt-transitions
|
||||
lbtt-state: STATE_NUM INT lbtt-acc
|
||||
{
|
||||
res.cur_state = $1;
|
||||
if ((int) res.cur_state >= res.states)
|
||||
{
|
||||
error(@1, "state number is larger than state "
|
||||
"count...");
|
||||
error(res.states_loc, "... declared here.");
|
||||
res.cur_state = 0;
|
||||
}
|
||||
else if ($2)
|
||||
res.start.emplace_back(@1 + @2, $1);
|
||||
res.acc_state = $3;
|
||||
}
|
||||
lbtt-acc: { $$ = 0U; }
|
||||
| lbtt-acc ACC
|
||||
{
|
||||
$$ = $1;
|
||||
auto p = res.acc_mapper->lookup($2);
|
||||
if (p.first)
|
||||
$$ |= p.second;
|
||||
else
|
||||
error(@2, "more acceptance sets used than declared");
|
||||
}
|
||||
lbtt-guard: STRING
|
||||
{
|
||||
spot::ltl::parse_error_list pel;
|
||||
auto* f = spot::ltl::parse_lbt(*$1, pel, *res.env);
|
||||
if (!f || !pel.empty())
|
||||
{
|
||||
// FIXME: show pel.
|
||||
error(@$, "failed to parse guard");
|
||||
if (f)
|
||||
f->destroy();
|
||||
res.cur_label = bddtrue;
|
||||
}
|
||||
else
|
||||
{
|
||||
res.cur_label =
|
||||
formula_to_bdd(f, res.h->aut->get_dict(), res.h->aut);
|
||||
f->destroy();
|
||||
}
|
||||
delete $1;
|
||||
}
|
||||
lbtt-transitions:
|
||||
| lbtt-transitions DEST_NUM lbtt-acc lbtt-guard
|
||||
{
|
||||
if ((int) $2 >= res.states)
|
||||
{
|
||||
error(@2, "state number is larger than state "
|
||||
"count...");
|
||||
error(res.states_loc, "... declared here.");
|
||||
}
|
||||
else
|
||||
res.h->aut->new_transition(res.cur_state, $2,
|
||||
res.cur_label,
|
||||
res.acc_state | $3);
|
||||
}
|
||||
|
||||
%%
|
||||
|
||||
static void fill_guards(result_& r)
|
||||
|
|
@ -1038,6 +1138,7 @@ namespace spot
|
|||
r.env = &env;
|
||||
hoayy::parser parser(error_list, r, last_loc);
|
||||
parser.set_debug_level(debug);
|
||||
hoayyreset();
|
||||
try
|
||||
{
|
||||
if (parser.parse())
|
||||
|
|
|
|||
|
|
@ -35,6 +35,9 @@ static unsigned comment_level = 0;
|
|||
static unsigned parent_level = 0;
|
||||
static int orig_cond = 0;
|
||||
static bool missing_parent = false;
|
||||
static bool lbtt_s = false;
|
||||
static bool lbtt_t = false;
|
||||
static unsigned lbtt_states = 0;
|
||||
|
||||
%}
|
||||
|
||||
|
|
@ -43,13 +46,27 @@ eol2 (\n\r)+|(\r\n)+
|
|||
identifier [[:alpha:]_][[:alnum:]_-]*
|
||||
|
||||
%x in_COMMENT in_STRING in_NEVER_PAR
|
||||
%s in_HOA in_NEVER
|
||||
|
||||
%s in_HOA in_NEVER in_LBTT_HEADER
|
||||
%s in_LBTT_STATE in_LBTT_INIT in_LBTT_TRANS
|
||||
%s in_LBTT_T_ACC in_LBTT_S_ACC in_LBTT_GUARD
|
||||
%%
|
||||
|
||||
%{
|
||||
std::string s;
|
||||
yylloc->step();
|
||||
|
||||
auto parse_int = [&](){
|
||||
errno = 0;
|
||||
char* end;
|
||||
unsigned long n = strtoul(yytext, &end, 10);
|
||||
yylval->num = n;
|
||||
if (errno || yylval->num != n)
|
||||
{
|
||||
error_list.push_back(spot::hoa_parse_error(*yylloc, "value too large"));
|
||||
yylval->num = 0;
|
||||
}
|
||||
return end;
|
||||
};
|
||||
%}
|
||||
|
||||
|
||||
|
|
@ -62,14 +79,30 @@ identifier [[:alpha:]_][[:alnum:]_-]*
|
|||
BEGIN(in_COMMENT);
|
||||
comment_level = 1;
|
||||
}
|
||||
"\"" {
|
||||
orig_cond = YY_START;
|
||||
BEGIN(in_STRING);
|
||||
comment_level = 1;
|
||||
}
|
||||
"HOA:" BEGIN(in_HOA); return token::HOA;
|
||||
<INITIAL>"HOA:" BEGIN(in_HOA); return token::HOA;
|
||||
<INITIAL,in_HOA>"--ABORT--" BEGIN(INITIAL); throw spot::hoa_abort{*yylloc};
|
||||
"never" BEGIN(in_NEVER); return token::NEVER;
|
||||
<INITIAL>"never" BEGIN(in_NEVER); return token::NEVER;
|
||||
|
||||
<INITIAL>[0-9]+[ \t][0-9]+[ts]? {
|
||||
BEGIN(in_LBTT_HEADER);
|
||||
char* end = 0;
|
||||
errno = 0;
|
||||
unsigned long n = strtoul(yytext, &end, 10);
|
||||
yylval->num = n;
|
||||
unsigned s = end - yytext;
|
||||
yylloc->end = yylloc->begin;
|
||||
yylloc->end.columns(s);
|
||||
yyless(s);
|
||||
if (errno || yylval->num != n)
|
||||
{
|
||||
error_list.push_back(
|
||||
spot::hoa_parse_error(*yylloc,
|
||||
"value too large"));
|
||||
yylval->num = 0;
|
||||
}
|
||||
lbtt_states = yylval->num;
|
||||
return token::LBTT;
|
||||
}
|
||||
|
||||
<in_HOA>{
|
||||
"States:" return token::STATES;
|
||||
|
|
@ -98,19 +131,7 @@ identifier [[:alpha:]_][[:alnum:]_-]*
|
|||
yylval->str = new std::string(yytext + 1, yyleng - 1);
|
||||
return token::ANAME;
|
||||
}
|
||||
[0-9]+ {
|
||||
errno = 0;
|
||||
unsigned long n = strtoul(yytext, 0, 10);
|
||||
yylval->num = n;
|
||||
if (errno || yylval->num != n)
|
||||
{
|
||||
error_list.push_back(
|
||||
spot::hoa_parse_error(*yylloc,
|
||||
"value too large"));
|
||||
yylval->num = 0;
|
||||
}
|
||||
return token::INT;
|
||||
}
|
||||
[0-9]+ parse_int(); return token::INT;
|
||||
}
|
||||
|
||||
<in_NEVER>{
|
||||
|
|
@ -140,7 +161,89 @@ identifier [[:alpha:]_][[:alnum:]_-]*
|
|||
yylval->str = new std::string(yytext, yyleng);
|
||||
return token::IDENTIFIER;
|
||||
}
|
||||
}
|
||||
|
||||
/* Note: the LBTT format is scanf friendly, but not Bison-friendly.
|
||||
If we only tokenize it as a stream of INTs, the parser will have
|
||||
a very hard time recognizing what is a state from what is a
|
||||
transitions. As a consequence we abuse the start conditions to
|
||||
maintain a state an return integers with different sementic types
|
||||
depending on the purpose of those integers. */
|
||||
<in_LBTT_HEADER>{
|
||||
[0-9]+[st]* {
|
||||
BEGIN(in_LBTT_STATE);
|
||||
auto end = parse_int();
|
||||
lbtt_s = false;
|
||||
lbtt_t = false;
|
||||
if (end)
|
||||
while (int c = *end++)
|
||||
if (c == 's')
|
||||
lbtt_s = true;
|
||||
else // c == 't'
|
||||
lbtt_t = true;
|
||||
if (!lbtt_t)
|
||||
lbtt_s = true;
|
||||
if (lbtt_states == 0)
|
||||
{
|
||||
BEGIN(INITIAL);
|
||||
return token::LBTT_EMPTY;
|
||||
}
|
||||
if (lbtt_s && !lbtt_t)
|
||||
return token::INT_S;
|
||||
else
|
||||
return token::INT;
|
||||
}
|
||||
}
|
||||
|
||||
<in_LBTT_STATE>[0-9]+ {
|
||||
parse_int();
|
||||
BEGIN(in_LBTT_INIT);
|
||||
return token::STATE_NUM;
|
||||
}
|
||||
<in_LBTT_INIT>[01] {
|
||||
yylval->num = *yytext - '0';
|
||||
if (lbtt_s)
|
||||
BEGIN(in_LBTT_S_ACC);
|
||||
else
|
||||
BEGIN(in_LBTT_TRANS);
|
||||
return token::INT;
|
||||
}
|
||||
<in_LBTT_S_ACC>{
|
||||
[0-9]+ parse_int(); return token::ACC;
|
||||
"-1" BEGIN(in_LBTT_TRANS); yylloc->step();
|
||||
}
|
||||
<in_LBTT_TRANS>{
|
||||
[0-9+] {
|
||||
parse_int();
|
||||
if (lbtt_t)
|
||||
BEGIN(in_LBTT_T_ACC);
|
||||
else
|
||||
BEGIN(in_LBTT_GUARD);
|
||||
return token::DEST_NUM;
|
||||
}
|
||||
"-1" {
|
||||
if (--lbtt_states)
|
||||
{
|
||||
BEGIN(in_LBTT_STATE);
|
||||
yylloc->step();
|
||||
}
|
||||
else
|
||||
{
|
||||
BEGIN(INITIAL);
|
||||
return token::ENDAUT;
|
||||
}
|
||||
}
|
||||
}
|
||||
<in_LBTT_T_ACC>{
|
||||
[0-9+] parse_int(); return token::ACC;
|
||||
"-1" BEGIN(in_LBTT_GUARD); yylloc->step();
|
||||
}
|
||||
<in_LBTT_GUARD>{
|
||||
[^\n\r]* {
|
||||
yylval->str = new std::string(yytext, yyleng);
|
||||
BEGIN(in_LBTT_TRANS);
|
||||
return token::STRING;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -161,6 +264,13 @@ identifier [[:alpha:]_][[:alnum:]_-]*
|
|||
}
|
||||
}
|
||||
|
||||
/* matched late, so that the in_LBTT_GUARD pattern has precedence */
|
||||
"\"" {
|
||||
orig_cond = YY_START;
|
||||
BEGIN(in_STRING);
|
||||
comment_level = 1;
|
||||
}
|
||||
|
||||
<in_STRING>{
|
||||
\" {
|
||||
BEGIN(orig_cond);
|
||||
|
|
@ -236,6 +346,15 @@ identifier [[:alpha:]_][[:alnum:]_-]*
|
|||
|
||||
namespace spot
|
||||
{
|
||||
void
|
||||
hoayyreset()
|
||||
{
|
||||
BEGIN(INITIAL);
|
||||
comment_level = 0;
|
||||
parent_level = 0;
|
||||
missing_parent = false;
|
||||
}
|
||||
|
||||
int
|
||||
hoayyopen(const std::string &name)
|
||||
{
|
||||
|
|
@ -253,10 +372,7 @@ namespace spot
|
|||
// Reset the lexer in case a previous parse
|
||||
// ended badly.
|
||||
YY_NEW_FILE;
|
||||
BEGIN(INITIAL);
|
||||
comment_level = 0;
|
||||
parent_level = 0;
|
||||
missing_parent = false;
|
||||
hoayyreset();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ YY_DECL;
|
|||
|
||||
namespace spot
|
||||
{
|
||||
void hoayyreset();
|
||||
int hoayyopen(const std::string& name);
|
||||
void hoayyclose();
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue