ltlparse: add a lenient parsing mode

Spin 6 supports formulas such as []<>(a < b) so that atomic properties
need not be specified using #define.  Of course we don't want to
implement all the syntax of Spin in our LTL parser because other tools
may have different syntaxes for their atomic propositions.  The
lenient mode tells the scanner to return any (...), {...}, or {...}!
block as a single token.  The parser will try to recursively parse
this block as a LTL/SERE formula, and if this fails, it will consider
the block to be an atomic proposition.  The drawback is that most
syntax errors will no be considered to be atomic propositions.  For
instance (a U b U) is a single atomic proposition in lenient mode, and
a syntax error in default mode.

* src/ltlparse/ltlparse.yy, src/ltlparse/ltlscan.ll,
src/ltlparse/parsedecl.hh, src/ltlparse/public.hh: Add a
lenient parsing mode.  Simplify the lexer using yy_scan_string.
* src/bin/common_finput.cc: Add a --lenient option.
* src/ltltest/lenient.test: New file.
* src/ltltest/Makefile.am: Add it.
* src/neverparse/neverclaimparse.yy: Parse the guards in lenient mode.
* src/tgbatest/neverclaimread.test: Adjust.
* src/ltlvisit/tostring.cc: When outputing a formula in Spin's syntax,
output (a < b) instead of "a < b".
* src/misc/escape.cc, src/misc/escape.hh (trim): New helper function.
This commit is contained in:
Alexandre Duret-Lutz 2012-10-16 19:29:40 +02:00
parent d9ceb4adc4
commit 86dac4aadf
12 changed files with 355 additions and 60 deletions

View file

@ -86,6 +86,53 @@ using namespace spot::ltl;
} \
while (0);
const formula*
try_recursive_parse(const std::string& str,
const ltlyy::location& location,
spot::ltl::environment& env,
bool debug,
bool sere,
spot::ltl::parse_error_list& error_list)
{
// We want to parse a U (b U c) as two until operators applied
// to the atomic propositions a, b, and c. We also want to
// parse a U (b == c) as one until operator applied to the
// atomic propositions "a" and "b == c". The only problem is
// that we do not know anything about "==" or in general about
// the syntax of atomic proposition of our users.
//
// To support that, the lexer will return "b U c" and "b == c"
// as PAR_BLOCK tokens. We then try to parse such tokens
// recursively. If, as in the case of "b U c", the block is
// successfully parsed as a formula, we return this formula.
// Otherwise, we convert the string into an atomic proposition
// (it's up to the environment to check the syntax of this
// proposition, and maybe reject it).
spot::ltl::parse_error_list suberror;
const spot::ltl::formula* f;
if (sere)
f = spot::ltl::parse_sere(str, suberror, env, debug, true);
else
f = spot::ltl::parse(str, suberror, env, debug, true);
if (suberror.empty())
return f;
if (f)
f->destroy();
f = env.require(str);
if (!f)
{
std::string s = "atomic proposition `";
s += str;
s += "' rejected by environment `";
s += env.name();
s += "'";
error_list.push_back(parse_error(location, s));
}
return f;
}
}
@ -95,6 +142,9 @@ using namespace spot::ltl;
%token START_LBT "LBT start marker"
%token START_SERE "SERE start marker"
%token PAR_OPEN "opening parenthesis" PAR_CLOSE "closing parenthesis"
%token <str> PAR_BLOCK "(...) block"
%token <str> BRA_BLOCK "{...} block"
%token <str> BRA_BANG_BLOCK "{...}! block"
%token BRACE_OPEN "opening brace" BRACE_CLOSE "closing brace"
%token BRACE_BANG_CLOSE "closing brace-bang"
%token OP_OR "or operator" OP_XOR "xor operator"
@ -378,6 +428,14 @@ sere: booleanatom
}
}
| bracedsere
| PAR_BLOCK
{
$$ = try_recursive_parse(*$1, @1, parse_environment,
debug_level(), true, error_list);
delete $1;
if (!$$)
YYERROR;
}
| PAR_OPEN sere PAR_CLOSE
{ $$ = $2; }
| PAR_OPEN error PAR_CLOSE
@ -590,8 +648,24 @@ bracedsere: BRACE_OPEN sere BRACE_CLOSE
"treating this brace block as false"));
$$ = constant::false_instance();
}
| BRA_BLOCK
{
$$ = try_recursive_parse(*$1, @1, parse_environment,
debug_level(), true, error_list);
delete $1;
if (!$$)
YYERROR;
}
parenthesedsubformula: PAR_OPEN subformula PAR_CLOSE
parenthesedsubformula: PAR_BLOCK
{
$$ = try_recursive_parse(*$1, @1, parse_environment,
debug_level(), false, error_list);
delete $1;
if (!$$)
YYERROR;
}
| PAR_OPEN subformula PAR_CLOSE
{ $$ = $2; }
| PAR_OPEN subformula error PAR_CLOSE
{ error_list.push_back(parse_error(@3, "ignoring this"));
@ -721,16 +795,25 @@ subformula: booleanatom
/* {SERE}! = {SERE} <>-> 1 */
{ $$ = binop::instance(binop::EConcat, $2,
constant::true_instance()); }
;
| BRA_BANG_BLOCK
{
$$ = try_recursive_parse(*$1, @1, parse_environment,
debug_level(), true, error_list);
delete $1;
if (!$$)
YYERROR;
$$ = binop::instance(binop::EConcat, $$,
constant::true_instance());
}
lbtformula: ATOMIC_PROP
{
$$ = parse_environment.require(*$1);
if (! $$)
{
std::string s = "unknown atomic proposition `";
std::string s = "atomic proposition `";
s += *$1;
s += "' in environment `";
s += "' rejected by environment `";
s += parse_environment.name();
s += "'";
error_list.push_back(parse_error(@1, s));
@ -790,14 +873,16 @@ namespace spot
parse(const std::string& ltl_string,
parse_error_list& error_list,
environment& env,
bool debug)
bool debug, bool lenient)
{
const formula* result = 0;
flex_set_buffer(ltl_string.c_str(),
ltlyy::parser::token::START_LTL);
ltlyy::parser::token::START_LTL,
lenient);
ltlyy::parser parser(error_list, env, result);
parser.set_debug_level(debug);
parser.parse();
flex_unset_buffer();
return result;
}
@ -809,25 +894,30 @@ namespace spot
{
const formula* result = 0;
flex_set_buffer(ltl_string.c_str(),
ltlyy::parser::token::START_LBT);
ltlyy::parser::token::START_LBT,
false);
ltlyy::parser parser(error_list, env, result);
parser.set_debug_level(debug);
parser.parse();
flex_unset_buffer();
return result;
}
const formula*
parse_sere(const std::string& sere_string,
parse_error_list& error_list,
environment& env,
bool debug)
parse_error_list& error_list,
environment& env,
bool debug,
bool lenient)
{
const formula* result = 0;
flex_set_buffer(sere_string.c_str(),
ltlyy::parser::token::START_SERE);
ltlyy::parser::token::START_SERE,
lenient);
ltlyy::parser parser(error_list, env, result);
parser.set_debug_level(debug);
parser.parse();
flex_unset_buffer();
return result;
}

View file

@ -22,41 +22,29 @@
%option noyywrap warn 8bit batch
%option prefix="ltlyy"
%option outfile="lex.yy.c"
%option stack
%{
#include <string>
#include <boost/lexical_cast.hpp>
#include "ltlparse/parsedecl.hh"
/* Hack Flex so we read from a string instead of reading from a file. */
#define YY_INPUT(buf, result, max_size) \
do { \
result = (max_size < to_parse_size) ? max_size : to_parse_size; \
memcpy(buf, to_parse, result); \
to_parse_size -= result; \
to_parse += result; \
} while (0);
#include "misc/escape.hh"
#define YY_USER_ACTION \
yylloc->columns(yyleng);
static const char* to_parse = 0;
static size_t to_parse_size = 0;
static int start_token = 0;
static int parent_level = 0;
static bool missing_parent = false;
static bool lenient_mode = false;
typedef ltlyy::parser::token token;
void
flex_set_buffer(const char* buf, int start_tok)
{
to_parse = buf;
to_parse_size = strlen(to_parse);
start_token = start_tok;
}
%}
%s not_prop
%x in_par
%x in_bra
%x sqbracket
%x lbt
@ -78,18 +66,103 @@ BOXDARROW {BOX}{DARROWL}|"|"{DARROWL}|"⤇"
{
int t = start_token;
start_token = 0;
if (t == token::START_LBT)
BEGIN(lbt);
else
BEGIN(0);
return t;
}
yylloc->step();
%}
"(" BEGIN(0); return token::PAR_OPEN;
"(" {
if (!lenient_mode)
{
BEGIN(0);
return token::PAR_OPEN;
}
/* Parse any (...) block as a single block,
taking care of nested parentheses. The
parser will then try to parse this block
recursively. */
BEGIN(in_par);
parent_level = 1;
yylval->str = new std::string();
}
<in_par>{
"(" ++parent_level; yylval->str->append(yytext, yyleng);
")" {
if (--parent_level)
{
yylval->str->append(yytext, yyleng);
}
else
{
BEGIN(not_prop);
spot::trim(*yylval->str);
return token::PAR_BLOCK;
}
}
[^()]+ yylval->str->append(yytext, yyleng);
<<EOF>> {
unput(')');
if (!missing_parent)
error_list.push_back(
spot::ltl::parse_error(*yylloc,
"missing closing parenthese"));
missing_parent = true;
}
}
"{" {
if (!lenient_mode)
{
BEGIN(0);
return token::BRACE_OPEN;
}
/* Parse any {...} block as a single block,
taking care of nested parentheses. The
parser will then try to parse this block
recursively. */
BEGIN(in_bra);
parent_level = 1;
yylval->str = new std::string();
}
<in_bra>{
"{" ++parent_level; yylval->str->append(yytext, yyleng);
"}"[ \t\n]*"!" {
if (--parent_level)
{
yylval->str->append(yytext, yyleng);
}
else
{
BEGIN(not_prop);
spot::trim(*yylval->str);
return token::BRA_BANG_BLOCK;
}
}
"}" {
if (--parent_level)
{
yylval->str->append(yytext, yyleng);
}
else
{
BEGIN(not_prop);
spot::trim(*yylval->str);
return token::BRA_BLOCK;
}
}
[^{}]+ yylval->str->append(yytext, yyleng);
<<EOF>> {
unput(')');
if (!missing_parent)
error_list.push_back(
spot::ltl::parse_error(*yylloc,
"missing closing brace"));
missing_parent = true;
}
}
")" BEGIN(not_prop); return token::PAR_CLOSE;
"{" BEGIN(0); return token::BRACE_OPEN;
"}"[ \t\n]*"!" BEGIN(not_prop); return token::BRACE_BANG_CLOSE;
"}" BEGIN(not_prop); return token::BRACE_CLOSE;
@ -225,7 +298,26 @@ BOXDARROW {BOX}{DARROWL}|"|"{DARROWL}|"⤇"
<<EOF>> return token::END_OF_INPUT;
%{
/* Dummy use of yyunput to shut up a gcc warning. */
(void) &yyunput;
%}
%%
void
flex_set_buffer(const char* buf, int start_tok, bool lenient)
{
yypush_buffer_state(YY_CURRENT_BUFFER);
yy_scan_string(buf);
start_token = start_tok;
if (start_tok == token::START_LBT)
yy_push_state(lbt);
else
yy_push_state(0);
lenient_mode = lenient;
}
void
flex_unset_buffer()
{
(void)&yy_top_state; // shut up a g++ warning.
yy_pop_state();
yypop_buffer_state();
missing_parent = false;
}

View file

@ -1,4 +1,4 @@
// Copyright (C) 2010 Laboratoire de Recherche et Développement de
// Copyright (C) 2010, 2012 Laboratoire de Recherche et Développement de
// l'Epita (LRDE)
// Copyright (C) 2003, 2005 Laboratoire d'Informatique de Paris 6 (LIP6),
// département Systèmes Répartis Coopératifs (SRC), Université Pierre
@ -31,6 +31,7 @@
spot::ltl::parse_error_list& error_list)
YY_DECL;
void flex_set_buffer(const char *buf, int start_tok);
void flex_set_buffer(const char *buf, int start_tok, bool lenient);
void flex_unset_buffer();
#endif // SPOT_LTLPARSE_PARSEDECL_HH

View file

@ -56,6 +56,9 @@ namespace spot
/// parse errors that occured during parsing.
/// \param env The environment into which parsing should take place.
/// \param debug When true, causes the parser to trace its execution.
/// \param lenient When true, parenthesized blocks that cannot be
/// parsed as subformulas will be considered as
/// atomic propositions.
/// \return A pointer to the formula built from \a ltl_string, or
/// 0 if the input was unparsable.
///
@ -68,7 +71,8 @@ namespace spot
const formula* parse(const std::string& ltl_string,
parse_error_list& error_list,
environment& env = default_environment::instance(),
bool debug = false);
bool debug = false,
bool lenient = false);
/// \brief Build a formula from an LTL string in LBT's format.
/// \param ltl_string The string to parse.
@ -100,6 +104,9 @@ namespace spot
/// parse errors that occured during parsing.
/// \param env The environment into which parsing should take place.
/// \param debug When true, causes the parser to trace its execution.
/// \param lenient When true, parenthesized blocks that cannot be
/// parsed as subformulas will be considered as
/// atomic propositions.
/// \return A pointer to the formula built from \a sere_string, or
/// 0 if the input was unparsable.
///
@ -113,7 +120,8 @@ namespace spot
parse_error_list& error_list,
environment& env =
default_environment::instance(),
bool debug = false);
bool debug = false,
bool lenient = false);
/// \brief Format diagnostics produced by spot::ltl::parse
/// or spot::ltl::ratexp