From 86dac4aadf75706c60f912e336041a3316bb23ab Mon Sep 17 00:00:00 2001 From: Alexandre Duret-Lutz Date: Tue, 16 Oct 2012 19:29:40 +0200 Subject: [PATCH] ltlparse: add a lenient parsing mode Spin 6 supports formulas such as []<>(a < b) so that atomic properties need not be specified using #define. Of course we don't want to implement all the syntax of Spin in our LTL parser because other tools may have different syntaxes for their atomic propositions. The lenient mode tells the scanner to return any (...), {...}, or {...}! block as a single token. The parser will try to recursively parse this block as a LTL/SERE formula, and if this fails, it will consider the block to be an atomic proposition. The drawback is that most syntax errors will no be considered to be atomic propositions. For instance (a U b U) is a single atomic proposition in lenient mode, and a syntax error in default mode. * src/ltlparse/ltlparse.yy, src/ltlparse/ltlscan.ll, src/ltlparse/parsedecl.hh, src/ltlparse/public.hh: Add a lenient parsing mode. Simplify the lexer using yy_scan_string. * src/bin/common_finput.cc: Add a --lenient option. * src/ltltest/lenient.test: New file. * src/ltltest/Makefile.am: Add it. * src/neverparse/neverclaimparse.yy: Parse the guards in lenient mode. * src/tgbatest/neverclaimread.test: Adjust. * src/ltlvisit/tostring.cc: When outputing a formula in Spin's syntax, output (a < b) instead of "a < b". * src/misc/escape.cc, src/misc/escape.hh (trim): New helper function. --- src/bin/common_finput.cc | 12 ++- src/ltlparse/ltlparse.yy | 112 +++++++++++++++++++--- src/ltlparse/ltlscan.ll | 150 ++++++++++++++++++++++++------ src/ltlparse/parsedecl.hh | 5 +- src/ltlparse/public.hh | 12 ++- src/ltltest/Makefile.am | 1 + src/ltltest/lenient.test | 50 ++++++++++ src/ltlvisit/tostring.cc | 9 +- src/misc/escape.cc | 21 ++++- src/misc/escape.hh | 4 +- src/neverparse/neverclaimparse.yy | 4 +- src/tgbatest/neverclaimread.test | 35 ++++--- 12 files changed, 355 insertions(+), 60 deletions(-) create mode 100755 src/ltltest/lenient.test diff --git a/src/bin/common_finput.cc b/src/bin/common_finput.cc index baa42143b..094004713 100644 --- a/src/bin/common_finput.cc +++ b/src/bin/common_finput.cc @@ -24,9 +24,11 @@ #include #define OPT_LBT 1 +#define OPT_LENIENT 2 jobs_t jobs; bool lbt_input = false; +static bool lenient = false; static const argp_option options[] = { @@ -36,6 +38,9 @@ static const argp_option options[] = "process each line of FILENAME as a formula", 0 }, { "lbt-input", OPT_LBT, 0, 0, "read all formulas using LBT's prefix syntax", 0 }, + { "lenient", OPT_LENIENT, 0, 0, + "parenthesized blocks that cannot be parsed as subformulas " + "are considered as atomic properties", 0 }, { 0, 0, 0, 0, 0, 0 } }; @@ -56,6 +61,9 @@ parse_opt_finput(int key, char* arg, struct argp_state*) case OPT_LBT: lbt_input = true; break; + case OPT_LENIENT: + lenient = true; + break; default: return ARGP_ERR_UNKNOWN; } @@ -68,7 +76,9 @@ parse_formula(const std::string& s, spot::ltl::parse_error_list& pel) if (lbt_input) return spot::ltl::parse_lbt(s, pel); else - return spot::ltl::parse(s, pel); + return spot::ltl::parse(s, pel, + spot::ltl::default_environment::instance(), + false, lenient); } diff --git a/src/ltlparse/ltlparse.yy b/src/ltlparse/ltlparse.yy index 986f10eed..e99c36f2f 100644 --- a/src/ltlparse/ltlparse.yy +++ b/src/ltlparse/ltlparse.yy @@ -86,6 +86,53 @@ using namespace spot::ltl; } \ while (0); + const formula* + try_recursive_parse(const std::string& str, + const ltlyy::location& location, + spot::ltl::environment& env, + bool debug, + bool sere, + spot::ltl::parse_error_list& error_list) + { + // We want to parse a U (b U c) as two until operators applied + // to the atomic propositions a, b, and c. We also want to + // parse a U (b == c) as one until operator applied to the + // atomic propositions "a" and "b == c". The only problem is + // that we do not know anything about "==" or in general about + // the syntax of atomic proposition of our users. + // + // To support that, the lexer will return "b U c" and "b == c" + // as PAR_BLOCK tokens. We then try to parse such tokens + // recursively. If, as in the case of "b U c", the block is + // successfully parsed as a formula, we return this formula. + // Otherwise, we convert the string into an atomic proposition + // (it's up to the environment to check the syntax of this + // proposition, and maybe reject it). + spot::ltl::parse_error_list suberror; + const spot::ltl::formula* f; + if (sere) + f = spot::ltl::parse_sere(str, suberror, env, debug, true); + else + f = spot::ltl::parse(str, suberror, env, debug, true); + + if (suberror.empty()) + return f; + + if (f) + f->destroy(); + + f = env.require(str); + if (!f) + { + std::string s = "atomic proposition `"; + s += str; + s += "' rejected by environment `"; + s += env.name(); + s += "'"; + error_list.push_back(parse_error(location, s)); + } + return f; + } } @@ -95,6 +142,9 @@ using namespace spot::ltl; %token START_LBT "LBT start marker" %token START_SERE "SERE start marker" %token PAR_OPEN "opening parenthesis" PAR_CLOSE "closing parenthesis" +%token PAR_BLOCK "(...) block" +%token BRA_BLOCK "{...} block" +%token BRA_BANG_BLOCK "{...}! block" %token BRACE_OPEN "opening brace" BRACE_CLOSE "closing brace" %token BRACE_BANG_CLOSE "closing brace-bang" %token OP_OR "or operator" OP_XOR "xor operator" @@ -378,6 +428,14 @@ sere: booleanatom } } | bracedsere + | PAR_BLOCK + { + $$ = try_recursive_parse(*$1, @1, parse_environment, + debug_level(), true, error_list); + delete $1; + if (!$$) + YYERROR; + } | PAR_OPEN sere PAR_CLOSE { $$ = $2; } | PAR_OPEN error PAR_CLOSE @@ -590,8 +648,24 @@ bracedsere: BRACE_OPEN sere BRACE_CLOSE "treating this brace block as false")); $$ = constant::false_instance(); } + | BRA_BLOCK + { + $$ = try_recursive_parse(*$1, @1, parse_environment, + debug_level(), true, error_list); + delete $1; + if (!$$) + YYERROR; + } -parenthesedsubformula: PAR_OPEN subformula PAR_CLOSE +parenthesedsubformula: PAR_BLOCK + { + $$ = try_recursive_parse(*$1, @1, parse_environment, + debug_level(), false, error_list); + delete $1; + if (!$$) + YYERROR; + } + | PAR_OPEN subformula PAR_CLOSE { $$ = $2; } | PAR_OPEN subformula error PAR_CLOSE { error_list.push_back(parse_error(@3, "ignoring this")); @@ -721,16 +795,25 @@ subformula: booleanatom /* {SERE}! = {SERE} <>-> 1 */ { $$ = binop::instance(binop::EConcat, $2, constant::true_instance()); } -; + | BRA_BANG_BLOCK + { + $$ = try_recursive_parse(*$1, @1, parse_environment, + debug_level(), true, error_list); + delete $1; + if (!$$) + YYERROR; + $$ = binop::instance(binop::EConcat, $$, + constant::true_instance()); + } lbtformula: ATOMIC_PROP { $$ = parse_environment.require(*$1); if (! $$) { - std::string s = "unknown atomic proposition `"; + std::string s = "atomic proposition `"; s += *$1; - s += "' in environment `"; + s += "' rejected by environment `"; s += parse_environment.name(); s += "'"; error_list.push_back(parse_error(@1, s)); @@ -790,14 +873,16 @@ namespace spot parse(const std::string& ltl_string, parse_error_list& error_list, environment& env, - bool debug) + bool debug, bool lenient) { const formula* result = 0; flex_set_buffer(ltl_string.c_str(), - ltlyy::parser::token::START_LTL); + ltlyy::parser::token::START_LTL, + lenient); ltlyy::parser parser(error_list, env, result); parser.set_debug_level(debug); parser.parse(); + flex_unset_buffer(); return result; } @@ -809,25 +894,30 @@ namespace spot { const formula* result = 0; flex_set_buffer(ltl_string.c_str(), - ltlyy::parser::token::START_LBT); + ltlyy::parser::token::START_LBT, + false); ltlyy::parser parser(error_list, env, result); parser.set_debug_level(debug); parser.parse(); + flex_unset_buffer(); return result; } const formula* parse_sere(const std::string& sere_string, - parse_error_list& error_list, - environment& env, - bool debug) + parse_error_list& error_list, + environment& env, + bool debug, + bool lenient) { const formula* result = 0; flex_set_buffer(sere_string.c_str(), - ltlyy::parser::token::START_SERE); + ltlyy::parser::token::START_SERE, + lenient); ltlyy::parser parser(error_list, env, result); parser.set_debug_level(debug); parser.parse(); + flex_unset_buffer(); return result; } diff --git a/src/ltlparse/ltlscan.ll b/src/ltlparse/ltlscan.ll index 7da6ec6d7..db883d42c 100644 --- a/src/ltlparse/ltlscan.ll +++ b/src/ltlparse/ltlscan.ll @@ -22,41 +22,29 @@ %option noyywrap warn 8bit batch %option prefix="ltlyy" %option outfile="lex.yy.c" +%option stack %{ #include #include #include "ltlparse/parsedecl.hh" - -/* Hack Flex so we read from a string instead of reading from a file. */ -#define YY_INPUT(buf, result, max_size) \ - do { \ - result = (max_size < to_parse_size) ? max_size : to_parse_size; \ - memcpy(buf, to_parse, result); \ - to_parse_size -= result; \ - to_parse += result; \ - } while (0); +#include "misc/escape.hh" #define YY_USER_ACTION \ yylloc->columns(yyleng); -static const char* to_parse = 0; -static size_t to_parse_size = 0; static int start_token = 0; +static int parent_level = 0; +static bool missing_parent = false; +static bool lenient_mode = false; typedef ltlyy::parser::token token; -void -flex_set_buffer(const char* buf, int start_tok) -{ - to_parse = buf; - to_parse_size = strlen(to_parse); - start_token = start_tok; -} - %} %s not_prop +%x in_par +%x in_bra %x sqbracket %x lbt @@ -78,18 +66,103 @@ BOXDARROW {BOX}{DARROWL}|"|"{DARROWL}|"⤇" { int t = start_token; start_token = 0; - if (t == token::START_LBT) - BEGIN(lbt); - else - BEGIN(0); return t; } yylloc->step(); %} -"(" BEGIN(0); return token::PAR_OPEN; + +"(" { + if (!lenient_mode) + { + BEGIN(0); + return token::PAR_OPEN; + } + /* Parse any (...) block as a single block, + taking care of nested parentheses. The + parser will then try to parse this block + recursively. */ + BEGIN(in_par); + parent_level = 1; + yylval->str = new std::string(); + } +{ + "(" ++parent_level; yylval->str->append(yytext, yyleng); + ")" { + if (--parent_level) + { + yylval->str->append(yytext, yyleng); + } + else + { + BEGIN(not_prop); + spot::trim(*yylval->str); + return token::PAR_BLOCK; + } + } + [^()]+ yylval->str->append(yytext, yyleng); + <> { + unput(')'); + if (!missing_parent) + error_list.push_back( + spot::ltl::parse_error(*yylloc, + "missing closing parenthese")); + missing_parent = true; + } +} + +"{" { + if (!lenient_mode) + { + BEGIN(0); + return token::BRACE_OPEN; + } + /* Parse any {...} block as a single block, + taking care of nested parentheses. The + parser will then try to parse this block + recursively. */ + BEGIN(in_bra); + parent_level = 1; + yylval->str = new std::string(); + } +{ + "{" ++parent_level; yylval->str->append(yytext, yyleng); + "}"[ \t\n]*"!" { + if (--parent_level) + { + yylval->str->append(yytext, yyleng); + } + else + { + BEGIN(not_prop); + spot::trim(*yylval->str); + return token::BRA_BANG_BLOCK; + } + } + "}" { + if (--parent_level) + { + yylval->str->append(yytext, yyleng); + } + else + { + BEGIN(not_prop); + spot::trim(*yylval->str); + return token::BRA_BLOCK; + } + } + [^{}]+ yylval->str->append(yytext, yyleng); + <> { + unput(')'); + if (!missing_parent) + error_list.push_back( + spot::ltl::parse_error(*yylloc, + "missing closing brace")); + missing_parent = true; + } +} + ")" BEGIN(not_prop); return token::PAR_CLOSE; -"{" BEGIN(0); return token::BRACE_OPEN; "}"[ \t\n]*"!" BEGIN(not_prop); return token::BRACE_BANG_CLOSE; "}" BEGIN(not_prop); return token::BRACE_CLOSE; @@ -225,7 +298,26 @@ BOXDARROW {BOX}{DARROWL}|"|"{DARROWL}|"⤇" <> return token::END_OF_INPUT; -%{ - /* Dummy use of yyunput to shut up a gcc warning. */ - (void) &yyunput; -%} +%% + +void +flex_set_buffer(const char* buf, int start_tok, bool lenient) +{ + yypush_buffer_state(YY_CURRENT_BUFFER); + yy_scan_string(buf); + start_token = start_tok; + if (start_tok == token::START_LBT) + yy_push_state(lbt); + else + yy_push_state(0); + lenient_mode = lenient; +} + +void +flex_unset_buffer() +{ + (void)&yy_top_state; // shut up a g++ warning. + yy_pop_state(); + yypop_buffer_state(); + missing_parent = false; +} diff --git a/src/ltlparse/parsedecl.hh b/src/ltlparse/parsedecl.hh index db425765d..7b0f809af 100644 --- a/src/ltlparse/parsedecl.hh +++ b/src/ltlparse/parsedecl.hh @@ -1,4 +1,4 @@ -// Copyright (C) 2010 Laboratoire de Recherche et Développement de +// Copyright (C) 2010, 2012 Laboratoire de Recherche et Développement de // l'Epita (LRDE) // Copyright (C) 2003, 2005 Laboratoire d'Informatique de Paris 6 (LIP6), // département Systèmes Répartis Coopératifs (SRC), Université Pierre @@ -31,6 +31,7 @@ spot::ltl::parse_error_list& error_list) YY_DECL; -void flex_set_buffer(const char *buf, int start_tok); +void flex_set_buffer(const char *buf, int start_tok, bool lenient); +void flex_unset_buffer(); #endif // SPOT_LTLPARSE_PARSEDECL_HH diff --git a/src/ltlparse/public.hh b/src/ltlparse/public.hh index 09607b492..9e1cebaaa 100644 --- a/src/ltlparse/public.hh +++ b/src/ltlparse/public.hh @@ -56,6 +56,9 @@ namespace spot /// parse errors that occured during parsing. /// \param env The environment into which parsing should take place. /// \param debug When true, causes the parser to trace its execution. + /// \param lenient When true, parenthesized blocks that cannot be + /// parsed as subformulas will be considered as + /// atomic propositions. /// \return A pointer to the formula built from \a ltl_string, or /// 0 if the input was unparsable. /// @@ -68,7 +71,8 @@ namespace spot const formula* parse(const std::string& ltl_string, parse_error_list& error_list, environment& env = default_environment::instance(), - bool debug = false); + bool debug = false, + bool lenient = false); /// \brief Build a formula from an LTL string in LBT's format. /// \param ltl_string The string to parse. @@ -100,6 +104,9 @@ namespace spot /// parse errors that occured during parsing. /// \param env The environment into which parsing should take place. /// \param debug When true, causes the parser to trace its execution. + /// \param lenient When true, parenthesized blocks that cannot be + /// parsed as subformulas will be considered as + /// atomic propositions. /// \return A pointer to the formula built from \a sere_string, or /// 0 if the input was unparsable. /// @@ -113,7 +120,8 @@ namespace spot parse_error_list& error_list, environment& env = default_environment::instance(), - bool debug = false); + bool debug = false, + bool lenient = false); /// \brief Format diagnostics produced by spot::ltl::parse /// or spot::ltl::ratexp diff --git a/src/ltltest/Makefile.am b/src/ltltest/Makefile.am index af685fc12..20ffac85a 100644 --- a/src/ltltest/Makefile.am +++ b/src/ltltest/Makefile.am @@ -85,6 +85,7 @@ TESTS = \ tunenoform.test \ consterm.test \ kind.test \ + lenient.test \ syntimpl.test \ reduc.test \ reducpsl.test \ diff --git a/src/ltltest/lenient.test b/src/ltltest/lenient.test new file mode 100755 index 000000000..b6dc8b6ff --- /dev/null +++ b/src/ltltest/lenient.test @@ -0,0 +1,50 @@ +#!/bin/sh +# -*- coding: utf-8 -*- +# Copyright (C) 2012 Laboratoire de Recherche et Développement de +# l'Epita (LRDE). +# +# This file is part of Spot, a model checking library. +# +# Spot is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Spot is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +# License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. ./defs || exit 1 + +set -e + +ltlfilt=../../bin/ltlfilt + +cat >input <expected < output +cmp output expected diff --git a/src/ltlvisit/tostring.cc b/src/ltlvisit/tostring.cc index d92887218..35233e3e7 100644 --- a/src/ltlvisit/tostring.cc +++ b/src/ltlvisit/tostring.cc @@ -249,7 +249,14 @@ namespace spot os_ << "("; if (!is_bare_word(str.c_str())) { - os_ << '"' << str << '"'; + // Spin 6 supports atomic propositions such as (a == 0) + // as long as they are enclosed in parentheses. + if (kw_ != spin_kw) + os_ << '"' << str << '"'; + else if (!full_parent_) + os_ << '(' << str << ')'; + else + os_ << str; } else { diff --git a/src/misc/escape.cc b/src/misc/escape.cc index ad100b95f..0eb164072 100644 --- a/src/misc/escape.cc +++ b/src/misc/escape.cc @@ -1,5 +1,8 @@ +// -*- coding: utf-8 -*- +// Copyright (C) 2012 Laboratoire de Recherche et Developpement de +// l'Epita (LRDE) // Copyright (C) 2004 Laboratoire d'Informatique de Paris 6 (LIP6), -// département Systèmes Répartis Coopératifs (SRC), Université Pierre +// département Systèmes Répartis Coopératifs (SRC), Université Pierre // et Marie Curie. // // This file is part of Spot, a model checking library. @@ -19,6 +22,10 @@ #include #include +#include +#include +#include +#include #include "escape.hh" namespace spot @@ -52,4 +59,16 @@ namespace spot escape_str(os, str); return os.str(); } + + void + trim(std::string& str) + { + str.erase(std::find_if(str.rbegin(), str.rend(), + std::not1(std::ptr_fun + (std::isspace))).base(), + str.end()); + str.erase(str.begin(), + std::find_if(str.begin(), str.end(), + std::not1(std::ptr_fun(std::isspace)))); + } } diff --git a/src/misc/escape.hh b/src/misc/escape.hh index 64ae2909a..cff0108fc 100644 --- a/src/misc/escape.hh +++ b/src/misc/escape.hh @@ -1,4 +1,4 @@ -// Copyright (C) 2011 Laboratoire de Recherche et Developpement de +// Copyright (C) 2011, 2012 Laboratoire de Recherche et Developpement de // l'Epita (LRDE). // Copyright (C) 2004 Laboratoire d'Informatique de Paris 6 (LIP6), // département Systèmes Répartis Coopératifs (SRC), Université Pierre @@ -38,6 +38,8 @@ namespace spot /// \\n in \a str. std::string escape_str(const std::string& str); + /// \brief Remove spaces at the front and back of \a str. + void trim(std::string& str); /// @} } diff --git a/src/neverparse/neverclaimparse.yy b/src/neverparse/neverclaimparse.yy index b0f009657..646362dec 100644 --- a/src/neverparse/neverclaimparse.yy +++ b/src/neverparse/neverclaimparse.yy @@ -201,7 +201,9 @@ transition: else { spot::ltl::parse_error_list pel; - const spot::ltl::formula* f = spot::ltl::parse(*$3, pel); + const spot::ltl::formula* f = spot::ltl::parse(*$3, pel, + parse_environment, + false, true); delete $3; for(spot::ltl::parse_error_list::const_iterator i = pel.begin(); i != pel.end(); ++i) diff --git a/src/tgbatest/neverclaimread.test b/src/tgbatest/neverclaimread.test index cd6590bd4..e643587d5 100755 --- a/src/tgbatest/neverclaimread.test +++ b/src/tgbatest/neverclaimread.test @@ -86,10 +86,14 @@ skip } EOF +# We used to catch the following error: +# input:5.11: syntax error, unexpected closing parenthesis +# input:5.8-9: missing right operand for "and operator" +# but since the guard parser is more lenient, we just assume +# that "p1 && " is an atomic property. + run 2 ../ltl2tgba -XN input > stdout 2>stderr cat >expected <formulae<[] a +FG a X false -([] a) U X b +(G a) U X b (a U b) U (c U d) true -([]a && XXXX!a) +(Ga && XXXX!a) +"a > b" U "process@foo" +GF("(a + b) == 42" U "process@foo") EOF while read f do run 0 ../ltl2tgba -b -f "!($f)" > f.tgba + sf=`../../bin/ltlfilt -sf "$f"` if test -n "$SPIN"; then - $SPIN -f "$f" > f.spin - run 0 ../ltl2tgba -E -Pf.tgba -XN f.spin - fi - if test -n "$LTL2BA"; then - $LTL2BA -f "$f" > f.ltl2ba - run 0 ../ltl2tgba -E -Pf.tgba -XN f.ltl2ba + # Old spin versions cannot parse formulas such as ((a + b) == 42). + if $SPIN -f "$sf" > f.spin; then + run 0 ../ltl2tgba -E -Pf.tgba -XN f.spin + fi fi + case $f in + *\"*);; + *) + if test -n "$LTL2BA"; then + $LTL2BA -f "$sf" > f.ltl2ba + run 0 ../ltl2tgba -E -Pf.tgba -XN f.ltl2ba + fi + esac run 0 ../ltl2tgba -DS -NN -f "$f" > f.spot # Make sure there is no `!x' occurring in the # output. Because `x' is usually #define'd, we