spot/spot/parsetl/scantl.ll
Alexandre Duret-Lutz 9c6a09890e parsetl: speedup parsing of n-ary operators with many operands
Issue #500, reported by Yann Thierry-Mieg.

* spot/parsetl/parsetl.yy, spot/parsetl/scantl.ll: Use variant
to store a new pnode objects that delays the construction of n-ary
operators.
* spot/parsetl/Makefile.am: Do not distribute stack.hh anymore.
* spot/tl/formula.cc: Fix detection of overflow in Star and FStar.
* HACKING: Update Bison requirements to 3.3.
* tests/core/500.test: New test case.
* tests/Makefile.am: Add it.
* tests/core/ltl2tgba2.test, tests/core/ltlsynt.test,
tests/core/tostring.test: Adjust to new expected order.
* NEWS: Mention the change.
2022-03-28 09:00:18 +02:00

466 lines
16 KiB
LLVM
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* -*- coding: utf-8 -*-
** Copyright (C) 2010-2015, 2017-2019, 2021-2022, Laboratoire de
** Recherche et Développement de l'Epita (LRDE).
** Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6
** (LIP6), département Systèmes Répartis Coopératifs (SRC), Université
** Pierre et Marie Curie.
**
** This file is part of Spot, a model checking library.
**
** Spot is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 3 of the License, or
** (at your option) any later version.
**
** Spot is distributed in the hope that it will be useful, but WITHOUT
** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
** or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
** License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
%option noyywrap warn 8bit batch
%option prefix="tlyy"
%option outfile="lex.yy.c"
%option stack
%option never-interactive
%top{
#include "config.h"
/* Flex 2.6.4's test for <inttypes.h> relies on __STDC_VERSION__
which is undefined in C++. So without that, it will define
its own integer types, including a broken SIZE_MAX definition.
So let's define __STDC_VERSION__ to make sure <inttypes.h> gets
included. */
#if HAVE_INTTYPES_H && !(defined __STDC_VERSION__)
# define __STDC_VERSION__ 199901L
#endif
}
%{
#include <cstdlib>
#include <string>
#include <spot/parsetl/parsedecl.hh>
#include "spot/priv/trim.hh"
#include "spot/tl/formula.hh"
#define YY_USER_ACTION \
yylloc->columns(yyleng);
static int start_token = 0;
static int parent_level = 0;
static bool missing_parent = false;
static bool lenient_mode = false;
static int orig_cond = 0;
static unsigned comment_level = 0;
typedef tlyy::parser::token token;
%}
%s not_prop
%x in_par
%x in_bra
%x sqbracket
%x lbt
%x in_COMMENT in_STRING
BOX "[]"|"□"|"⬜"|"◻"
DIAMOND "<>"|"◇"|"⋄"|"♢"
ARROWL "->"|"-->"|"→"|"⟶"
DARROWL "=>"|"⇒"|"⟹"
ARROWLR "<->"|"<-->"|"↔"
DARROWLR "<=>"|"⇔"
CIRCLE "()"|"○"|"◯"
CIRCLEX "Ⓧ"
NOT "!"|"~"|"¬"
BOXARROW {BOX}{ARROWL}|"|"{ARROWL}|"↦"
BOXDARROW {BOX}{DARROWL}|"|"{DARROWL}|"⤇"
eol \n+|\r+
eol2 (\n\r)+|(\r\n)+
%%
%{
if (start_token)
{
int t = start_token;
start_token = 0;
return t;
}
yylloc->step();
std::string s;
%}
<*>"/""*"+ {
if (YY_START != in_COMMENT)
{
orig_cond = YY_START;
BEGIN(in_COMMENT);
comment_level = 0;
}
++comment_level;
}
<in_COMMENT>{
[^*/\n\r]* continue;
"/"[^*\n\r]* continue;
"*"+[^*/\n\r]* continue;
{eol} yylloc->lines(yyleng); yylloc->end.column = 1;
{eol2} yylloc->lines(yyleng / 2); yylloc->end.column = 1;
"*"+"/" if (--comment_level == 0) BEGIN(orig_cond);
<<EOF>> {
BEGIN(orig_cond);
error_list.push_back(
spot::one_parse_error(*yylloc,
"unclosed comment"));
return 0;
}
}
"(" {
if (!lenient_mode)
{
BEGIN(0);
return token::PAR_OPEN;
}
/* Parse any (...) block as a single block,
taking care of nested parentheses. The
parser will then try to parse this block
recursively. */
BEGIN(in_par);
parent_level = 1;
yylval->emplace<std::string>();
}
<in_par>{
"(" {
++parent_level;
yylval->as<std::string>().append(yytext, yyleng);
}
")" {
if (--parent_level)
{
yylval->as<std::string>().append(yytext, yyleng);
}
else
{
BEGIN(not_prop);
spot::trim(yylval->as<std::string>());
return token::PAR_BLOCK;
}
}
[^()]+ yylval->as<std::string>().append(yytext, yyleng);
<<EOF>> {
unput(')');
if (!missing_parent)
error_list.push_back(
spot::one_parse_error(*yylloc,
"missing closing parenthesis"));
missing_parent = true;
}
}
"{" {
if (!lenient_mode)
{
BEGIN(0);
return token::BRACE_OPEN;
}
/* Parse any {...} block as a single block,
taking care of nested parentheses. The
parser will then try to parse this block
recursively. */
BEGIN(in_bra);
parent_level = 1;
yylval->emplace<std::string>();
}
<in_bra>{
"{" {
++parent_level;
yylval->as<std::string>().append(yytext, yyleng);
}
"}"[ \t]*"!" {
if (--parent_level)
{
yylval->as<std::string>().append(yytext, yyleng);
}
else
{
BEGIN(not_prop);
spot::trim(yylval->as<std::string>());
return token::BRA_BANG_BLOCK;
}
}
"}" {
if (--parent_level)
{
yylval->as<std::string>().append(yytext, yyleng);
}
else
{
BEGIN(not_prop);
spot::trim(yylval->as<std::string>());
return token::BRA_BLOCK;
}
}
[^{}]+ yylval->as<std::string>().append(yytext, yyleng);
<<EOF>> {
unput('}');
if (!missing_parent)
error_list.push_back(
spot::one_parse_error(*yylloc,
"missing closing brace"));
missing_parent = true;
}
}
")" BEGIN(not_prop); return token::PAR_CLOSE;
"}"[ \t]*"!" BEGIN(not_prop); return token::BRACE_BANG_CLOSE;
"}" BEGIN(not_prop); return token::BRACE_CLOSE;
/* Must go before the other operators, because the F of FALSE
should not be mistaken with a unary F. */
"1"|[tT][rR][uU][eE] BEGIN(0); return token::CONST_TRUE;
"0"|[fF][aA][lL][sS][eE] BEGIN(0); return token::CONST_FALSE;
/* ~ comes from Goal, ! from everybody else */
{NOT} BEGIN(0); return token::OP_NOT;
"first_match" BEGIN(0); return token::OP_FIRST_MATCH;
/* SVA operators */
"##"[0-9] {
yylval->emplace<unsigned>(yytext[2] - '0');
return token::OP_DELAY_N;
}
"##"[0-9][0-9] {
yylval->emplace<unsigned>(yytext[2] * 10
+ yytext[3]
- '0' * 11);
return token::OP_DELAY_N;
}
"##"[0-9]{3,} {
errno = 0;
unsigned long n = strtoul(yytext + 2, 0, 10);
yylval->emplace<unsigned>(n);
if (errno || yylval->as<unsigned>() != n)
{
error_list.push_back(
spot::one_parse_error(*yylloc,
"value too large ignored"));
yylval->emplace<unsigned>(1);
}
if (yylval->as<unsigned>() >= spot::fnode::unbounded())
{
auto max = spot::fnode::unbounded() - 1;
std::ostringstream s;
s << yylval->as<unsigned>()
<< (" exceeds maximum supported "
"repetition (")
<< max << ")";
error_list.emplace_back(*yylloc, s.str());
yylval->emplace<unsigned>(max);
}
return token::OP_DELAY_N;
}
"##[+]" BEGIN(0); return token::OP_DELAY_PLUS;
"##[*]" BEGIN(0); return token::OP_DELAY_STAR;
"##[" BEGIN(sqbracket); return token::OP_DELAY_OPEN;
/* PSL operators */
{BOXARROW} BEGIN(0); return token::OP_UCONCAT;
{DIAMOND}{ARROWL} BEGIN(0); return token::OP_ECONCAT;
{BOXDARROW} BEGIN(0); return token::OP_UCONCAT_NONO;
{DIAMOND}{DARROWL} BEGIN(0); return token::OP_ECONCAT_NONO;
";" BEGIN(0); return token::OP_CONCAT;
":" BEGIN(0); return token::OP_FUSION;
"*" BEGIN(0); return token::OP_STAR;
"[*]" BEGIN(0); return token::OP_BSTAR;
"[+]" BEGIN(0); return token::OP_PLUS;
"[*" BEGIN(sqbracket); return token::OP_STAR_OPEN;
"[:*]" BEGIN(0); return token::OP_BFSTAR;
"[:+]" BEGIN(0); return token::OP_FPLUS;
"[:*" BEGIN(sqbracket); return token::OP_FSTAR_OPEN;
"[=" BEGIN(sqbracket); return token::OP_EQUAL_OPEN;
"["{ARROWL} BEGIN(sqbracket); return token::OP_GOTO_OPEN;
<sqbracket>"]" BEGIN(0); return token::OP_SQBKT_CLOSE;
<sqbracket>"!]" BEGIN(0); return token::OP_SQBKT_STRONG_CLOSE;
<sqbracket>[0-9]+ {
errno = 0;
unsigned long n = strtoul(yytext, 0, 10);
yylval->emplace<unsigned>(n);
if (errno || yylval->as<unsigned>() != n)
{
error_list.push_back(
spot::one_parse_error(*yylloc,
"value too large ignored"));
// Skip this number and read next token
yylloc->step();
}
else
{
return token::OP_SQBKT_NUM;
}
}
/* .. is from PSL and EDL
: is from Verilog and PSL
to is from VHDL
, is from Perl */
<sqbracket>","|".."|":"|"to" return token::OP_SQBKT_SEP;
/* In SVA you use [=1:$] instead of [=1..]. We will also accept
[=1..$] and [=1:]. The PSL LRM shows examples like [=1:inf]
instead, so will accept this too. */
<sqbracket>"$"|"inf" return token::OP_UNBOUNDED;
/* & and | come from Spin. && and || from LTL2BA.
/\, \/, and xor are from LBTT.
--> and <--> come from Goal.
+,*,^ are from Wring. */
"||"|"|"|"+"|"\\/"|""|"" BEGIN(0); return token::OP_OR;
"&&"|"/\\"|"∧"|"∩" BEGIN(0); return token::OP_AND;
"&" BEGIN(0); return token::OP_SHORT_AND;
"^"|"xor"|"⊕" BEGIN(0); return token::OP_XOR;
{ARROWL}|{DARROWL} BEGIN(0); return token::OP_IMPLIES;
{ARROWLR}|{DARROWLR} BEGIN(0); return token::OP_EQUIV;
/* TSLF-like syntactic sugar:
X[3]f = XXXf
F[2..4]f = XX(f | X(f | Xf))
G[2..4]f = XX(f & X(f & Xf))
We also have to deal with the Spin-like notation for G.
X[]f = XGf
*/
"X"[ \t\n\r]*"[]" yyless(1); return token::OP_X;
"F"[ \t\n\r]*"[]" yyless(1); return token::OP_F;
"G"[ \t\n\r]*"[]" yyless(1); return token::OP_G;
"X"[ \t\n\r]*"[" BEGIN(sqbracket); return token::OP_XREP;
"F"[ \t\n\r]*"[" BEGIN(sqbracket); return token::OP_FREP;
"G"[ \t\n\r]*"[" BEGIN(sqbracket); return token::OP_GREP;
/* <> (DIAMOND) and [] (BOX), are used in Spin.
() (CIRCLE) is not, but would make sense. */
"F"|{DIAMOND} BEGIN(0); return token::OP_F;
"G"|{BOX} BEGIN(0); return token::OP_G;
"U" BEGIN(0); return token::OP_U;
"R"|"V" BEGIN(0); return token::OP_R;
"X"|{CIRCLE} BEGIN(0); return token::OP_X;
{CIRCLEX} BEGIN(0); return token::OP_STRONG_X;
"W" BEGIN(0); return token::OP_W;
"M" BEGIN(0); return token::OP_M;
/* The combining overline or macron (overbar) should normally
occur only after a single letter, but we do not check that. */
"=0"|"̅"|"̄" return token::OP_POST_NEG;
"=1" return token::OP_POST_POS;
<*>[ \t\n\r]+ /* discard whitespace */ yylloc->step ();
/* An Atomic proposition cannot start with the letter
used by a unary operator (F,G,X), unless this
letter is followed by a digit in which case we assume
it's an ATOMIC_PROP (even though F0 could be seen as Ffalse, we
don't, because Ffalse is never used in practice).
*/
<INITIAL>[a-zA-EH-WYZ_.][a-zA-Z0-9_.]* |
<INITIAL>[FGX][0-9][a-zA-Z0-9_.]* |
/*
However if we have just parsed an atomic proposition, then we are
not expecting another atomic proposition, so we can be stricter
and disallow propositions that start with M, U, R, V, and W. If
you wonder why we do this, consider the Wring formula `p=0Uq=1'.
When p is parsed, we enter the not_prop start condition, we
remain into this condition when `=0' is processed, and then
because we are in this condition we will not consider `Uq' as an
atomic proposition but as a `U' operator followed by a `q' atomic
proposition.
We also disable atomic proposition that may look like a combination
of a binary operator followed by several unary operators.
E.g. UFXp. This way, `p=0UFXp=1' will be parsed as `(p=0)U(F(X(p=1)))'.
*/
<not_prop>[a-zA-EH-LN-QSTYZ_.][a-zA-EH-WYZ0-9_.]* |
<not_prop>[a-zA-EH-LN-QSTYZ_.][a-zA-EH-WYZ0-9_.][a-zA-Z0-9_.]* {
yylval->emplace<std::string>(yytext, yyleng);
BEGIN(not_prop);
return token::ATOMIC_PROP;
}
/* Atomic propositions can also be enclosed in double quotes.
orig_cond stores the parsing condition to use after the
string has been parsed. */
"\"" {
orig_cond = not_prop;
BEGIN(in_STRING);
}
<lbt>"\"" {
orig_cond = lbt;
BEGIN(in_STRING);
}
<in_STRING>{
\" {
BEGIN(orig_cond);
yylval->emplace<std::string>(s);
return token::ATOMIC_PROP;
}
{eol} {
s.append(yytext, yyleng);
yylloc->lines(yyleng); yylloc->end.column = 1;
}
{eol2} {
s.append(yytext, yyleng);
yylloc->lines(yyleng / 2); yylloc->end.column = 1;
}
\\. s += yytext[1];
[^\\\"\n\r]+ s.append(yytext, yyleng);
<<EOF>> {
error_list.push_back(
spot::one_parse_error(*yylloc,
"unclosed string"));
BEGIN(orig_cond);
yylval->emplace<std::string>(s);
return token::ATOMIC_PROP;
}
}
/* these are operators */
<lbt>[eitfXFGUVRWM] return *yytext;
/* in LBT's format, atomic proposition look like p0 or p3141592, but
for compatibility with ltl2dstar we also accept any alphanumeric
string that is not an operator. */
<lbt>[a-zA-Z._][a-zA-Z0-9._]* {
yylval->emplace<std::string>(yytext, yyleng);
return token::ATOMIC_PROP;
}
<*>. return *yytext;
<<EOF>> return token::END_OF_INPUT;
%%
void
flex_set_buffer(const std::string& buf,
int start_tok, bool lenient)
{
yypush_buffer_state(YY_CURRENT_BUFFER);
(void) yy_scan_bytes(buf.c_str(), buf.size());
start_token = start_tok;
if (start_tok == token::START_LBT)
yy_push_state(lbt);
else
yy_push_state(0);
lenient_mode = lenient;
}
void
flex_unset_buffer()
{
(void)&yy_top_state; // shut up a g++ warning.
yy_pop_state();
yypop_buffer_state();
missing_parent = false;
}