* spot/parseaut/public.hh, spot/parseaut/scanaut.ll: When parsing automata read from some given FD, do not close the file descriptor, as the caller is likely to already close it, and closing FDs twice is very bad. This seems to have be the root of some issue we had with recent jupyter versions, were 0MQ would crash with "Bad file descriptor" messages. Also do not close stdin while we are at it. * NEWS: Mention the bug.
480 lines
12 KiB
LLVM
480 lines
12 KiB
LLVM
/* -*- coding: utf-8 -*-
|
|
** Copyright (C) 2014, 2015, 2016 Laboratoire de Recherche et Développement
|
|
** de l'Epita (LRDE).
|
|
**
|
|
** This file is part of Spot, a model checking library.
|
|
**
|
|
** Spot is free software; you can redistribute it and/or modify it
|
|
** under the terms of the GNU General Public License as published by
|
|
** the Free Software Foundation; either version 3 of the License, or
|
|
** (at your option) any later version.
|
|
**
|
|
** Spot is distributed in the hope that it will be useful, but WITHOUT
|
|
** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
** or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
|
** License for more details.
|
|
**
|
|
** You should have received a copy of the GNU General Public License
|
|
** along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
%option noyywrap
|
|
%option prefix="hoayy"
|
|
%option outfile="lex.yy.c"
|
|
/* %option debug */
|
|
|
|
%{
|
|
#include <string>
|
|
#include <sys/stat.h>
|
|
#include <spot/parseaut/parsedecl.hh>
|
|
#include "spot/priv/trim.hh"
|
|
|
|
#define YY_USER_ACTION yylloc->columns(yyleng);
|
|
#define YY_NEVER_INTERACTIVE 1
|
|
|
|
typedef hoayy::parser::token token;
|
|
static unsigned comment_level = 0;
|
|
static unsigned parent_level = 0;
|
|
static int orig_cond = 0;
|
|
static bool lbtt_s = false;
|
|
static bool lbtt_t = false;
|
|
static unsigned lbtt_states = 0;
|
|
static bool yyin_close = true;
|
|
|
|
%}
|
|
|
|
eol \n+|\r+
|
|
eol2 (\n\r)+|(\r\n)+
|
|
eols ({eol}|{eol2})*
|
|
identifier [[:alpha:]_][[:alnum:]_.-]*
|
|
|
|
%x in_COMMENT in_STRING in_NEVER_PAR
|
|
%s in_HOA in_NEVER in_LBTT_HEADER
|
|
%s in_LBTT_STATE in_LBTT_INIT in_LBTT_TRANS
|
|
%s in_LBTT_T_ACC in_LBTT_S_ACC in_LBTT_GUARD
|
|
%s in_DSTAR
|
|
%%
|
|
|
|
%{
|
|
std::string s;
|
|
yylloc->step();
|
|
|
|
auto parse_int = [&](){
|
|
errno = 0;
|
|
char* end;
|
|
unsigned long n = strtoul(yytext, &end, 10);
|
|
yylval->num = n;
|
|
if (errno || yylval->num != n)
|
|
{
|
|
error_list.push_back(spot::parse_aut_error(*yylloc, "value too large"));
|
|
yylval->num = 0;
|
|
}
|
|
return end;
|
|
};
|
|
%}
|
|
|
|
|
|
/* skip blanks and comments */
|
|
{eol} yylloc->lines(yyleng); yylloc->step();
|
|
{eol2} yylloc->lines(yyleng / 2); yylloc->step();
|
|
[ \t\v\f]+ yylloc->step();
|
|
"/*" {
|
|
orig_cond = YY_START;
|
|
BEGIN(in_COMMENT);
|
|
comment_level = 1;
|
|
}
|
|
<INITIAL>"HOA:" BEGIN(in_HOA); return token::HOA;
|
|
<INITIAL,in_HOA>"--ABORT--" BEGIN(INITIAL); throw spot::hoa_abort{*yylloc};
|
|
<INITIAL>"never" BEGIN(in_NEVER); return token::NEVER;
|
|
<INITIAL>"DSA" BEGIN(in_DSTAR); return token::DSA;
|
|
<INITIAL>"DRA" BEGIN(in_DSTAR); return token::DRA;
|
|
|
|
<INITIAL>[0-9]+[ \t][0-9]+[ts]? {
|
|
BEGIN(in_LBTT_HEADER);
|
|
char* end = 0;
|
|
errno = 0;
|
|
unsigned long n = strtoul(yytext, &end, 10);
|
|
yylval->num = n;
|
|
unsigned s = end - yytext;
|
|
yylloc->end = yylloc->begin;
|
|
yylloc->end.columns(s);
|
|
yyless(s);
|
|
if (errno || yylval->num != n)
|
|
{
|
|
error_list.push_back(
|
|
spot::parse_aut_error(*yylloc,
|
|
"value too large"));
|
|
yylval->num = 0;
|
|
}
|
|
lbtt_states = yylval->num;
|
|
return token::LBTT;
|
|
}
|
|
|
|
<in_HOA>{
|
|
"States:" return token::STATES;
|
|
"Start:" return token::START;
|
|
"AP:" return token::AP;
|
|
"Alias:" return token::ALIAS;
|
|
"Acceptance:" return token::ACCEPTANCE;
|
|
"acc-name:" return token::ACCNAME;
|
|
"tool:" return token::TOOL;
|
|
"name:" return token::NAME;
|
|
"properties:" return token::PROPERTIES;
|
|
"spot.highlight.states:" return token::SPOT_HIGHLIGHT_STATES;
|
|
"spot.highlight.edges:" return token::SPOT_HIGHLIGHT_EDGES;
|
|
"--BODY--" return token::BODY;
|
|
"--END--" BEGIN(INITIAL); return token::END;
|
|
"State:" return token::STATE;
|
|
[tf{}()\[\]&|!] return *yytext;
|
|
|
|
{identifier} {
|
|
yylval->str = new std::string(yytext, yyleng);
|
|
return token::IDENTIFIER;
|
|
}
|
|
{identifier}":" {
|
|
yylval->str = new std::string(yytext, yyleng - 1);
|
|
return token::HEADERNAME;
|
|
}
|
|
"@"[[:alnum:]_.-]+ {
|
|
yylval->str = new std::string(yytext + 1, yyleng - 1);
|
|
return token::ANAME;
|
|
}
|
|
[0-9]+ parse_int(); return token::INT;
|
|
}
|
|
|
|
<in_DSTAR>{
|
|
"States:" return token::STATES;
|
|
"State:" return token::STATE;
|
|
"Start:" return token::START;
|
|
"AP:" return token::AP;
|
|
"v2" return token::V2;
|
|
"explicit" return token::EXPLICIT;
|
|
"Comment:".* yylloc->step();
|
|
"//".* yylloc->step();
|
|
"Acceptance-Pairs:" return token::ACCPAIRS;
|
|
"Acc-Sig:" return token::ACCSIG;
|
|
"---" return token::ENDOFHEADER;
|
|
[0-9]+ parse_int(); return token::INT;
|
|
/* The start of any automaton is the end of this one.
|
|
We do not try to detect LBTT automata, as that would
|
|
be too hard to distinguish from state numbers. */
|
|
{eols}("HOA:"|"never"|"DSA"|"DRA") {
|
|
yylloc->end = yylloc->begin;
|
|
yyless(0);
|
|
BEGIN(INITIAL);
|
|
return token::ENDDSTAR;
|
|
}
|
|
<<EOF>> return token::ENDDSTAR;
|
|
}
|
|
|
|
<in_NEVER>{
|
|
"skip" return token::SKIP;
|
|
"if" return token::IF;
|
|
"fi" return token::FI;
|
|
"do" return token::DO;
|
|
"od" return token::OD;
|
|
"->" return token::ARROW;
|
|
"goto" return token::GOTO;
|
|
"false"|"0" return token::FALSE;
|
|
"atomic" return token::ATOMIC;
|
|
"assert" return token::ASSERT;
|
|
|
|
("!"[ \t]*)?"(" {
|
|
parent_level = 1;
|
|
BEGIN(in_NEVER_PAR);
|
|
yylval->str = new std::string(yytext, yyleng);
|
|
}
|
|
|
|
"true"|"1" {
|
|
yylval->str = new std::string(yytext, yyleng);
|
|
return token::FORMULA;
|
|
}
|
|
|
|
[a-zA-Z][a-zA-Z0-9_]* {
|
|
yylval->str = new std::string(yytext, yyleng);
|
|
return token::IDENTIFIER;
|
|
}
|
|
}
|
|
|
|
/* Note: the LBTT format is scanf friendly, but not Bison-friendly.
|
|
If we only tokenize it as a stream of INTs, the parser will have
|
|
a very hard time recognizing what is a state from what is a
|
|
transitions. As a consequence we abuse the start conditions to
|
|
maintain a state an return integers with different semantic types
|
|
depending on the purpose of those integers. */
|
|
<in_LBTT_HEADER>{
|
|
[0-9]+[st]* {
|
|
BEGIN(in_LBTT_STATE);
|
|
auto end = parse_int();
|
|
lbtt_s = false;
|
|
lbtt_t = false;
|
|
if (end)
|
|
while (int c = *end++)
|
|
{
|
|
if (c == 's')
|
|
lbtt_s = true;
|
|
else // c == 't'
|
|
lbtt_t = true;
|
|
}
|
|
if (!lbtt_t)
|
|
lbtt_s = true;
|
|
if (lbtt_states == 0)
|
|
{
|
|
BEGIN(INITIAL);
|
|
return token::LBTT_EMPTY;
|
|
}
|
|
if (lbtt_s && !lbtt_t)
|
|
return token::INT_S;
|
|
else
|
|
return token::INT;
|
|
}
|
|
}
|
|
|
|
<in_LBTT_STATE>[0-9]+ {
|
|
parse_int();
|
|
BEGIN(in_LBTT_INIT);
|
|
return token::STATE_NUM;
|
|
}
|
|
<in_LBTT_INIT>[01] {
|
|
yylval->num = *yytext - '0';
|
|
if (lbtt_s)
|
|
BEGIN(in_LBTT_S_ACC);
|
|
else
|
|
BEGIN(in_LBTT_TRANS);
|
|
return token::INT;
|
|
}
|
|
<in_LBTT_S_ACC>{
|
|
[0-9]+ parse_int(); return token::ACC;
|
|
"-1" BEGIN(in_LBTT_TRANS); yylloc->step();
|
|
}
|
|
<in_LBTT_TRANS>{
|
|
[0-9]+ {
|
|
parse_int();
|
|
if (lbtt_t)
|
|
BEGIN(in_LBTT_T_ACC);
|
|
else
|
|
BEGIN(in_LBTT_GUARD);
|
|
return token::DEST_NUM;
|
|
}
|
|
"-1" {
|
|
if (--lbtt_states)
|
|
{
|
|
BEGIN(in_LBTT_STATE);
|
|
yylloc->step();
|
|
}
|
|
else
|
|
{
|
|
BEGIN(INITIAL);
|
|
return token::ENDAUT;
|
|
}
|
|
}
|
|
}
|
|
<in_LBTT_T_ACC>{
|
|
[0-9]+ parse_int(); return token::ACC;
|
|
"-1" BEGIN(in_LBTT_GUARD); yylloc->step();
|
|
}
|
|
<in_LBTT_GUARD>{
|
|
[^\n\r]* {
|
|
yylval->str = new std::string(yytext, yyleng);
|
|
BEGIN(in_LBTT_TRANS);
|
|
return token::STRING;
|
|
}
|
|
}
|
|
|
|
|
|
<in_COMMENT>{
|
|
"/*" ++comment_level;
|
|
[^*/\n\r]* continue;
|
|
"/"[^*\n\r]* continue;
|
|
"*" continue;
|
|
{eol} yylloc->lines(yyleng); yylloc->end.column = 1;
|
|
{eol2} yylloc->lines(yyleng / 2); yylloc->end.column = 1;
|
|
"*/" {
|
|
if (--comment_level == 0)
|
|
{
|
|
yylloc->step();
|
|
BEGIN(orig_cond);
|
|
}
|
|
}
|
|
<<EOF>> {
|
|
BEGIN(orig_cond);
|
|
error_list.push_back(
|
|
spot::parse_aut_error(*yylloc,
|
|
"unclosed comment"));
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/* matched late, so that the in_LBTT_GUARD pattern has precedence */
|
|
"\"" {
|
|
orig_cond = YY_START;
|
|
BEGIN(in_STRING);
|
|
comment_level = 1;
|
|
}
|
|
|
|
<in_STRING>{
|
|
\" {
|
|
BEGIN(orig_cond);
|
|
yylval->str = new std::string(s);
|
|
return token::STRING;
|
|
}
|
|
{eol} {
|
|
s.append(yytext, yyleng);
|
|
yylloc->lines(yyleng); yylloc->end.column = 1;
|
|
}
|
|
{eol2} {
|
|
s.append(yytext, yyleng);
|
|
yylloc->lines(yyleng / 2); yylloc->end.column = 1;
|
|
}
|
|
\\. s += yytext[1];
|
|
[^\\\"\n\r]+ s.append(yytext, yyleng);
|
|
<<EOF>> {
|
|
error_list.push_back(
|
|
spot::parse_aut_error(*yylloc,
|
|
"unclosed string"));
|
|
BEGIN(orig_cond);
|
|
yylval->str = new std::string(s);
|
|
return token::STRING;
|
|
}
|
|
}
|
|
|
|
<in_NEVER_PAR>{
|
|
"(" {
|
|
++parent_level;
|
|
yylval->str->append(yytext, yyleng);
|
|
}
|
|
/* if we match ")&&(" or ")||(", stay in <in_NEVER_PAR> mode */
|
|
")"[ \t]*("&&"|"||")[ \t!]*"(" {
|
|
yylval->str->append(yytext, yyleng);
|
|
}
|
|
")" {
|
|
yylval->str->append(yytext, yyleng);
|
|
if (!--parent_level)
|
|
{
|
|
BEGIN(in_NEVER);
|
|
spot::trim(*yylval->str);
|
|
return token::FORMULA;
|
|
}
|
|
}
|
|
{eol} {
|
|
yylval->str->append(yytext, yyleng);
|
|
yylloc->lines(yyleng); yylloc->end.column = 1;
|
|
}
|
|
{eol2} {
|
|
yylval->str->append(yytext, yyleng);
|
|
yylloc->lines(yyleng / 2); yylloc->end.column = 1;
|
|
}
|
|
[^()\n\r]+ yylval->str->append(yytext, yyleng);
|
|
<<EOF>> {
|
|
error_list.push_back(
|
|
spot::parse_aut_error(*yylloc,
|
|
"missing closing parenthese"));
|
|
yylval->str->append(parent_level, ')');
|
|
BEGIN(in_NEVER);
|
|
spot::trim(*yylval->str);
|
|
return token::FORMULA;
|
|
}
|
|
}
|
|
|
|
. return *yytext;
|
|
|
|
%{
|
|
/* Dummy use of yyunput to shut up a gcc warning. */
|
|
(void) &yyunput;
|
|
%}
|
|
|
|
%%
|
|
|
|
namespace spot
|
|
{
|
|
void
|
|
hoayyreset()
|
|
{
|
|
BEGIN(INITIAL);
|
|
comment_level = 0;
|
|
parent_level = 0;
|
|
}
|
|
|
|
int
|
|
hoayyopen(const std::string &name)
|
|
{
|
|
bool want_interactive = false;
|
|
|
|
// yy_flex_debug = 1;
|
|
if (name == "-")
|
|
{
|
|
// If the input is a pipe, make the scanner
|
|
// interactive so that it does not wait for the input
|
|
// buffer to be full to process automata.
|
|
struct stat s;
|
|
if (fstat(fileno(stdin), &s) < 0)
|
|
throw std::runtime_error("fstat failed");
|
|
if (S_ISFIFO(s.st_mode))
|
|
want_interactive = true;
|
|
|
|
yyin = stdin;
|
|
yyin_close = false;
|
|
}
|
|
else
|
|
{
|
|
yyin = fopen(name.c_str(), "r");
|
|
if (!yyin)
|
|
return 1;
|
|
yyin_close = true;
|
|
}
|
|
// Reset the lexer in case a previous parse
|
|
// ended badly.
|
|
YY_NEW_FILE;
|
|
hoayyreset();
|
|
if (want_interactive)
|
|
yy_set_interactive(1);
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
hoayystring(const char* data)
|
|
{
|
|
yy_scan_string(data);
|
|
hoayyreset();
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
hoayyopen(int fd)
|
|
{
|
|
bool want_interactive = false;
|
|
yyin_close = false;
|
|
|
|
yyin = fdopen(fd, "r");
|
|
|
|
if (!yyin)
|
|
throw std::runtime_error("fdopen failed");
|
|
|
|
// If the input is a pipe, make the scanner
|
|
// interactive so that it does not wait for the input
|
|
// buffer to be full to process automata.
|
|
struct stat s;
|
|
if (fstat(fd, &s) < 0)
|
|
throw std::runtime_error("fstat failed");
|
|
if (S_ISFIFO(s.st_mode))
|
|
want_interactive = true;
|
|
|
|
// Reset the lexer in case a previous parse
|
|
// ended badly.
|
|
YY_NEW_FILE;
|
|
hoayyreset();
|
|
if (want_interactive)
|
|
yy_set_interactive(1);
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
hoayyclose()
|
|
{
|
|
if (yyin)
|
|
{
|
|
if (yyin_close)
|
|
fclose(yyin);
|
|
yyin = NULL;
|
|
}
|
|
}
|
|
}
|