From ea9d894d016b147e7de15b42e87cfe562f3991b3 Mon Sep 17 00:00:00 2001 From: Alexandre Duret-Lutz Date: Sat, 2 Feb 2019 11:39:03 +0100 Subject: [PATCH] parsetl: better handling of MS-DOS line endings * spot/parsetl/scantl.ll: Ignore \r. * tests/core/lbt.test: Add a test. * NEWS: Mention the issue. --- NEWS | 4 ++++ spot/parsetl/scantl.ll | 26 +++++++++++++------------- tests/core/lbt.test | 7 +++++-- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index 282288b19..5ca9b86a3 100644 --- a/NEWS +++ b/NEWS @@ -21,6 +21,10 @@ New in spot 2.7.0.dev (not yet release) converted into IndexError Python exceptions (instead of aborting the program). + - The LTL parser would choke on carriage returns when command-line + tools such as ltlfilt, ltlcross, or ltl2tgba were run on files of + formulas with MS-DOS line endings. + New in spot 2.7 (2018-12-11) Command-line tools: diff --git a/spot/parsetl/scantl.ll b/spot/parsetl/scantl.ll index 6aea2b237..2d5f8b8ac 100644 --- a/spot/parsetl/scantl.ll +++ b/spot/parsetl/scantl.ll @@ -1,9 +1,9 @@ /* -*- coding: utf-8 -*- -** Copyright (C) 2010-2015, 2017, 2018, Laboratoire de Recherche et +** Copyright (C) 2010-2015, 2017-2019, Laboratoire de Recherche et ** Développement de l'Epita (LRDE). -** Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6 (LIP6), -** département Systèmes Répartis Coopératifs (SRC), Université Pierre -** et Marie Curie. +** Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6 +** (LIP6), département Systèmes Répartis Coopératifs (SRC), Université +** Pierre et Marie Curie. ** ** This file is part of Spot, a model checking library. ** @@ -169,7 +169,7 @@ eol2 (\n\r)+|(\r\n)+ ++parent_level; yylval->str->append(yytext, yyleng); } - "}"[ \t\n]*"!" { + "}"[ \t]*"!" { if (--parent_level) { yylval->str->append(yytext, yyleng); @@ -205,7 +205,7 @@ eol2 (\n\r)+|(\r\n)+ } ")" BEGIN(not_prop); return token::PAR_CLOSE; -"}"[ \t\n]*"!" BEGIN(not_prop); return token::BRACE_BANG_CLOSE; +"}"[ \t]*"!" BEGIN(not_prop); return token::BRACE_BANG_CLOSE; "}" BEGIN(not_prop); return token::BRACE_CLOSE; /* Must go before the other operators, because the F of FALSE @@ -280,12 +280,12 @@ eol2 (\n\r)+|(\r\n)+ We also have to deal with the Spin-like notation for G. X[]f = XGf */ -"X"[ \t\n]*"[]" yyless(1); return token::OP_X; -"F"[ \t\n]*"[]" yyless(1); return token::OP_F; -"G"[ \t\n]*"[]" yyless(1); return token::OP_G; -"X"[ \t\n]*"[" BEGIN(sqbracket); return token::OP_XREP; -"F"[ \t\n]*"[" BEGIN(sqbracket); return token::OP_FREP; -"G"[ \t\n]*"[" BEGIN(sqbracket); return token::OP_GREP; +"X"[ \t\n\r]*"[]" yyless(1); return token::OP_X; +"F"[ \t\n\r]*"[]" yyless(1); return token::OP_F; +"G"[ \t\n\r]*"[]" yyless(1); return token::OP_G; +"X"[ \t\n\r]*"[" BEGIN(sqbracket); return token::OP_XREP; +"F"[ \t\n\r]*"[" BEGIN(sqbracket); return token::OP_FREP; +"G"[ \t\n\r]*"[" BEGIN(sqbracket); return token::OP_GREP; /* <> (DIAMOND) and [] (BOX), are used in Spin. () (CIRCLE) is not, but would make sense. */ "F"|{DIAMOND} BEGIN(0); return token::OP_F; @@ -301,7 +301,7 @@ eol2 (\n\r)+|(\r\n)+ "=0"|"̅"|"̄" return token::OP_POST_NEG; "=1" return token::OP_POST_POS; -<*>[ \t\n]+ /* discard whitespace */ yylloc->step (); +<*>[ \t\n\r]+ /* discard whitespace */ yylloc->step (); /* An Atomic proposition cannot start with the letter used by a unary operator (F,G,X), unless this diff --git a/tests/core/lbt.test b/tests/core/lbt.test index fce309208..7602e1945 100755 --- a/tests/core/lbt.test +++ b/tests/core/lbt.test @@ -1,6 +1,6 @@ #!/bin/sh # -*- coding: utf-8 -*- -# Copyright (C) 2013, 2016, 2017 Laboratoire de Recherche et +# Copyright (C) 2013, 2016, 2017, 2019 Laboratoire de Recherche et # Développement de l'Epita (LRDE). # # This file is part of Spot, a model checking library. @@ -97,10 +97,13 @@ test `wc -l < formulas.2` -eq 168 test `wc -l < formulas.3` -eq 168 test `wc -l < formulas.4` -eq 168 +# Add some carriage returns to simulate MS-DOS files and +# make sure our parser does not mind. +sed 's/$/\r/' formulas.2 > formulas.2ms # The --csv-escape option is now obsolete and replaced by double # quotes in the format string. So eventually the first two lines # should disappear. -run 0 ltlfilt formulas.2 --csv-escape --format='%L,%f' > formulas.5 +run 0 ltlfilt formulas.2ms --csv-escape --format='%L,%f' > formulas.5 run 0 ltlfilt formulas.5/2 --csv-escape --format='%L,%f' > formulas.6 run 0 ltlfilt formulas.2 --format='%L,"%f"' > formulas.5a run 0 ltlfilt formulas.5/2 --format='%L,"%f"' > formulas.6a