From ea9d894d016b147e7de15b42e87cfe562f3991b3 Mon Sep 17 00:00:00 2001
From: Alexandre Duret-Lutz <adl@lrde.epita.fr>
Date: Sat, 2 Feb 2019 11:39:03 +0100
Subject: [PATCH] parsetl: better handling of MS-DOS line endings

* spot/parsetl/scantl.ll: Ignore \r.
* tests/core/lbt.test: Add a test.
* NEWS: Mention the issue.
---
 NEWS                   |  4 ++++
 spot/parsetl/scantl.ll | 26 +++++++++++++-------------
 tests/core/lbt.test    |  7 +++++--
 3 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/NEWS b/NEWS
index 282288b19..5ca9b86a3 100644
--- a/NEWS
+++ b/NEWS
@@ -21,6 +21,10 @@ New in spot 2.7.0.dev (not yet release)
     converted into IndexError Python exceptions (instead of aborting
     the program).
 
+  - The LTL parser would choke on carriage returns when command-line
+    tools such as ltlfilt, ltlcross, or ltl2tgba were run on files of
+    formulas with MS-DOS line endings.
+
 New in spot 2.7 (2018-12-11)
 
   Command-line tools:
diff --git a/spot/parsetl/scantl.ll b/spot/parsetl/scantl.ll
index 6aea2b237..2d5f8b8ac 100644
--- a/spot/parsetl/scantl.ll
+++ b/spot/parsetl/scantl.ll
@@ -1,9 +1,9 @@
 /* -*- coding: utf-8 -*-
-** Copyright (C) 2010-2015, 2017, 2018, Laboratoire de Recherche et
+** Copyright (C) 2010-2015, 2017-2019, Laboratoire de Recherche et
 ** Développement de l'Epita (LRDE).
-** Copyright (C) 2003, 2004  Laboratoire d'Informatique de Paris 6 (LIP6),
-** département Systèmes Répartis Coopératifs (SRC), Université Pierre
-** et Marie Curie.
+** Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6
+** (LIP6), département Systèmes Répartis Coopératifs (SRC), Université
+** Pierre et Marie Curie.
 **
 ** This file is part of Spot, a model checking library.
 **
@@ -169,7 +169,7 @@ eol2        (\n\r)+|(\r\n)+
 				  ++parent_level;
 				  yylval->str->append(yytext, yyleng);
 				}
-         "}"[ \t\n]*"!"         {
+         "}"[ \t]*"!"           {
 				  if (--parent_level)
 				    {
                                       yylval->str->append(yytext, yyleng);
@@ -205,7 +205,7 @@ eol2        (\n\r)+|(\r\n)+
 }
 
 ")"				BEGIN(not_prop); return token::PAR_CLOSE;
-"}"[ \t\n]*"!"			BEGIN(not_prop); return token::BRACE_BANG_CLOSE;
+"}"[ \t]*"!"			BEGIN(not_prop); return token::BRACE_BANG_CLOSE;
 "}"				BEGIN(not_prop); return token::BRACE_CLOSE;
 
   /* Must go before the other operators, because the F of FALSE
@@ -280,12 +280,12 @@ eol2        (\n\r)+|(\r\n)+
     We also have to deal with the Spin-like notation for G.
     X[]f = XGf
  */
-"X"[ \t\n]*"[]"                 yyless(1); return token::OP_X;
-"F"[ \t\n]*"[]"                 yyless(1); return token::OP_F;
-"G"[ \t\n]*"[]"                 yyless(1); return token::OP_G;
-"X"[ \t\n]*"["                  BEGIN(sqbracket); return token::OP_XREP;
-"F"[ \t\n]*"["                  BEGIN(sqbracket); return token::OP_FREP;
-"G"[ \t\n]*"["                  BEGIN(sqbracket); return token::OP_GREP;
+"X"[ \t\n\r]*"[]"               yyless(1); return token::OP_X;
+"F"[ \t\n\r]*"[]"               yyless(1); return token::OP_F;
+"G"[ \t\n\r]*"[]"               yyless(1); return token::OP_G;
+"X"[ \t\n\r]*"["                BEGIN(sqbracket); return token::OP_XREP;
+"F"[ \t\n\r]*"["                BEGIN(sqbracket); return token::OP_FREP;
+"G"[ \t\n\r]*"["                BEGIN(sqbracket); return token::OP_GREP;
   /* <> (DIAMOND) and [] (BOX), are used in Spin.
      () (CIRCLE) is not, but would make sense.  */
 "F"|{DIAMOND}			BEGIN(0); return token::OP_F;
@@ -301,7 +301,7 @@ eol2        (\n\r)+|(\r\n)+
 "=0"|"̅"|"̄"			return token::OP_POST_NEG;
 "=1"				return token::OP_POST_POS;
 
-<*>[ \t\n]+			/* discard whitespace */ yylloc->step ();
+<*>[ \t\n\r]+			/* discard whitespace */ yylloc->step ();
 
   /* An Atomic proposition cannot start with the letter
      used by a unary operator (F,G,X), unless this
diff --git a/tests/core/lbt.test b/tests/core/lbt.test
index fce309208..7602e1945 100755
--- a/tests/core/lbt.test
+++ b/tests/core/lbt.test
@@ -1,6 +1,6 @@
 #!/bin/sh
 # -*- coding: utf-8 -*-
-# Copyright (C) 2013, 2016, 2017 Laboratoire de Recherche et
+# Copyright (C) 2013, 2016, 2017, 2019 Laboratoire de Recherche et
 # Développement de l'Epita (LRDE).
 #
 # This file is part of Spot, a model checking library.
@@ -97,10 +97,13 @@ test `wc -l < formulas.2` -eq 168
 test `wc -l < formulas.3` -eq 168
 test `wc -l < formulas.4` -eq 168
 
+# Add some carriage returns to simulate MS-DOS files and
+# make sure our parser does not mind.
+sed 's/$/\r/' formulas.2 > formulas.2ms
 # The --csv-escape option is now obsolete and replaced by double
 # quotes in the format string.  So eventually the first two lines
 # should disappear.
-run 0 ltlfilt formulas.2 --csv-escape --format='%L,%f' > formulas.5
+run 0 ltlfilt formulas.2ms --csv-escape --format='%L,%f' > formulas.5
 run 0 ltlfilt formulas.5/2 --csv-escape --format='%L,%f' > formulas.6
 run 0 ltlfilt formulas.2 --format='%L,"%f"' > formulas.5a
 run 0 ltlfilt formulas.5/2 --format='%L,"%f"' > formulas.6a