Fix error reporting in utf8-encoded LTL formulae.

* src/ltlparse/public.hh (fix_utf8_locations): New function. * src/ltlparse/fmterror.cc (fix_utf8_locations): Implement it. (format_parse_errors): Rename as ... (format_parse_errors_aux): ... this. (format_parse_errors): New implementation that call fix_utf8_locations() before format_parse_errors_aux() on valid utf8 strings. * src/ltlparse/Makefile.am: Include $(top_srcdir). * src/ltltest/utf8.test: New file. * src/ltltest/Makefile.am: Add it. * src/ltltest/parse.test: Fix header.
2012-03-29 14:14:56 +02:00 · 2012-03-29 14:14:56 +02:00 · 7e4787da22
commit 7e4787da22
parent 403170f5c8
6 changed files with 192 additions and 27 deletions
--- a/src/ltlparse/Makefile.am
+++ b/src/ltlparse/Makefile.am
@ -1,5 +1,5 @@
-## Copyright (C) 2008, 2009, 2010, 2011 Laboratoire de Recherche et
+## Copyright (C) 2008, 2009, 2010, 2011, 2012 Laboratoire de Recherche
-## Développement de l'Epita (LRDE).
+## et Développement de l'Epita (LRDE).
 ## Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris
 ## 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
 ## Université Pierre et Marie Curie.
@ -21,7 +21,7 @@
 ## Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 ## 02111-1307, USA.
-AM_CPPFLAGS = -I$(srcdir)/.. -I.. -DYY_NO_INPUT
+AM_CPPFLAGS = -I$(top_srcdir) -I$(srcdir)/.. -I.. -DYY_NO_INPUT
 # Disable -Werror because too many versions of flex yield warnings.
 AM_CXXFLAGS = $(WARNING_CXXFLAGS:-Werror=)
--- a/src/ltlparse/fmterror.cc
+++ b/src/ltlparse/fmterror.cc
@ -1,5 +1,5 @@
-// Copyright (C) 2010 Laboratoire de Recherche et Développement de
+// Copyright (C) 2010, 2012 Laboratoire de Recherche et Développement
-// l'Epita (LRDE).
+// de l'Epita (LRDE).
 // Copyright (C) 2003, 2004, 2005 Laboratoire d'Informatique de Paris
 // 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
 // Université Pierre et Marie Curie.
@ -23,23 +23,64 @@
 #include "public.hh"
 #include <ostream>
 #include <iterator>
 #include <vector>
 #include "utf8/utf8.h"
 namespace spot
 {
  namespace ltl
  {
-    bool
+    void
-    format_parse_errors(std::ostream& os,
+    fix_utf8_locations(const std::string& ltl_string,
 			const std::string& ltl_string,
 		       parse_error_list& error_list)
    {
      // LUT to convert byte positions to utf8 positions.
      // (The +2 is to account for position 0, not used,
      // and position ltl_string.size()+1 denoting EOS.)
      std::vector<unsigned> b2u(ltl_string.size() + 2);
      // i will iterate over all utf8 characters between b and e
      std::string::const_iterator b = ltl_string.begin();
      std::string::const_iterator i = b;
      std::string::const_iterator e = ltl_string.end();
      unsigned n = 0;		// number of current utf8 character
      unsigned prev = 0;	// last byte of previous utf8 character
      while (i != e)
 	{
 	  utf8::next(i, e);
 	  ++n;
 	  unsigned d = std::distance(b, i);
 	  while (prev < d)
 	    b2u[++prev] = n;
 	}
      b2u[++prev] = ++n;
      // use b2u to update error_list
      parse_error_list::iterator it;
      for (it = error_list.begin(); it != error_list.end(); ++it)
 	{
 	  ltlyy::location& l = it->first;
 	  l.begin.column = b2u[l.begin.column];
 	  l.end.column = b2u[l.end.column];
 	}
    }
    namespace
    {
      bool
      format_parse_errors_aux(std::ostream& os,
 			      const std::string& ltl_string,
 			      const parse_error_list& error_list)
      {
 	bool printed = false;
-      spot::ltl::parse_error_list::iterator it;
+	parse_error_list::const_iterator it;
 	for (it = error_list.begin(); it != error_list.end(); ++it)
 	  {
 	    os << ">>> " << ltl_string << std::endl;
-	  ltlyy::location& l = it->first;
+	    const ltlyy::location& l = it->first;
 	    unsigned n = 1;
 	    for (; n < 4 + l.begin.column; ++n)
@ -54,6 +95,24 @@ namespace spot
 	  }
 	return printed;
      }
    }
    bool
    format_parse_errors(std::ostream& os,
 			const std::string& ltl_string,
 			const parse_error_list& error_list)
    {
      if (utf8::is_valid(ltl_string.begin(), ltl_string.end()))
 	{
 	  parse_error_list fixed = error_list;
 	  fix_utf8_locations(ltl_string, fixed);
 	  return format_parse_errors_aux(os, ltl_string, fixed);
 	}
      else
 	{
 	  return format_parse_errors_aux(os, ltl_string, error_list);
 	}
    }
  }
 }
--- a/src/ltlparse/public.hh
+++ b/src/ltlparse/public.hh
@ -1,5 +1,5 @@
-// Copyright (C) 2010, 2011 Laboratoire de Recherche et Développement de
+// Copyright (C) 2010, 2011, 2012 Laboratoire de Recherche et
-// l'Epita (LRDE).
+// Développement de l'Epita (LRDE).
 // Copyright (C) 2003, 2004, 2005, 2006 Laboratoire d'Informatique de
 // Paris 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
 // Université Pierre et Marie Curie.
@ -94,6 +94,12 @@ namespace spot
    /// \brief Format diagnostics produced by spot::ltl::parse
    ///        or spot::ltl::ratexp
    ///
    /// If the string is utf8 encoded, spot::ltl::fix_utf8_locations()
    /// will be used to report correct utf8 locations (assuming the
    /// output is utf8 aware).  Nonetheless, the supplied \a
    /// error_list will not be modified.
    ///
    /// \param os Where diagnostics should be output.
    /// \param input_string The string that were parsed.
    /// \param error_list The error list filled by spot::ltl::parse
@ -101,6 +107,33 @@ namespace spot
    /// \return \c true iff any diagnostic was output.
    bool format_parse_errors(std::ostream& os,
 			     const std::string& input_string,
 			     const parse_error_list& error_list);
    /// \brief Fix location of diagnostics assuming the input is utf8.
    ///
    /// The spot::ltl::parse() and spot::ltl::parse_sere() function
    /// return a parse_error_list that contain locations specified at
    /// the byte level.  Although these parser recognize some
    /// utf8 characters they only work byte by byte and will report
    /// positions by counting byte.
    ///
    /// This function fixes the positions returned by the parser to
    /// look correct when the string is interpreted as a utf8-encoded
    /// string.
    ///
    /// It is invalid to call this function on a string that is not
    /// valid utf8.
    ///
    /// You should NOT call this function before calling
    /// spot::ltl::format_parse_errors() because it is already called
    /// inside if needed.  You may need this function only if you want
    /// to write your own error reporting code.
    ///
    /// \param input_string The string that were parsed.
    /// \param error_list The error list filled by spot::ltl::parse
    ///        or spot::ltl::parse_sere while parsing \a input_string.
    void
    fix_utf8_locations(const std::string& ltl_string,
 		       parse_error_list& error_list);
    /// @}
--- a/src/ltltest/Makefile.am
+++ b/src/ltltest/Makefile.am
@ -83,6 +83,7 @@ EXTRA_DIST = $(TESTS)
 TESTS = \
  parse.test \
  parseerr.test \
  utf8.test \
  length.test \
  equals.test \
  tostring.test \
--- a/src/ltltest/parse.test
+++ b/src/ltltest/parse.test
@ -2,7 +2,7 @@
 # Copyright (C) 2009, 2010, 2011 Laboratoire de Recherche et Developpement
 # de l'Epita (LRDE).
 # Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6 (LIP6),
-# d<EFBFBD>partement Syst<73>mes R<>partis Coop<6F>ratifs (SRC), Universit<69> Pierre
+# département Systèmes Répartis Coopératifs (SRC), Université Pierre
 # et Marie Curie.
 #
 # This file is part of Spot, a model checking library.
--- a/src/ltltest/utf8.test
+++ b/src/ltltest/utf8.test
@ -0,0 +1,72 @@
 #! /bin/sh
 # Copyright (C) 2012 Laboratoire de Recherche et Developpement
 # de l'Epita (LRDE).
 #
 # This file is part of Spot, a model checking library.
 #
 # Spot is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
 #
 # Spot is distributed in the hope that it will be useful, but WITHOUT
 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 # License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with Spot; see the file COPYING.  If not, write to the Free
 # Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 # 02111-1307, USA.
 # Make sure
 . ./defs || exit 1
 # ----
 run 0 ../ltl2text '□◯a' >out
 echo 'unop(G, unop(X, AP(a)))' > exp
 cmp out exp
 # ----
 run 0 ../ltl2text '□◯"αβγ"' >out
 echo 'unop(G, unop(X, AP(αβγ)))' > exp
 cmp out exp
 # ----
 set +x
 run 1 ../ltl2text '□)◯a' 2>err
 set -x
 cat >exp <<EOF
 >>> □)◯a
     ^
 syntax error, unexpected closing parenthesis
 >>> □)◯a
    ^
 missing right operand for "always operator"
 >>> □)◯a
     ^^^
 ignoring trailing garbage
 EOF
 cmp exp err
 # ----
 set +x
 run 1 ../ltl2text '"αβγ"X' 2>err
 set -x
 cat >exp <<EOF
 >>> "αβγ"X
         ^
 syntax error, unexpected next operator
 >>> "αβγ"X
         ^
 ignoring trailing garbage
 EOF
 cmp exp err