diff --git a/src/ltlparse/Makefile.am b/src/ltlparse/Makefile.am index 39a222881..90ff5b5ce 100644 --- a/src/ltlparse/Makefile.am +++ b/src/ltlparse/Makefile.am @@ -1,5 +1,5 @@ -## Copyright (C) 2008, 2009, 2010, 2011 Laboratoire de Recherche et -## Développement de l'Epita (LRDE). +## Copyright (C) 2008, 2009, 2010, 2011, 2012 Laboratoire de Recherche +## et Développement de l'Epita (LRDE). ## Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris ## 6 (LIP6), département Systèmes Répartis Coopératifs (SRC), ## Université Pierre et Marie Curie. @@ -21,7 +21,7 @@ ## Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ## 02111-1307, USA. -AM_CPPFLAGS = -I$(srcdir)/.. -I.. -DYY_NO_INPUT +AM_CPPFLAGS = -I$(top_srcdir) -I$(srcdir)/.. -I.. -DYY_NO_INPUT # Disable -Werror because too many versions of flex yield warnings. AM_CXXFLAGS = $(WARNING_CXXFLAGS:-Werror=) diff --git a/src/ltlparse/fmterror.cc b/src/ltlparse/fmterror.cc index 64eb99723..598ac956a 100644 --- a/src/ltlparse/fmterror.cc +++ b/src/ltlparse/fmterror.cc @@ -1,5 +1,5 @@ -// Copyright (C) 2010 Laboratoire de Recherche et Développement de -// l'Epita (LRDE). +// Copyright (C) 2010, 2012 Laboratoire de Recherche et Développement +// de l'Epita (LRDE). // Copyright (C) 2003, 2004, 2005 Laboratoire d'Informatique de Paris // 6 (LIP6), département Systèmes Répartis Coopératifs (SRC), // Université Pierre et Marie Curie. @@ -23,36 +23,95 @@ #include "public.hh" #include +#include +#include +#include "utf8/utf8.h" namespace spot { namespace ltl { + void + fix_utf8_locations(const std::string& ltl_string, + parse_error_list& error_list) + { + // LUT to convert byte positions to utf8 positions. + // (The +2 is to account for position 0, not used, + // and position ltl_string.size()+1 denoting EOS.) + std::vector b2u(ltl_string.size() + 2); + + // i will iterate over all utf8 characters between b and e + std::string::const_iterator b = ltl_string.begin(); + std::string::const_iterator i = b; + std::string::const_iterator e = ltl_string.end(); + + unsigned n = 0; // number of current utf8 character + unsigned prev = 0; // last byte of previous utf8 character + while (i != e) + { + utf8::next(i, e); + ++n; + unsigned d = std::distance(b, i); + while (prev < d) + b2u[++prev] = n; + } + b2u[++prev] = ++n; + + // use b2u to update error_list + parse_error_list::iterator it; + for (it = error_list.begin(); it != error_list.end(); ++it) + { + ltlyy::location& l = it->first; + l.begin.column = b2u[l.begin.column]; + l.end.column = b2u[l.end.column]; + } + } + + namespace + { + bool + format_parse_errors_aux(std::ostream& os, + const std::string& ltl_string, + const parse_error_list& error_list) + { + bool printed = false; + parse_error_list::const_iterator it; + for (it = error_list.begin(); it != error_list.end(); ++it) + { + os << ">>> " << ltl_string << std::endl; + const ltlyy::location& l = it->first; + + unsigned n = 1; + for (; n < 4 + l.begin.column; ++n) + os << ' '; + // Write at least one '^', even if begin==end. + os << '^'; + ++n; + for (; n < 4 + l.end.column; ++n) + os << '^'; + os << std::endl << it->second << std::endl << std::endl; + printed = true; + } + return printed; + } + } + bool format_parse_errors(std::ostream& os, const std::string& ltl_string, - parse_error_list& error_list) + const parse_error_list& error_list) { - bool printed = false; - spot::ltl::parse_error_list::iterator it; - for (it = error_list.begin(); it != error_list.end(); ++it) + if (utf8::is_valid(ltl_string.begin(), ltl_string.end())) { - os << ">>> " << ltl_string << std::endl; - ltlyy::location& l = it->first; - - unsigned n = 1; - for (; n < 4 + l.begin.column; ++n) - os << ' '; - // Write at least one '^', even if begin==end. - os << '^'; - ++n; - for (; n < 4 + l.end.column; ++n) - os << '^'; - os << std::endl << it->second << std::endl << std::endl; - printed = true; + parse_error_list fixed = error_list; + fix_utf8_locations(ltl_string, fixed); + return format_parse_errors_aux(os, ltl_string, fixed); + } + else + { + return format_parse_errors_aux(os, ltl_string, error_list); } - return printed; } } diff --git a/src/ltlparse/public.hh b/src/ltlparse/public.hh index 89085b1c6..6b0d0f33d 100644 --- a/src/ltlparse/public.hh +++ b/src/ltlparse/public.hh @@ -1,5 +1,5 @@ -// Copyright (C) 2010, 2011 Laboratoire de Recherche et Développement de -// l'Epita (LRDE). +// Copyright (C) 2010, 2011, 2012 Laboratoire de Recherche et +// Développement de l'Epita (LRDE). // Copyright (C) 2003, 2004, 2005, 2006 Laboratoire d'Informatique de // Paris 6 (LIP6), département Systèmes Répartis Coopératifs (SRC), // Université Pierre et Marie Curie. @@ -94,6 +94,12 @@ namespace spot /// \brief Format diagnostics produced by spot::ltl::parse /// or spot::ltl::ratexp + /// + /// If the string is utf8 encoded, spot::ltl::fix_utf8_locations() + /// will be used to report correct utf8 locations (assuming the + /// output is utf8 aware). Nonetheless, the supplied \a + /// error_list will not be modified. + /// /// \param os Where diagnostics should be output. /// \param input_string The string that were parsed. /// \param error_list The error list filled by spot::ltl::parse @@ -101,7 +107,34 @@ namespace spot /// \return \c true iff any diagnostic was output. bool format_parse_errors(std::ostream& os, const std::string& input_string, - parse_error_list& error_list); + const parse_error_list& error_list); + + /// \brief Fix location of diagnostics assuming the input is utf8. + /// + /// The spot::ltl::parse() and spot::ltl::parse_sere() function + /// return a parse_error_list that contain locations specified at + /// the byte level. Although these parser recognize some + /// utf8 characters they only work byte by byte and will report + /// positions by counting byte. + /// + /// This function fixes the positions returned by the parser to + /// look correct when the string is interpreted as a utf8-encoded + /// string. + /// + /// It is invalid to call this function on a string that is not + /// valid utf8. + /// + /// You should NOT call this function before calling + /// spot::ltl::format_parse_errors() because it is already called + /// inside if needed. You may need this function only if you want + /// to write your own error reporting code. + /// + /// \param input_string The string that were parsed. + /// \param error_list The error list filled by spot::ltl::parse + /// or spot::ltl::parse_sere while parsing \a input_string. + void + fix_utf8_locations(const std::string& ltl_string, + parse_error_list& error_list); /// @} } diff --git a/src/ltltest/Makefile.am b/src/ltltest/Makefile.am index 9222e95ac..b1b174665 100644 --- a/src/ltltest/Makefile.am +++ b/src/ltltest/Makefile.am @@ -83,6 +83,7 @@ EXTRA_DIST = $(TESTS) TESTS = \ parse.test \ parseerr.test \ + utf8.test \ length.test \ equals.test \ tostring.test \ diff --git a/src/ltltest/parse.test b/src/ltltest/parse.test index 0f4351218..a16a9e0c1 100755 --- a/src/ltltest/parse.test +++ b/src/ltltest/parse.test @@ -2,7 +2,7 @@ # Copyright (C) 2009, 2010, 2011 Laboratoire de Recherche et Developpement # de l'Epita (LRDE). # Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6 (LIP6), -# d�partement Syst�mes R�partis Coop�ratifs (SRC), Universit� Pierre +# département Systèmes Répartis Coopératifs (SRC), Université Pierre # et Marie Curie. # # This file is part of Spot, a model checking library. diff --git a/src/ltltest/utf8.test b/src/ltltest/utf8.test new file mode 100755 index 000000000..9527cec09 --- /dev/null +++ b/src/ltltest/utf8.test @@ -0,0 +1,72 @@ +#! /bin/sh +# Copyright (C) 2012 Laboratoire de Recherche et Developpement +# de l'Epita (LRDE). +# +# This file is part of Spot, a model checking library. +# +# Spot is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Spot is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +# License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Spot; see the file COPYING. If not, write to the Free +# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + + +# Make sure + +. ./defs || exit 1 + +# ---- +run 0 ../ltl2text 'â–¡â—¯a' >out +echo 'unop(G, unop(X, AP(a)))' > exp +cmp out exp + +# ---- +run 0 ../ltl2text 'â–¡â—¯"αβγ"' >out +echo 'unop(G, unop(X, AP(αβγ)))' > exp +cmp out exp + + +# ---- +set +x +run 1 ../ltl2text 'â–¡)â—¯a' 2>err +set -x +cat >exp <>> â–¡)â—¯a + ^ +syntax error, unexpected closing parenthesis + +>>> â–¡)â—¯a + ^ +missing right operand for "always operator" + +>>> â–¡)â—¯a + ^^^ +ignoring trailing garbage + +EOF +cmp exp err + +# ---- +set +x +run 1 ../ltl2text '"αβγ"X' 2>err +set -x +cat >exp <>> "αβγ"X + ^ +syntax error, unexpected next operator + +>>> "αβγ"X + ^ +ignoring trailing garbage + +EOF +cmp exp err