Fix error reporting in utf8-encoded LTL formulae.

* src/ltlparse/public.hh (fix_utf8_locations): New function.
* src/ltlparse/fmterror.cc (fix_utf8_locations): Implement it.
(format_parse_errors): Rename as ...
(format_parse_errors_aux): ... this.
(format_parse_errors): New implementation that call fix_utf8_locations()
before format_parse_errors_aux() on valid utf8 strings.
* src/ltlparse/Makefile.am: Include $(top_srcdir).
* src/ltltest/utf8.test: New file.
* src/ltltest/Makefile.am: Add it.
* src/ltltest/parse.test: Fix header.
This commit is contained in:
Alexandre Duret-Lutz 2012-03-29 14:14:56 +02:00
parent 403170f5c8
commit 7e4787da22
6 changed files with 192 additions and 27 deletions

View file

@ -1,5 +1,5 @@
## Copyright (C) 2008, 2009, 2010, 2011 Laboratoire de Recherche et ## Copyright (C) 2008, 2009, 2010, 2011, 2012 Laboratoire de Recherche
## Développement de l'Epita (LRDE). ## et Développement de l'Epita (LRDE).
## Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris ## Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris
## 6 (LIP6), département Systèmes Répartis Coopératifs (SRC), ## 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
## Université Pierre et Marie Curie. ## Université Pierre et Marie Curie.
@ -21,7 +21,7 @@
## Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ## Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
## 02111-1307, USA. ## 02111-1307, USA.
AM_CPPFLAGS = -I$(srcdir)/.. -I.. -DYY_NO_INPUT AM_CPPFLAGS = -I$(top_srcdir) -I$(srcdir)/.. -I.. -DYY_NO_INPUT
# Disable -Werror because too many versions of flex yield warnings. # Disable -Werror because too many versions of flex yield warnings.
AM_CXXFLAGS = $(WARNING_CXXFLAGS:-Werror=) AM_CXXFLAGS = $(WARNING_CXXFLAGS:-Werror=)

View file

@ -1,5 +1,5 @@
// Copyright (C) 2010 Laboratoire de Recherche et Développement de // Copyright (C) 2010, 2012 Laboratoire de Recherche et Développement
// l'Epita (LRDE). // de l'Epita (LRDE).
// Copyright (C) 2003, 2004, 2005 Laboratoire d'Informatique de Paris // Copyright (C) 2003, 2004, 2005 Laboratoire d'Informatique de Paris
// 6 (LIP6), département Systèmes Répartis Coopératifs (SRC), // 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
// Université Pierre et Marie Curie. // Université Pierre et Marie Curie.
@ -23,23 +23,64 @@
#include "public.hh" #include "public.hh"
#include <ostream> #include <ostream>
#include <iterator>
#include <vector>
#include "utf8/utf8.h"
namespace spot namespace spot
{ {
namespace ltl namespace ltl
{ {
bool void
format_parse_errors(std::ostream& os, fix_utf8_locations(const std::string& ltl_string,
const std::string& ltl_string,
parse_error_list& error_list) parse_error_list& error_list)
{
// LUT to convert byte positions to utf8 positions.
// (The +2 is to account for position 0, not used,
// and position ltl_string.size()+1 denoting EOS.)
std::vector<unsigned> b2u(ltl_string.size() + 2);
// i will iterate over all utf8 characters between b and e
std::string::const_iterator b = ltl_string.begin();
std::string::const_iterator i = b;
std::string::const_iterator e = ltl_string.end();
unsigned n = 0; // number of current utf8 character
unsigned prev = 0; // last byte of previous utf8 character
while (i != e)
{
utf8::next(i, e);
++n;
unsigned d = std::distance(b, i);
while (prev < d)
b2u[++prev] = n;
}
b2u[++prev] = ++n;
// use b2u to update error_list
parse_error_list::iterator it;
for (it = error_list.begin(); it != error_list.end(); ++it)
{
ltlyy::location& l = it->first;
l.begin.column = b2u[l.begin.column];
l.end.column = b2u[l.end.column];
}
}
namespace
{
bool
format_parse_errors_aux(std::ostream& os,
const std::string& ltl_string,
const parse_error_list& error_list)
{ {
bool printed = false; bool printed = false;
spot::ltl::parse_error_list::iterator it; parse_error_list::const_iterator it;
for (it = error_list.begin(); it != error_list.end(); ++it) for (it = error_list.begin(); it != error_list.end(); ++it)
{ {
os << ">>> " << ltl_string << std::endl; os << ">>> " << ltl_string << std::endl;
ltlyy::location& l = it->first; const ltlyy::location& l = it->first;
unsigned n = 1; unsigned n = 1;
for (; n < 4 + l.begin.column; ++n) for (; n < 4 + l.begin.column; ++n)
@ -54,6 +95,24 @@ namespace spot
} }
return printed; return printed;
} }
}
bool
format_parse_errors(std::ostream& os,
const std::string& ltl_string,
const parse_error_list& error_list)
{
if (utf8::is_valid(ltl_string.begin(), ltl_string.end()))
{
parse_error_list fixed = error_list;
fix_utf8_locations(ltl_string, fixed);
return format_parse_errors_aux(os, ltl_string, fixed);
}
else
{
return format_parse_errors_aux(os, ltl_string, error_list);
}
}
} }
} }

View file

@ -1,5 +1,5 @@
// Copyright (C) 2010, 2011 Laboratoire de Recherche et Développement de // Copyright (C) 2010, 2011, 2012 Laboratoire de Recherche et
// l'Epita (LRDE). // Développement de l'Epita (LRDE).
// Copyright (C) 2003, 2004, 2005, 2006 Laboratoire d'Informatique de // Copyright (C) 2003, 2004, 2005, 2006 Laboratoire d'Informatique de
// Paris 6 (LIP6), département Systèmes Répartis Coopératifs (SRC), // Paris 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
// Université Pierre et Marie Curie. // Université Pierre et Marie Curie.
@ -94,6 +94,12 @@ namespace spot
/// \brief Format diagnostics produced by spot::ltl::parse /// \brief Format diagnostics produced by spot::ltl::parse
/// or spot::ltl::ratexp /// or spot::ltl::ratexp
///
/// If the string is utf8 encoded, spot::ltl::fix_utf8_locations()
/// will be used to report correct utf8 locations (assuming the
/// output is utf8 aware). Nonetheless, the supplied \a
/// error_list will not be modified.
///
/// \param os Where diagnostics should be output. /// \param os Where diagnostics should be output.
/// \param input_string The string that were parsed. /// \param input_string The string that were parsed.
/// \param error_list The error list filled by spot::ltl::parse /// \param error_list The error list filled by spot::ltl::parse
@ -101,6 +107,33 @@ namespace spot
/// \return \c true iff any diagnostic was output. /// \return \c true iff any diagnostic was output.
bool format_parse_errors(std::ostream& os, bool format_parse_errors(std::ostream& os,
const std::string& input_string, const std::string& input_string,
const parse_error_list& error_list);
/// \brief Fix location of diagnostics assuming the input is utf8.
///
/// The spot::ltl::parse() and spot::ltl::parse_sere() function
/// return a parse_error_list that contain locations specified at
/// the byte level. Although these parser recognize some
/// utf8 characters they only work byte by byte and will report
/// positions by counting byte.
///
/// This function fixes the positions returned by the parser to
/// look correct when the string is interpreted as a utf8-encoded
/// string.
///
/// It is invalid to call this function on a string that is not
/// valid utf8.
///
/// You should NOT call this function before calling
/// spot::ltl::format_parse_errors() because it is already called
/// inside if needed. You may need this function only if you want
/// to write your own error reporting code.
///
/// \param input_string The string that were parsed.
/// \param error_list The error list filled by spot::ltl::parse
/// or spot::ltl::parse_sere while parsing \a input_string.
void
fix_utf8_locations(const std::string& ltl_string,
parse_error_list& error_list); parse_error_list& error_list);
/// @} /// @}

View file

@ -83,6 +83,7 @@ EXTRA_DIST = $(TESTS)
TESTS = \ TESTS = \
parse.test \ parse.test \
parseerr.test \ parseerr.test \
utf8.test \
length.test \ length.test \
equals.test \ equals.test \
tostring.test \ tostring.test \

View file

@ -2,7 +2,7 @@
# Copyright (C) 2009, 2010, 2011 Laboratoire de Recherche et Developpement # Copyright (C) 2009, 2010, 2011 Laboratoire de Recherche et Developpement
# de l'Epita (LRDE). # de l'Epita (LRDE).
# Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6 (LIP6), # Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6 (LIP6),
# d<EFBFBD>partement Syst<73>mes R<>partis Coop<6F>ratifs (SRC), Universit<69> Pierre # département Systèmes Répartis Coopératifs (SRC), Université Pierre
# et Marie Curie. # et Marie Curie.
# #
# This file is part of Spot, a model checking library. # This file is part of Spot, a model checking library.

72
src/ltltest/utf8.test Executable file
View file

@ -0,0 +1,72 @@
#! /bin/sh
# Copyright (C) 2012 Laboratoire de Recherche et Developpement
# de l'Epita (LRDE).
#
# This file is part of Spot, a model checking library.
#
# Spot is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Spot is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Spot; see the file COPYING. If not, write to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
# Make sure
. ./defs || exit 1
# ----
run 0 ../ltl2text '□◯a' >out
echo 'unop(G, unop(X, AP(a)))' > exp
cmp out exp
# ----
run 0 ../ltl2text '□◯"αβγ"' >out
echo 'unop(G, unop(X, AP(αβγ)))' > exp
cmp out exp
# ----
set +x
run 1 ../ltl2text '□)◯a' 2>err
set -x
cat >exp <<EOF
>>> □)◯a
^
syntax error, unexpected closing parenthesis
>>> □)◯a
^
missing right operand for "always operator"
>>> □)◯a
^^^
ignoring trailing garbage
EOF
cmp exp err
# ----
set +x
run 1 ../ltl2text '"αβγ"X' 2>err
set -x
cat >exp <<EOF
>>> "αβγ"X
^
syntax error, unexpected next operator
>>> "αβγ"X
^
ignoring trailing garbage
EOF
cmp exp err