Fix error reporting in utf8-encoded LTL formulae.
* src/ltlparse/public.hh (fix_utf8_locations): New function. * src/ltlparse/fmterror.cc (fix_utf8_locations): Implement it. (format_parse_errors): Rename as ... (format_parse_errors_aux): ... this. (format_parse_errors): New implementation that call fix_utf8_locations() before format_parse_errors_aux() on valid utf8 strings. * src/ltlparse/Makefile.am: Include $(top_srcdir). * src/ltltest/utf8.test: New file. * src/ltltest/Makefile.am: Add it. * src/ltltest/parse.test: Fix header.
This commit is contained in:
parent
403170f5c8
commit
7e4787da22
6 changed files with 192 additions and 27 deletions
|
|
@ -1,5 +1,5 @@
|
||||||
## Copyright (C) 2008, 2009, 2010, 2011 Laboratoire de Recherche et
|
## Copyright (C) 2008, 2009, 2010, 2011, 2012 Laboratoire de Recherche
|
||||||
## Développement de l'Epita (LRDE).
|
## et Développement de l'Epita (LRDE).
|
||||||
## Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris
|
## Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris
|
||||||
## 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
|
## 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
|
||||||
## Université Pierre et Marie Curie.
|
## Université Pierre et Marie Curie.
|
||||||
|
|
@ -21,7 +21,7 @@
|
||||||
## Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
## Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||||
## 02111-1307, USA.
|
## 02111-1307, USA.
|
||||||
|
|
||||||
AM_CPPFLAGS = -I$(srcdir)/.. -I.. -DYY_NO_INPUT
|
AM_CPPFLAGS = -I$(top_srcdir) -I$(srcdir)/.. -I.. -DYY_NO_INPUT
|
||||||
# Disable -Werror because too many versions of flex yield warnings.
|
# Disable -Werror because too many versions of flex yield warnings.
|
||||||
AM_CXXFLAGS = $(WARNING_CXXFLAGS:-Werror=)
|
AM_CXXFLAGS = $(WARNING_CXXFLAGS:-Werror=)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
// Copyright (C) 2010 Laboratoire de Recherche et Développement de
|
// Copyright (C) 2010, 2012 Laboratoire de Recherche et Développement
|
||||||
// l'Epita (LRDE).
|
// de l'Epita (LRDE).
|
||||||
// Copyright (C) 2003, 2004, 2005 Laboratoire d'Informatique de Paris
|
// Copyright (C) 2003, 2004, 2005 Laboratoire d'Informatique de Paris
|
||||||
// 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
|
// 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
|
||||||
// Université Pierre et Marie Curie.
|
// Université Pierre et Marie Curie.
|
||||||
|
|
@ -23,23 +23,64 @@
|
||||||
|
|
||||||
#include "public.hh"
|
#include "public.hh"
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
|
#include <iterator>
|
||||||
|
#include <vector>
|
||||||
|
#include "utf8/utf8.h"
|
||||||
|
|
||||||
namespace spot
|
namespace spot
|
||||||
{
|
{
|
||||||
namespace ltl
|
namespace ltl
|
||||||
{
|
{
|
||||||
|
|
||||||
bool
|
void
|
||||||
format_parse_errors(std::ostream& os,
|
fix_utf8_locations(const std::string& ltl_string,
|
||||||
const std::string& ltl_string,
|
|
||||||
parse_error_list& error_list)
|
parse_error_list& error_list)
|
||||||
|
{
|
||||||
|
// LUT to convert byte positions to utf8 positions.
|
||||||
|
// (The +2 is to account for position 0, not used,
|
||||||
|
// and position ltl_string.size()+1 denoting EOS.)
|
||||||
|
std::vector<unsigned> b2u(ltl_string.size() + 2);
|
||||||
|
|
||||||
|
// i will iterate over all utf8 characters between b and e
|
||||||
|
std::string::const_iterator b = ltl_string.begin();
|
||||||
|
std::string::const_iterator i = b;
|
||||||
|
std::string::const_iterator e = ltl_string.end();
|
||||||
|
|
||||||
|
unsigned n = 0; // number of current utf8 character
|
||||||
|
unsigned prev = 0; // last byte of previous utf8 character
|
||||||
|
while (i != e)
|
||||||
|
{
|
||||||
|
utf8::next(i, e);
|
||||||
|
++n;
|
||||||
|
unsigned d = std::distance(b, i);
|
||||||
|
while (prev < d)
|
||||||
|
b2u[++prev] = n;
|
||||||
|
}
|
||||||
|
b2u[++prev] = ++n;
|
||||||
|
|
||||||
|
// use b2u to update error_list
|
||||||
|
parse_error_list::iterator it;
|
||||||
|
for (it = error_list.begin(); it != error_list.end(); ++it)
|
||||||
|
{
|
||||||
|
ltlyy::location& l = it->first;
|
||||||
|
l.begin.column = b2u[l.begin.column];
|
||||||
|
l.end.column = b2u[l.end.column];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
bool
|
||||||
|
format_parse_errors_aux(std::ostream& os,
|
||||||
|
const std::string& ltl_string,
|
||||||
|
const parse_error_list& error_list)
|
||||||
{
|
{
|
||||||
bool printed = false;
|
bool printed = false;
|
||||||
spot::ltl::parse_error_list::iterator it;
|
parse_error_list::const_iterator it;
|
||||||
for (it = error_list.begin(); it != error_list.end(); ++it)
|
for (it = error_list.begin(); it != error_list.end(); ++it)
|
||||||
{
|
{
|
||||||
os << ">>> " << ltl_string << std::endl;
|
os << ">>> " << ltl_string << std::endl;
|
||||||
ltlyy::location& l = it->first;
|
const ltlyy::location& l = it->first;
|
||||||
|
|
||||||
unsigned n = 1;
|
unsigned n = 1;
|
||||||
for (; n < 4 + l.begin.column; ++n)
|
for (; n < 4 + l.begin.column; ++n)
|
||||||
|
|
@ -54,6 +95,24 @@ namespace spot
|
||||||
}
|
}
|
||||||
return printed;
|
return printed;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
format_parse_errors(std::ostream& os,
|
||||||
|
const std::string& ltl_string,
|
||||||
|
const parse_error_list& error_list)
|
||||||
|
{
|
||||||
|
if (utf8::is_valid(ltl_string.begin(), ltl_string.end()))
|
||||||
|
{
|
||||||
|
parse_error_list fixed = error_list;
|
||||||
|
fix_utf8_locations(ltl_string, fixed);
|
||||||
|
return format_parse_errors_aux(os, ltl_string, fixed);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return format_parse_errors_aux(os, ltl_string, error_list);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
// Copyright (C) 2010, 2011 Laboratoire de Recherche et Développement de
|
// Copyright (C) 2010, 2011, 2012 Laboratoire de Recherche et
|
||||||
// l'Epita (LRDE).
|
// Développement de l'Epita (LRDE).
|
||||||
// Copyright (C) 2003, 2004, 2005, 2006 Laboratoire d'Informatique de
|
// Copyright (C) 2003, 2004, 2005, 2006 Laboratoire d'Informatique de
|
||||||
// Paris 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
|
// Paris 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
|
||||||
// Université Pierre et Marie Curie.
|
// Université Pierre et Marie Curie.
|
||||||
|
|
@ -94,6 +94,12 @@ namespace spot
|
||||||
|
|
||||||
/// \brief Format diagnostics produced by spot::ltl::parse
|
/// \brief Format diagnostics produced by spot::ltl::parse
|
||||||
/// or spot::ltl::ratexp
|
/// or spot::ltl::ratexp
|
||||||
|
///
|
||||||
|
/// If the string is utf8 encoded, spot::ltl::fix_utf8_locations()
|
||||||
|
/// will be used to report correct utf8 locations (assuming the
|
||||||
|
/// output is utf8 aware). Nonetheless, the supplied \a
|
||||||
|
/// error_list will not be modified.
|
||||||
|
///
|
||||||
/// \param os Where diagnostics should be output.
|
/// \param os Where diagnostics should be output.
|
||||||
/// \param input_string The string that were parsed.
|
/// \param input_string The string that were parsed.
|
||||||
/// \param error_list The error list filled by spot::ltl::parse
|
/// \param error_list The error list filled by spot::ltl::parse
|
||||||
|
|
@ -101,6 +107,33 @@ namespace spot
|
||||||
/// \return \c true iff any diagnostic was output.
|
/// \return \c true iff any diagnostic was output.
|
||||||
bool format_parse_errors(std::ostream& os,
|
bool format_parse_errors(std::ostream& os,
|
||||||
const std::string& input_string,
|
const std::string& input_string,
|
||||||
|
const parse_error_list& error_list);
|
||||||
|
|
||||||
|
/// \brief Fix location of diagnostics assuming the input is utf8.
|
||||||
|
///
|
||||||
|
/// The spot::ltl::parse() and spot::ltl::parse_sere() function
|
||||||
|
/// return a parse_error_list that contain locations specified at
|
||||||
|
/// the byte level. Although these parser recognize some
|
||||||
|
/// utf8 characters they only work byte by byte and will report
|
||||||
|
/// positions by counting byte.
|
||||||
|
///
|
||||||
|
/// This function fixes the positions returned by the parser to
|
||||||
|
/// look correct when the string is interpreted as a utf8-encoded
|
||||||
|
/// string.
|
||||||
|
///
|
||||||
|
/// It is invalid to call this function on a string that is not
|
||||||
|
/// valid utf8.
|
||||||
|
///
|
||||||
|
/// You should NOT call this function before calling
|
||||||
|
/// spot::ltl::format_parse_errors() because it is already called
|
||||||
|
/// inside if needed. You may need this function only if you want
|
||||||
|
/// to write your own error reporting code.
|
||||||
|
///
|
||||||
|
/// \param input_string The string that were parsed.
|
||||||
|
/// \param error_list The error list filled by spot::ltl::parse
|
||||||
|
/// or spot::ltl::parse_sere while parsing \a input_string.
|
||||||
|
void
|
||||||
|
fix_utf8_locations(const std::string& ltl_string,
|
||||||
parse_error_list& error_list);
|
parse_error_list& error_list);
|
||||||
|
|
||||||
/// @}
|
/// @}
|
||||||
|
|
|
||||||
|
|
@ -83,6 +83,7 @@ EXTRA_DIST = $(TESTS)
|
||||||
TESTS = \
|
TESTS = \
|
||||||
parse.test \
|
parse.test \
|
||||||
parseerr.test \
|
parseerr.test \
|
||||||
|
utf8.test \
|
||||||
length.test \
|
length.test \
|
||||||
equals.test \
|
equals.test \
|
||||||
tostring.test \
|
tostring.test \
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# Copyright (C) 2009, 2010, 2011 Laboratoire de Recherche et Developpement
|
# Copyright (C) 2009, 2010, 2011 Laboratoire de Recherche et Developpement
|
||||||
# de l'Epita (LRDE).
|
# de l'Epita (LRDE).
|
||||||
# Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6 (LIP6),
|
# Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6 (LIP6),
|
||||||
# d<EFBFBD>partement Syst<73>mes R<>partis Coop<6F>ratifs (SRC), Universit<69> Pierre
|
# département Systèmes Répartis Coopératifs (SRC), Université Pierre
|
||||||
# et Marie Curie.
|
# et Marie Curie.
|
||||||
#
|
#
|
||||||
# This file is part of Spot, a model checking library.
|
# This file is part of Spot, a model checking library.
|
||||||
|
|
|
||||||
72
src/ltltest/utf8.test
Executable file
72
src/ltltest/utf8.test
Executable file
|
|
@ -0,0 +1,72 @@
|
||||||
|
#! /bin/sh
|
||||||
|
# Copyright (C) 2012 Laboratoire de Recherche et Developpement
|
||||||
|
# de l'Epita (LRDE).
|
||||||
|
#
|
||||||
|
# This file is part of Spot, a model checking library.
|
||||||
|
#
|
||||||
|
# Spot is free software; you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# Spot is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
||||||
|
# License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with Spot; see the file COPYING. If not, write to the Free
|
||||||
|
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||||
|
# 02111-1307, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# Make sure
|
||||||
|
|
||||||
|
. ./defs || exit 1
|
||||||
|
|
||||||
|
# ----
|
||||||
|
run 0 ../ltl2text '□◯a' >out
|
||||||
|
echo 'unop(G, unop(X, AP(a)))' > exp
|
||||||
|
cmp out exp
|
||||||
|
|
||||||
|
# ----
|
||||||
|
run 0 ../ltl2text '□◯"αβγ"' >out
|
||||||
|
echo 'unop(G, unop(X, AP(αβγ)))' > exp
|
||||||
|
cmp out exp
|
||||||
|
|
||||||
|
|
||||||
|
# ----
|
||||||
|
set +x
|
||||||
|
run 1 ../ltl2text '□)◯a' 2>err
|
||||||
|
set -x
|
||||||
|
cat >exp <<EOF
|
||||||
|
>>> □)◯a
|
||||||
|
^
|
||||||
|
syntax error, unexpected closing parenthesis
|
||||||
|
|
||||||
|
>>> □)◯a
|
||||||
|
^
|
||||||
|
missing right operand for "always operator"
|
||||||
|
|
||||||
|
>>> □)◯a
|
||||||
|
^^^
|
||||||
|
ignoring trailing garbage
|
||||||
|
|
||||||
|
EOF
|
||||||
|
cmp exp err
|
||||||
|
|
||||||
|
# ----
|
||||||
|
set +x
|
||||||
|
run 1 ../ltl2text '"αβγ"X' 2>err
|
||||||
|
set -x
|
||||||
|
cat >exp <<EOF
|
||||||
|
>>> "αβγ"X
|
||||||
|
^
|
||||||
|
syntax error, unexpected next operator
|
||||||
|
|
||||||
|
>>> "αβγ"X
|
||||||
|
^
|
||||||
|
ignoring trailing garbage
|
||||||
|
|
||||||
|
EOF
|
||||||
|
cmp exp err
|
||||||
Loading…
Add table
Add a link
Reference in a new issue