Fix error reporting in utf8-encoded LTL formulae.
* src/ltlparse/public.hh (fix_utf8_locations): New function. * src/ltlparse/fmterror.cc (fix_utf8_locations): Implement it. (format_parse_errors): Rename as ... (format_parse_errors_aux): ... this. (format_parse_errors): New implementation that call fix_utf8_locations() before format_parse_errors_aux() on valid utf8 strings. * src/ltlparse/Makefile.am: Include $(top_srcdir). * src/ltltest/utf8.test: New file. * src/ltltest/Makefile.am: Add it. * src/ltltest/parse.test: Fix header.
This commit is contained in:
parent
403170f5c8
commit
7e4787da22
6 changed files with 192 additions and 27 deletions
|
|
@ -1,5 +1,5 @@
|
|||
## Copyright (C) 2008, 2009, 2010, 2011 Laboratoire de Recherche et
|
||||
## Développement de l'Epita (LRDE).
|
||||
## Copyright (C) 2008, 2009, 2010, 2011, 2012 Laboratoire de Recherche
|
||||
## et Développement de l'Epita (LRDE).
|
||||
## Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris
|
||||
## 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
|
||||
## Université Pierre et Marie Curie.
|
||||
|
|
@ -21,7 +21,7 @@
|
|||
## Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||
## 02111-1307, USA.
|
||||
|
||||
AM_CPPFLAGS = -I$(srcdir)/.. -I.. -DYY_NO_INPUT
|
||||
AM_CPPFLAGS = -I$(top_srcdir) -I$(srcdir)/.. -I.. -DYY_NO_INPUT
|
||||
# Disable -Werror because too many versions of flex yield warnings.
|
||||
AM_CXXFLAGS = $(WARNING_CXXFLAGS:-Werror=)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
// Copyright (C) 2010 Laboratoire de Recherche et Développement de
|
||||
// l'Epita (LRDE).
|
||||
// Copyright (C) 2010, 2012 Laboratoire de Recherche et Développement
|
||||
// de l'Epita (LRDE).
|
||||
// Copyright (C) 2003, 2004, 2005 Laboratoire d'Informatique de Paris
|
||||
// 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
|
||||
// Université Pierre et Marie Curie.
|
||||
|
|
@ -23,36 +23,95 @@
|
|||
|
||||
#include "public.hh"
|
||||
#include <ostream>
|
||||
#include <iterator>
|
||||
#include <vector>
|
||||
#include "utf8/utf8.h"
|
||||
|
||||
namespace spot
|
||||
{
|
||||
namespace ltl
|
||||
{
|
||||
|
||||
void
|
||||
fix_utf8_locations(const std::string& ltl_string,
|
||||
parse_error_list& error_list)
|
||||
{
|
||||
// LUT to convert byte positions to utf8 positions.
|
||||
// (The +2 is to account for position 0, not used,
|
||||
// and position ltl_string.size()+1 denoting EOS.)
|
||||
std::vector<unsigned> b2u(ltl_string.size() + 2);
|
||||
|
||||
// i will iterate over all utf8 characters between b and e
|
||||
std::string::const_iterator b = ltl_string.begin();
|
||||
std::string::const_iterator i = b;
|
||||
std::string::const_iterator e = ltl_string.end();
|
||||
|
||||
unsigned n = 0; // number of current utf8 character
|
||||
unsigned prev = 0; // last byte of previous utf8 character
|
||||
while (i != e)
|
||||
{
|
||||
utf8::next(i, e);
|
||||
++n;
|
||||
unsigned d = std::distance(b, i);
|
||||
while (prev < d)
|
||||
b2u[++prev] = n;
|
||||
}
|
||||
b2u[++prev] = ++n;
|
||||
|
||||
// use b2u to update error_list
|
||||
parse_error_list::iterator it;
|
||||
for (it = error_list.begin(); it != error_list.end(); ++it)
|
||||
{
|
||||
ltlyy::location& l = it->first;
|
||||
l.begin.column = b2u[l.begin.column];
|
||||
l.end.column = b2u[l.end.column];
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
bool
|
||||
format_parse_errors_aux(std::ostream& os,
|
||||
const std::string& ltl_string,
|
||||
const parse_error_list& error_list)
|
||||
{
|
||||
bool printed = false;
|
||||
parse_error_list::const_iterator it;
|
||||
for (it = error_list.begin(); it != error_list.end(); ++it)
|
||||
{
|
||||
os << ">>> " << ltl_string << std::endl;
|
||||
const ltlyy::location& l = it->first;
|
||||
|
||||
unsigned n = 1;
|
||||
for (; n < 4 + l.begin.column; ++n)
|
||||
os << ' ';
|
||||
// Write at least one '^', even if begin==end.
|
||||
os << '^';
|
||||
++n;
|
||||
for (; n < 4 + l.end.column; ++n)
|
||||
os << '^';
|
||||
os << std::endl << it->second << std::endl << std::endl;
|
||||
printed = true;
|
||||
}
|
||||
return printed;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
format_parse_errors(std::ostream& os,
|
||||
const std::string& ltl_string,
|
||||
parse_error_list& error_list)
|
||||
const parse_error_list& error_list)
|
||||
{
|
||||
bool printed = false;
|
||||
spot::ltl::parse_error_list::iterator it;
|
||||
for (it = error_list.begin(); it != error_list.end(); ++it)
|
||||
if (utf8::is_valid(ltl_string.begin(), ltl_string.end()))
|
||||
{
|
||||
os << ">>> " << ltl_string << std::endl;
|
||||
ltlyy::location& l = it->first;
|
||||
|
||||
unsigned n = 1;
|
||||
for (; n < 4 + l.begin.column; ++n)
|
||||
os << ' ';
|
||||
// Write at least one '^', even if begin==end.
|
||||
os << '^';
|
||||
++n;
|
||||
for (; n < 4 + l.end.column; ++n)
|
||||
os << '^';
|
||||
os << std::endl << it->second << std::endl << std::endl;
|
||||
printed = true;
|
||||
parse_error_list fixed = error_list;
|
||||
fix_utf8_locations(ltl_string, fixed);
|
||||
return format_parse_errors_aux(os, ltl_string, fixed);
|
||||
}
|
||||
else
|
||||
{
|
||||
return format_parse_errors_aux(os, ltl_string, error_list);
|
||||
}
|
||||
return printed;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
// Copyright (C) 2010, 2011 Laboratoire de Recherche et Développement de
|
||||
// l'Epita (LRDE).
|
||||
// Copyright (C) 2010, 2011, 2012 Laboratoire de Recherche et
|
||||
// Développement de l'Epita (LRDE).
|
||||
// Copyright (C) 2003, 2004, 2005, 2006 Laboratoire d'Informatique de
|
||||
// Paris 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
|
||||
// Université Pierre et Marie Curie.
|
||||
|
|
@ -94,6 +94,12 @@ namespace spot
|
|||
|
||||
/// \brief Format diagnostics produced by spot::ltl::parse
|
||||
/// or spot::ltl::ratexp
|
||||
///
|
||||
/// If the string is utf8 encoded, spot::ltl::fix_utf8_locations()
|
||||
/// will be used to report correct utf8 locations (assuming the
|
||||
/// output is utf8 aware). Nonetheless, the supplied \a
|
||||
/// error_list will not be modified.
|
||||
///
|
||||
/// \param os Where diagnostics should be output.
|
||||
/// \param input_string The string that were parsed.
|
||||
/// \param error_list The error list filled by spot::ltl::parse
|
||||
|
|
@ -101,7 +107,34 @@ namespace spot
|
|||
/// \return \c true iff any diagnostic was output.
|
||||
bool format_parse_errors(std::ostream& os,
|
||||
const std::string& input_string,
|
||||
parse_error_list& error_list);
|
||||
const parse_error_list& error_list);
|
||||
|
||||
/// \brief Fix location of diagnostics assuming the input is utf8.
|
||||
///
|
||||
/// The spot::ltl::parse() and spot::ltl::parse_sere() function
|
||||
/// return a parse_error_list that contain locations specified at
|
||||
/// the byte level. Although these parser recognize some
|
||||
/// utf8 characters they only work byte by byte and will report
|
||||
/// positions by counting byte.
|
||||
///
|
||||
/// This function fixes the positions returned by the parser to
|
||||
/// look correct when the string is interpreted as a utf8-encoded
|
||||
/// string.
|
||||
///
|
||||
/// It is invalid to call this function on a string that is not
|
||||
/// valid utf8.
|
||||
///
|
||||
/// You should NOT call this function before calling
|
||||
/// spot::ltl::format_parse_errors() because it is already called
|
||||
/// inside if needed. You may need this function only if you want
|
||||
/// to write your own error reporting code.
|
||||
///
|
||||
/// \param input_string The string that were parsed.
|
||||
/// \param error_list The error list filled by spot::ltl::parse
|
||||
/// or spot::ltl::parse_sere while parsing \a input_string.
|
||||
void
|
||||
fix_utf8_locations(const std::string& ltl_string,
|
||||
parse_error_list& error_list);
|
||||
|
||||
/// @}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,6 +83,7 @@ EXTRA_DIST = $(TESTS)
|
|||
TESTS = \
|
||||
parse.test \
|
||||
parseerr.test \
|
||||
utf8.test \
|
||||
length.test \
|
||||
equals.test \
|
||||
tostring.test \
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Copyright (C) 2009, 2010, 2011 Laboratoire de Recherche et Developpement
|
||||
# de l'Epita (LRDE).
|
||||
# Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6 (LIP6),
|
||||
# d<EFBFBD>partement Syst<73>mes R<>partis Coop<6F>ratifs (SRC), Universit<69> Pierre
|
||||
# département Systèmes Répartis Coopératifs (SRC), Université Pierre
|
||||
# et Marie Curie.
|
||||
#
|
||||
# This file is part of Spot, a model checking library.
|
||||
|
|
|
|||
72
src/ltltest/utf8.test
Executable file
72
src/ltltest/utf8.test
Executable file
|
|
@ -0,0 +1,72 @@
|
|||
#! /bin/sh
|
||||
# Copyright (C) 2012 Laboratoire de Recherche et Developpement
|
||||
# de l'Epita (LRDE).
|
||||
#
|
||||
# This file is part of Spot, a model checking library.
|
||||
#
|
||||
# Spot is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Spot is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
||||
# License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Spot; see the file COPYING. If not, write to the Free
|
||||
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||
# 02111-1307, USA.
|
||||
|
||||
|
||||
# Make sure
|
||||
|
||||
. ./defs || exit 1
|
||||
|
||||
# ----
|
||||
run 0 ../ltl2text '□◯a' >out
|
||||
echo 'unop(G, unop(X, AP(a)))' > exp
|
||||
cmp out exp
|
||||
|
||||
# ----
|
||||
run 0 ../ltl2text '□◯"αβγ"' >out
|
||||
echo 'unop(G, unop(X, AP(αβγ)))' > exp
|
||||
cmp out exp
|
||||
|
||||
|
||||
# ----
|
||||
set +x
|
||||
run 1 ../ltl2text '□)◯a' 2>err
|
||||
set -x
|
||||
cat >exp <<EOF
|
||||
>>> □)◯a
|
||||
^
|
||||
syntax error, unexpected closing parenthesis
|
||||
|
||||
>>> □)◯a
|
||||
^
|
||||
missing right operand for "always operator"
|
||||
|
||||
>>> □)◯a
|
||||
^^^
|
||||
ignoring trailing garbage
|
||||
|
||||
EOF
|
||||
cmp exp err
|
||||
|
||||
# ----
|
||||
set +x
|
||||
run 1 ../ltl2text '"αβγ"X' 2>err
|
||||
set -x
|
||||
cat >exp <<EOF
|
||||
>>> "αβγ"X
|
||||
^
|
||||
syntax error, unexpected next operator
|
||||
|
||||
>>> "αβγ"X
|
||||
^
|
||||
ignoring trailing garbage
|
||||
|
||||
EOF
|
||||
cmp exp err
|
||||
Loading…
Add table
Add a link
Reference in a new issue