formater: add support for double-quoted fields
Part of #91. * spot/misc/formater.cc, spot/misc/formater.hh: Here. * bin/common_output.cc: Adjust automatic output format. * doc/org/csv.org: Adjust. * tests/core/lbt.test, tests/core/ltlfilt.test: More tests. * NEWS: Mention the changes.
This commit is contained in:
parent
6ed0830f87
commit
0d753048ce
7 changed files with 97 additions and 28 deletions
15
NEWS
15
NEWS
|
|
@ -94,6 +94,19 @@ New in spot 2.0.3a (not yet released)
|
|||
* genltl learned two options, --positive and --negative, to control
|
||||
wether formulas should be output after negation or not (or both).
|
||||
|
||||
* The formater used by --format (for ltlfilt, ltlgrind, genltl,
|
||||
randltl) or --stats (for autfilt, dstar2tgba, ltl2tgba, ltldo,
|
||||
randaut) learned to recognize double-quoted fields and double the
|
||||
double-quotes output inbetween as expected from RFC4180-compliant
|
||||
CSV files. For instance
|
||||
ltl2tgba -f 'a U "b+c"' --stats='"%f",%s'
|
||||
will output
|
||||
"a U ""b+c""",2
|
||||
|
||||
* The --csv-escape option of genltl, ltlfilt, ltlgrind, and randltl
|
||||
is now deprecated. The option is still here, but hidden and
|
||||
undocumented.
|
||||
|
||||
* Arguments passed to -x (in ltl2tgba, ltl2tgta, autfilt, dstar2tgba)
|
||||
that are not used are now reported as they might be typos.
|
||||
This ocurred a couple of times in our test-suite. A similar
|
||||
|
|
@ -1696,7 +1709,7 @@ New in spot 1.2.1 (2013-12-11)
|
|||
columns before the formula) and %> (text after) can be used
|
||||
with the --format option to alter this output.
|
||||
|
||||
- ltlfile, genltl, randltl, and ltl2tgba have a --csv-escape option
|
||||
- ltlfilt, genltl, randltl, and ltl2tgba have a --csv-escape option
|
||||
to help escape formulas in CSV files.
|
||||
|
||||
- Please check
|
||||
|
|
|
|||
|
|
@ -50,7 +50,9 @@ static const argp_option options[] =
|
|||
{ "wring", OPT_WRING, nullptr, 0, "output in Wring's syntax", -20 },
|
||||
{ "utf8", '8', nullptr, 0, "output using UTF-8 characters", -20 },
|
||||
{ "latex", OPT_LATEX, nullptr, 0, "output using LaTeX macros", -20 },
|
||||
{ "csv-escape", OPT_CSV, nullptr, 0,
|
||||
// --csv-escape was deprecated in Spot 2.1, we can remove it at
|
||||
// some point
|
||||
{ "csv-escape", OPT_CSV, nullptr, OPTION_HIDDEN,
|
||||
"quote the formula for use in a CSV file", -20 },
|
||||
{ "format", OPT_FORMAT, "FORMAT", 0,
|
||||
"specify how each line should be output (default: \"%f\")", -20 },
|
||||
|
|
@ -268,7 +270,30 @@ output_formula(std::ostream& out,
|
|||
{
|
||||
if (prefix)
|
||||
out << prefix << ',';
|
||||
// For backward compatibility, we still run
|
||||
// stream_escapable_formula when --csv-escape has been given.
|
||||
// But eventually --csv-escape should be removed, and the
|
||||
// formula printed raw.
|
||||
if ((prefix || suffix) && !escape_csv)
|
||||
{
|
||||
std::ostringstream tmp;
|
||||
stream_formula(tmp, f, filename, linenum);
|
||||
std::string tmpstr = tmp.str();
|
||||
if (tmpstr.find_first_of("\",") != std::string::npos)
|
||||
{
|
||||
out << '"';
|
||||
spot::escape_rfc4180(out, tmpstr);
|
||||
out << '"';
|
||||
}
|
||||
else
|
||||
{
|
||||
out << tmpstr;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
stream_escapable_formula(out, f, filename, linenum);
|
||||
}
|
||||
if (suffix)
|
||||
out << ',' << suffix;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -74,17 +74,18 @@ ltl2tgba -f Xa -f 'G("switch == on" -> F"tab[3,5] < 12")' --stats '%f,%s,%e'
|
|||
|
||||
The second line of this input does no conform to [[https://www.rfc-editor.org/rfc/rfc4180.txt][RFC 4180]] because
|
||||
non-escaped fields are not allowed to contain comma or double-quotes.
|
||||
To fix this, use =ltl2tgba='s =--csv-escape= option: this causes
|
||||
"=%f=" to produce a double-quoted string properly escaped.
|
||||
To fix this, simply double-quote the =%f= in the argument to =--stats=:
|
||||
|
||||
#+BEGIN_SRC sh :results verbatim :exports both
|
||||
ltl2tgba -f Xa -f 'G("switch == on" -> F"tab[3,5] < 12")' --stats '%f,%s,%e' --csv-escape
|
||||
ltl2tgba -f Xa -f 'G("switch == on" -> F"tab[3,5] < 12")' --stats '"%f",%s,%e'
|
||||
#+END_SRC
|
||||
|
||||
#+RESULTS:
|
||||
: "Xa",3,3
|
||||
: "G(!""switch == on"" | F""tab[3,5] < 12"")",2,4
|
||||
|
||||
The formater will detect your double-quote and automatically double
|
||||
any double quote output between them, as per [[https://www.rfc-editor.org/rfc/rfc4180.txt][RFC 4180]].
|
||||
|
||||
The tool [[file:ltlcross.org][=ltlcross=]] has its own =--csv=FILENAME= option to format the
|
||||
statistics it gathers in a CSV file, but you have very little control
|
||||
|
|
@ -150,18 +151,6 @@ ltlfilt -F gen.csv/3 --size-min=8 --relabel=abc
|
|||
: and-gf,5,GFa & GFb & GFc & GFd & GFe
|
||||
: u-left,5,(((a U b) U c) U d) U e
|
||||
|
||||
For security, in case a formula may contain double-quotes or
|
||||
commas, you should use the =--csv-escape= option:
|
||||
|
||||
#+BEGIN_SRC sh :results verbatim :exports both
|
||||
ltlfilt -F gen.csv/3 --size-min=8 --relabel=abc --csv-escape
|
||||
#+END_SRC
|
||||
#+RESULTS:
|
||||
: and-gf,3,"GFa & GFb & GFc"
|
||||
: and-gf,4,"GFa & GFb & GFc & GFd"
|
||||
: and-gf,5,"GFa & GFb & GFc & GFd & GFe"
|
||||
: u-left,5,"(((a U b) U c) U d) U e"
|
||||
|
||||
The preservation in the output of the text before and after the
|
||||
selected column can be altered using the =--format= option. The =%<=
|
||||
escape sequence represent the (comma-separated) data of all the
|
||||
|
|
@ -173,7 +162,7 @@ string.
|
|||
For instance this moves the first two columns after the formulas.
|
||||
|
||||
#+BEGIN_SRC sh :results verbatim :exports both
|
||||
ltlfilt -F gen.csv/3 --size-min=8 --csv-escape --format='%f,%<'
|
||||
ltlfilt -F gen.csv/3 --size-min=8 --format='"%f",%<'
|
||||
#+END_SRC
|
||||
#+RESULTS:
|
||||
: "GFp1 & GFp2 & GFp3",and-gf,3
|
||||
|
|
@ -181,14 +170,19 @@ ltlfilt -F gen.csv/3 --size-min=8 --csv-escape --format='%f,%<'
|
|||
: "GFp1 & GFp2 & GFp3 & GFp4 & GFp5",and-gf,5
|
||||
: "(((p1 U p2) U p3) U p4) U p5",u-left,5
|
||||
|
||||
Note that if the =--format= option is not specified, the default
|
||||
format is one of: =%f=, =%<,%f=, =%f,%>=, or =%<,%f,%>= depending on
|
||||
whether the input CSV had column before and after the selected one.
|
||||
Furthermore, the formula field is automatically double-quoted if the
|
||||
formula actually use double-quotes, and the input CSV file had more
|
||||
than one column.
|
||||
|
||||
Typical uses of =ltlfilt= on CSV file include:
|
||||
- Filtering lines based on an LTL criterion, as above.
|
||||
- Changing the syntax of LTL formulas. For instance =ltl2tgba='s
|
||||
=--stats= option, and =ltlcross='s =--csv= option always output
|
||||
formulas in Spot's format. If that is inappropriate, simply
|
||||
use =ltlfilt= to rewrite the relevant column in your prefered
|
||||
syntax.
|
||||
formulas in Spot's format. If that is inappropriate, simply use
|
||||
=ltlfilt= to rewrite the relevant column in your preferred syntax.
|
||||
|
||||
* Dealing with header lines
|
||||
|
||||
|
|
|
|||
|
|
@ -19,8 +19,10 @@
|
|||
|
||||
#include "config.h"
|
||||
#include <spot/misc/formater.hh>
|
||||
#include <spot/misc/escape.hh>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
|
||||
namespace spot
|
||||
{
|
||||
|
|
@ -68,6 +70,20 @@ namespace spot
|
|||
formater::format(const char* fmt)
|
||||
{
|
||||
for (const char* pos = fmt; *pos; ++pos)
|
||||
{
|
||||
if (*pos == '"')
|
||||
{
|
||||
*output_ << '"';
|
||||
const char* end = strchr(pos + 1, '"');
|
||||
if (!end)
|
||||
continue;
|
||||
std::string tmp(pos + 1, end - (pos + 1));
|
||||
std::ostringstream os;
|
||||
format(os, tmp);
|
||||
escape_rfc4180(*output_, os.str());
|
||||
pos = end;
|
||||
// the end double-quote will be printed below
|
||||
}
|
||||
if (*pos != '%')
|
||||
{
|
||||
*output_ << *pos;
|
||||
|
|
@ -95,6 +111,7 @@ namespace spot
|
|||
break;
|
||||
pos = next;
|
||||
}
|
||||
}
|
||||
return *output_;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -173,8 +173,11 @@ namespace spot
|
|||
std::ostream&
|
||||
format(std::ostream& output, const char* fmt)
|
||||
{
|
||||
std::ostream* tmp = output_;
|
||||
set_output(output);
|
||||
return format(fmt);
|
||||
format(fmt);
|
||||
set_output(*tmp);
|
||||
return output;
|
||||
}
|
||||
|
||||
/// Expand the %-sequences in \a fmt, write the result on \a output_.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/sh
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2013 Laboratoire de Recherche et
|
||||
# Copyright (C) 2013, 2016 Laboratoire de Recherche et
|
||||
# Développement de l'Epita (LRDE).
|
||||
#
|
||||
# This file is part of Spot, a model checking library.
|
||||
|
|
@ -101,9 +101,16 @@ test `wc -l < formulas.2` -eq 168
|
|||
test `wc -l < formulas.3` -eq 168
|
||||
test `wc -l < formulas.4` -eq 168
|
||||
|
||||
# The --csv-escape option is now obsolete and replaced by double
|
||||
# quotes in the format string. So eventually the first two lines
|
||||
# should disappear.
|
||||
run 0 $ltlfilt formulas.2 --csv-escape --format='%L,%f' > formulas.5
|
||||
run 0 $ltlfilt formulas.5/2 --csv-escape --format='%L,%f' > formulas.6
|
||||
run 0 $ltlfilt formulas.2 --format='%L,"%f"' > formulas.5a
|
||||
run 0 $ltlfilt formulas.5/2 --format='%L,"%f"' > formulas.6a
|
||||
cmp formulas.5 formulas.6
|
||||
cmp formulas.5 formulas.5a
|
||||
cmp formulas.5a formulas.6a
|
||||
|
||||
# Make sure ltl2dstar-style litterals always get quoted.
|
||||
test "`$ltlfilt -l --lbt-input -f 'G F a'`" = 'G F "a"'
|
||||
|
|
|
|||
|
|
@ -348,4 +348,14 @@ test "`cat out`" = 'G F "a\"\\b"'
|
|||
$ltlfilt --size=foo=2..3 2>stderr && exit 1
|
||||
grep 'invalid range.*should start with' stderr
|
||||
|
||||
|
||||
cat >out <<EOF
|
||||
test1,"""a b"" U c"
|
||||
"""a b"" U c",test2
|
||||
EOF
|
||||
ltlfilt out/1 > out.1
|
||||
ltlfilt out/2 > out.2
|
||||
diff out out.1
|
||||
diff out out.2
|
||||
|
||||
true
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue