bin: add support for reading formulas from CSV files.
* NEWS: Mention it. * src/bin/common_finput.cc, src/bin/common_finput.hh: Implement it. * src/bin/common_output.cc, src/bin/common_output.hh: Add the %< and %> escapes. * src/bin/ltlfilt.cc: Connect %< and %> to the prefix andsuffix of the input, and document them. * src/tgbatest/det.test, src/tgbatest/nondet.test: Simplify these tests that read CSV files.
This commit is contained in:
parent
8c5875314c
commit
0faea814da
8 changed files with 236 additions and 22 deletions
9
NEWS
9
NEWS
|
|
@ -22,6 +22,15 @@ New in spot 1.2a (not released)
|
||||||
output in CSV or JSON will have some extra columns to report
|
output in CSV or JSON will have some extra columns to report
|
||||||
the size of these input automata before ltlcross converts them
|
the size of these input automata before ltlcross converts them
|
||||||
into TGBA to perform its regular checks.
|
into TGBA to perform its regular checks.
|
||||||
|
- ltlfilt, ltl2tgba, ltl2tgta, and ltlcross can now read formulas
|
||||||
|
from CSV files. Use option -F FILE/COL to read formulas from
|
||||||
|
column COL of FILE. Use -F FILE/-COL if the first line of
|
||||||
|
FILE be ignored.
|
||||||
|
- when ltlfilt processes formulas from a CSV file, it will output
|
||||||
|
each CSV line whose formula matches the given constraints, with
|
||||||
|
the rewriten formula. The new escape sequence %< (text in
|
||||||
|
columns before the formula) and %> (text after) can be used
|
||||||
|
with the --format option to alter this output.
|
||||||
* Bug fixes:
|
* Bug fixes:
|
||||||
- ltlcross' CSV output now stricly follows RFC 4180.
|
- ltlcross' CSV output now stricly follows RFC 4180.
|
||||||
- ltlcross failed to report missing input or output escape sequences
|
- ltlcross failed to report missing input or output escape sequences
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@
|
||||||
#include "ltlparse/public.hh"
|
#include "ltlparse/public.hh"
|
||||||
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
#define OPT_LBT 1
|
#define OPT_LBT 1
|
||||||
#define OPT_LENIENT 2
|
#define OPT_LENIENT 2
|
||||||
|
|
@ -34,8 +35,10 @@ static const argp_option options[] =
|
||||||
{
|
{
|
||||||
{ 0, 0, 0, 0, "Input options:", 1 },
|
{ 0, 0, 0, 0, "Input options:", 1 },
|
||||||
{ "formula", 'f', "STRING", 0, "process the formula STRING", 0 },
|
{ "formula", 'f', "STRING", 0, "process the formula STRING", 0 },
|
||||||
{ "file", 'F', "FILENAME", 0,
|
{ "file", 'F', "FILENAME[/COL]", 0,
|
||||||
"process each line of FILENAME as a formula", 0 },
|
"process each line of FILENAME as a formula; if COL is a "
|
||||||
|
"positive integer, assume a CSV file and read column COL; use "
|
||||||
|
"a negative COL to drop the first line of the CSV file", 0 },
|
||||||
{ "lbt-input", OPT_LBT, 0, 0,
|
{ "lbt-input", OPT_LBT, 0, 0,
|
||||||
"read all formulas using LBT's prefix syntax", 0 },
|
"read all formulas using LBT's prefix syntax", 0 },
|
||||||
{ "lenient", OPT_LENIENT, 0, 0,
|
{ "lenient", OPT_LENIENT, 0, 0,
|
||||||
|
|
@ -82,10 +85,22 @@ parse_formula(const std::string& s, spot::ltl::parse_error_list& pel)
|
||||||
}
|
}
|
||||||
|
|
||||||
job_processor::job_processor()
|
job_processor::job_processor()
|
||||||
: abort_run(false)
|
: abort_run(false), real_filename(0),
|
||||||
|
col_to_read(0), prefix(0), suffix(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
job_processor::~job_processor()
|
||||||
|
{
|
||||||
|
if (real_filename)
|
||||||
|
free(real_filename);
|
||||||
|
if (prefix)
|
||||||
|
free(prefix);
|
||||||
|
if (suffix)
|
||||||
|
free(suffix);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
job_processor::process_string(const std::string& input,
|
job_processor::process_string(const std::string& input,
|
||||||
const char* filename,
|
const char* filename,
|
||||||
|
|
@ -113,9 +128,139 @@ job_processor::process_stream(std::istream& is,
|
||||||
int error = 0;
|
int error = 0;
|
||||||
int linenum = 0;
|
int linenum = 0;
|
||||||
std::string line;
|
std::string line;
|
||||||
|
|
||||||
|
// Discard the first line of a CSV file if requested.
|
||||||
|
if (col_to_read < 0)
|
||||||
|
{
|
||||||
|
std::getline(is, line);
|
||||||
|
col_to_read = -col_to_read;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Each line of the file and send them to process_string,
|
||||||
|
// optionally extracting a column of a CSV file.
|
||||||
while (!abort_run && std::getline(is, line))
|
while (!abort_run && std::getline(is, line))
|
||||||
if (!line.empty())
|
if (!line.empty())
|
||||||
error |= process_string(line, filename, ++linenum);
|
{
|
||||||
|
if (col_to_read == 0)
|
||||||
|
{
|
||||||
|
error |= process_string(line, filename, ++linenum);
|
||||||
|
}
|
||||||
|
else // We are reading column COL_TO_READ in a CSV file.
|
||||||
|
{
|
||||||
|
// FIXME: This code assumes an entire CSV row was been
|
||||||
|
// fetched by getline(). This is incorrect for processing
|
||||||
|
// CSV files with fields that contain newlines inside
|
||||||
|
// double-quoted strings. Patching this code to deal with
|
||||||
|
// such files is left as an exercise for the first user
|
||||||
|
// who encounters the issue.
|
||||||
|
const char* str = line.c_str();
|
||||||
|
const char* col1_start = str;
|
||||||
|
// Delimiters for the extracted column.
|
||||||
|
const char* coln_start = str;
|
||||||
|
const char* coln_end = 0;
|
||||||
|
// The current column. (1-based)
|
||||||
|
int colnum = 1;
|
||||||
|
// Whether we are parsing a double-quoted string.
|
||||||
|
bool instring = false;
|
||||||
|
// Note that RFC 4180 has strict rules about
|
||||||
|
// double-quotes: ① if a field is double-quoted, the first
|
||||||
|
// and last characters of the field should be
|
||||||
|
// double-quotes; ② if a field contains a double-quote
|
||||||
|
// then it should be double quoted, and the occurrences
|
||||||
|
// of double-quotes should be doubled. Therefore a CSV file
|
||||||
|
// may no contain a line such as:
|
||||||
|
// foo,bar"ba""z",12
|
||||||
|
// Tools have different interpretation of such a line.
|
||||||
|
// For instance Python's pandas.read_csv() function will
|
||||||
|
// load the second field verbatim as the string 'bar"ba""z"',
|
||||||
|
// while R's read.csv() function will load it as the
|
||||||
|
// string 'barba"z'. We use this second interpretation, because
|
||||||
|
// it also makes it possible to parse CSVs fields formatted
|
||||||
|
// with leading spaces (often for cosmetic purpose). When
|
||||||
|
// extracting the second field of
|
||||||
|
// foo, "ba""z", 12
|
||||||
|
// we will return ' baz' and the leading space will be ignored
|
||||||
|
// by our LTL formula parser.
|
||||||
|
while (*str)
|
||||||
|
{
|
||||||
|
switch (*str)
|
||||||
|
{
|
||||||
|
case '"':
|
||||||
|
// Doubled double-quotes are used to escape
|
||||||
|
// double-quotes.
|
||||||
|
if (instring && str[1] == '"')
|
||||||
|
++str;
|
||||||
|
else
|
||||||
|
instring = !instring;
|
||||||
|
break;
|
||||||
|
case ',':
|
||||||
|
if (!instring)
|
||||||
|
{
|
||||||
|
if (col_to_read == colnum)
|
||||||
|
coln_end = str;
|
||||||
|
++colnum;
|
||||||
|
if (col_to_read == colnum)
|
||||||
|
coln_start = str + 1;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Once we have the end delimiter for our target
|
||||||
|
// column, we have all we need.
|
||||||
|
if (coln_end)
|
||||||
|
break;
|
||||||
|
++str;
|
||||||
|
}
|
||||||
|
if (!*str)
|
||||||
|
{
|
||||||
|
if (colnum != col_to_read)
|
||||||
|
// Skip this line as it has no enough columns.
|
||||||
|
continue;
|
||||||
|
else
|
||||||
|
// The target columns ends at the end of the line.
|
||||||
|
coln_end = str;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip the line if it has an empty field.
|
||||||
|
if (coln_start == coln_end)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// save the contents before and after that columns for the
|
||||||
|
// %< and %> escapes (ignoring the trailing and leading
|
||||||
|
// commas).
|
||||||
|
prefix = (col_to_read != 1) ?
|
||||||
|
strndup(col1_start, coln_start - col1_start - 1) : 0;
|
||||||
|
suffix = (*coln_end != 0) ? strdup(coln_end + 1) : 0;
|
||||||
|
std::string field(coln_start, coln_end);
|
||||||
|
// Remove double-quotes if any.
|
||||||
|
if (field.find('"') != std::string::npos)
|
||||||
|
{
|
||||||
|
unsigned dst = 0;
|
||||||
|
bool instring = false;
|
||||||
|
for (; coln_start != coln_end; ++coln_start)
|
||||||
|
if (*coln_start == '"')
|
||||||
|
// A doubled double-quote instead a double-quoted
|
||||||
|
// string is an escaped double-quote.
|
||||||
|
if (instring && coln_start[1] == '"')
|
||||||
|
field[dst++] = *++coln_start;
|
||||||
|
else
|
||||||
|
instring = !instring;
|
||||||
|
else
|
||||||
|
field[dst++] = *coln_start;
|
||||||
|
field.resize(dst);
|
||||||
|
}
|
||||||
|
error |= process_string(field, filename, ++linenum);
|
||||||
|
if (prefix)
|
||||||
|
{
|
||||||
|
free(prefix);
|
||||||
|
prefix = 0;
|
||||||
|
}
|
||||||
|
if (suffix)
|
||||||
|
{
|
||||||
|
free(suffix);
|
||||||
|
suffix = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -128,9 +273,46 @@ job_processor::process_file(const char* filename)
|
||||||
|
|
||||||
errno = 0;
|
errno = 0;
|
||||||
std::ifstream input(filename);
|
std::ifstream input(filename);
|
||||||
if (!input)
|
if (input)
|
||||||
error(2, errno, "cannot open '%s'", filename);
|
return process_stream(input, filename);
|
||||||
return process_stream(input, filename);
|
int saved_errno = errno;
|
||||||
|
|
||||||
|
// If we have a filename like "foo/NN" such
|
||||||
|
// that:
|
||||||
|
// ① foo/NN is not a file (already the case),
|
||||||
|
// ② NN is a number > 0,
|
||||||
|
// ③ foo is a file,
|
||||||
|
// then it means we want to open foo as
|
||||||
|
// a CSV file and process column NN.
|
||||||
|
|
||||||
|
if (const char* slash = strrchr(filename, '/'))
|
||||||
|
{
|
||||||
|
char* end;
|
||||||
|
errno = 0;
|
||||||
|
long int col = strtol(slash + 1, &end, 10);
|
||||||
|
// strtol ate all remaining characters and NN is positive
|
||||||
|
if (errno == 0 && !*end && col != 0)
|
||||||
|
{
|
||||||
|
col_to_read = col;
|
||||||
|
if (real_filename)
|
||||||
|
free(real_filename);
|
||||||
|
real_filename = strndup(filename, slash - filename);
|
||||||
|
|
||||||
|
// Special case for stdin.
|
||||||
|
if (real_filename[0] == '-' && real_filename[1] == 0)
|
||||||
|
return process_stream(std::cin, real_filename);
|
||||||
|
|
||||||
|
std::ifstream input(real_filename);
|
||||||
|
if (input)
|
||||||
|
return process_stream(input, real_filename);
|
||||||
|
|
||||||
|
error(2, errno, "cannot open '%s' nor '%s'",
|
||||||
|
filename, real_filename);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
error(2, saved_errno, "cannot open '%s'", filename);
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
// -*- coding: utf-8 -*-
|
// -*- coding: utf-8 -*-
|
||||||
// Copyright (C) 2012 Laboratoire de Recherche et Développement de
|
// Copyright (C) 2012, 2013 Laboratoire de Recherche et Développement
|
||||||
// l'Epita (LRDE).
|
// de l'Epita (LRDE).
|
||||||
//
|
//
|
||||||
// This file is part of Spot, a model checking library.
|
// This file is part of Spot, a model checking library.
|
||||||
//
|
//
|
||||||
|
|
@ -56,9 +56,7 @@ protected:
|
||||||
public:
|
public:
|
||||||
job_processor();
|
job_processor();
|
||||||
|
|
||||||
virtual ~job_processor()
|
virtual ~job_processor();
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual int
|
virtual int
|
||||||
process_formula(const spot::ltl::formula* f,
|
process_formula(const spot::ltl::formula* f,
|
||||||
|
|
@ -75,6 +73,11 @@ public:
|
||||||
|
|
||||||
virtual int
|
virtual int
|
||||||
run();
|
run();
|
||||||
|
|
||||||
|
char* real_filename;
|
||||||
|
long int col_to_read;
|
||||||
|
char* prefix;
|
||||||
|
char* suffix;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -109,6 +109,8 @@ namespace
|
||||||
const spot::ltl::formula* f;
|
const spot::ltl::formula* f;
|
||||||
const char* filename;
|
const char* filename;
|
||||||
int line;
|
int line;
|
||||||
|
const char* prefix;
|
||||||
|
const char* suffix;
|
||||||
};
|
};
|
||||||
|
|
||||||
class printable_formula:
|
class printable_formula:
|
||||||
|
|
@ -141,6 +143,8 @@ namespace
|
||||||
declare('f', &fl_);
|
declare('f', &fl_);
|
||||||
declare('F', &filename_);
|
declare('F', &filename_);
|
||||||
declare('L', &line_);
|
declare('L', &line_);
|
||||||
|
declare('<', &prefix_);
|
||||||
|
declare('>', &suffix_);
|
||||||
set_output(os);
|
set_output(os);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -150,6 +154,8 @@ namespace
|
||||||
fl_ = &fl;
|
fl_ = &fl;
|
||||||
filename_ = fl.filename ? fl.filename : "";
|
filename_ = fl.filename ? fl.filename : "";
|
||||||
line_ = fl.line;
|
line_ = fl.line;
|
||||||
|
prefix_ = fl.prefix ? fl.prefix : "";
|
||||||
|
suffix_ = fl.suffix ? fl.suffix : "";
|
||||||
return format(format_);
|
return format(format_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -158,6 +164,8 @@ namespace
|
||||||
printable_formula fl_;
|
printable_formula fl_;
|
||||||
spot::printable_value<const char*> filename_;
|
spot::printable_value<const char*> filename_;
|
||||||
spot::printable_value<int> line_;
|
spot::printable_value<int> line_;
|
||||||
|
spot::printable_value<const char*> prefix_;
|
||||||
|
spot::printable_value<const char*> suffix_;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -202,15 +210,20 @@ parse_opt_output(int key, char* arg, struct argp_state*)
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
output_formula(const spot::ltl::formula* f, const char* filename, int linenum)
|
output_formula(const spot::ltl::formula* f, const char* filename, int linenum,
|
||||||
|
const char* prefix, const char* suffix)
|
||||||
{
|
{
|
||||||
if (!format)
|
if (!format)
|
||||||
{
|
{
|
||||||
|
if (prefix)
|
||||||
|
std::cout << prefix << ",";
|
||||||
stream_formula(std::cout, f, filename, linenum);
|
stream_formula(std::cout, f, filename, linenum);
|
||||||
|
if (suffix)
|
||||||
|
std::cout << "," << suffix;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
formula_with_location fl = { f, filename, linenum };
|
formula_with_location fl = { f, filename, linenum, prefix, suffix };
|
||||||
format->print(fl);
|
format->print(fl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ extern const struct argp output_argp;
|
||||||
int parse_opt_output(int key, char* arg, struct argp_state* state);
|
int parse_opt_output(int key, char* arg, struct argp_state* state);
|
||||||
|
|
||||||
void output_formula(const spot::ltl::formula* f,
|
void output_formula(const spot::ltl::formula* f,
|
||||||
const char* filename = 0, int linenum = 0);
|
const char* filename = 0, int linenum = 0,
|
||||||
|
const char* prefix = 0, const char* suffix = 0);
|
||||||
|
|
||||||
#endif // SPOT_BIN_COMMON_OUTPUT_HH
|
#endif // SPOT_BIN_COMMON_OUTPUT_HH
|
||||||
|
|
|
||||||
|
|
@ -165,6 +165,12 @@ static const argp_option options[] =
|
||||||
"the name of the input file", 0 },
|
"the name of the input file", 0 },
|
||||||
{ "%L", 0, 0, OPTION_DOC | OPTION_NO_USAGE,
|
{ "%L", 0, 0, OPTION_DOC | OPTION_NO_USAGE,
|
||||||
"the original line number in the input file", 0 },
|
"the original line number in the input file", 0 },
|
||||||
|
{ "%<", 0, 0, OPTION_DOC | OPTION_NO_USAGE,
|
||||||
|
"the part of the line before the formula if it "
|
||||||
|
"comes from a column extracted from a CSV file", 0 },
|
||||||
|
{ "%>", 0, 0, OPTION_DOC | OPTION_NO_USAGE,
|
||||||
|
"the part of the line after the formula if it "
|
||||||
|
"comes from a column extracted from a CSV file", 0 },
|
||||||
{ "%%", 0, 0, OPTION_DOC | OPTION_NO_USAGE,
|
{ "%%", 0, 0, OPTION_DOC | OPTION_NO_USAGE,
|
||||||
"a single %", 0 },
|
"a single %", 0 },
|
||||||
{ 0, 0, 0, 0, "Miscellaneous options:", -1 },
|
{ 0, 0, 0, 0, "Miscellaneous options:", -1 },
|
||||||
|
|
@ -571,7 +577,7 @@ namespace
|
||||||
if (matched)
|
if (matched)
|
||||||
{
|
{
|
||||||
one_match = true;
|
one_match = true;
|
||||||
output_formula(f, filename, linenum);
|
output_formula(f, filename, linenum, prefix, suffix);
|
||||||
}
|
}
|
||||||
f->destroy();
|
f->destroy();
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -584,7 +590,7 @@ main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
setup(argv);
|
setup(argv);
|
||||||
|
|
||||||
const argp ap = { options, parse_opt, "[FILENAME...]",
|
const argp ap = { options, parse_opt, "[FILENAME[/COL]...]",
|
||||||
argp_program_doc, children, 0, 0 };
|
argp_program_doc, children, 0, 0 };
|
||||||
|
|
||||||
if (int err = argp_parse(&ap, argc, argv, ARGP_NO_HELP, 0, 0))
|
if (int err = argp_parse(&ap, argc, argv, ARGP_NO_HELP, 0, 0))
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ cat >formulas <<'EOF'
|
||||||
1,4,G(G!a | F!c | G!b)
|
1,4,G(G!a | F!c | G!b)
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
cut -d, -f3 <formulas | $ltl2tgba -x tba-det --det --stats '%d,%s,%f' -F - > out
|
$ltl2tgba -x tba-det --det --stats '%d,%s,%f' -F formulas/3 > out
|
||||||
diff formulas out
|
diff formulas out
|
||||||
|
|
||||||
cat >in.tgba <<'EOF'
|
cat >in.tgba <<'EOF'
|
||||||
|
|
|
||||||
|
|
@ -28,8 +28,8 @@ a U b, 1 0
|
||||||
G(!r | Fa) | Fx, 0 1
|
G(!r | Fa) | Fx, 0 1
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
cut -d, -f1 expected.1 |
|
# also test the filename/COL syntax
|
||||||
../../bin/ltl2tgba -F- --stats='%f, %d %p' >out.1
|
../../bin/ltl2tgba -F expected.1/1 --stats='%f, %d %p' >out.1
|
||||||
diff out.1 expected.1
|
diff out.1 expected.1
|
||||||
|
|
||||||
cat >expected.2<<EOF
|
cat >expected.2<<EOF
|
||||||
|
|
@ -39,8 +39,8 @@ a U b, 1 1
|
||||||
G(!r | Fa) | Fx, 0 1
|
G(!r | Fa) | Fx, 0 1
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
cut -d, -f1 expected.2 |
|
# filename/COL should also work when filename=-
|
||||||
../../bin/ltl2tgba -C -F- --stats='%f, %d %p' >out.2
|
../../bin/ltl2tgba -C -F-/1 --stats='%f, %d %p' <expected.2 >out.2
|
||||||
diff out.2 expected.2
|
diff out.2 expected.2
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue