bin: support multi-line CSV fields.
* src/bin/common_finput.cc (job_processor::process_stream): Read multi-line CSV fields. * src/ltltest/lbt.test, src/tgbatest/nondet.test: Add tests.
This commit is contained in:
parent
6c21089599
commit
f0bcab4add
3 changed files with 53 additions and 14 deletions
|
|
@ -126,7 +126,7 @@ job_processor::process_stream(std::istream& is,
|
|||
const char* filename)
|
||||
{
|
||||
int error = 0;
|
||||
int linenum = 0;
|
||||
int linenum = 1;
|
||||
std::string line;
|
||||
|
||||
// Discard the first line of a CSV file if requested.
|
||||
|
|
@ -134,6 +134,7 @@ job_processor::process_stream(std::istream& is,
|
|||
{
|
||||
std::getline(is, line);
|
||||
col_to_read = -col_to_read;
|
||||
++linenum;
|
||||
}
|
||||
|
||||
// Each line of the file and send them to process_string,
|
||||
|
|
@ -143,17 +144,39 @@ job_processor::process_stream(std::istream& is,
|
|||
{
|
||||
if (col_to_read == 0)
|
||||
{
|
||||
error |= process_string(line, filename, ++linenum);
|
||||
error |= process_string(line, filename, linenum++);
|
||||
}
|
||||
else // We are reading column COL_TO_READ in a CSV file.
|
||||
{
|
||||
// FIXME: This code assumes an entire CSV row was been
|
||||
// fetched by getline(). This is incorrect for processing
|
||||
// CSV files with fields that contain newlines inside
|
||||
// double-quoted strings. Patching this code to deal with
|
||||
// such files is left as an exercise for the first user
|
||||
// who encounters the issue.
|
||||
const char* str = line.c_str();
|
||||
// If the line we have read contains an odd number
|
||||
// of double-quotes, then it is an incomplete CSV line
|
||||
// that should be completed by the next lines.
|
||||
unsigned dquotes = 0;
|
||||
std::string fullline;
|
||||
unsigned csvlines = 0;
|
||||
do
|
||||
{
|
||||
++csvlines;
|
||||
size_t s = line.size();
|
||||
for (unsigned i = 0; i < s; ++i)
|
||||
dquotes += line[i] == '"';
|
||||
if (fullline.empty())
|
||||
fullline = line;
|
||||
else
|
||||
(fullline += '\n') += line;
|
||||
if (!(dquotes &= 1))
|
||||
break;
|
||||
}
|
||||
while (std::getline(is, line));
|
||||
if (dquotes)
|
||||
error_at_line(2, errno, filename, linenum,
|
||||
"mismatched double-quote, "
|
||||
"reached EOF while parsing this line");
|
||||
|
||||
// Now that we have a full CSV line, extract the right
|
||||
// column.
|
||||
|
||||
const char* str = fullline.c_str();
|
||||
const char* col1_start = str;
|
||||
// Delimiters for the extracted column.
|
||||
const char* coln_start = str;
|
||||
|
|
@ -248,7 +271,8 @@ job_processor::process_stream(std::istream& is,
|
|||
field[dst++] = *coln_start;
|
||||
field.resize(dst);
|
||||
}
|
||||
error |= process_string(field, filename, ++linenum);
|
||||
error |= process_string(field, filename, linenum);
|
||||
linenum += csvlines;
|
||||
if (prefix)
|
||||
{
|
||||
free(prefix);
|
||||
|
|
|
|||
|
|
@ -101,6 +101,9 @@ test `wc -l < formulas.2` -eq 168
|
|||
test `wc -l < formulas.3` -eq 168
|
||||
test `wc -l < formulas.4` -eq 168
|
||||
|
||||
run 0 $ltlfilt formulas.2 --csv-escape --format='%L,%f' > formulas.5
|
||||
run 0 $ltlfilt formulas.5/2 --csv-escape --format='%L,%f' > formulas.6
|
||||
cmp formulas.5 formulas.6
|
||||
|
||||
# Make sure ltl2dstar-style litterals always get quoted.
|
||||
test "`$ltlfilt -l --lbt-input -f 'G F a'`" = 'G F "a"'
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ G(!r | Fa) | Fx, 0 1
|
|||
EOF
|
||||
|
||||
# also test the filename/COL syntax
|
||||
../../bin/ltl2tgba -F expected.1/1 --stats='%f, %d %p' >out.1
|
||||
run 0 ../../bin/ltl2tgba -F expected.1/1 --stats='%f, %d %p' >out.1
|
||||
diff out.1 expected.1
|
||||
|
||||
cat >expected.2<<EOF
|
||||
|
|
@ -40,11 +40,23 @@ G(!r | Fa) | Fx, 0 1
|
|||
EOF
|
||||
|
||||
# filename/COL should also work when filename=-
|
||||
../../bin/ltl2tgba -C -F-/1 --stats='%f, %d %p' <expected.2 >out.2
|
||||
run 0 ../../bin/ltl2tgba -C -F-/1 --stats='%f, %d %p' <expected.2 >out.2
|
||||
diff out.2 expected.2
|
||||
|
||||
# Test multi-line CSV fields.
|
||||
cat >in.2b<<EOF
|
||||
FGa, 0 1
|
||||
GFa, 1 1
|
||||
a U b, 1 1
|
||||
"G(!r | Fa)
|
||||
|
|
||||
Fx", 0 1
|
||||
EOF
|
||||
run 0 ../../bin/ltl2tgba -C -Fin.2b/1 --stats='%f, %d %p' >out.2b
|
||||
diff out.2b expected.2
|
||||
|
||||
../../bin/ltl2tgba FGa GFa --stats='%f %d %n %s %p' >out.3
|
||||
|
||||
run 0 ../../bin/ltl2tgba FGa GFa --stats='%f %d %n %s %p' >out.3
|
||||
cat >expected.3<<EOF
|
||||
FGa 0 1 2 0
|
||||
GFa 1 0 1 1
|
||||
|
|
@ -53,7 +65,7 @@ EOF
|
|||
diff out.3 expected.3
|
||||
|
||||
|
||||
../../bin/ltl2tgba -DC FGa GFa --stats='%f %d %n %s %p' >out.4
|
||||
run 0 ../../bin/ltl2tgba -DC FGa GFa --stats='%f %d %n %s %p' >out.4
|
||||
cat >expected.4<<EOF
|
||||
FGa 0 1 3 1
|
||||
GFa 1 0 1 1
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue