bin: support multi-line CSV fields.

* src/bin/common_finput.cc (job_processor::process_stream): Read
multi-line CSV fields.
* src/ltltest/lbt.test, src/tgbatest/nondet.test: Add tests.
This commit is contained in:
Alexandre Duret-Lutz 2013-12-06 13:45:55 +01:00
parent 6c21089599
commit f0bcab4add
3 changed files with 53 additions and 14 deletions

View file

@ -126,7 +126,7 @@ job_processor::process_stream(std::istream& is,
const char* filename) const char* filename)
{ {
int error = 0; int error = 0;
int linenum = 0; int linenum = 1;
std::string line; std::string line;
// Discard the first line of a CSV file if requested. // Discard the first line of a CSV file if requested.
@ -134,6 +134,7 @@ job_processor::process_stream(std::istream& is,
{ {
std::getline(is, line); std::getline(is, line);
col_to_read = -col_to_read; col_to_read = -col_to_read;
++linenum;
} }
// Each line of the file and send them to process_string, // Each line of the file and send them to process_string,
@ -143,17 +144,39 @@ job_processor::process_stream(std::istream& is,
{ {
if (col_to_read == 0) if (col_to_read == 0)
{ {
error |= process_string(line, filename, ++linenum); error |= process_string(line, filename, linenum++);
} }
else // We are reading column COL_TO_READ in a CSV file. else // We are reading column COL_TO_READ in a CSV file.
{ {
// FIXME: This code assumes an entire CSV row was been // If the line we have read contains an odd number
// fetched by getline(). This is incorrect for processing // of double-quotes, then it is an incomplete CSV line
// CSV files with fields that contain newlines inside // that should be completed by the next lines.
// double-quoted strings. Patching this code to deal with unsigned dquotes = 0;
// such files is left as an exercise for the first user std::string fullline;
// who encounters the issue. unsigned csvlines = 0;
const char* str = line.c_str(); do
{
++csvlines;
size_t s = line.size();
for (unsigned i = 0; i < s; ++i)
dquotes += line[i] == '"';
if (fullline.empty())
fullline = line;
else
(fullline += '\n') += line;
if (!(dquotes &= 1))
break;
}
while (std::getline(is, line));
if (dquotes)
error_at_line(2, errno, filename, linenum,
"mismatched double-quote, "
"reached EOF while parsing this line");
// Now that we have a full CSV line, extract the right
// column.
const char* str = fullline.c_str();
const char* col1_start = str; const char* col1_start = str;
// Delimiters for the extracted column. // Delimiters for the extracted column.
const char* coln_start = str; const char* coln_start = str;
@ -248,7 +271,8 @@ job_processor::process_stream(std::istream& is,
field[dst++] = *coln_start; field[dst++] = *coln_start;
field.resize(dst); field.resize(dst);
} }
error |= process_string(field, filename, ++linenum); error |= process_string(field, filename, linenum);
linenum += csvlines;
if (prefix) if (prefix)
{ {
free(prefix); free(prefix);

View file

@ -101,6 +101,9 @@ test `wc -l < formulas.2` -eq 168
test `wc -l < formulas.3` -eq 168 test `wc -l < formulas.3` -eq 168
test `wc -l < formulas.4` -eq 168 test `wc -l < formulas.4` -eq 168
run 0 $ltlfilt formulas.2 --csv-escape --format='%L,%f' > formulas.5
run 0 $ltlfilt formulas.5/2 --csv-escape --format='%L,%f' > formulas.6
cmp formulas.5 formulas.6
# Make sure ltl2dstar-style litterals always get quoted. # Make sure ltl2dstar-style litterals always get quoted.
test "`$ltlfilt -l --lbt-input -f 'G F a'`" = 'G F "a"' test "`$ltlfilt -l --lbt-input -f 'G F a'`" = 'G F "a"'

View file

@ -29,7 +29,7 @@ G(!r | Fa) | Fx, 0 1
EOF EOF
# also test the filename/COL syntax # also test the filename/COL syntax
../../bin/ltl2tgba -F expected.1/1 --stats='%f, %d %p' >out.1 run 0 ../../bin/ltl2tgba -F expected.1/1 --stats='%f, %d %p' >out.1
diff out.1 expected.1 diff out.1 expected.1
cat >expected.2<<EOF cat >expected.2<<EOF
@ -40,11 +40,23 @@ G(!r | Fa) | Fx, 0 1
EOF EOF
# filename/COL should also work when filename=- # filename/COL should also work when filename=-
../../bin/ltl2tgba -C -F-/1 --stats='%f, %d %p' <expected.2 >out.2 run 0 ../../bin/ltl2tgba -C -F-/1 --stats='%f, %d %p' <expected.2 >out.2
diff out.2 expected.2 diff out.2 expected.2
# Test multi-line CSV fields.
cat >in.2b<<EOF
FGa, 0 1
GFa, 1 1
a U b, 1 1
"G(!r | Fa)
|
Fx", 0 1
EOF
run 0 ../../bin/ltl2tgba -C -Fin.2b/1 --stats='%f, %d %p' >out.2b
diff out.2b expected.2
../../bin/ltl2tgba FGa GFa --stats='%f %d %n %s %p' >out.3
run 0 ../../bin/ltl2tgba FGa GFa --stats='%f %d %n %s %p' >out.3
cat >expected.3<<EOF cat >expected.3<<EOF
FGa 0 1 2 0 FGa 0 1 2 0
GFa 1 0 1 1 GFa 1 0 1 1
@ -53,7 +65,7 @@ EOF
diff out.3 expected.3 diff out.3 expected.3
../../bin/ltl2tgba -DC FGa GFa --stats='%f %d %n %s %p' >out.4 run 0 ../../bin/ltl2tgba -DC FGa GFa --stats='%f %d %n %s %p' >out.4
cat >expected.4<<EOF cat >expected.4<<EOF
FGa 0 1 3 1 FGa 0 1 3 1
GFa 1 0 1 1 GFa 1 0 1 1