From f0bcab4addda1417b3c6dadf122059b5d32d4f1b Mon Sep 17 00:00:00 2001 From: Alexandre Duret-Lutz Date: Fri, 6 Dec 2013 13:45:55 +0100 Subject: [PATCH] bin: support multi-line CSV fields. * src/bin/common_finput.cc (job_processor::process_stream): Read multi-line CSV fields. * src/ltltest/lbt.test, src/tgbatest/nondet.test: Add tests. --- src/bin/common_finput.cc | 44 +++++++++++++++++++++++++++++++--------- src/ltltest/lbt.test | 3 +++ src/tgbatest/nondet.test | 20 ++++++++++++++---- 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/src/bin/common_finput.cc b/src/bin/common_finput.cc index af9bee438..82716cae3 100644 --- a/src/bin/common_finput.cc +++ b/src/bin/common_finput.cc @@ -126,7 +126,7 @@ job_processor::process_stream(std::istream& is, const char* filename) { int error = 0; - int linenum = 0; + int linenum = 1; std::string line; // Discard the first line of a CSV file if requested. @@ -134,6 +134,7 @@ job_processor::process_stream(std::istream& is, { std::getline(is, line); col_to_read = -col_to_read; + ++linenum; } // Each line of the file and send them to process_string, @@ -143,17 +144,39 @@ job_processor::process_stream(std::istream& is, { if (col_to_read == 0) { - error |= process_string(line, filename, ++linenum); + error |= process_string(line, filename, linenum++); } else // We are reading column COL_TO_READ in a CSV file. { - // FIXME: This code assumes an entire CSV row was been - // fetched by getline(). This is incorrect for processing - // CSV files with fields that contain newlines inside - // double-quoted strings. Patching this code to deal with - // such files is left as an exercise for the first user - // who encounters the issue. - const char* str = line.c_str(); + // If the line we have read contains an odd number + // of double-quotes, then it is an incomplete CSV line + // that should be completed by the next lines. + unsigned dquotes = 0; + std::string fullline; + unsigned csvlines = 0; + do + { + ++csvlines; + size_t s = line.size(); + for (unsigned i = 0; i < s; ++i) + dquotes += line[i] == '"'; + if (fullline.empty()) + fullline = line; + else + (fullline += '\n') += line; + if (!(dquotes &= 1)) + break; + } + while (std::getline(is, line)); + if (dquotes) + error_at_line(2, errno, filename, linenum, + "mismatched double-quote, " + "reached EOF while parsing this line"); + + // Now that we have a full CSV line, extract the right + // column. + + const char* str = fullline.c_str(); const char* col1_start = str; // Delimiters for the extracted column. const char* coln_start = str; @@ -248,7 +271,8 @@ job_processor::process_stream(std::istream& is, field[dst++] = *coln_start; field.resize(dst); } - error |= process_string(field, filename, ++linenum); + error |= process_string(field, filename, linenum); + linenum += csvlines; if (prefix) { free(prefix); diff --git a/src/ltltest/lbt.test b/src/ltltest/lbt.test index 8d18408bf..15f7bfebf 100755 --- a/src/ltltest/lbt.test +++ b/src/ltltest/lbt.test @@ -101,6 +101,9 @@ test `wc -l < formulas.2` -eq 168 test `wc -l < formulas.3` -eq 168 test `wc -l < formulas.4` -eq 168 +run 0 $ltlfilt formulas.2 --csv-escape --format='%L,%f' > formulas.5 +run 0 $ltlfilt formulas.5/2 --csv-escape --format='%L,%f' > formulas.6 +cmp formulas.5 formulas.6 # Make sure ltl2dstar-style litterals always get quoted. test "`$ltlfilt -l --lbt-input -f 'G F a'`" = 'G F "a"' diff --git a/src/tgbatest/nondet.test b/src/tgbatest/nondet.test index 8057f7558..b9911a48b 100755 --- a/src/tgbatest/nondet.test +++ b/src/tgbatest/nondet.test @@ -29,7 +29,7 @@ G(!r | Fa) | Fx, 0 1 EOF # also test the filename/COL syntax -../../bin/ltl2tgba -F expected.1/1 --stats='%f, %d %p' >out.1 +run 0 ../../bin/ltl2tgba -F expected.1/1 --stats='%f, %d %p' >out.1 diff out.1 expected.1 cat >expected.2<out.2 +run 0 ../../bin/ltl2tgba -C -F-/1 --stats='%f, %d %p' out.2 diff out.2 expected.2 +# Test multi-line CSV fields. +cat >in.2b<out.2b +diff out.2b expected.2 -../../bin/ltl2tgba FGa GFa --stats='%f %d %n %s %p' >out.3 + +run 0 ../../bin/ltl2tgba FGa GFa --stats='%f %d %n %s %p' >out.3 cat >expected.3<out.4 +run 0 ../../bin/ltl2tgba -DC FGa GFa --stats='%f %d %n %s %p' >out.4 cat >expected.4<