bin: support multi-line CSV fields.

* src/bin/common_finput.cc (job_processor::process_stream): Read multi-line CSV fields. * src/ltltest/lbt.test, src/tgbatest/nondet.test: Add tests.
2013-12-06 13:45:55 +01:00 · 2013-12-06 13:45:55 +01:00 · f0bcab4add
commit f0bcab4add
parent 6c21089599
3 changed files with 53 additions and 14 deletions
--- a/src/bin/common_finput.cc
+++ b/src/bin/common_finput.cc
@ -126,7 +126,7 @@ job_processor::process_stream(std::istream& is,
 			      const char* filename)
 {
  int error = 0;
-  int linenum = 0;
+  int linenum = 1;
  std::string line;
  // Discard the first line of a CSV file if requested.
@ -134,6 +134,7 @@ job_processor::process_stream(std::istream& is,
    {
      std::getline(is, line);
      col_to_read = -col_to_read;
      ++linenum;
    }
  // Each line of the file and send them to process_string,
@ -143,17 +144,39 @@ job_processor::process_stream(std::istream& is,
      {
 	if (col_to_read == 0)
 	  {
-	    error |= process_string(line, filename, ++linenum);
+	    error |= process_string(line, filename, linenum++);
 	  }
 	else // We are reading column COL_TO_READ in a CSV file.
 	  {
-	    // FIXME: This code assumes an entire CSV row was been
+	    // If the line we have read contains an odd number
-	    // fetched by getline().  This is incorrect for processing
+	    // of double-quotes, then it is an incomplete CSV line
-	    // CSV files with fields that contain newlines inside
+	    // that should be completed by the next lines.
-	    // double-quoted strings.  Patching this code to deal with
+	    unsigned dquotes = 0;
-	    // such files is left as an exercise for the first user
+	    std::string fullline;
-	    // who encounters the issue.
+	    unsigned csvlines = 0;
-	    const char* str = line.c_str();
+	    do
 	      {
 		++csvlines;
 		size_t s = line.size();
 		for (unsigned i = 0; i < s; ++i)
 		  dquotes += line[i] == '"';
 		if (fullline.empty())
 		  fullline = line;
 		else
 		  (fullline += '\n') += line;
 		if (!(dquotes &= 1))
 		  break;
 	      }
 	    while (std::getline(is, line));
 	    if (dquotes)
 	      error_at_line(2, errno, filename, linenum,
 			    "mismatched double-quote, "
 			    "reached EOF while parsing this line");
 	    // Now that we have a full CSV line, extract the right
 	    // column.
 	    const char* str = fullline.c_str();
 	    const char* col1_start = str;
 	    // Delimiters for the extracted column.
 	    const char* coln_start = str;
@ -248,7 +271,8 @@ job_processor::process_stream(std::istream& is,
 		    field[dst++] = *coln_start;
 		field.resize(dst);
 	      }
-	    error |= process_string(field, filename, ++linenum);
+	    error |= process_string(field, filename, linenum);
 	    linenum += csvlines;
 	    if (prefix)
 	      {
 		free(prefix);
--- a/src/ltltest/lbt.test
+++ b/src/ltltest/lbt.test
@ -101,6 +101,9 @@ test `wc -l < formulas.2` -eq 168
 test `wc -l < formulas.3` -eq 168
 test `wc -l < formulas.4` -eq 168
 run 0 $ltlfilt formulas.2 --csv-escape --format='%L,%f' > formulas.5
 run 0 $ltlfilt formulas.5/2 --csv-escape --format='%L,%f' > formulas.6
 cmp formulas.5 formulas.6
 # Make sure ltl2dstar-style litterals always get quoted.
 test "`$ltlfilt -l --lbt-input -f 'G F a'`" = 'G F "a"'
--- a/src/tgbatest/nondet.test
+++ b/src/tgbatest/nondet.test
@ -29,7 +29,7 @@ G(!r | Fa) | Fx, 0 1
 EOF
 # also test the filename/COL syntax
-../../bin/ltl2tgba -F expected.1/1 --stats='%f, %d %p' >out.1
+run 0 ../../bin/ltl2tgba -F expected.1/1 --stats='%f, %d %p' >out.1
 diff out.1 expected.1
 cat >expected.2<<EOF
@ -40,11 +40,23 @@ G(!r | Fa) | Fx, 0 1
 EOF
 # filename/COL should also work when filename=-
-../../bin/ltl2tgba -C -F-/1 --stats='%f, %d %p' <expected.2 >out.2
+run 0 ../../bin/ltl2tgba -C -F-/1 --stats='%f, %d %p' <expected.2 >out.2
 diff out.2 expected.2
 # Test multi-line CSV fields.
 cat >in.2b<<EOF
 FGa, 0 1
 GFa, 1 1
 a U b, 1 1
 "G(!r | Fa)
 |
 Fx", 0 1
 EOF
 run 0 ../../bin/ltl2tgba -C -Fin.2b/1 --stats='%f, %d %p' >out.2b
 diff out.2b expected.2
-../../bin/ltl2tgba FGa GFa --stats='%f %d %n %s %p' >out.3
+
 run 0 ../../bin/ltl2tgba FGa GFa --stats='%f %d %n %s %p' >out.3
 cat >expected.3<<EOF
 FGa 0 1 2 0
 GFa 1 0 1 1
@ -53,7 +65,7 @@ EOF
 diff out.3 expected.3
-../../bin/ltl2tgba -DC FGa GFa --stats='%f %d %n %s %p' >out.4
+run 0 ../../bin/ltl2tgba -DC FGa GFa --stats='%f %d %n %s %p' >out.4
 cat >expected.4<<EOF
 FGa 0 1 3 1
 GFa 1 0 1 1