From 64df4fbccc3cfbf985f586ca030870a09f10d271 Mon Sep 17 00:00:00 2001 From: Alexandre Duret-Lutz Date: Wed, 21 Oct 2015 17:29:37 +0200 Subject: [PATCH] fix "input buffer overflow, can't enlarge buffer..." This occured when parsing the HOA automaton generated by Rabinizer3 for a very long LTL formula with many nested U. State labels could easily be more than 40k characters. * src/parseaut/scanaut.ll: Fix that. * src/tests/parseaut.test: New test case. * NEWS: Mention the fix. --- NEWS | 3 +++ src/parseaut/scanaut.ll | 16 ++++++++++++---- src/tests/parseaut.test | 18 ++++++++++++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index 41bba4198..9e9749541 100644 --- a/NEWS +++ b/NEWS @@ -71,6 +71,9 @@ New in spot 1.99.4a (not yet released) * unabbreviate could easily use forbidden operators. * "autfilt --is-unambiguous" could fail to detect the nonambiguity of some automata with empty languages. + * When parsing long tokens (e.g, state labels representing + very large strings) the automaton parser could die with + "input buffer overflow, can't enlarge buffer because scanner uses REJECT" New in spot 1.99.4 (2015-10-01) diff --git a/src/parseaut/scanaut.ll b/src/parseaut/scanaut.ll index ea28676fa..2d845912f 100644 --- a/src/parseaut/scanaut.ll +++ b/src/parseaut/scanaut.ll @@ -145,8 +145,8 @@ identifier [[:alpha:]_][[:alnum:]_-]* "AP:" return token::AP; "v2" return token::V2; "explicit" return token::EXPLICIT; - "Comment:".* continue; - "//".* continue; + "Comment:".* yylloc->step(); + "//".* yylloc->step(); "Acceptance-Pairs:" return token::ACCPAIRS; "Acc-Sig:" return token::ACCSIG; "---" return token::ENDOFHEADER; @@ -154,7 +154,9 @@ identifier [[:alpha:]_][[:alnum:]_-]* /* The start of any automaton is the end of this one. We do not try to detect LBTT automata, as that would be too hard to distinguish from state numbers. */ - ""/{eols}("HOA:"|"never"|"DSA"|"DRA") { + {eols}("HOA:"|"never"|"DSA"|"DRA") { + yylloc->end = yylloc->begin; + yyless(0); BEGIN(INITIAL); return token::ENDDSTAR; } @@ -281,7 +283,13 @@ identifier [[:alpha:]_][[:alnum:]_-]* "*"+[^*/\n\r]* continue; {eol} yylloc->lines(yyleng); yylloc->end.column = 1; {eol2} yylloc->lines(yyleng / 2); yylloc->end.column = 1; - "*"+"/" if (--comment_level == 0) BEGIN(orig_cond); + "*"+"/" { + if (--comment_level == 0) + { + yylloc->step(); + BEGIN(orig_cond); + } + } <> { BEGIN(orig_cond); error_list.push_back( diff --git a/src/tests/parseaut.test b/src/tests/parseaut.test index b4a94800c..a185be4b7 100755 --- a/src/tests/parseaut.test +++ b/src/tests/parseaut.test @@ -2365,3 +2365,21 @@ State: 3 "[{8}]" [!1] 0 {0 1 2} --END-- EOF + +# A 50000-char word +BIGLABEL=`yes y | sed 25000q | tr 'y\n' 'xx'` +cat >bigaut <