From c6605e951dab2db3cc25be184433c3f9c6f1fe16 Mon Sep 17 00:00:00 2001
From: Alexandre Duret-Lutz <adl@lrde.epita.fr>
Date: Wed, 8 May 2019 15:05:47 +0200
Subject: [PATCH] tl: add some simplifications for first_match

Following a discussion with Victor Khomenko.

* doc/tl/tl.tex: Document those rules.
* spot/tl/simplify.cc: Implement them.
* tests/core/reduccmp.test: Test them.
---
 doc/tl/tl.tex            | 28 ++++++++-------------
 spot/tl/simplify.cc      | 53 +++++++++++++++++++++++++++++++++++++++-
 tests/core/reduccmp.test | 12 ++++++++-
 3 files changed, 73 insertions(+), 20 deletions(-)

diff --git a/doc/tl/tl.tex b/doc/tl/tl.tex
index 8c244814b..941a39305 100644
--- a/doc/tl/tl.tex
+++ b/doc/tl/tl.tex
@@ -801,7 +801,8 @@ $b_1$, $b_2$ are assumed to be Boolean formulas.
   &&
   f\FSTAR{\mvar{i}..\mvar{j}}\FSTAR{\mvar{k}..\mvar{l}} &\equiv f\FSTAR{\mvar{ik}..\mvar{jl}}\text{~if~}i(k+1)\le jk+1 \\
   f\FSTAR{0}&\equiv \1 & f\FSTAR{1}&\equiv f\text{~if~}\varepsilon\nVDash f\\
-  \FIRSTMATCH\code(b\code) &\equiv b & \FIRSTMATCH\code(f\code) &\equiv \eword\text{~if~}\varepsilon\VDash f
+  \FIRSTMATCH\code(b\code) &\equiv b & \FIRSTMATCH\code(f\code) &\equiv \eword\text{~if~}\varepsilon\VDash f \\
+                                    && \FIRSTMATCH\code(\FIRSTMATCH\code(f\code)\code) &\equiv \FIRSTMATCH\code(f\code)
 \end{align*}
 
 \noindent
@@ -995,22 +996,6 @@ operator, even if the operator has multiple synonyms (like \samp{|},
 \end{align*}
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 \section{Operator precedence}
 
 The following operator precedence describes the current parser of
@@ -1600,7 +1585,14 @@ SERE.
   \sere{b_1\FUSION r_1}\ANDALT\sere{b_2\FUSION r_2} &\equiv \sere{b_1\ANDALT b_2}\FUSION\sere{r_1\ANDALT r_2} &
   \sere{r_1\FUSION b_1}\ANDALT\sere{r_2\FUSION b_2} &\equiv \sere{r_1\ANDALT r_2}\FUSION\sere{b_1\ANDALT b_2} \\
   \sere{b_1\CONCAT r_1}\AND \sere{b_2\CONCAT r_2} &\equiv \sere{b_1\ANDALT b_2}\CONCAT\sere{r_1\AND r_2} \\
-  \sere{b_1\FUSION r_1}\AND \sere{b_2\FUSION r_2} &\equiv \sere{b_1\ANDALT b_2}\FUSION\sere{r_1\AND r_2} \mathrlap{\quad\text{if~}\varepsilon\nVDash r_1\land\varepsilon\nVDash r_2}\\
+  \sere{b_1\FUSION r_1}\AND \sere{b_2\FUSION r_2} &\equiv \sere{b_1\ANDALT b_2}\FUSION\sere{r_1\AND r_2} \mathrlap{\quad\text{if~}\varepsilon\nVDash r_1\land\varepsilon\nVDash r_2}
+\end{align*}
+\begin{align*}
+  \FIRSTMATCH\code(b\STAR{\mvar{i}..\mvar{j}}\code) &\equiv b\STAR{\mvar{i}} \\
+  \FIRSTMATCH\code(r\STAR{\mvar{i}..\mvar{j}}\code) &\equiv \FIRSTMATCH\code(r\STAR{\mvar{i}}\code) \\
+  \FIRSTMATCH\code(r_1\CONCAT{}r_2\STAR{\mvar{i}..\mvar{j}}\code) &\equiv \FIRSTMATCH(r_1\CONCAT\mathrlap{r_2\STAR{\mvar{i}})} \\
+  \FIRSTMATCH\code(b\CONCAT{}r\code) &\equiv b\CONCAT\FIRSTMATCH\code(r\code) \\
+  \FIRSTMATCH\code(r_1\CONCAT{}r_2\code) &\equiv\FIRSTMATCH(r_1)\mathrlap{\quad\text{if~} \varepsilon\VDash r_2}
 \end{align*}
 
 Starred subformulas are rewritten in Star Normal
diff --git a/spot/tl/simplify.cc b/spot/tl/simplify.cc
index 2d82ba983..7fbbda912 100644
--- a/spot/tl/simplify.cc
+++ b/spot/tl/simplify.cc
@@ -856,7 +856,6 @@ namespace spot
           case op::ap:
           case op::Not:
           case op::FStar:
-          case op::first_match:
             return f;
           case op::X:
             {
@@ -1429,6 +1428,58 @@ namespace spot
                 }
               return formula::Star(h, min, f.max());
             }
+          case op::first_match:
+            {
+              formula h = f[0];
+              if (h.is(op::Star))
+                {
+                  // first_match(b[*i..j]) = b[*i]
+                  // first_match(f[*i..j]) = first_match(f[*i])
+                  unsigned m = h.min();
+                  formula s = formula::Star(h[0], m, m);
+                  return h[0].is_boolean() ? s : formula::first_match(s);
+                }
+              else if (h.is(op::Concat))
+                {
+                  // If b is Boolean and e accepts [*0], we have
+                  // first_match(b;f) = b;first_match(f)
+                  // first_match(f;e) = first_match(f)
+                  // first_match(f;g[*i..j]) = first_match(f;g[*i])
+                  // the first two rules can be repeated, so we will loop
+                  // to save the recursion and intermediate caching.
+
+                  // Extract Boolean formulas at the beginning.
+                  int i = 0;
+                  int n = h.size();
+                  while (i < n && h[i].is_boolean())
+                    ++i;
+                  vec prefix;
+                  prefix.reserve(i + 1); // +1 to append first_match()
+                  for (int ii = 0; ii < i; ++ii)
+                    prefix.push_back(h[ii]);
+                  // Extract suffix, minus trailing formulas that accept [*0].
+                  // "i" is the start of the suffix.
+                  do
+                    --n;
+                  while (i <= n && h[n].accepts_eword());
+                  vec suffix;
+                  suffix.reserve(n + 1 - i);
+                  for (int ii = i; ii <= n; ++ii)
+                    suffix.push_back(h[ii]);
+                  if (!suffix.empty() && suffix.back().is(op::Star))
+                    {
+                      formula s = suffix.back();
+                      unsigned smin = s.min();
+                      suffix.back() = formula::Star(s[0], smin, smin);
+                    }
+                  prefix.push_back(formula::first_match
+                                   (formula::Concat(suffix)));
+                  formula res = formula::Concat(prefix);
+                  if (res != f)
+                    return recurse(res);
+                }
+              return f;
+            }
           }
         SPOT_UNREACHABLE();
       }
diff --git a/tests/core/reduccmp.test b/tests/core/reduccmp.test
index b14677585..d263fc77a 100755
--- a/tests/core/reduccmp.test
+++ b/tests/core/reduccmp.test
@@ -1,6 +1,6 @@
 #! /bin/sh
 # -*- coding: utf-8 -*-
-# Copyright (C) 2009-2014, 2016-2018 Laboratoire de Recherche et
+# Copyright (C) 2009-2014, 2016-2019 Laboratoire de Recherche et
 # Developpement de l'Epita (LRDE).
 # Copyright (C) 2004, 2006 Laboratoire d'Informatique de Paris 6 (LIP6),
 # département Systèmes Répartis Coopératifs (SRC), Université Pierre
@@ -436,6 +436,16 @@ GF(a && GF(b) && c), G(F(a & c) & Fb)
 # not reduced
 {a;(b[*2..4];c*;([*0]+{d;e}))*}!, {a;(b[*2..4];c*;([*0]+{d;e}))*}!
 {c[*];e[*]}[]-> a, {c[*];e[*]}[]-> a
+# first_match
+{first_match(a*);b}, b
+{first_match(a)}, a
+{first_match(a;b*)}, a
+{first_match(a;c;b*)}, a & Xc
+{first_match(a;c;b*;e)}, a & X(c & X{first_match(b[*];e)})
+{first_match(a;c;b*;(e | [*0]))}, a & Xc
+{first_match(a;c;b[+];(e | [*0]))}, a & X(c & Xb)
+{first_match(b[+];(e | [*0]))}, b
+{first_match(b[*2..3])}, b & Xb
 EOF
 
 run 0 ../reduccmp nottau.txt