From 05abed6d2f231239180d0594ef1f74c3b8bc3dcf Mon Sep 17 00:00:00 2001
From: Alexandre Duret-Lutz <adl@lrde.epita.fr>
Date: Tue, 20 Jun 2017 13:42:58 +0200
Subject: [PATCH] translate: use relabel_bse() to speed translations with a lot
 of APs

Fixes #268, reported by Yann Thierry-Mieg.

* spot/twaalgos/translate.cc, spot/twaalgos/translate.hh: Call
relabel_bse() + relabel_here().
* tests/core/format.test: Add a test case.
* bin/spot-x.cc, NEWS: Document the change.
---
 NEWS                       | 11 ++++++++
 bin/spot-x.cc              |  8 +++++-
 spot/twaalgos/translate.cc | 52 ++++++++++++++++++++++++++++++++++++--
 spot/twaalgos/translate.hh |  1 +
 tests/core/format.test     |  4 +++
 5 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/NEWS b/NEWS
index e5ec6ca6b..aab20cc2f 100644
--- a/NEWS
+++ b/NEWS
@@ -123,6 +123,17 @@ New in spot 2.3.4.dev (not yet released)
     names from another automaton, honoring "original-states" if
     present.
 
+  - Building automata for LTL formula with a large number N of atomic
+    propositions can be costly, because several loops and
+    data-structures are exponential into N.  However a formula like
+      ((a & b & c) | (d & e & f)) U ((d & e & f) | (g & h & i))
+    can be translated more efficiently by first building an automaton
+    for (p0 | p1) U (p1 | p2), and then substituting p0, p1, p2 by the
+    appropriate Boolean formula.  Such a trick is now attempted
+    for translation of formulas with 4 atomic propositions or
+    more (this threshold can be changed, see -x relabel-bool=N in
+    the spot-x(7) man page).
+
   Python:
 
   - The 'spot.gen' package exports the functions from libspotgen.
diff --git a/bin/spot-x.cc b/bin/spot-x.cc
index d191d9a19..698ca22ed 100644
--- a/bin/spot-x.cc
+++ b/bin/spot-x.cc
@@ -1,5 +1,5 @@
 // -*- coding: utf-8 -*-
-// Copyright (C) 2013, 2014, 2015, 2016 Laboratoire de Recherche et
+// Copyright (C) 2013, 2014, 2015, 2016, 2017 Laboratoire de Recherche et
 // Développement de l'Epita (LRDE).
 //
 // This file is part of Spot, a model checking library.
@@ -107,6 +107,12 @@ on the Büchi automaton (i.e., after degeneralization has been performed). \
 Set to 1 to use only direct simulation.  Set to 2 to use only reverse \
 simulation.  Set to 3 to iterate both direct and reverse simulations.   \
 The default is 3 in --high mode, and 0 otherwise.") },
+    { DOC("relabel-bool", "If set to a positive integer N, a formula \
+with N atomic propositions or more will have its Boolean subformulas \
+abstracted as atomic propositions during the translation to automaton. \
+This relabeling can speeds the translation if a few Boolean subformulas \
+use a large number of atomic propositions.  By default N=4.  Setting \
+this value to 0 will disable the rewriting.") },
     { DOC("wdba-minimize", "Set to 0 to disable WDBA-minimization.  \
 Enabled by default.") },
     { DOC("tba-det", "Set to 1 to attempt a powerset determinization \
diff --git a/spot/twaalgos/translate.cc b/spot/twaalgos/translate.cc
index dc117490d..10f1947fa 100644
--- a/spot/twaalgos/translate.cc
+++ b/spot/twaalgos/translate.cc
@@ -21,6 +21,8 @@
 #include <spot/twaalgos/ltl2tgba_fm.hh>
 #include <spot/twaalgos/compsusp.hh>
 #include <spot/misc/optionmap.hh>
+#include <spot/tl/relabel.hh>
+#include <spot/twaalgos/relabel.hh>
 
 namespace spot
 {
@@ -28,10 +30,12 @@ namespace spot
   void translator::setup_opt(const option_map* opt)
   {
     comp_susp_ = early_susp_ = skel_wdba_ = skel_simul_ = 0;
+    relabel_bool_ = -1;
 
     if (!opt)
       return;
 
+    relabel_bool_ = opt->get("relabel-bool", 4);
     comp_susp_ = opt->get("comp-susp", 0);
     if (comp_susp_ == 1)
       {
@@ -71,8 +75,48 @@ namespace spot
         set_pref(pref_ | postprocessor::Deterministic);
       }
 
-    formula r = simpl_->simplify(*f);
-    *f = r;
+    // Do we want to relabel Boolean subformulas?
+    // If we have a huge formula such as
+    //  (a1 & a2 & ... & an) U (b1 | b2 | ... | bm)
+    // then it is more efficient to translate
+    //  a U b
+    // and then fix the automaton.  We use relabel_bse() to find
+    // sub-formulas that are Boolean but do not have common terms.
+    //
+    // This rewriting is enabled only if the formula
+    //  1) has some Boolean subformula
+    //  2) has more than relabel_bool_ atomic propisition (the default
+    //     is 4, but this can be changed)
+    //  3) relabel_bse() actually reduces the number of atomic
+    //     propositions.
+    relabeling_map m;
+    formula to_work_on = *f;
+    if (relabel_bool_ > 0)
+      {
+        bool has_boolean_sub = false; // that is not atomic
+        std::set<formula> aps;
+        to_work_on.traverse([&](const formula& f)
+                            {
+                              if (f.is(op::ap))
+                                aps.insert(f);
+                              else if (f.is_boolean())
+                                has_boolean_sub = true;
+                              return false;
+                            });
+        unsigned atomic_props = aps.size();
+        if (has_boolean_sub && (atomic_props >= (unsigned) relabel_bool_))
+          {
+            formula relabeled = relabel_bse(to_work_on, Pnn, &m);
+            if (m.size() < atomic_props)
+              to_work_on = relabeled;
+            else
+              m.clear();
+          }
+      }
+
+    formula r = simpl_->simplify(to_work_on);
+    if (to_work_on == *f)
+      *f = r;
 
     // This helps ltl_to_tgba_fm() to order BDD variables in a more
     // natural way (improving the degeneralization).
@@ -97,7 +141,11 @@ namespace spot
                              true, false, false, nullptr, nullptr,
                              unambiguous);
       }
+
     aut = this->postprocessor::run(aut, r);
+
+    if (!m.empty())
+      relabel_here(aut, &m);
     return aut;
   }
 
diff --git a/spot/twaalgos/translate.hh b/spot/twaalgos/translate.hh
index 569c87218..a7bf80e9d 100644
--- a/spot/twaalgos/translate.hh
+++ b/spot/twaalgos/translate.hh
@@ -146,6 +146,7 @@ namespace spot
     int early_susp_;
     int skel_wdba_;
     int skel_simul_;
+    int relabel_bool_;
   };
   /// @}
 }
diff --git a/tests/core/format.test b/tests/core/format.test
index 8f178b3ba..b312496a1 100644
--- a/tests/core/format.test
+++ b/tests/core/format.test
@@ -123,6 +123,10 @@ jb) | (c & jb) | (d & jb) | (e & jb) | (f & m) | (f & h) | (f & i) |
 (r & y) | (r & t) | (r & u) | (r & v) | (r & w) | (fb & x) | (x & z) |
 (ab & x) | (bb & x) | (cb & x)))'
 test 3,5 = `ltl2tgba --low --any --stats=%s,%e "$f"`
+# While we are at it, make sure we can translate this beast.  Dispite
+# the huge number of atomic propositiions, there is only one Boolean
+# subformula and it is used twice.
+test 3,4 = `ltl2tgba --stats=%s,%e "$f"`
 
 cat >foo <<EOF
 HOA: v1