From a6f79c6211020d074fecb7871613ca597a5aef02 Mon Sep 17 00:00:00 2001
From: Alexandre Duret-Lutz <adl@lrde.epita.fr>
Date: Thu, 8 Feb 2024 22:43:44 +0100
Subject: [PATCH] more doc handling of prop_universal for fused initial states

Fixes #560.

* spot/parseaut/parseaut.yy: Add more comments about handling
of prop_universal in present of multiple initial states.  It took
me time to figure out that it was done correctly.  Also only
reset prop_complete() in the case an initial state is reused.
* tests/core/det.test: Add a test case for the deterministic property.
* tests/python/parsetgba.py: Add tests for complete.
* doc/org/hoa.org: Add more text about the effect of fusing initial
states.
* doc/org/concepts.org (properties): Replace "deterministic" by
"universal".  The former was obsoleted in Spot 2.4.
---
 doc/org/concepts.org      |   6 +--
 doc/org/hoa.org           | 108 ++++++++++++++++++++++++++------------
 spot/parseaut/parseaut.yy |  40 ++++++++------
 tests/core/det.test       |  28 ++++++++++
 tests/python/parsetgba.py |  31 +++++++++++
 5 files changed, 159 insertions(+), 54 deletions(-)

diff --git a/doc/org/concepts.org b/doc/org/concepts.org
index 64f982eb8..7dca74b54 100644
--- a/doc/org/concepts.org
+++ b/doc/org/concepts.org
@@ -1034,7 +1034,7 @@ layers.
     distributed with the rest of Spot, their source-code is publicly
     available (in case you want to contribute or run a local version).
     The [[https://spot-sandbox.lrde.epita.fr/][=spot-sandbox=]] website runs from a Docker container whose
-    configuration can be found in [[https://gitlab.lre.epita.fr/spot/sandbox/tree/master=][this repository]].  The client and
+    configuration can be found in [[https://gitlab.lre.epita.fr/spot/sandbox/tree/master][this repository]].  The client and
     server parts of the [[https://spot.lrde.epita.fr/app/][online LTL translator]] can be found in [[https://gitlab.lre.epita.fr/spot/spot-web-app/][this
     repository]].
 
@@ -1065,9 +1065,9 @@ automaton, and that can be queried or set by algorithms:
 | =very_weak=          | weak automaton where all SCCs have size 1                                                    |
 | =terminal=           | automaton is weak, accepting SCCs are complete, accepting edges may not go to rejecting SCCs |
 | =complete=           | for any letter ℓ, each state has is at least one outgoing transition compatible with ℓ       |
-| =deterministic=      | there is at most one run *recognizing* a word, but not necessarily accepting it              |
+| =universal=          | there is at most one run *recognizing* each word, but not necessarily accepting it           |
 | =semi_deterministic= | any nondeterminism occurs before entering an accepting SCC                                   |
-| =unambiguous=        | there is at most one run *accepting* a word (but it might be recognized several time)        |
+| =unambiguous=        | there is at most one run *accepting* each word (but it might be recognized several time)     |
 | =stutter_invariant=  | the property recognized by the automaton is [[https://www.lrde.epita.fr/~adl/dl/adl/michaud.15.spin.pdf][stutter-invariant]]                                |
 
 For each flag =flagname=, the =twa= class has a method
diff --git a/doc/org/hoa.org b/doc/org/hoa.org
index 6994abdc5..2efb72537 100644
--- a/doc/org/hoa.org
+++ b/doc/org/hoa.org
@@ -8,7 +8,7 @@
 The [[http://adl.github.io/hoaf/][Hanoi Omega-Automata format]] is a textual representation of
 ω-automata labeled by Boolean formulas over a set of atomic
 propositions, and using an arbitrary acceptance condition. The
-typical acceptances conditions like Büchi, generalized-Büchi,
+typical acceptance conditions like Büchi, generalized-Büchi,
 co-Büchi, Rabin, Streett, parity, ... are all supported, but the main
 advantage of this format is that any arbitrary acceptance condition
 can be defined.  The HOA format has support for many features such as
@@ -20,7 +20,7 @@ automaton.
 The HOA format is already supported in [[http://adl.github.io/hoaf/support.html][several tools]].  The goal of
 this page is to detail the support of this format in Spot.  It contains
 some information that are useful to better understand the behavior
-of the tools distributed by Spot, and it also look at some lower-level,
+of the tools distributed by Spot, and it also looks at some lower-level,
 discussing details that are interesting when programming with Spot.
 
 Spot can read files written using either version 1 or version 1.1 of
@@ -69,21 +69,6 @@ the HOA format, the output may not be exactly the same as the input.
   using option =--enable-max-accsets=N=, but doing so will consume
   more memory and time.
 
-- Multiple (or missing) initial states are emulated.
-
-  The internal TωA representation used by Spot supports only a single
-  initial state.  When an HOA with multiple initial states is read, it
-  is transformed into an equivalent TωA by merging the initial states
-  into a single one.  The merged state can either be one of the
-  original initial states (if one of those has no incoming edge) or a
-  new state introduced for that purpose.  This "conversion" may change
-  the completeness property of the automaton.
-
-  Similarly, when an automaton with no initial state is loaded (this
-  includes the case where the automaton has no state), a disconnected
-  initial state is added.  As a consequence, Spot's HOA output always
-  contains at least one state, even when the input had no state.
-
 - =Fin(!x)= and =Inf(!x)= are rewritten away.
 
   Internally Spot only deals with acceptance conditions involving the
@@ -100,6 +85,58 @@ the HOA format, the output may not be exactly the same as the input.
   =1=, that stores the complement of set =0=, and using
   =Inf(0)&Inf(1)=.
 
+- Multiple (or missing) initial states are emulated.
+
+  The internal TωA representation used by Spot supports only a single
+  initial state.  To make it possible to still process HOA files
+  that declare multiple initial states, Spot's automaton parser
+  will automatically transform the parsed automaton into an equivalent
+  TωA by merging the initial states into a single initial state that
+  has no incoming edge.  This can be achieved in two ways:
+
+  - If one of the original initial states has no incoming edge, it can
+    serve as the new initial state.  The parse simply has to duplicate
+    the edges leaving the other initial states onto this one.  (As an
+    optimization, if the other initial state alsos do not have any
+    incoming edge, the source of its outgoing edges are changed to
+    avoid copying them.)
+
+  - If all of the original initial states has incoming edges, a new
+    state is created to serve as the new initial states, with a copy
+    of all the outgoing edges of the original initial states.
+
+  This process is done in such a way that the states are not
+  renumbered: states are kept even if they become useless after the
+  above change, and if a new state is added, it is added after all
+  declared states.
+
+  This fusing of initial states may have puzzling consequences if you
+  are unaware of it:
+
+  - An automaton with multiple initial states is considered
+    non-deterministic and can be declared as so in the HOA 1.1 format.
+    However, fusing its initial states could potentially turn in into a
+    deterministic automaton if the only nondeterminism was in the
+    choice of the initial state.  For this reason Spot's parser will
+    keep the [[file:concepts.org::#property-flags][prop_universal()]] set to =maybe()= when multiple initial
+    states are declared, regardless of any determinism property
+    specified in the HOA file.
+
+  - Similarly, fusing all initial states into an existing one
+    can change the [[file:concepts.org::#property-flags][prop_complete()]] property of the automaton.
+
+  - Finally, if an automaton that declares state-based acceptance, and
+    uses different acceptance marks on their initial states.  Only one
+    of these acceptance marks will be kept on the single initial
+    state.  Remember that the new initial state has no incoming edge,
+    so it cannot be part of any loop and its acceptance mark is
+    actually irrelevant.
+
+  Similarly, when an automaton with no initial state is loaded (this
+  includes the case where the automaton has no state), a disconnected
+  initial state is added.  As a consequence, Spot's HOA output always
+  contains at least one state, even when the input had no state.
+
 * Internal representations of some features
 
 In this section we discuss features of the format that are fully
@@ -194,7 +231,7 @@ State: 2
 #+END_SRC
 
 Even if an input HOA file uses only state-based acceptance, Spot
-internally stores it using transition-based acceptance.  However in
+internally stores it using transition-based acceptance.  However, in
 that case the TωA will have a property flag indicating that it actually
 represents an automaton with the "state-based acceptance" property:
 this implies that transitions leaving one state all belong to the same
@@ -207,7 +244,7 @@ the automaton satisfies this property.  In that case, it outputs the
 automaton with state-based acceptance.
 
 For instance in the following automaton, the outgoing transitions of
-each states belong to the same sets:
+each state belong to the same sets:
 
 #+NAME: state-based-example
 #+BEGIN_SRC sh :wrap SRC hoa
@@ -258,7 +295,7 @@ State: 2 {0 1}
 --END--
 #+END_SRC
 
-The rational for this automatic switch to state-based acceptance is as follows:
+The rationale for this automatic switch to state-based acceptance is as follows:
 - Tools that support transition-based acceptance can easily see
   state-based acceptance as syntactic sugar, so they should be
   able to process state-based or transition-based acceptance
@@ -302,7 +339,7 @@ State: 2
 #+END_SRC
 
 By default, the output uses either state-based acceptance, or
-transition-based acceptance.  However there is no restriction in the
+transition-based acceptance.  However, there is no restriction in the
 format to prevents mixing the two: if you use =-Hm=, the decision of
 using state or transition-based acceptance will be made for each state
 separately.  For instance:
@@ -391,7 +428,7 @@ Whenever an HOA file is output, Spot attempts to recognize the
 acceptance condition to give it a suitable =acc-name:= (even if Spot
 does not use this line, it is useful to tools that only deal with one
 specific acceptance condition and that do not want to parse the
-=Acceptance:= line).  However the HOA output routine has no idea of
+=Acceptance:= line).  However, the HOA printer has no idea of
 what type of automata you are trying to output: it is only looking at
 the acceptance condition and trying to name it as precisely as
 possible.  This could be a problem when a given condition accepts
@@ -402,7 +439,7 @@ format]] the condition =Inf(0)= could be called =Buchi=, or
 =generalized-Buchi 1=, or (why not?) =parity min even 1= or =parity
 max even 1=.  Spot will always call this acceptance condition =Buchi=.
 
-Similarly the acceptance condition =t= is always called =all= (not
+Similarly, the acceptance condition =t= is always called =all= (not
 =generalized-Buchi 0= or =Rabin 0=, etc.), and while =f= is always
 named =none=.
 
@@ -595,7 +632,7 @@ instance it is easier to complement a deterministic automaton that is
 known to be inherently weak.
 
 Spot stores the properties that matters to its algorithms as
-[[file:concepts.org::#property-flags][additional bits attached to each automaton]].  Currently the HOA parser
+[[file:concepts.org::#property-flags][additional bits attached to each automaton]].  Currently, the HOA parser
 ignores all the properties that are unused by Spot.
 
 Some of the supported properties are double-checked when the automaton
@@ -636,7 +673,7 @@ redundant and useless properties.  For instance =deterministic=
 automata are necessarily =unambiguous=, and people interested in
 unambiguous automata know that, so Spot only outputs the =unambiguous=
 property if an unambiguous automaton is non-deterministic.  Similarly,
-while Spot may output alternating automata, it does not output
+while Spot may produce alternating automata, it does not output
 the =no-univ-branch= property because we cannot think of a situation
 where this would be useful.  This decision can be overridden by
 passing the =-Hv= (or =--hoa=v=) option to the command-line tools:
@@ -754,13 +791,13 @@ State: 2 "so am I" {0 1}
 --END--
 #+end_SRC
 
-However when Spot performs some transformation, and actually has to
+However, when Spot performs some transformation, and actually has to
 construct a new automaton, those properties will not be quarried over
 to the new automaton.  First because it is not obvious that the new
 automaton should have the same name, and second because if a new
 automaton is created, there might not be clear correspondence between
 the old states and the new ones.  =autfilt= tries to preserve aliases
-by reintroducing them to the automaton before it is outputs it (unless
+by reintroducing them to the automaton before it prints it (unless
 option =--aliases=drop= is used).
 
 Here is for instance the result when =autfilt= is instructed to
@@ -938,7 +975,7 @@ by tools that produce automata in a stream to cancel the current one.
 This makes sense for instance when the automaton is constructed
 on-the-fly, while it is being output.  This scenario does not occur in
 Spot (automata are constructed before they are output), so it does not
-emit =--ABORT--=.  However the input parser is fully aware of this
+emit ~--ABORT--~.  However, the input parser is fully aware of this
 token.  Tools like =autfilt= will diagnose aborted automata in the
 input, and continue processing with the next automaton.  The Python
 bindings for the HOA parser can be configured in two modes: skip
@@ -1013,7 +1050,7 @@ autfilt decorate.hoa -d'.#'
 
 
 On the above example, we call =autfilt= with option =-d#= to display
-edges numbers, which helps identifying the edges to highlight.  The
+edges numbers, which helps identify the edges to highlight.  The
 headers ~spot.highlight.states:~ and ~spot.highlight.edges:~ are both
 followed by a list of alternating state/edges numbers and color numbers.
 
@@ -1040,7 +1077,7 @@ acceptance sets.  This might change in the future.
 
 The automaton parser will not complain if these headers are used in
 some =HOA: v1= file, even if =v1= disallows dots in header names.
-However [[https://en.wikipedia.org/wiki/Robustness_principle][the automaton printer is more rigorous]] and will only output
+However, [[https://en.wikipedia.org/wiki/Robustness_principle][the automaton printer is more rigorous]] and will only output
 these lines when version 1.1 is selected.
 
 Compare:
@@ -1155,11 +1192,6 @@ State: 2 {0}
 rm -f stvstracc.hoa sba.hoa stvstrlab.hoa decorate.hoa
 #+END_SRC
 
-#  LocalWords:  html args Büchi accsets BDD SRC stvstracc EOF sed sba
-#  LocalWords:  acc Buchi Hm tgba GFa Fb encodings parametered ary Hk
-#  LocalWords:  bitsets randaut stvstrlab aut Hv hw bigwedge mathsf
-#  LocalWords:  genltl gf GFp Fp parser's rankdir br labelloc ffffa
-#  LocalWords:  fillcolor fontname svg txt Xa
 ** Arenas for two-player games
 
 An automaton can be seen as a two-player game by simply annotating
@@ -1240,3 +1272,9 @@ $txt
 
 #+RESULTS:
 [[file:exgame.svg]]
+
+#  LocalWords:  html args Büchi accsets BDD SRC stvstracc EOF sed sba
+#  LocalWords:  acc Buchi Hm tgba GFa Fb encodings parametered ary Hk
+#  LocalWords:  bitsets randaut stvstrlab aut Hv hw bigwedge mathsf
+#  LocalWords:  genltl gf GFp Fp parser's rankdir br labelloc ffffa
+#  LocalWords:  fillcolor fontname svg txt Xa
diff --git a/spot/parseaut/parseaut.yy b/spot/parseaut/parseaut.yy
index 6410d00de..acfc92276 100644
--- a/spot/parseaut/parseaut.yy
+++ b/spot/parseaut/parseaut.yy
@@ -559,19 +559,18 @@ header: format-version header-items
 	    if (ss > 1)
 	      {
 		if (det)
-		  {
-		    error(det.loc,
-			  "deterministic automata should have at most "
-			  "one initial state");
-                    res.universal = spot::trival::maybe();
-		  }
+                  error(det.loc,
+                        "deterministic automata should have at most "
+                        "one initial state");
                 else if (no_exist)
-                  {
-		    error(no_exist.loc,
-			  "universal automata should have at most "
-			  "one initial state");
-                    res.universal = spot::trival::maybe();
-                  }
+                  error(no_exist.loc,
+                        "universal automata should have at most "
+                        "one initial state");
+                // res.universal defaults to maybe() and this is what
+                // we need here.  In presence of multiple initial
+                // state, fix_initial_state() will have to fuse them,
+                // and this could turn a non-deterministic automaton
+                // into a deterministic one.
 	      }
 	    else
 	      {
@@ -2662,10 +2661,6 @@ static void fix_initial_state(result_& r)
 	  return;
 	}
       auto& aut = r.h->aut;
-      // Fiddling with initial state may turn an incomplete automaton
-      // into a complete one.
-      if (aut->prop_complete().is_false())
-        aut->prop_complete(spot::trival::maybe());
       // Multiple initial states.  We might need to add a fake one,
       // unless one of the actual initial state has no incoming edge.
       std::vector<unsigned char> has_incoming(aut->num_states(), 0);
@@ -2695,6 +2690,19 @@ static void fix_initial_state(result_& r)
       if (!found || init_alternation)
 	// We do need a fake initial state
 	init = aut->new_state();
+      else
+        // Modifying one existing initial state may turn an incomplete
+        // automaton into a complete one.  For instance if the state
+        // that we elected as the future initial state was the only
+        // incomplete state of the automaton.  Similarly this could
+        // also turn a non-deterministic automaton into a
+        // deterministic one, but we don't have to deal with this are
+        // automata with multiple initial states have prop_universal()
+        // set to maybe() already in prevision of what this function
+        // will do.
+        if (aut->prop_complete().is_false())
+          aut->prop_complete(spot::trival::maybe());
+
       aut->set_init_state(init);
 
       // The non-alternating case is the easiest, we simply declare
diff --git a/tests/core/det.test b/tests/core/det.test
index f3249ca27..76125ae7e 100755
--- a/tests/core/det.test
+++ b/tests/core/det.test
@@ -293,3 +293,31 @@ State: 1
 EOF
 autfilt -q --is-deterministic in.hoa && exit 1
 autfilt --merge-transitions in.hoa | autfilt --is-deterministic
+
+
+
+# This is a peculiarity of Spot: because it supports only one initial
+# state, but the HOA format allows for more, Spot's automaton parser
+# will fuse multiple initial states to fit into the Spot definition of
+# an automaton.  While this is preserving the original language, this
+# fusing of initial states may turn a non-deterministic automaton
+# (because of the multiple declared initial states) into a
+# deterministic one.  (Issue #560.)
+cat >560.hoa <<EOF
+HOA: v1.1
+States: 3
+Start: 1
+Start: 2
+AP: 1 "a"
+Acceptance: 0 t
+properties: !deterministic
+--BODY--
+State: 0
+[t] 0
+State: 1
+[0] 0
+State: 2
+[!0] 0
+--END--
+EOF
+test "1 2 1" = "`autfilt --stats='%d %[r]s %[u]s' 560.hoa`"
diff --git a/tests/python/parsetgba.py b/tests/python/parsetgba.py
index 493d2341f..8118c63ac 100755
--- a/tests/python/parsetgba.py
+++ b/tests/python/parsetgba.py
@@ -191,3 +191,34 @@ State: 3
 [!0&2] 0
 [!0&!2] 3
 --END--""")
+
+
+# Spot supports only one initial state, but the HOA format allows for
+# more.  Spot's automaton parser will fuse multiple initial states to
+# fit into the Spot definition of an automaton.  Depending on how the
+# fusing is done, this can turn a complete automaton into a incomplete
+# one, and vice-versa.  See Issue #560.
+a = spot.automaton("""HOA: v1 States: 8 Start: 0 Start: 1 AP: 0
+acc-name: generalized-Buchi 2 properties: complete Acceptance: 2
+Inf(0)&Inf(1) --BODY-- State: 0 {0} [t] 2 [t] 3 [t] 0 State: 1 {0} [t]
+4 [t] 5 State: 2 {0} [t] 2 [t] 3 State: 3 {0} [t] 6 [t] 7 State: 4 {1}
+[t] 4 [t] 5 State: 5 {1} [t] 6 [t] 7 State: 6 [t] 6 [t] 7 State: 7 [t]
+6 [t] 7 --END--""")
+tc.assertTrue(a.prop_complete().is_true())
+
+a = spot.automaton("""HOA: v1 States: 8 Start: 0 Start: 1 AP: 0
+acc-name: generalized-Buchi 2 properties: complete Acceptance: 2
+Inf(0)&Inf(1) --BODY-- State: 0 {0} [t] 2 [t] 3 State: 1 {0} [t] 4 [t]
+5 State: 2 {0} [t] 2 [t] 3 State: 3 {0} [t] 6 [t] 7 State: 4 {1} [t] 4
+[t] 5 State: 5 {1} [t] 6 [t] 7 State: 6 [t] 6 [t] 7 State: 7 [t] 6 [t]
+7 --END--""")
+tc.assertTrue(a.prop_complete().is_false())
+
+a = spot.automaton("""HOA: v1.1 States: 8 Start: 0 Start: 1 AP: 1 "a"
+acc-name: generalized-Buchi 2 properties: !complete Acceptance: 2
+Inf(0)&Inf(1) --BODY-- State: 0 {0} [0] 2 [0] 3 State: 1 {0} [t] 1 [t]
+5 State: 2 {0} [t] 2 [t] 3 State: 3 {0} [t] 6 [t] 7 State: 4 {1} [t] 4
+[t] 5 State: 5 {1} [t] 6 [t] 7 State: 6 [t] 6 [t] 7 State: 7 [t] 6 [t]
+7 --END--""")
+tc.assertTrue(a.prop_complete().is_maybe())
+tc.assertTrue(spot.is_complete(a))