From d1b8495510c354ff635b171af3e9ecc8078bd6fc Mon Sep 17 00:00:00 2001
From: Alexandre Duret-Lutz <adl@lrde.epita.fr>
Date: Fri, 12 Aug 2022 14:56:45 +0200
Subject: [PATCH] do not use a global variable to define the number of
 available threads

* python/spot/impl.i: Make parallel_policy implicitly contractible.
* spot/graph/graph.hh (sort_edges_srcfirst_): Pass a parallel_policy
explicitly.
* spot/twa/twagraph.hh, spot/twa/twagraph.cc (merge_states): Likewise.
* spot/misc/common.cc: Remove file.
* spot/misc/common.hh (set_nthreads, get_nthreads): Remove, and
replace with...
(parallel_policy): ... this.
* spot/misc/Makefile.am, tests/python/mergedge.py, NEWS: Adjust.
---
 NEWS                     |  8 ++--
 python/spot/impl.i       |  1 +
 spot/graph/graph.hh      | 47 +++++++++++-----------
 spot/misc/Makefile.am    |  3 +-
 spot/misc/common.cc      | 33 ---------------
 spot/misc/common.hh      | 31 ++++++++++-----
 spot/twa/twagraph.cc     | 86 ++++++++++++++++++++--------------------
 spot/twa/twagraph.hh     |  8 +++-
 tests/python/mergedge.py | 74 +++++++++++++++++-----------------
 9 files changed, 137 insertions(+), 154 deletions(-)
 delete mode 100644 spot/misc/common.cc
diff --git a/NEWS b/NEWS
index 77c4e081f..5b55ebbdd 100644
--- a/NEWS
+++ b/NEWS
@@ -38,9 +38,6 @@ New in spot 2.10.6.dev  (not yet released)
 
   Library:
 
-  - A global variable, together with its setters and getters to define the
-    maximal number of threads is added to common.hh/common.cc
-
   - The new function suffix_operator_normal_form() implements
     transformation of formulas to Suffix Operator Normal Form,
     described in [cimatti.06.fmcad].
@@ -117,6 +114,11 @@ New in spot 2.10.6.dev  (not yet released)
     to obtain a simple model checker (that returns true or false,
     without counterexample).
 
+  - spot::parallel_policy is an object that can be passed to some
+    algorithm to specify how many threads can be used if Spot has been
+    compiled with --enable-pthread.  Currently, only
+    twa_graph::merge_states() support it.
+
  Python bindings:
 
   - The to_str() method of automata can now export a parity game into
diff --git a/python/spot/impl.i b/python/spot/impl.i
index a07709005..23c07c4e8 100644
--- a/python/spot/impl.i
+++ b/python/spot/impl.i
@@ -486,6 +486,7 @@ static void handle_any_exception()
   }
 }
 
+%implicitconv spot::parallel_policy;
 %include <spot/misc/common.hh>
 %include <spot/misc/version.hh>
 %include <spot/misc/minato.hh>
diff --git a/spot/graph/graph.hh b/spot/graph/graph.hh
index 06ddf0997..531426244 100644
--- a/spot/graph/graph.hh
+++ b/spot/graph/graph.hh
@@ -1255,56 +1255,57 @@ namespace spot
     /// and make a temporary copy of the edges (needs more ram)
     /// \pre This needs the edge_vector to be in a coherent state when called
     template<class Predicate = std::less<edge_storage_t>>
-    void sort_edges_srcfirst_(Predicate p = Predicate())
+    void sort_edges_srcfirst_(Predicate p = Predicate(),
+                              parallel_policy ppolicy = parallel_policy())
     {
-      //std::cerr << "\nbefore\n";
-      //dump_storage(std::cerr);
-      const auto N = num_states();
-
-      auto idx_list = std::vector<unsigned>(N+1);
-      auto new_edges = edge_vector_t();
+      SPOT_ASSERT(!edges_.empty());
+      const unsigned ns = num_states();
+      std::vector<unsigned> idx_list(ns+1);
+      edge_vector_t new_edges;
       new_edges.reserve(edges_.size());
-      if (SPOT_UNLIKELY(edges_.empty()))
-        throw std::runtime_error("Empty edge vector!");
       new_edges.resize(1);
       // This causes edge 0 to be considered as dead.
       new_edges[0].next_succ = 0;
-      // Copy the edges such that they are sorted by src
-      for (auto s = 0u; s < N; ++s)
+      // Copy all edges so that they are sorted by src
+      for (unsigned s = 0; s < ns; ++s)
         {
           idx_list[s] = new_edges.size();
           for (const auto& e : out(s))
             new_edges.push_back(e);
         }
-      idx_list[N] = new_edges.size();
+      idx_list[ns] = new_edges.size();
       // New edge sorted by source
       // If we have few edge or only one threads
       // Benchmark few?
       auto bne = new_edges.begin();
-#ifdef SPOT_ENABLE_PTHREAD
-      const unsigned nthreads = get_nthreads();
-      if (nthreads == 1 || edges_.size() < 1000)
+#ifndef SPOT_ENABLE_PTHREAD
+      (void) ppolicy;
+#else
+      unsigned nthreads = ppolicy.nthreads();
+      if (nthreads <= 1)
 #endif
         {
-          for (auto s = 0u; s < N; ++s)
+          for (unsigned s = 0u; s < ns; ++s)
             std::stable_sort(bne + idx_list[s],
-                             bne + idx_list[s+1],
-                             p);
+                             bne + idx_list[s+1], p);
         }
 #ifdef SPOT_ENABLE_PTHREAD
       else
         {
-          static auto tv = std::vector<std::thread>();
+          static std::vector<std::thread> tv;
           SPOT_ASSERT(tv.empty());
           tv.resize(nthreads);
+          // FIXME: Due to the way these thread advence into the sate
+          // vectors, they access very close memory location.  It
+          // would seems more cache friendly to have thread work on
+          // blocks of continuous states.
           for (unsigned id = 0; id < nthreads; ++id)
             tv[id] = std::thread(
-              [bne, id, N, &idx_list, p, nthreads]()
+              [bne, id, ns, &idx_list, p, nthreads]()
               {
-                for (auto s = id; s < N; s+=nthreads)
+                for (unsigned s = id; s < ns; s += nthreads)
                   std::stable_sort(bne + idx_list[s],
-                                   bne + idx_list[s+1],
-                                   p);
+                                   bne + idx_list[s+1], p);
                 return;
               });
           for (auto& t : tv)
diff --git a/spot/misc/Makefile.am b/spot/misc/Makefile.am
index 623a13c87..6b771dbb5 100644
--- a/spot/misc/Makefile.am
+++ b/spot/misc/Makefile.am
@@ -1,5 +1,5 @@
 ## -*- coding: utf-8 -*-
-## Copyright (C) 2011-2014, 2016-2018, 2020-2021 Laboratoire de
+## Copyright (C) 2011-2014, 2016-2018, 2020-2022 Laboratoire de
 ## Recherche et Développement de l'Epita (LRDE).
 ## Copyright (C) 2003, 2004, 2005, 2006 Laboratoire d'Informatique de
 ## Paris 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
@@ -63,7 +63,6 @@ libmisc_la_SOURCES = \
   bareword.cc \
   bitset.cc \
   bitvect.cc \
-  common.cc \
   escape.cc \
   formater.cc \
   intvcomp.cc \
diff --git a/spot/misc/common.cc b/spot/misc/common.cc
deleted file mode 100644
index adf9f2da0..000000000
--- a/spot/misc/common.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-// -*- coding: utf-8 -*-
-// Copyright (C) 2018 Laboratoire de Recherche et Développement
-// de l'Epita (LRDE).
-//
-// This file is part of Spot, a model checking library.
-//
-// Spot is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 3 of the License, or
-// (at your option) any later version.
-//
-// Spot is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-// or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-// License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-#include "config.h"
-#include <spot/misc/common.hh>
-
-static unsigned N_MAX_THREADS = 1;
-
-void set_nthreads(unsigned nthreads)
-{
-  N_MAX_THREADS = nthreads;
-}
-
-unsigned get_nthreads()
-{
-  return N_MAX_THREADS;
-}
\ No newline at end of file
diff --git a/spot/misc/common.hh b/spot/misc/common.hh
index fc74a8ee7..8b066b0a5 100644
--- a/spot/misc/common.hh
+++ b/spot/misc/common.hh
@@ -1,5 +1,5 @@
 // -*- coding: utf-8 -*-
-// Copyright (C) 2013-2021 Laboratoire de Recherche et Développement
+// Copyright (C) 2013-2022 Laboratoire de Recherche et Développement
 // de l'Epita (LRDE).
 //
 // This file is part of Spot, a model checking library.
@@ -145,6 +145,27 @@ namespace spot
       {
       }
   };
+
+  /// \brief This class is used to tell parallel algorithms what
+  /// resources they may use.
+  ///
+  /// Currently, this simply stores an integer indicating the number
+  /// of threads that the algorithm may create, but in the future it
+  /// will probably do more.
+  class SPOT_API parallel_policy
+  {
+    unsigned nthreads_;
+  public:
+    parallel_policy(unsigned nthreads = 1) : nthreads_(nthreads)
+    {
+    }
+
+    unsigned nthreads() const
+    {
+      return nthreads_;
+    }
+  };
+
 }
 
 // This is a workaround for the issue described in GNU GCC bug 89303.
@@ -169,11 +190,3 @@ namespace spot
 #  define SPOT_make_shared_enabled__(TYPE, ...)  \
    std::make_shared<TYPE>(__VA_ARGS__)
 #endif
-
-
-// Global variable to determine the maximal number of threads
-SPOT_API void
-set_nthreads(unsigned nthreads);
-
-SPOT_API unsigned
-get_nthreads();
diff --git a/spot/twa/twagraph.cc b/spot/twa/twagraph.cc
index 5b4da10a3..64f229b03 100644
--- a/spot/twa/twagraph.cc
+++ b/spot/twa/twagraph.cc
@@ -366,15 +366,16 @@ namespace spot
     g_.chain_edges_();
   }
 
-  unsigned twa_graph::merge_states()
+  unsigned twa_graph::merge_states(parallel_policy ppolicy)
   {
     if (!is_existential())
       throw std::runtime_error(
           "twa_graph::merge_states() does not work on alternating automata");
 
 #ifdef ENABLE_PTHREAD
-    const unsigned nthreads = get_nthreads();
+    const unsigned nthreads = ppolicy.nthreads();
 #else
+    (void) ppolicy;
     constexpr unsigned nthreads = 1;
 #endif
 
@@ -392,10 +393,10 @@ namespace spot
       if (rhs.cond != lhs.cond)
         return false;
       return lhs.dst < rhs.dst;
-    });
+    }, nthreads);
     g_.chain_edges_();
 
-    const auto n_states = num_states();
+    const unsigned n_states = num_states();
 
     // Edges are nicely chained and there are no erased edges
     // -> We can work with the edge_vector
@@ -404,30 +405,28 @@ namespace spot
     // if so, the graph alternates between env and player vertices,
     // so there are, by definition, no self-loops
     auto sp = get_named_prop<std::vector<bool>>("state-player");
-    const auto spe = (bool) sp;
 
     // The hashing is a bit delicat: We may only use the dst
     // if it has no self-loop
-    auto use_for_hash = spe ? std::vector<bool>()
-                            : std::vector<bool>(n_states);
+    auto use_for_hash = sp ? std::vector<bool>()
+                           : std::vector<bool>(n_states);
 
     const auto& e_vec = edge_vector();
-    const auto n_edges = e_vec.size();
+    unsigned n_edges = e_vec.size();
 
     // For each state we need 4 indices of the edge vector
     // [first, first_non_sfirst_selflooplfloop, first_selfloop, end]
     // The init value makes sure nothing is done for dead end states
-    auto e_idx =
-      std::vector<std::array<unsigned, 4>>(n_states, {-1u, -1u,
-                                                      -1u, -1u});
+    std::vector<std::array<unsigned, 4>> e_idx(n_states, {-1u, -1u,
+                                                          -1u, -1u});
     // Like a linked list holding the non-selfloop and selfloop transitions
-    auto e_chain = std::vector<unsigned>(e_vec.size(), -1u);
+    std::vector<unsigned> e_chain(n_edges, -1u);
 
     unsigned idx = 1;
 
     // Edges are sorted with repected to src first
     const unsigned n_high = e_vec.back().src;
-    if (spe)
+    if (sp)
       for (auto s = 0u; s < n_high; ++s)
         treat<true, false>(e_idx, e_vec, e_chain,
                            use_for_hash, idx, s, n_edges);
@@ -436,7 +435,7 @@ namespace spot
         treat<false, false>(e_idx, e_vec, e_chain,
                             use_for_hash, idx, s, n_edges);
     // Last one
-    if (spe)
+    if (sp)
       treat<true, true>(e_idx, e_vec, e_chain,
                         use_for_hash, idx, n_high, n_edges);
     else
@@ -445,7 +444,7 @@ namespace spot
 
     assert(idx == e_vec.size() && "Something went wrong during indexing");
 
-    auto n_players = 0u;
+    unsigned n_players = 0u;
     if (sp)
       n_players = std::accumulate(sp->begin(), sp->end(), 0u);
 
@@ -454,14 +453,12 @@ namespace spot
     // hash_linked_list is like a linked list structure
     // of fake pointers
 
-    auto hash_linked_list = std::vector<unsigned>(n_states, -1u);
-    auto s_to_hash = std::vector<size_t>(n_states, 0);
-    auto env_map =
-      robin_hood::unordered_flat_map<size_t,
-                                     std::pair<unsigned, unsigned>>();
-    auto player_map =
-      robin_hood::unordered_flat_map<size_t,
-                                     std::pair<unsigned, unsigned>>();
+    std::vector<unsigned> hash_linked_list(n_states, -1u);
+    std::vector<size_t> s_to_hash(n_states, 0);
+    robin_hood::unordered_flat_map<size_t,
+                                   std::pair<unsigned, unsigned>> env_map;
+    robin_hood::unordered_flat_map<size_t,
+                                   std::pair<unsigned, unsigned>> player_map;
     env_map.reserve(n_states - n_players);
     player_map.reserve(n_players);
 
@@ -476,7 +473,7 @@ namespace spot
         else
           {
             // "tail"
-            auto idx = it->second.second;
+            unsigned idx = it->second.second;
             assert(idx < s && "Must be monotone");
             hash_linked_list[idx] = s;
             it->second.second = s;
@@ -484,19 +481,19 @@ namespace spot
       };
 
     // Hash all states
-    constexpr auto SHIFT = sizeof(size_t)/2 * CHAR_BIT;
+    constexpr unsigned shift = sizeof(size_t)/2 * CHAR_BIT;
     for (auto s = 0u; s != n_states; ++s)
       {
-        auto h = fnv<size_t>::init;
-        const auto e = e_idx[s][3];
-        for (auto i = e_idx[s][0]; i != e; ++i)
+        size_t h = fnv<size_t>::init;
+        const unsigned e = e_idx[s][3];
+        for (unsigned i = e_idx[s][0]; i != e; ++i)
           {
             // If size_t has 8byte and unsigned has 4byte
             // then this works fine, otherwise there might be more collisions
-            size_t hh = spe || use_for_hash[e_vec[i].dst]
+            size_t hh = sp || use_for_hash[e_vec[i].dst]
                           ? e_vec[i].dst
                           : fnv<unsigned>::init;
-            hh <<= SHIFT;
+            hh <<= shift;
             hh += e_vec[i].cond.id();
             h ^= hh;
             h *= fnv<size_t>::prime;
@@ -504,7 +501,7 @@ namespace spot
             h *= fnv<size_t>::prime;
           }
         s_to_hash[s] = h;
-        if (spe && (*sp)[s])
+        if (sp && (*sp)[s])
           emplace(player_map, h, s);
         else
           emplace(env_map, h, s);
@@ -538,20 +535,20 @@ namespace spot
         auto [i1, nsl1, sl1, e1] = e_idx[s1];
         auto [i2, nsl2, sl2, e2] = e_idx[s2];
 
-        if ((e2-i2) != (e1-i1))
+        if ((e2 - i2) != (e1 - i1))
           return false; // Different number of outgoing trans
 
         // checked1/2 is one element larger than necessary
         // the last element is always false
         // and acts like a nulltermination
-        checked1.resize(e1-i1+1);
+        checked1.resize(e1 - i1 + 1);
         std::fill(checked1.begin(), checked1.end(), false);
-        checked2.resize(e2-i2+1);
+        checked2.resize(e2 - i2 + 1);
         std::fill(checked2.begin(), checked2.end(), false);
 
         // Try to match self-loops
         // Not entirely sure when this helps exactly
-        while ((sl1 < e1) & (sl2 < e2))
+        while ((sl1 < e1) && (sl2 < e2))
           {
             // Like a search in ordered array
             if (e_vec[sl1].data() == e_vec[sl2].data())
@@ -576,12 +573,12 @@ namespace spot
         // Check if all have been correctly treated
         if ((nsl1 > e1)
             && std::all_of(checked1.begin(), checked1.end(),
-                           [](const auto& e){return e; }))
+                           [](const auto& e){return e;}))
           return true;
 
         // The remaining edges need to match exactly
-        auto idx1 = i1;
-        auto idx2 = i2;
+        unsigned idx1 = i1;
+        unsigned idx2 = i2;
         while (((idx1 < e1) & (idx2 < e2)))
           {
             // More efficient version?
@@ -600,7 +597,7 @@ namespace spot
 
 
             if  ((e_vec[idx1].dst != e_vec[idx2].dst)
-                || !(e_vec[idx1].data() == e_vec[idx2].data()))
+                 || !(e_vec[idx1].data() == e_vec[idx2].data()))
               return false;
 
             // Advance
@@ -620,7 +617,7 @@ namespace spot
                         std::vector<char>& checked2)
       {
         v.clear();
-        for (auto i = ix; i != -1U; i = hash_linked_list[i])
+        for (unsigned i = ix; i != -1U; i = hash_linked_list[i])
           v.push_back(i);
         const unsigned N = v.size();
 
@@ -699,8 +696,10 @@ namespace spot
       auto bege = env_map.begin();
       auto ende = env_map.end();
 
-#ifdef ENABLE_PTHREAD
-      if ((nthreads == 1) & (num_states() > 1000)) // Bound?
+#ifndef ENABLE_PTHREAD
+      (void) nthreads;
+#else
+      if (nthreads <= 1)
         {
 #endif // ENABLE_PTHREAD
           worker(0, begp, endp, bege, ende);
@@ -728,12 +727,11 @@ namespace spot
     for (auto& e: edges())
       if (remap[e.dst] != -1U)
         {
-          assert((!spe || (sp->at(e.dst) == sp->at(remap[e.dst])))
+          assert((!sp || (sp->at(e.dst) == sp->at(remap[e.dst])))
                  && "States do not have the same owner");
           e.dst = remap[e.dst];
         }
 
-
     if (remap[get_init_state_number()] != -1U)
       set_init_state(remap[get_init_state_number()]);
 
diff --git a/spot/twa/twagraph.hh b/spot/twa/twagraph.hh
index c6222871e..742a4d69a 100644
--- a/spot/twa/twagraph.hh
+++ b/spot/twa/twagraph.hh
@@ -1,5 +1,5 @@
 // -*- coding: utf-8 -*-
-// Copyright (C) 2014-2021 Laboratoire de Recherche et Développement
+// Copyright (C) 2014-2022 Laboratoire de Recherche et Développement
 // de l'Epita.
 //
 // This file is part of Spot, a model checking library.
@@ -589,8 +589,12 @@ namespace spot
     /// (1)-a->(1) and (1)-a->(1) if (1), (2) and (3) are merged into
     /// (1).
     ///
+    /// On large automaton, it might be worthwhile to use multiple
+    /// threads to find states that can be merged.  This can be
+    /// requested with the \a ppolicy argument.
+    ///
     /// \return the number of states that have been merged and removed.
-    unsigned merge_states();
+    unsigned merge_states(parallel_policy ppolicy = parallel_policy());
 
     /// \brief Like merge states, but one can chose which states are
     /// candidates for merging.
diff --git a/tests/python/mergedge.py b/tests/python/mergedge.py
index 2be4d4984..b3e934946 100644
--- a/tests/python/mergedge.py
+++ b/tests/python/mergedge.py
@@ -23,42 +23,40 @@ import spot
 from unittest import TestCase
 tc = TestCase()
 
+aut = spot.automaton("""HOA: v1 States: 1 Start: 0 AP: 1 "a"
+Acceptance: 1 Inf(0) --BODY-- State: 0 [0] 0 [0] 0 {0} --END--""")
+tc.assertEqual(aut.num_edges(), 2)
+aut.merge_edges()
+tc.assertEqual(aut.num_edges(), 1)
+
+aut = spot.automaton("""
+HOA: v1
+States: 2
+Start: 0
+AP: 2 "p0" "p1"
+acc-name: Buchi
+Acceptance: 1 Inf(0)
+properties: trans-labels explicit-labels trans-acc complete
+--BODY--
+State: 0
+[!0] 0 {0}
+[0] 1 {0}
+State: 1
+[!0&!1] 0 {0}
+[0 | 1] 1
+[0&!1] 1 {0}
+--END--""")
+tc.assertEqual(aut.num_edges(), 5)
+aut.merge_edges()
+tc.assertEqual(aut.num_edges(), 5)
+tc.assertFalse(spot.is_deterministic(aut))
+aut = spot.split_edges(aut)
+tc.assertEqual(aut.num_edges(), 9)
+aut.merge_edges()
+tc.assertEqual(aut.num_edges(), 5)
+tc.assertTrue(spot.is_deterministic(aut))
+
 for nthread in range(1, 16, 2):
-    spot.set_nthreads(nthread)
-    tc.assertEqual(spot.get_nthreads(), nthread)
-    aut = spot.automaton("""HOA: v1 States: 1 Start: 0 AP: 1 "a"
-    Acceptance: 1 Inf(0) --BODY-- State: 0 [0] 0 [0] 0 {0} --END--""")
-    tc.assertEqual(aut.num_edges(), 2)
-    aut.merge_edges()
-    tc.assertEqual(aut.num_edges(), 1)
-
-    aut = spot.automaton("""
-    HOA: v1
-    States: 2
-    Start: 0
-    AP: 2 "p0" "p1"
-    acc-name: Buchi
-    Acceptance: 1 Inf(0)
-    properties: trans-labels explicit-labels trans-acc complete
-    --BODY--
-    State: 0
-    [!0] 0 {0}
-    [0] 1 {0}
-    State: 1
-    [!0&!1] 0 {0}
-    [0 | 1] 1
-    [0&!1] 1 {0}
-    --END--""")
-    tc.assertEqual(aut.num_edges(), 5)
-    aut.merge_edges()
-    tc.assertEqual(aut.num_edges(), 5)
-    tc.assertFalse(spot.is_deterministic(aut))
-    aut = spot.split_edges(aut)
-    tc.assertEqual(aut.num_edges(), 9)
-    aut.merge_edges()
-    tc.assertEqual(aut.num_edges(), 5)
-    tc.assertTrue(spot.is_deterministic(aut))
-
     aut = spot.automaton("""
     HOA: v1
     States: 3
@@ -78,12 +76,12 @@ for nthread in range(1, 16, 2):
     [!0] 2 {0}
     [0] 1
     --END--""")
-    aut.merge_states()
+    aut.merge_states(nthread)
     tc.assertEqual(aut.num_edges(), 4)
     tc.assertEqual(aut.num_states(), 2)
     tc.assertTrue(spot.is_deterministic(aut))
     tc.assertTrue(aut.prop_complete())
-    aut.merge_states()
+    aut.merge_states(nthread)
     tc.assertEqual(aut.num_edges(), 4)
     tc.assertEqual(aut.num_states(), 2)
     tc.assertTrue(spot.is_deterministic(aut))
@@ -168,6 +166,6 @@ for nthread in range(1, 16, 2):
     State: 40 [0&1&!2] 7 {3} [0&1&2] 8 {3} [0&!1&!2] 11 {1} [0&!1&2] 12
     {1} [!0&1&!2] 31 {1} [!0&1&2] 32 {1} [!0&!1&2] 34 {1} [!0&!1&!2] 40
     {1} --END--""")
-    aa.merge_states()
+    aa.merge_states(nthread)
     # This used to cause a segfault reported by Philipp.
     print(aa.to_str())