Preliminary implementation of an int array compressor.

* src/misc/intvcomp.hh: New file. * src/misc/Makefile.am: Add it. * src/tgbatest/intvcomp.cc, src/tgbatest/intvcomp.test: New files. * src/tgbatest/Makefile.am: Add them.
2011-04-07 19:39:20 +02:00 · 2011-04-07 19:39:20 +02:00 · bc1275455c
commit bc1275455c
parent 9ad062b247
6 changed files with 584 additions and 0 deletions
--- a/src/misc/Makefile.am
+++ b/src/misc/Makefile.am
@ -37,6 +37,7 @@ misc_HEADERS = \
  freelist.hh \
  hash.hh \
  hashfunc.hh \
+  intvcomp.hh \
  ltstr.hh \
  minato.hh \
  memusage.hh \
--- a/src/misc/intvcomp.hh
+++ b/src/misc/intvcomp.hh
@ -0,0 +1,457 @@
+// Copyright (C) 2011 Laboratoire de Recherche et Developpement de
+// l'Epita (LRDE).
+//
+// This file is part of Spot, a model checking library.
+//
+// Spot is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// Spot is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+// or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+// License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Spot; see the file COPYING.  If not, write to the Free
+// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+// 02111-1307, USA.
+
+#ifndef SPOT_MISC_INTVCOMP_HH
+# define SPOT_MISC_INTVCOMP_HH
+
+#include <cstddef>
+#include <vector>
+#include <cassert>
+
+namespace spot
+{
+
+  //  Compression scheme
+  //  ------------------
+  //
+  // Assumptions:
+  //  - small and positive values are more frequent than negative
+  //    and large values.
+  //  - 0 is the most frequent value
+  //  - repeated values (esp. repeated 0s occur often).
+  //
+  //  00  encodes "value 0"
+  //  010 encodes "value 1"
+  //  011 encodes "a value in [2..5]" followed by 2 bits
+  //  100 encodes "a value in [6..22]" followed by 4 bits
+  //  101 encodes "repeat prev. value [1..8] times" followed by 3 bits count
+  //  110 encodes "repeat prev. value [9..40] times" followed by 5 bits count
+  //  111 encodes "an int value" followed by 32 bits
+  //
+  // If 101 or 110 occur at the start, the value to repeat is 0.
+
+  template <class Self>
+  class stream_compression_base
+  {
+    static const unsigned int max_bits = sizeof(unsigned int) * 8;
+
+  public:
+    stream_compression_base()
+      : cur_(0), bits_left_(max_bits)
+    {
+    }
+
+    void emit(unsigned int val)
+    {
+      if (val == 0)
+	{
+	  self().push_bits(0x0, 2, 0x3);
+	}
+      else if (val == 1)
+	{
+	  self().push_bits(0x2, 3, 0x7);
+	}
+      else if (val >= 2 && val <= 5)
+	{
+	  self().push_bits(0x3, 3, 0x7);
+	  self().push_bits(val - 2, 2, 0x3);
+	}
+      else if (val >= 6 && val <= 22)
+	{
+	  self().push_bits(0x4, 3, 0x7);
+	  self().push_bits(val - 6, 4, 0xf);
+	}
+      else
+	{
+	  assert(val > 22);
+	  self().push_bits(0x7, 3, 0x7);
+	  self().push_bits(val, 32, -1U);
+	}
+    }
+
+    void run()
+    {
+      unsigned int last_val = 0;
+
+      while (self().have_data())
+	{
+	  unsigned int val = self().next_data();
+	  // Repeated value?  Try to find more.
+	  if (val == last_val)
+	    {
+	      unsigned int count = 1;
+	      while (count <= 41 && self().skip_if(val))
+		++count;
+
+	      if ((val == 0 && count < 3) || (val == 1 && count == 1))
+		{
+		  // it is more efficient to emit 0 once or twice directly
+		  // (e.g., 00 00 vs. 011 11)
+		  // for value 1, repetition is worthwhile for count > 1
+		  // (e.g., 010 010 vs. 011 00)
+		  while (count--)
+		    emit(val);
+		}
+	      else if (count < 9)
+		{
+		  self().push_bits(0x5, 3, 0x7);
+		  self().push_bits(count - 1, 3, 0x7);
+		}
+	      else
+		{
+		  self().push_bits(0x6, 3, 0x7);
+		  self().push_bits(count - 9, 5, 0x1f);
+		}
+	    }
+	  else
+	    {
+	      emit(val);
+	      last_val = val;
+	    }
+	}
+      flush();
+    }
+
+    // This version assumes there is at least n bits free in cur_.
+    void
+    push_bits_unchecked(unsigned int bits, unsigned int n, unsigned int mask)
+    {
+      cur_ <<= n;
+      cur_ |= (bits & mask);
+      if (bits_left_ -= n)
+	return;
+
+      self().push_data(cur_);
+      cur_ = 0;
+      bits_left_ = max_bits;
+    }
+
+    void
+    push_bits(unsigned int bits, unsigned int n, unsigned int mask)
+    {
+      if (n <= bits_left_)
+	{
+	  push_bits_unchecked(bits, n, mask);
+	  return;
+	}
+
+      // bits_left_ < n
+
+      unsigned int right_bit_count = n - bits_left_;
+      unsigned int left = bits >> right_bit_count;
+      push_bits_unchecked(left, bits_left_, (1 << bits_left_) - 1);
+      push_bits_unchecked(bits, right_bit_count, (1 << right_bit_count) - 1);
+    }
+
+    void flush()
+    {
+      if (bits_left_ == max_bits)
+	return;
+      cur_ <<= bits_left_;
+      self().push_data(cur_);
+    }
+
+  protected:
+    Self& self()
+    {
+      return static_cast<Self&>(*this);
+    }
+
+    const Self& self() const
+    {
+      return static_cast<const Self&>(*this);
+    }
+
+    unsigned int cur_;
+    unsigned int bits_left_;
+  };
+
+  class int_array_compression:
+    public stream_compression_base<int_array_compression>
+  {
+  public:
+    int_array_compression(int* array, size_t n)
+      : array_(array), n_(n), pos_(0), result_(new std::vector<unsigned int>)
+    {
+    }
+
+    void push_data(unsigned int i)
+    {
+      result_->push_back(i);
+    }
+
+    const std::vector<unsigned int>*
+    result() const
+    {
+      return result_;
+    }
+
+    bool have_data() const
+    {
+      return pos_ < n_;
+    }
+
+    unsigned int next_data()
+    {
+      return static_cast<unsigned int>(array_[pos_++]);
+    }
+
+    bool skip_if(unsigned int val)
+    {
+      if (!have_data())
+	return false;
+
+      if (static_cast<unsigned int>(array_[pos_]) != val)
+	return false;
+
+      ++pos_;
+      return true;
+    }
+
+  protected:
+    int* array_;
+    size_t n_;
+    size_t pos_;
+    std::vector<unsigned int>* result_;
+  };
+
+  const std::vector<unsigned int>*
+  int_array_compress(int* array, unsigned int n)
+  {
+    int_array_compression c(array, n);
+    c.run();
+    return c.result();
+  }
+
+  //////////////////////////////////////////////////////////////////////
+
+  template<class Self>
+  class stream_decompression_base
+  {
+    static const unsigned int max_bits = sizeof(unsigned int) * 8;
+
+  public:
+    void refill()
+    {
+    oncemore:
+      unsigned int fill_size = max_bits - look_bits_;
+      if (fill_size > buffer_bits_)
+	fill_size = buffer_bits_;
+
+      look_ <<= fill_size;
+      look_ |= (buffer_ >> (buffer_bits_ - fill_size));
+      buffer_bits_ -= fill_size;
+      buffer_mask_ >>= fill_size;
+      buffer_ &= buffer_mask_;
+      look_bits_ += fill_size;
+
+      if (buffer_bits_ == 0 && self().have_comp_data())
+	{
+	  buffer_ = self().next_comp_data();
+	  buffer_bits_ = max_bits;
+	  buffer_mask_ = -1U;
+	  if (look_bits_ != max_bits)
+	    goto oncemore;
+	}
+
+      // Do not fold these two cases, because we cannot write
+      // "x<<max_bits" safely.  (Intel "shl" instruction considers
+      // only the first 5 bits, in other words "shl 32" is similar to
+      // "shl 0": it has no effect.)
+      if (look_bits_ == max_bits)
+	{
+	  look_mask_ = -1U;
+	}
+      else
+	{
+	  look_mask_ = (1U << look_bits_) - 1U;
+	}
+    }
+
+    // 010 00 00 010 00 101:011 010 00 101:010 010
+    // 010 00010000001000010010000100000
+    unsigned int look_n_bits(unsigned int n)
+    {
+      if (look_bits_ < n)
+	refill();
+      return look_ >> (look_bits_ - n);
+    }
+
+    void skip_n_bits(unsigned int n)
+    {
+      assert (n <= look_bits_);
+      look_bits_ -= n;
+      look_mask_ >>= n;
+      look_ &= look_mask_;
+    }
+
+    unsigned int get_n_bits(unsigned int n)
+    {
+      if (look_bits_ < n)
+	refill();
+      look_bits_ -= n;
+      unsigned int val = look_ >> look_bits_;
+      look_mask_ >>= n;
+      look_ &= look_mask_;
+      return val;
+    }
+
+    void run()
+    {
+      if (!self().have_comp_data())
+	return;
+
+      look_ = self().next_comp_data();
+      look_bits_ = max_bits;
+      look_mask_ = -1U;
+      if (self().have_comp_data())
+	{
+	  buffer_ = self().next_comp_data();
+	  buffer_bits_ = max_bits;
+	  buffer_mask_ = -1U;
+	}
+      else
+	{
+	  buffer_ = 0;
+	  buffer_bits_ = 0;
+	  buffer_mask_ = 0;
+	}
+
+      while (!self().complete())
+	{
+	  unsigned int token = look_n_bits(3);
+	  switch (token)
+	    {
+	    case 0x0: // 00[0]
+	    case 0x1: // 00[1]
+	      skip_n_bits(2);
+	      self().push_data(0);
+	      break;
+	    case 0x2: // 010
+	      skip_n_bits(3);
+	      self().push_data(1);
+	      break;
+	    case 0x3: // 011
+	      skip_n_bits(3);
+	      self().push_data(2 + get_n_bits(2));
+	      break;
+	    case 0x4: // 100
+	      skip_n_bits(3);
+	      self().push_data(6 + get_n_bits(4));
+	      break;
+	    case 0x5: // 101
+	      skip_n_bits(3);
+	      self().repeat(1 + get_n_bits(3));
+	      break;
+	    case 0x6: // 110
+	      skip_n_bits(3);
+	      self().repeat(9 + get_n_bits(5));
+	      break;
+	    case 0x7: // 111
+	      skip_n_bits(3);
+	      self().push_data(get_n_bits(32));
+	      break;
+	    default:
+	      assert(0);
+	    }
+	}
+    }
+
+
+  protected:
+    Self& self()
+    {
+      return static_cast<Self&>(*this);
+    }
+
+    const Self& self() const
+    {
+      return static_cast<const Self&>(*this);
+    }
+
+    unsigned int look_;
+    unsigned int look_bits_;
+    unsigned int look_mask_;
+    unsigned int buffer_;
+    unsigned int buffer_bits_;
+    unsigned int buffer_mask_;
+  };
+
+  class int_array_decompression:
+    public stream_decompression_base<int_array_decompression>
+  {
+  public:
+    int_array_decompression(const std::vector<unsigned int>* array, int* res,
+			    size_t size)
+      : prev_(0), array_(array), n_(array->size()), pos_(0), result_(res),
+	size_(size)
+    {
+    }
+
+    bool complete() const
+    {
+      return size_ == 0;
+    }
+
+    void push_data(int i)
+    {
+      prev_ = i;
+      *result_++ = i;
+      --size_;
+    }
+
+    void repeat(unsigned int i)
+    {
+      size_ -= i;
+      while (i--)
+	*result_++ = prev_;
+    }
+
+    bool have_comp_data() const
+    {
+      return pos_ < n_;
+    }
+
+    unsigned int next_comp_data()
+    {
+      return (*array_)[pos_++];
+    }
+
+  protected:
+    int prev_;
+    const std::vector<unsigned int>* array_;
+    size_t n_;
+    size_t pos_;
+    int* result_;
+    size_t size_;
+  };
+
+  void
+  int_array_decompress(const std::vector<unsigned int>* array, int* res,
+		       size_t size)
+  {
+    int_array_decompression c(array, res, size);
+    c.run();
+  }
+
+
+}
+
+#endif // SPOT_MISC_INTVCOMP_HH
--- a/src/tgbatest/Makefile.am
+++ b/src/tgbatest/Makefile.am
@ -37,6 +37,7 @@ check_PROGRAMS = \
  explicit \
  expldot \
  explprod \
+  intvcomp \
  ltlprod \
  mixprod \
  powerset \
@ -54,6 +55,7 @@ explicit_SOURCES = explicit.cc
 expldot_SOURCES  = powerset.cc
 expldot_CXXFLAGS = -DDOTTY
 explprod_SOURCES = explprod.cc
+intvcomp_SOURCES = intvcomp.cc
 ltl2tgba_SOURCES = ltl2tgba.cc
 ltlprod_SOURCES  = ltlprod.cc
 mixprod_SOURCES  = mixprod.cc
@ -69,6 +71,7 @@ tripprod_SOURCES = tripprod.cc
 # Keep this sorted by STRENGTH.  Test basic things first,
 # because such failures will be easier to diagnose and fix.
 TESTS = \
+  intvcomp.test \
  eltl2tgba.test \
  explicit.test \
  taatgba.test \
--- a/src/tgbatest/intvcomp.cc
+++ b/src/tgbatest/intvcomp.cc
@ -0,0 +1,87 @@
+#include <iostream>
+#include "misc/intvcomp.hh"
+#include <cstring>
+
+int check(int* comp, int size, unsigned expected = 0)
+{
+  const std::vector<unsigned int>* v = spot::int_array_compress(comp, size);
+
+  std::cout << "C[" << v->size() << "] ";
+  for (size_t i = 0; i < v->size(); ++i)
+    std::cout << (*v)[i] << " ";
+  std::cout << std::endl;
+
+  int* decomp = new int[size];
+  spot::int_array_decompress(v, decomp, size);
+
+  std::cout << "D[" << size << "] ";
+  for (int i = 0; i < size; ++i)
+    std::cout << decomp[i] << " ";
+  std::cout << std::endl;
+
+  int res = memcmp(comp, decomp, size * sizeof(int));
+
+  if (res)
+    {
+      std::cout << "*** cmp error *** " << res << std::endl;
+      std::cout << "E[" << size << "] ";
+      for (int i = 0; i < size; ++i)
+	std::cout << comp[i] << " ";
+      std::cout << std::endl;
+    }
+
+  if (expected && (v->size() * sizeof(int) != expected))
+    {
+      std::cout << "*** size error *** (expected "
+		<< expected << " bytes, got " << v->size() * sizeof(int)
+		<< " bytes)" << std::endl;
+      res = 1;
+    }
+
+  std::cout << std::endl;
+
+  delete v;
+  delete[] decomp;
+  return !!res;
+}
+
+int main()
+{
+  int errors = 0;
+
+  int comp1[] = { 1, 0, 0, 0, 0, 0, 3, 3, 4, 0, 0, 0 };
+  errors += check(comp1, sizeof(comp1) / sizeof(*comp1));
+
+  int comp2[] = { 3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9, 3, 1 };
+  errors += check(comp2, sizeof(comp2) / sizeof(*comp2));
+
+  int comp3[] = { 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
+		  0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0,
+		  0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1,
+		  0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+		  0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1,
+		  0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0 };
+  errors += check(comp3, sizeof(comp3) / sizeof(*comp3));
+
+  int comp4[] = { 1, 2, 1, 2, 1, 2, 2, 0 }; // 32 bits
+  errors += check(comp4, sizeof(comp4) / sizeof(*comp4), 4);
+
+  int comp5[] = { 1, 2, 1, 2, 1, 2, 2, 0, 1, 2, 1, 2, 1, 2, 2, 0 }; // 64 bits
+  errors += check(comp5, sizeof(comp5) / sizeof(*comp5), 8);
+
+  int comp6[] = { 1, 2, 1, 2, 1, 2, 2, 0, 1, 2, 1, 2, 1, 2, 2, 0,
+                  1, 2, 1, 2, 1, 2, 2, 0, 1, 2, 1, 2, 1, 2, 2, 0 }; // 128 bits
+  errors += check(comp6, sizeof(comp6) / sizeof(*comp6), 16);
+
+  int comp7[] = { -4, -8, -10, 3, 49, 50, 0, 20, 13 };
+  errors += check(comp7, sizeof(comp7) / sizeof(*comp7));
+
+  int comp8[] = { 4959, 6754, 8133, 10985, 11121, 14413, 17335, 20754,
+		  21317, 30008, 30381, 33494, 34935, 41210, 41417 };
+  errors += check(comp8, sizeof(comp8) / sizeof(*comp8));
+
+  int comp9[] = { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  errors += check(comp9, sizeof(comp9) / sizeof(*comp9));
+
+  return errors;
+}
--- a/src/tgbatest/intvcomp.test
+++ b/src/tgbatest/intvcomp.test
@ -0,0 +1,27 @@
+#!/bin/sh
+# Copyright (C) 2011 Laboratoire de Recherche et Développement
+# de l'Epita (LRDE).
+#
+# This file is part of Spot, a model checking library.
+#
+# Spot is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Spot is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Spot; see the file COPYING.  If not, write to the Free
+# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+
+. ./defs
+
+set -e
+
+run 0 ../intvcomp