spot/src/misc/intvcmp2.cc

// -*- coding: utf-8 -*-
// Copyright (C) 2011, 2013, 2014, 2015 Laboratoire de Recherche et
// Développement de l'Epita (LRDE).
//
// This file is part of Spot, a model checking library.
//
// Spot is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.
//
// Spot is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
// License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.

#include "config.h"
#include "common.hh"
#include <cstddef>
#include <cassert>
#include "intvcmp2.hh"

namespace spot
{
  namespace
  {
    // This implements integer compression inspired from "Simple-9".
    //
    // The first bits of an integer tell how the rest of the integer is coded:
    // 00:   30 1-bit values              id=0
    // 01:   10 3-bit values              id=1
    // 10:    6 5-bit values              id=2
    // 1100:  4 7-bit values              id=3
    // 1101:  3 9-bit values (1 bit lost) id=4
    // 1110:  2 14-bit values             id=5
    // 1111:  1 28-bit value              id=6

    template <class Self>
    class stream_compression_base
    {
    public:
      stream_compression_base(size_t size)
	: size_(size)
      {
      }

      void run()
      {
	static const unsigned bits_width[7] = { 1, 3, 5, 7, 9, 14, 28 };
	static const unsigned max_count[8] = { 30, 10, 6, 4, 3, 2, 1, 0 };
	static const unsigned max_allowed[8] = { 1,
						 (1 << 3) - 1,
						 (1 << 5) - 1,
						 (1 << 7) - 1,
						 (1 << 9) - 1,
						 (1 << 14) - 1,
						 (1 << 28) - 1,
						 -1U };
	// If we have only X data to compress and they fit with the
	// current bit width, the following table tells us we should
	// use bits_width[count_to_level[X - 1]] to limit the number
	// of trailing zeros we encode.  E.g.  count_to_level[5 - 1]
	// is 2, which mean that 5 values should be encoded with
	// bits_width[2] == 5 bits.
	static const unsigned count_to_level[30] =
	  {
	    6, // 1
	    5, // 2
	    4, // 3
	    3, // 4
	    2, // 5
	    2, // 6
	    1, // 7
	    1, // 8
	    1, // 9
	    1, // 10
	    0, 0, 0, 0, 0, // 11-15
	    0, 0, 0, 0, 0, // 16-20
	    0, 0, 0, 0, 0, // 21-25
	    0, 0, 0, 0, 0, // 26-30
	  };

	while (size_ > 0)
	  {
	    unsigned id = 0;	// Current level in the above two tables.
	    unsigned curmax_allowed = max_allowed[id];
	    unsigned compressable = 0; // Number of integers ready to pack.
	    do
	      {
		unsigned int val = self().data_at(compressable);
		++compressable;
		while (val > curmax_allowed)
		  {
		    curmax_allowed = max_allowed[++id];

		    if (compressable > max_count[id])
		      goto fast_encode;
		  }
		if (compressable >= max_count[id])
		  goto fast_encode;
	      }
	    while (SPOT_LIKELY(compressable < size_));

	    assert(compressable < max_count[id]);

	    // Since we have less data than the current "id" allows,
	    // try to use more bits so we can encode faster.

	    id = count_to_level[compressable - 1];

	    if (compressable == max_count[id])
	      goto fast_encode;

	    // Slow compression for situations where we have
	    // compressable < max_count[id].  We can only be in
	    // one of the 3 first "id" (1, 3, or 5 bits);
	    {
	      assert(id <= 2);
	      unsigned bits = bits_width[id];
	      unsigned finalshifts = (max_count[id] - compressable) * bits;
	      size_t pos = 0;
	      unsigned output = self().data_at(pos);
	      while (--compressable)
		{
		  output <<= bits;
		  output += self().data_at(++pos);
		}
	      output <<= finalshifts;
	      output += id << 30;
	      self().push_data(output);
	      return;
	    }

	  fast_encode:
	    switch (id)
	      {
	      case 0: // 30 1-bit values
		{
		  // This code has been tuned so that the compiler can
		  // efficiently encode it as a series of MOV+LEA
		  // instructions, without shifts.  For instance
		  //
		  //   output <<= 1;
		  //   output += self().data_at(4);
		  //
		  // translates to (assuming %eax points to the input,
		  // and %edx holds the output) the following:
		  //
		  //   mov ecx, [eax+16]
		  //   lea edx, [ecx+edx*2]
		  //
		  // This optimization is the reason why we use 'output +='
		  // instead of the more intuitive 'output |=' everywhere in
		  // this file.

		  unsigned int output = 0x00 << 1; // 00
		  output += self().data_at(0);
		  output <<= 1;
		  output += self().data_at(1);
		  output <<= 1;
		  output += self().data_at(2);
		  output <<= 1;
		  output += self().data_at(3);
		  output <<= 1;
		  output += self().data_at(4);
		  output <<= 1;
		  output += self().data_at(5);
		  output <<= 1;
		  output += self().data_at(6);
		  output <<= 1;
		  output += self().data_at(7);
		  output <<= 1;
		  output += self().data_at(8);
		  output <<= 1;
		  output += self().data_at(9);
		  output <<= 1;
		  output += self().data_at(10);
		  output <<= 1;
		  output += self().data_at(11);
		  output <<= 1;
		  output += self().data_at(12);
		  output <<= 1;
		  output += self().data_at(13);
		  output <<= 1;
		  output += self().data_at(14);
		  output <<= 1;
		  output += self().data_at(15);
		  output <<= 1;
		  output += self().data_at(16);
		  output <<= 1;
		  output += self().data_at(17);
		  output <<= 1;
		  output += self().data_at(18);
		  output <<= 1;
		  output += self().data_at(19);
		  output <<= 1;
		  output += self().data_at(20);
		  output <<= 1;
		  output += self().data_at(21);
		  output <<= 1;
		  output += self().data_at(22);
		  output <<= 1;
		  output += self().data_at(23);
		  output <<= 1;
		  output += self().data_at(24);
		  output <<= 1;
		  output += self().data_at(25);
		  output <<= 1;
		  output += self().data_at(26);
		  output <<= 1;
		  output += self().data_at(27);
		  output <<= 1;
		  output += self().data_at(28);
		  output <<= 1;
		  output += self().data_at(29);
		  self().push_data(output);
		}
		break;
	      case 1: // 10 3-bit values
		{
		  // This code has been tuned so that the compiler can
		  // efficiently encode it as a series of MOV+LEA
		  // instructions, without shifts.  For instance
		  //
		  //   output <<= 3;
		  //   output += self().data_at(4);
		  //
		  // translates to (assuming %eax points to the input,
		  // and %edx holds the output) the following:
		  //
		  //   mov ecx, [eax+16]
		  //   lea edx, [ecx+edx*8]

		  unsigned int output = 0x01 << 3; // 01
		  output += self().data_at(0);
		  output <<= 3;
		  output += self().data_at(1);
		  output <<= 3;
		  output += self().data_at(2);
		  output <<= 3;
		  output += self().data_at(3);
		  output <<= 3;
		  output += self().data_at(4);
		  output <<= 3;
		  output += self().data_at(5);
		  output <<= 3;
		  output += self().data_at(6);
		  output <<= 3;
		  output += self().data_at(7);
		  output <<= 3;
		  output += self().data_at(8);
		  output <<= 3;
		  output += self().data_at(9);
		  self().push_data(output);
		}
		break;
	      case 2: // 6 5-bit values
		{
		  unsigned int output = 0x02U << 30; // 10
		  output += self().data_at(0) << 25;
		  output += self().data_at(1) << 20;
		  output += self().data_at(2) << 15;
		  output += self().data_at(3) << 10;
		  output += self().data_at(4) << 5;
		  output += self().data_at(5);
		  self().push_data(output);
		}
		break;
	      case 3: // 4 7-bit values
		{
		  unsigned int output = 0x0CU << 28; // 1100
		  output += self().data_at(0) << 21;
		  output += self().data_at(1) << 14;
		  output += self().data_at(2) << 7;
		  output += self().data_at(3);
		  self().push_data(output);
		}
		break;
	      case 4: // 3 9-bit values
		{
		  unsigned int output = 0x0DU << 28; // 1101x (1 bit lost)
		  output += self().data_at(0) << 18;
		  output += self().data_at(1) << 9;
		  output += self().data_at(2);
		  self().push_data(output);
		}
		break;
	      case 5: // 2 14-bit values
		{
		  unsigned int output = 0x0EU << 28; // 1110
		  output += self().data_at(0) << 14;
		  output += self().data_at(1);
		  self().push_data(output);
		}
		break;
	      case 6: // one 28-bit value
		{
		  unsigned int output = 0x0FU << 28; // 1111
		  output += self().data_at(0);
		  self().push_data(output);
		}
		break;
	      }
	      self().forward(max_count[id]);
	      size_ -= max_count[id];
	  }
      }

    protected:

      size_t size_;

      Self& self()
      {
	return static_cast<Self&>(*this);
      }

      const Self& self() const
      {
	return static_cast<const Self&>(*this);
      }

    };


    class int_array_array_compression:
      public stream_compression_base<int_array_array_compression>
    {
    public:
      int_array_array_compression(const int* array, size_t n,
				  int* dest, size_t& dest_n)
	: stream_compression_base<int_array_array_compression>(n),
	  array_(array), result_size_(dest_n),
	  result_(dest), result_end_(dest + dest_n)
      {
	result_size_ = 0; // this resets dest_n.
      }

      void push_data(unsigned int i)
      {
	assert(result_ < result_end_);
	++result_size_;
	*result_++ = static_cast<int>(i);
      }

      unsigned int data_at(size_t offset)
      {
	return static_cast<unsigned int>(array_[offset]);
      }

      void forward(size_t offset)
      {
	array_ += offset;
      }

    protected:
      const int* array_;
      size_t& result_size_;
      int* result_;
      int* result_end_;
    };

  } // anonymous


  void
  int_array_array_compress2(const int* array, size_t n,
			    int* dest, size_t& dest_size)
  {
    int_array_array_compression c(array, n, dest, dest_size);
    c.run();
  }


  namespace
  {

    template<class Self>
    class stream_decompression_base
    {
    public:

      void run()
      {
	while (SPOT_LIKELY(self().have_comp_data()))
	  {
	    unsigned val = self().next_comp_data();

	    unsigned id = val >> 28;
	    switch (id)
	      {
	      case 0x00: // 00xx - 30 1-bit values.
	      case 0x01:
	      case 0x02:
	      case 0x03:
		self().write_data_at(0,  !!(val & (1 << 29)));
		self().write_data_at(1,  !!(val & (1 << 28)));
		self().write_data_at(2,  !!(val & (1 << 27)));
		self().write_data_at(3,  !!(val & (1 << 26)));
		self().write_data_at(4,  !!(val & (1 << 25)));
		self().write_data_at(5,  !!(val & (1 << 24)));
		self().write_data_at(6,  !!(val & (1 << 23)));
		self().write_data_at(7,  !!(val & (1 << 22)));
		self().write_data_at(8,  !!(val & (1 << 21)));
		self().write_data_at(9,  !!(val & (1 << 20)));
		self().write_data_at(10, !!(val & (1 << 19)));
		self().write_data_at(11, !!(val & (1 << 18)));
		self().write_data_at(12, !!(val & (1 << 17)));
		self().write_data_at(13, !!(val & (1 << 16)));
		self().write_data_at(14, !!(val & (1 << 15)));
		self().write_data_at(15, !!(val & (1 << 14)));
		self().write_data_at(16, !!(val & (1 << 13)));
		self().write_data_at(17, !!(val & (1 << 12)));
		self().write_data_at(18, !!(val & (1 << 11)));
		self().write_data_at(19, !!(val & (1 << 10)));
		self().write_data_at(20, !!(val & (1 <<  9)));
		self().write_data_at(21, !!(val & (1 <<  8)));
		self().write_data_at(22, !!(val & (1 <<  7)));
		self().write_data_at(23, !!(val & (1 <<  6)));
		self().write_data_at(24, !!(val & (1 <<  5)));
		self().write_data_at(25, !!(val & (1 <<  4)));
		self().write_data_at(26, !!(val & (1 <<  3)));
		self().write_data_at(27, !!(val & (1 <<  2)));
		self().write_data_at(28, !!(val & (1 <<  1)));
		self().write_data_at(29, !!(val & (1 <<  0)));
		self().forward(30);
		break;
	      case 0x04: // 01xx - 10 3-bit values.
	      case 0x05:
	      case 0x06:
	      case 0x07:
		self().write_data_at(0, (val >> 27) & 0x07);
		self().write_data_at(1, (val >> 24) & 0x07);
		self().write_data_at(2, (val >> 21) & 0x07);
		self().write_data_at(3, (val >> 18) & 0x07);
		self().write_data_at(4, (val >> 15) & 0x07);
		self().write_data_at(5, (val >> 12) & 0x07);
		self().write_data_at(6, (val >>  9) & 0x07);
		self().write_data_at(7, (val >>  6) & 0x07);
		self().write_data_at(8, (val >>  3) & 0x07);
		self().write_data_at(9, (val >>  0) & 0x07);
		self().forward(10);
		break;
	      case 0x08: // 10xx - 6 5-bit values.
	      case 0x09:
	      case 0x0A:
	      case 0x0B:
		self().write_data_at(0, (val >> 25) & 0x1F);
		self().write_data_at(1, (val >> 20) & 0x1F);
		self().write_data_at(2, (val >> 15) & 0x1F);
		self().write_data_at(3, (val >> 10) & 0x1F);
		self().write_data_at(4, (val >>  5) & 0x1F);
		self().write_data_at(5, (val >>  0) & 0x1F);
		self().forward(6);
		break;
	      case 0x0C: // 1100 - 4 7-bit values
		self().write_data_at(0, (val >> 21) & 0x7F);
		self().write_data_at(1, (val >> 14) & 0x7F);
		self().write_data_at(2, (val >>  7) & 0x7F);
		self().write_data_at(3, (val >>  0) & 0x7F);
		self().forward(4);
		break;
	      case 0x0D: // 1101x - 3 9-bit values.
		self().write_data_at(0, (val >> 18) & 0x1FF);
		self().write_data_at(1, (val >>  9) & 0x1FF);
		self().write_data_at(2, (val >>  0) & 0x1FF);
		self().forward(3);
		break;
	      case 0x0E: // 110x - 2 14-bit values.
		self().write_data_at(0, (val >> 14) & 0x3FFF);
		self().write_data_at(1, (val >>  0) & 0x3FFF);
		self().forward(2);
		break;
	      case 0x0F: // 1100 - 1 28-bit value.
		self().write_data_at(0, val & 0xFFFFFFF);
		self().forward(1);
		break;
	      }
	  }
      }


    protected:
      Self& self()
      {
	return static_cast<Self&>(*this);
      }

      const Self& self() const
      {
	return static_cast<const Self&>(*this);
      }
    };


    class int_array_array_decompression:
      public stream_decompression_base<int_array_array_decompression>
    {
    public:
      int_array_array_decompression(const int* array,
				    size_t array_size,
				    int* res)
	: array_(array), n_(array_size), pos_(0), result_(res)
      {
      }

      void write_data_at(size_t pos, unsigned int i)
      {
	result_[pos] = i;
      }

      void forward(size_t i)
      {
	result_ += i;
      }

      bool have_comp_data() const
      {
	return pos_ < n_;
      }

      unsigned int next_comp_data()
      {
	return array_[pos_++];
      }

    protected:
      const int* array_;
      size_t n_;
      size_t pos_;
      int* result_;
    };

  }


  void
  int_array_array_decompress2(const int* array, size_t array_size, int* res,
			      size_t)
  {
    int_array_array_decompression c(array, array_size, res);
    c.run();
  }


} // spot