From 4ec936387e177a67bfc263124c88047b31d369f1 Mon Sep 17 00:00:00 2001 From: Alexandre Duret-Lutz Date: Sat, 9 Apr 2011 15:06:10 +0200 Subject: [PATCH] Move the compression routines into their own *.cc file. * src/misc/intvcomp.hh: Move all code... * src/misc/intvcomp.cc: ... in this new file. * src/misc/Makefile.am: Add invcomp.cc --- ChangeLog | 8 + src/misc/Makefile.am | 1 + src/misc/intvcomp.cc | 579 +++++++++++++++++++++++++++++++++++++++++++ src/misc/intvcomp.hh | 561 ++--------------------------------------- 4 files changed, 613 insertions(+), 536 deletions(-) create mode 100644 src/misc/intvcomp.cc diff --git a/ChangeLog b/ChangeLog index 0ef1b8d0e..e8e4d876b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2011-04-09 Alexandre Duret-Lutz + + Move the compression routines into their own *.cc file. + + * src/misc/intvcomp.hh: Move all code... + * src/misc/intvcomp.cc: ... in this new file. + * src/misc/Makefile.am: Add invcomp.cc + 2011-04-09 Alexandre Duret-Lutz DVE2: Use mspool for compressed states. diff --git a/src/misc/Makefile.am b/src/misc/Makefile.am index a14f5af78..a9dbf66f7 100644 --- a/src/misc/Makefile.am +++ b/src/misc/Makefile.am @@ -55,6 +55,7 @@ libmisc_la_SOURCES = \ bddop.cc \ escape.cc \ freelist.cc \ + intvcomp.cc \ memusage.cc \ minato.cc \ modgray.cc \ diff --git a/src/misc/intvcomp.cc b/src/misc/intvcomp.cc new file mode 100644 index 000000000..e89cef1be --- /dev/null +++ b/src/misc/intvcomp.cc @@ -0,0 +1,579 @@ +// Copyright (C) 2011 Laboratoire de Recherche et Developpement de +// l'Epita (LRDE). +// +// This file is part of Spot, a model checking library. +// +// Spot is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// Spot is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +// License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Spot; see the file COPYING. If not, write to the Free +// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +// 02111-1307, USA. + +#include +#include +#include "intvcomp.hh" + +namespace spot +{ + + // Compression scheme + // ------------------ + // + // Assumptions: + // - small and positive values are more frequent than negative + // and large values. + // - 0 is the most frequent value + // - repeated values (esp. repeated 0s occur often). + // + // 00 encodes "value 0" + // 010 encodes "value 1" + // 011 encodes "a value in [2..5]" followed by 2 bits + // 100 encodes "a value in [6..22]" followed by 4 bits + // 101 encodes "repeat prev. value [1..8] times" followed by 3 bits count + // 110 encodes "repeat prev. value [9..40] times" followed by 5 bits count + // 111 encodes "an int value" followed by 32 bits + // + // If 101 or 110 occur at the start, the value to repeat is 0. + + namespace + { + + template + class stream_compression_base + { + static const unsigned int max_bits = sizeof(unsigned int) * 8; + + public: + stream_compression_base() + : cur_(0), bits_left_(max_bits) + { + } + + void emit(unsigned int val) + { + if (val == 0) + { + self().push_bits(0x0, 2, 0x3); + } + else if (val == 1) + { + self().push_bits(0x2, 3, 0x7); + } + else if (val >= 2 && val <= 5) + { + self().push_bits(0x3, 3, 0x7); + self().push_bits(val - 2, 2, 0x3); + } + else if (val >= 6 && val <= 22) + { + self().push_bits(0x4, 3, 0x7); + self().push_bits(val - 6, 4, 0xf); + } + else + { + assert(val > 22); + self().push_bits(0x7, 3, 0x7); + self().push_bits(val, 32, -1U); + } + } + + void run() + { + unsigned int last_val = 0; + + while (self().have_data()) + { + unsigned int val = self().next_data(); + // Repeated value? Try to find more. + if (val == last_val) + { + unsigned int count = 1; + while (count <= 41 && self().skip_if(val)) + ++count; + + if ((val == 0 && count < 3) || (val == 1 && count == 1)) + { + // it is more efficient to emit 0 once or twice directly + // (e.g., 00 00 vs. 011 11) + // for value 1, repetition is worthwhile for count > 1 + // (e.g., 010 010 vs. 011 00) + while (count--) + emit(val); + } + else if (count < 9) + { + self().push_bits(0x5, 3, 0x7); + self().push_bits(count - 1, 3, 0x7); + } + else + { + self().push_bits(0x6, 3, 0x7); + self().push_bits(count - 9, 5, 0x1f); + } + } + else + { + emit(val); + last_val = val; + } + } + flush(); + } + + // This version assumes there is at least n bits free in cur_. + void + push_bits_unchecked(unsigned int bits, unsigned int n, unsigned int mask) + { + cur_ <<= n; + cur_ |= (bits & mask); + if (bits_left_ -= n) + return; + + self().push_data(cur_); + cur_ = 0; + bits_left_ = max_bits; + } + + void + push_bits(unsigned int bits, unsigned int n, unsigned int mask) + { + if (n <= bits_left_) + { + push_bits_unchecked(bits, n, mask); + return; + } + + // bits_left_ < n + + unsigned int right_bit_count = n - bits_left_; + unsigned int left = bits >> right_bit_count; + push_bits_unchecked(left, bits_left_, (1 << bits_left_) - 1); + push_bits_unchecked(bits, right_bit_count, (1 << right_bit_count) - 1); + } + + void flush() + { + if (bits_left_ == max_bits) + return; + cur_ <<= bits_left_; + self().push_data(cur_); + } + + protected: + Self& self() + { + return static_cast(*this); + } + + const Self& self() const + { + return static_cast(*this); + } + + unsigned int cur_; + unsigned int bits_left_; + }; + + class int_array_vector_compression: + public stream_compression_base + { + public: + int_array_vector_compression(const int* array, size_t n) + : array_(array), n_(n), pos_(0), result_(new std::vector) + { + } + + void push_data(unsigned int i) + { + result_->push_back(i); + } + + const std::vector* + result() const + { + return result_; + } + + bool have_data() const + { + return pos_ < n_; + } + + unsigned int next_data() + { + return static_cast(array_[pos_++]); + } + + bool skip_if(unsigned int val) + { + if (!have_data()) + return false; + + if (static_cast(array_[pos_]) != val) + return false; + + ++pos_; + return true; + } + + protected: + const int* array_; + size_t n_; + size_t pos_; + std::vector* result_; + }; + + class int_array_array_compression: + public stream_compression_base + { + public: + int_array_array_compression(const int* array, size_t n, + int* dest, size_t& dest_n) + : array_(array), n_(n), pos_(0), + result_size_(dest_n), result_(dest), result_end_(dest + dest_n) + { + result_size_ = 0; // this resets dest_n. + } + + void push_data(unsigned int i) + { + assert(result_ < result_end_); + ++result_size_; + *result_++ = static_cast(i); + } + + bool have_data() const + { + return pos_ < n_; + } + + unsigned int next_data() + { + return static_cast(array_[pos_++]); + } + + bool skip_if(unsigned int val) + { + if (!have_data()) + return false; + + if (static_cast(array_[pos_]) != val) + return false; + + ++pos_; + return true; + } + + protected: + const int* array_; + size_t n_; + size_t pos_; + size_t& result_size_; + int* result_; + int* result_end_; + }; + } + + const std::vector* + int_array_vector_compress(const int* array, size_t n) + { + int_array_vector_compression c(array, n); + c.run(); + return c.result(); + } + + void + int_array_array_compress(const int* array, size_t n, + int* dest, size_t& dest_size) + { + int_array_array_compression c(array, n, dest, dest_size); + c.run(); + } + + ////////////////////////////////////////////////////////////////////// + + namespace + { + + template + class stream_decompression_base + { + static const unsigned int max_bits = sizeof(unsigned int) * 8; + + public: + void refill() + { + oncemore: + unsigned int fill_size = max_bits - look_bits_; + if (fill_size > buffer_bits_) + fill_size = buffer_bits_; + + look_ <<= fill_size; + look_ |= (buffer_ >> (buffer_bits_ - fill_size)); + buffer_bits_ -= fill_size; + buffer_mask_ >>= fill_size; + buffer_ &= buffer_mask_; + look_bits_ += fill_size; + + if (buffer_bits_ == 0 && self().have_comp_data()) + { + buffer_ = self().next_comp_data(); + buffer_bits_ = max_bits; + buffer_mask_ = -1U; + if (look_bits_ != max_bits) + goto oncemore; + } + + // Do not fold these two cases, because we cannot write + // "x<> (look_bits_ - n); + } + + void skip_n_bits(unsigned int n) + { + assert (n <= look_bits_); + look_bits_ -= n; + look_mask_ >>= n; + look_ &= look_mask_; + } + + unsigned int get_n_bits(unsigned int n) + { + if (look_bits_ < n) + refill(); + look_bits_ -= n; + unsigned int val = look_ >> look_bits_; + look_mask_ >>= n; + look_ &= look_mask_; + return val; + } + + void run() + { + if (!self().have_comp_data()) + return; + + look_ = self().next_comp_data(); + look_bits_ = max_bits; + look_mask_ = -1U; + if (self().have_comp_data()) + { + buffer_ = self().next_comp_data(); + buffer_bits_ = max_bits; + buffer_mask_ = -1U; + } + else + { + buffer_ = 0; + buffer_bits_ = 0; + buffer_mask_ = 0; + } + + while (!self().complete()) + { + unsigned int token = look_n_bits(3); + switch (token) + { + case 0x0: // 00[0] + case 0x1: // 00[1] + skip_n_bits(2); + self().push_data(0); + break; + case 0x2: // 010 + skip_n_bits(3); + self().push_data(1); + break; + case 0x3: // 011 + skip_n_bits(3); + self().push_data(2 + get_n_bits(2)); + break; + case 0x4: // 100 + skip_n_bits(3); + self().push_data(6 + get_n_bits(4)); + break; + case 0x5: // 101 + skip_n_bits(3); + self().repeat(1 + get_n_bits(3)); + break; + case 0x6: // 110 + skip_n_bits(3); + self().repeat(9 + get_n_bits(5)); + break; + case 0x7: // 111 + skip_n_bits(3); + self().push_data(get_n_bits(32)); + break; + default: + assert(0); + } + } + } + + + protected: + Self& self() + { + return static_cast(*this); + } + + const Self& self() const + { + return static_cast(*this); + } + + unsigned int look_; + unsigned int look_bits_; + unsigned int look_mask_; + unsigned int buffer_; + unsigned int buffer_bits_; + unsigned int buffer_mask_; + }; + + class int_vector_array_decompression: + public stream_decompression_base + { + public: + int_vector_array_decompression(const std::vector* array, + int* res, + size_t size) + : prev_(0), array_(array), n_(array->size()), pos_(0), result_(res), + size_(size) + { + } + + bool complete() const + { + return size_ == 0; + } + + void push_data(int i) + { + prev_ = i; + *result_++ = i; + --size_; + } + + void repeat(unsigned int i) + { + size_ -= i; + while (i--) + *result_++ = prev_; + } + + bool have_comp_data() const + { + return pos_ < n_; + } + + unsigned int next_comp_data() + { + return (*array_)[pos_++]; + } + + protected: + int prev_; + const std::vector* array_; + size_t n_; + size_t pos_; + int* result_; + size_t size_; + }; + + class int_array_array_decompression: + public stream_decompression_base + { + public: + int_array_array_decompression(const int* array, + size_t array_size, + int* res, + size_t size) + : prev_(0), array_(array), n_(array_size), pos_(0), result_(res), + size_(size) + { + } + + bool complete() const + { + return size_ == 0; + } + + void push_data(int i) + { + prev_ = i; + *result_++ = i; + --size_; + } + + void repeat(unsigned int i) + { + size_ -= i; + while (i--) + *result_++ = prev_; + } + + bool have_comp_data() const + { + return pos_ < n_; + } + + unsigned int next_comp_data() + { + return array_[pos_++]; + } + + protected: + int prev_; + const int* array_; + size_t n_; + size_t pos_; + int* result_; + size_t size_; + }; + + } + + void + int_vector_array_decompress(const std::vector* array, int* res, + size_t size) + { + int_vector_array_decompression c(array, res, size); + c.run(); + } + + void + int_array_array_decompress(const int* array, size_t array_size, + int* res, size_t size) + { + int_array_array_decompression c(array, array_size, res, size); + c.run(); + } + + +} diff --git a/src/misc/intvcomp.hh b/src/misc/intvcomp.hh index 1b18dd42e..056274478 100644 --- a/src/misc/intvcomp.hh +++ b/src/misc/intvcomp.hh @@ -21,555 +21,44 @@ #ifndef SPOT_MISC_INTVCOMP_HH # define SPOT_MISC_INTVCOMP_HH -#include #include -#include namespace spot { + /// \addtogroup misc_tools + /// @{ - // Compression scheme - // ------------------ - // - // Assumptions: - // - small and positive values are more frequent than negative - // and large values. - // - 0 is the most frequent value - // - repeated values (esp. repeated 0s occur often). - // - // 00 encodes "value 0" - // 010 encodes "value 1" - // 011 encodes "a value in [2..5]" followed by 2 bits - // 100 encodes "a value in [6..22]" followed by 4 bits - // 101 encodes "repeat prev. value [1..8] times" followed by 3 bits count - // 110 encodes "repeat prev. value [9..40] times" followed by 5 bits count - // 111 encodes "an int value" followed by 32 bits - // - // If 101 or 110 occur at the start, the value to repeat is 0. - - template - class stream_compression_base - { - static const unsigned int max_bits = sizeof(unsigned int) * 8; - - public: - stream_compression_base() - : cur_(0), bits_left_(max_bits) - { - } - - void emit(unsigned int val) - { - if (val == 0) - { - self().push_bits(0x0, 2, 0x3); - } - else if (val == 1) - { - self().push_bits(0x2, 3, 0x7); - } - else if (val >= 2 && val <= 5) - { - self().push_bits(0x3, 3, 0x7); - self().push_bits(val - 2, 2, 0x3); - } - else if (val >= 6 && val <= 22) - { - self().push_bits(0x4, 3, 0x7); - self().push_bits(val - 6, 4, 0xf); - } - else - { - assert(val > 22); - self().push_bits(0x7, 3, 0x7); - self().push_bits(val, 32, -1U); - } - } - - void run() - { - unsigned int last_val = 0; - - while (self().have_data()) - { - unsigned int val = self().next_data(); - // Repeated value? Try to find more. - if (val == last_val) - { - unsigned int count = 1; - while (count <= 41 && self().skip_if(val)) - ++count; - - if ((val == 0 && count < 3) || (val == 1 && count == 1)) - { - // it is more efficient to emit 0 once or twice directly - // (e.g., 00 00 vs. 011 11) - // for value 1, repetition is worthwhile for count > 1 - // (e.g., 010 010 vs. 011 00) - while (count--) - emit(val); - } - else if (count < 9) - { - self().push_bits(0x5, 3, 0x7); - self().push_bits(count - 1, 3, 0x7); - } - else - { - self().push_bits(0x6, 3, 0x7); - self().push_bits(count - 9, 5, 0x1f); - } - } - else - { - emit(val); - last_val = val; - } - } - flush(); - } - - // This version assumes there is at least n bits free in cur_. - void - push_bits_unchecked(unsigned int bits, unsigned int n, unsigned int mask) - { - cur_ <<= n; - cur_ |= (bits & mask); - if (bits_left_ -= n) - return; - - self().push_data(cur_); - cur_ = 0; - bits_left_ = max_bits; - } - - void - push_bits(unsigned int bits, unsigned int n, unsigned int mask) - { - if (n <= bits_left_) - { - push_bits_unchecked(bits, n, mask); - return; - } - - // bits_left_ < n - - unsigned int right_bit_count = n - bits_left_; - unsigned int left = bits >> right_bit_count; - push_bits_unchecked(left, bits_left_, (1 << bits_left_) - 1); - push_bits_unchecked(bits, right_bit_count, (1 << right_bit_count) - 1); - } - - void flush() - { - if (bits_left_ == max_bits) - return; - cur_ <<= bits_left_; - self().push_data(cur_); - } - - protected: - Self& self() - { - return static_cast(*this); - } - - const Self& self() const - { - return static_cast(*this); - } - - unsigned int cur_; - unsigned int bits_left_; - }; - - class int_array_vector_compression: - public stream_compression_base - { - public: - int_array_vector_compression(const int* array, size_t n) - : array_(array), n_(n), pos_(0), result_(new std::vector) - { - } - - void push_data(unsigned int i) - { - result_->push_back(i); - } - - const std::vector* - result() const - { - return result_; - } - - bool have_data() const - { - return pos_ < n_; - } - - unsigned int next_data() - { - return static_cast(array_[pos_++]); - } - - bool skip_if(unsigned int val) - { - if (!have_data()) - return false; - - if (static_cast(array_[pos_]) != val) - return false; - - ++pos_; - return true; - } - - protected: - const int* array_; - size_t n_; - size_t pos_; - std::vector* result_; - }; - - class int_array_array_compression: - public stream_compression_base - { - public: - int_array_array_compression(const int* array, size_t n, - int* dest, size_t& dest_n) - : array_(array), n_(n), pos_(0), - result_size_(dest_n), result_(dest), result_end_(dest + dest_n) - { - result_size_ = 0; // this resets dest_n. - } - - void push_data(unsigned int i) - { - assert(result_ < result_end_); - ++result_size_; - *result_++ = static_cast(i); - } - - bool have_data() const - { - return pos_ < n_; - } - - unsigned int next_data() - { - return static_cast(array_[pos_++]); - } - - bool skip_if(unsigned int val) - { - if (!have_data()) - return false; - - if (static_cast(array_[pos_]) != val) - return false; - - ++pos_; - return true; - } - - protected: - const int* array_; - size_t n_; - size_t pos_; - size_t& result_size_; - int* result_; - int* result_end_; - }; - + /// Compress an int array if size \a n into a vector of unsigned int. const std::vector* - int_array_vector_compress(const int* array, size_t n) - { - int_array_vector_compression c(array, n); - c.run(); - return c.result(); - } + int_array_vector_compress(const int* array, size_t n); + /// \brief Uncompress a vector of unsigned int into an int array if + /// size \a size. + /// + /// \a size must be the exact expected size of uncompressed array. + void + int_vector_array_decompress(const std::vector* array, + int* res, size_t size); + + /// \brief Compress an int array of size \a n into a int array. + /// + /// The destination array should be at least \a dest_size large An + /// assert will be triggered if \a dest_size is too small. On + /// return, \a dest_size will be set to the actually number of int + /// filled in \a dest void int_array_array_compress(const int* array, size_t n, - int* dest, size_t& dest_size) - { - int_array_array_compression c(array, n, dest, dest_size); - c.run(); - } - - ////////////////////////////////////////////////////////////////////// - - template - class stream_decompression_base - { - static const unsigned int max_bits = sizeof(unsigned int) * 8; - - public: - void refill() - { - oncemore: - unsigned int fill_size = max_bits - look_bits_; - if (fill_size > buffer_bits_) - fill_size = buffer_bits_; - - look_ <<= fill_size; - look_ |= (buffer_ >> (buffer_bits_ - fill_size)); - buffer_bits_ -= fill_size; - buffer_mask_ >>= fill_size; - buffer_ &= buffer_mask_; - look_bits_ += fill_size; - - if (buffer_bits_ == 0 && self().have_comp_data()) - { - buffer_ = self().next_comp_data(); - buffer_bits_ = max_bits; - buffer_mask_ = -1U; - if (look_bits_ != max_bits) - goto oncemore; - } - - // Do not fold these two cases, because we cannot write - // "x<> (look_bits_ - n); - } - - void skip_n_bits(unsigned int n) - { - assert (n <= look_bits_); - look_bits_ -= n; - look_mask_ >>= n; - look_ &= look_mask_; - } - - unsigned int get_n_bits(unsigned int n) - { - if (look_bits_ < n) - refill(); - look_bits_ -= n; - unsigned int val = look_ >> look_bits_; - look_mask_ >>= n; - look_ &= look_mask_; - return val; - } - - void run() - { - if (!self().have_comp_data()) - return; - - look_ = self().next_comp_data(); - look_bits_ = max_bits; - look_mask_ = -1U; - if (self().have_comp_data()) - { - buffer_ = self().next_comp_data(); - buffer_bits_ = max_bits; - buffer_mask_ = -1U; - } - else - { - buffer_ = 0; - buffer_bits_ = 0; - buffer_mask_ = 0; - } - - while (!self().complete()) - { - unsigned int token = look_n_bits(3); - switch (token) - { - case 0x0: // 00[0] - case 0x1: // 00[1] - skip_n_bits(2); - self().push_data(0); - break; - case 0x2: // 010 - skip_n_bits(3); - self().push_data(1); - break; - case 0x3: // 011 - skip_n_bits(3); - self().push_data(2 + get_n_bits(2)); - break; - case 0x4: // 100 - skip_n_bits(3); - self().push_data(6 + get_n_bits(4)); - break; - case 0x5: // 101 - skip_n_bits(3); - self().repeat(1 + get_n_bits(3)); - break; - case 0x6: // 110 - skip_n_bits(3); - self().repeat(9 + get_n_bits(5)); - break; - case 0x7: // 111 - skip_n_bits(3); - self().push_data(get_n_bits(32)); - break; - default: - assert(0); - } - } - } - - - protected: - Self& self() - { - return static_cast(*this); - } - - const Self& self() const - { - return static_cast(*this); - } - - unsigned int look_; - unsigned int look_bits_; - unsigned int look_mask_; - unsigned int buffer_; - unsigned int buffer_bits_; - unsigned int buffer_mask_; - }; - - class int_vector_array_decompression: - public stream_decompression_base - { - public: - int_vector_array_decompression(const std::vector* array, - int* res, - size_t size) - : prev_(0), array_(array), n_(array->size()), pos_(0), result_(res), - size_(size) - { - } - - bool complete() const - { - return size_ == 0; - } - - void push_data(int i) - { - prev_ = i; - *result_++ = i; - --size_; - } - - void repeat(unsigned int i) - { - size_ -= i; - while (i--) - *result_++ = prev_; - } - - bool have_comp_data() const - { - return pos_ < n_; - } - - unsigned int next_comp_data() - { - return (*array_)[pos_++]; - } - - protected: - int prev_; - const std::vector* array_; - size_t n_; - size_t pos_; - int* result_; - size_t size_; - }; - - class int_array_array_decompression: - public stream_decompression_base - { - public: - int_array_array_decompression(const int* array, - size_t array_size, - int* res, - size_t size) - : prev_(0), array_(array), n_(array_size), pos_(0), result_(res), - size_(size) - { - } - - bool complete() const - { - return size_ == 0; - } - - void push_data(int i) - { - prev_ = i; - *result_++ = i; - --size_; - } - - void repeat(unsigned int i) - { - size_ -= i; - while (i--) - *result_++ = prev_; - } - - bool have_comp_data() const - { - return pos_ < n_; - } - - unsigned int next_comp_data() - { - return array_[pos_++]; - } - - protected: - int prev_; - const int* array_; - size_t n_; - size_t pos_; - int* result_; - size_t size_; - }; - - void - int_vector_array_decompress(const std::vector* array, int* res, - size_t size) - { - int_vector_array_decompression c(array, res, size); - c.run(); - } + int* dest, size_t& dest_size); + /// \brief Uncompress an int array of size \a array_size into a int + /// array of size \size. + /// + /// \a size must be the exact expected size of uncompressed array. void int_array_array_decompress(const int* array, size_t array_size, - int* res, size_t size) - { - int_array_array_decompression c(array, array_size, res, size); - c.run(); - } - + int* res, size_t size); + /// @} } #endif // SPOT_MISC_INTVCOMP_HH