From bc1275455cd43b4535f2cde4e006b8d966f5c0a9 Mon Sep 17 00:00:00 2001 From: Alexandre Duret-Lutz Date: Thu, 7 Apr 2011 19:39:20 +0200 Subject: [PATCH] Preliminary implementation of an int array compressor. * src/misc/intvcomp.hh: New file. * src/misc/Makefile.am: Add it. * src/tgbatest/intvcomp.cc, src/tgbatest/intvcomp.test: New files. * src/tgbatest/Makefile.am: Add them. --- ChangeLog | 9 + src/misc/Makefile.am | 1 + src/misc/intvcomp.hh | 457 +++++++++++++++++++++++++++++++++++++ src/tgbatest/Makefile.am | 3 + src/tgbatest/intvcomp.cc | 87 +++++++ src/tgbatest/intvcomp.test | 27 +++ 6 files changed, 584 insertions(+) create mode 100644 src/misc/intvcomp.hh create mode 100644 src/tgbatest/intvcomp.cc create mode 100755 src/tgbatest/intvcomp.test diff --git a/ChangeLog b/ChangeLog index 8797f2da5..80cf1ea52 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2011-04-08 Alexandre Duret-Lutz + + Preliminary implementation of an int array compressor. + + * src/misc/intvcomp.hh: New file. + * src/misc/Makefile.am: Add it. + * src/tgbatest/intvcomp.cc, src/tgbatest/intvcomp.test: New files. + * src/tgbatest/Makefile.am: Add them. + 2011-04-09 Alexandre Duret-Lutz Fix two spurious segfaults in test cases for the Python interface. diff --git a/src/misc/Makefile.am b/src/misc/Makefile.am index d62b31fb7..fbc01c255 100644 --- a/src/misc/Makefile.am +++ b/src/misc/Makefile.am @@ -37,6 +37,7 @@ misc_HEADERS = \ freelist.hh \ hash.hh \ hashfunc.hh \ + intvcomp.hh \ ltstr.hh \ minato.hh \ memusage.hh \ diff --git a/src/misc/intvcomp.hh b/src/misc/intvcomp.hh new file mode 100644 index 000000000..e00f4b666 --- /dev/null +++ b/src/misc/intvcomp.hh @@ -0,0 +1,457 @@ +// Copyright (C) 2011 Laboratoire de Recherche et Developpement de +// l'Epita (LRDE). +// +// This file is part of Spot, a model checking library. +// +// Spot is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// Spot is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +// License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Spot; see the file COPYING. If not, write to the Free +// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +// 02111-1307, USA. + +#ifndef SPOT_MISC_INTVCOMP_HH +# define SPOT_MISC_INTVCOMP_HH + +#include +#include +#include + +namespace spot +{ + + // Compression scheme + // ------------------ + // + // Assumptions: + // - small and positive values are more frequent than negative + // and large values. + // - 0 is the most frequent value + // - repeated values (esp. repeated 0s occur often). + // + // 00 encodes "value 0" + // 010 encodes "value 1" + // 011 encodes "a value in [2..5]" followed by 2 bits + // 100 encodes "a value in [6..22]" followed by 4 bits + // 101 encodes "repeat prev. value [1..8] times" followed by 3 bits count + // 110 encodes "repeat prev. value [9..40] times" followed by 5 bits count + // 111 encodes "an int value" followed by 32 bits + // + // If 101 or 110 occur at the start, the value to repeat is 0. + + template + class stream_compression_base + { + static const unsigned int max_bits = sizeof(unsigned int) * 8; + + public: + stream_compression_base() + : cur_(0), bits_left_(max_bits) + { + } + + void emit(unsigned int val) + { + if (val == 0) + { + self().push_bits(0x0, 2, 0x3); + } + else if (val == 1) + { + self().push_bits(0x2, 3, 0x7); + } + else if (val >= 2 && val <= 5) + { + self().push_bits(0x3, 3, 0x7); + self().push_bits(val - 2, 2, 0x3); + } + else if (val >= 6 && val <= 22) + { + self().push_bits(0x4, 3, 0x7); + self().push_bits(val - 6, 4, 0xf); + } + else + { + assert(val > 22); + self().push_bits(0x7, 3, 0x7); + self().push_bits(val, 32, -1U); + } + } + + void run() + { + unsigned int last_val = 0; + + while (self().have_data()) + { + unsigned int val = self().next_data(); + // Repeated value? Try to find more. + if (val == last_val) + { + unsigned int count = 1; + while (count <= 41 && self().skip_if(val)) + ++count; + + if ((val == 0 && count < 3) || (val == 1 && count == 1)) + { + // it is more efficient to emit 0 once or twice directly + // (e.g., 00 00 vs. 011 11) + // for value 1, repetition is worthwhile for count > 1 + // (e.g., 010 010 vs. 011 00) + while (count--) + emit(val); + } + else if (count < 9) + { + self().push_bits(0x5, 3, 0x7); + self().push_bits(count - 1, 3, 0x7); + } + else + { + self().push_bits(0x6, 3, 0x7); + self().push_bits(count - 9, 5, 0x1f); + } + } + else + { + emit(val); + last_val = val; + } + } + flush(); + } + + // This version assumes there is at least n bits free in cur_. + void + push_bits_unchecked(unsigned int bits, unsigned int n, unsigned int mask) + { + cur_ <<= n; + cur_ |= (bits & mask); + if (bits_left_ -= n) + return; + + self().push_data(cur_); + cur_ = 0; + bits_left_ = max_bits; + } + + void + push_bits(unsigned int bits, unsigned int n, unsigned int mask) + { + if (n <= bits_left_) + { + push_bits_unchecked(bits, n, mask); + return; + } + + // bits_left_ < n + + unsigned int right_bit_count = n - bits_left_; + unsigned int left = bits >> right_bit_count; + push_bits_unchecked(left, bits_left_, (1 << bits_left_) - 1); + push_bits_unchecked(bits, right_bit_count, (1 << right_bit_count) - 1); + } + + void flush() + { + if (bits_left_ == max_bits) + return; + cur_ <<= bits_left_; + self().push_data(cur_); + } + + protected: + Self& self() + { + return static_cast(*this); + } + + const Self& self() const + { + return static_cast(*this); + } + + unsigned int cur_; + unsigned int bits_left_; + }; + + class int_array_compression: + public stream_compression_base + { + public: + int_array_compression(int* array, size_t n) + : array_(array), n_(n), pos_(0), result_(new std::vector) + { + } + + void push_data(unsigned int i) + { + result_->push_back(i); + } + + const std::vector* + result() const + { + return result_; + } + + bool have_data() const + { + return pos_ < n_; + } + + unsigned int next_data() + { + return static_cast(array_[pos_++]); + } + + bool skip_if(unsigned int val) + { + if (!have_data()) + return false; + + if (static_cast(array_[pos_]) != val) + return false; + + ++pos_; + return true; + } + + protected: + int* array_; + size_t n_; + size_t pos_; + std::vector* result_; + }; + + const std::vector* + int_array_compress(int* array, unsigned int n) + { + int_array_compression c(array, n); + c.run(); + return c.result(); + } + + ////////////////////////////////////////////////////////////////////// + + template + class stream_decompression_base + { + static const unsigned int max_bits = sizeof(unsigned int) * 8; + + public: + void refill() + { + oncemore: + unsigned int fill_size = max_bits - look_bits_; + if (fill_size > buffer_bits_) + fill_size = buffer_bits_; + + look_ <<= fill_size; + look_ |= (buffer_ >> (buffer_bits_ - fill_size)); + buffer_bits_ -= fill_size; + buffer_mask_ >>= fill_size; + buffer_ &= buffer_mask_; + look_bits_ += fill_size; + + if (buffer_bits_ == 0 && self().have_comp_data()) + { + buffer_ = self().next_comp_data(); + buffer_bits_ = max_bits; + buffer_mask_ = -1U; + if (look_bits_ != max_bits) + goto oncemore; + } + + // Do not fold these two cases, because we cannot write + // "x<> (look_bits_ - n); + } + + void skip_n_bits(unsigned int n) + { + assert (n <= look_bits_); + look_bits_ -= n; + look_mask_ >>= n; + look_ &= look_mask_; + } + + unsigned int get_n_bits(unsigned int n) + { + if (look_bits_ < n) + refill(); + look_bits_ -= n; + unsigned int val = look_ >> look_bits_; + look_mask_ >>= n; + look_ &= look_mask_; + return val; + } + + void run() + { + if (!self().have_comp_data()) + return; + + look_ = self().next_comp_data(); + look_bits_ = max_bits; + look_mask_ = -1U; + if (self().have_comp_data()) + { + buffer_ = self().next_comp_data(); + buffer_bits_ = max_bits; + buffer_mask_ = -1U; + } + else + { + buffer_ = 0; + buffer_bits_ = 0; + buffer_mask_ = 0; + } + + while (!self().complete()) + { + unsigned int token = look_n_bits(3); + switch (token) + { + case 0x0: // 00[0] + case 0x1: // 00[1] + skip_n_bits(2); + self().push_data(0); + break; + case 0x2: // 010 + skip_n_bits(3); + self().push_data(1); + break; + case 0x3: // 011 + skip_n_bits(3); + self().push_data(2 + get_n_bits(2)); + break; + case 0x4: // 100 + skip_n_bits(3); + self().push_data(6 + get_n_bits(4)); + break; + case 0x5: // 101 + skip_n_bits(3); + self().repeat(1 + get_n_bits(3)); + break; + case 0x6: // 110 + skip_n_bits(3); + self().repeat(9 + get_n_bits(5)); + break; + case 0x7: // 111 + skip_n_bits(3); + self().push_data(get_n_bits(32)); + break; + default: + assert(0); + } + } + } + + + protected: + Self& self() + { + return static_cast(*this); + } + + const Self& self() const + { + return static_cast(*this); + } + + unsigned int look_; + unsigned int look_bits_; + unsigned int look_mask_; + unsigned int buffer_; + unsigned int buffer_bits_; + unsigned int buffer_mask_; + }; + + class int_array_decompression: + public stream_decompression_base + { + public: + int_array_decompression(const std::vector* array, int* res, + size_t size) + : prev_(0), array_(array), n_(array->size()), pos_(0), result_(res), + size_(size) + { + } + + bool complete() const + { + return size_ == 0; + } + + void push_data(int i) + { + prev_ = i; + *result_++ = i; + --size_; + } + + void repeat(unsigned int i) + { + size_ -= i; + while (i--) + *result_++ = prev_; + } + + bool have_comp_data() const + { + return pos_ < n_; + } + + unsigned int next_comp_data() + { + return (*array_)[pos_++]; + } + + protected: + int prev_; + const std::vector* array_; + size_t n_; + size_t pos_; + int* result_; + size_t size_; + }; + + void + int_array_decompress(const std::vector* array, int* res, + size_t size) + { + int_array_decompression c(array, res, size); + c.run(); + } + + +} + +#endif // SPOT_MISC_INTVCOMP_HH diff --git a/src/tgbatest/Makefile.am b/src/tgbatest/Makefile.am index 9597f7356..81609e46c 100644 --- a/src/tgbatest/Makefile.am +++ b/src/tgbatest/Makefile.am @@ -37,6 +37,7 @@ check_PROGRAMS = \ explicit \ expldot \ explprod \ + intvcomp \ ltlprod \ mixprod \ powerset \ @@ -54,6 +55,7 @@ explicit_SOURCES = explicit.cc expldot_SOURCES = powerset.cc expldot_CXXFLAGS = -DDOTTY explprod_SOURCES = explprod.cc +intvcomp_SOURCES = intvcomp.cc ltl2tgba_SOURCES = ltl2tgba.cc ltlprod_SOURCES = ltlprod.cc mixprod_SOURCES = mixprod.cc @@ -69,6 +71,7 @@ tripprod_SOURCES = tripprod.cc # Keep this sorted by STRENGTH. Test basic things first, # because such failures will be easier to diagnose and fix. TESTS = \ + intvcomp.test \ eltl2tgba.test \ explicit.test \ taatgba.test \ diff --git a/src/tgbatest/intvcomp.cc b/src/tgbatest/intvcomp.cc new file mode 100644 index 000000000..6462b081c --- /dev/null +++ b/src/tgbatest/intvcomp.cc @@ -0,0 +1,87 @@ +#include +#include "misc/intvcomp.hh" +#include + +int check(int* comp, int size, unsigned expected = 0) +{ + const std::vector* v = spot::int_array_compress(comp, size); + + std::cout << "C[" << v->size() << "] "; + for (size_t i = 0; i < v->size(); ++i) + std::cout << (*v)[i] << " "; + std::cout << std::endl; + + int* decomp = new int[size]; + spot::int_array_decompress(v, decomp, size); + + std::cout << "D[" << size << "] "; + for (int i = 0; i < size; ++i) + std::cout << decomp[i] << " "; + std::cout << std::endl; + + int res = memcmp(comp, decomp, size * sizeof(int)); + + if (res) + { + std::cout << "*** cmp error *** " << res << std::endl; + std::cout << "E[" << size << "] "; + for (int i = 0; i < size; ++i) + std::cout << comp[i] << " "; + std::cout << std::endl; + } + + if (expected && (v->size() * sizeof(int) != expected)) + { + std::cout << "*** size error *** (expected " + << expected << " bytes, got " << v->size() * sizeof(int) + << " bytes)" << std::endl; + res = 1; + } + + std::cout << std::endl; + + delete v; + delete[] decomp; + return !!res; +} + +int main() +{ + int errors = 0; + + int comp1[] = { 1, 0, 0, 0, 0, 0, 3, 3, 4, 0, 0, 0 }; + errors += check(comp1, sizeof(comp1) / sizeof(*comp1)); + + int comp2[] = { 3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9, 3, 1 }; + errors += check(comp2, sizeof(comp2) / sizeof(*comp2)); + + int comp3[] = { 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, + 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, + 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0 }; + errors += check(comp3, sizeof(comp3) / sizeof(*comp3)); + + int comp4[] = { 1, 2, 1, 2, 1, 2, 2, 0 }; // 32 bits + errors += check(comp4, sizeof(comp4) / sizeof(*comp4), 4); + + int comp5[] = { 1, 2, 1, 2, 1, 2, 2, 0, 1, 2, 1, 2, 1, 2, 2, 0 }; // 64 bits + errors += check(comp5, sizeof(comp5) / sizeof(*comp5), 8); + + int comp6[] = { 1, 2, 1, 2, 1, 2, 2, 0, 1, 2, 1, 2, 1, 2, 2, 0, + 1, 2, 1, 2, 1, 2, 2, 0, 1, 2, 1, 2, 1, 2, 2, 0 }; // 128 bits + errors += check(comp6, sizeof(comp6) / sizeof(*comp6), 16); + + int comp7[] = { -4, -8, -10, 3, 49, 50, 0, 20, 13 }; + errors += check(comp7, sizeof(comp7) / sizeof(*comp7)); + + int comp8[] = { 4959, 6754, 8133, 10985, 11121, 14413, 17335, 20754, + 21317, 30008, 30381, 33494, 34935, 41210, 41417 }; + errors += check(comp8, sizeof(comp8) / sizeof(*comp8)); + + int comp9[] = { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + errors += check(comp9, sizeof(comp9) / sizeof(*comp9)); + + return errors; +} diff --git a/src/tgbatest/intvcomp.test b/src/tgbatest/intvcomp.test new file mode 100755 index 000000000..b1892568b --- /dev/null +++ b/src/tgbatest/intvcomp.test @@ -0,0 +1,27 @@ +#!/bin/sh +# Copyright (C) 2011 Laboratoire de Recherche et Développement +# de l'Epita (LRDE). +# +# This file is part of Spot, a model checking library. +# +# Spot is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Spot is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +# License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Spot; see the file COPYING. If not, write to the Free +# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + + +. ./defs + +set -e + +run 0 ../intvcomp