// Copyright 2010-2011, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "storage/sparse_array_image.h"
#include "base/base.h"
using mozc::sparse_array_image::BitArray;
using mozc::sparse_array_image::BitTreeNode;
using mozc::sparse_array_image::ByteStream;
namespace mozc {
namespace sparse_array_image {
// Trie node. Used to build sparse array image.
struct BitTreeNode {
~BitTreeNode();
vector<struct BitTreeNode *> children;
int mask;
};
BitTreeNode::~BitTreeNode() {
for (int i = 0; i < children.size(); ++i) {
delete children[i];
}
}
// Bit array with rank operation.
class BitArray {
public:
BitArray(const char *image, int size);
~BitArray();
int Rank(int n);
char GetByte(int idx);
private:
int PopCount(unsigned int p);
const char *image_;
int size_;
// Pre computed rank value for each 4 bytes.
int *rank_array_;
};
BitArray::BitArray(const char *image, int size) : image_(image), size_(size) {
int rank_array_len = (size + 3) / 4;
rank_array_ = new int[rank_array_len];
int r = 0;
const unsigned int *p = reinterpret_cast<const unsigned int *>(image_);
for (int i = 0; i < rank_array_len; ++i) {
rank_array_[i] = r;
r += PopCount(*p);
++p;
}
}
BitArray::~BitArray() {
delete [] rank_array_;
}
int BitArray::Rank(int n) {
int idx = n / 32;
int rem = n % 32;
const unsigned int *p = reinterpret_cast<const unsigned int *>(image_);
int rank = rank_array_[idx];
if (rem) {
rank += PopCount(p[idx] << (32 - rem));
}
return rank;
}
char BitArray::GetByte(int idx) {
return image_[idx];
}
int BitArray::PopCount(unsigned int x) {
x = ((x & 0xaaaaaaaa) >> 1) + (x & 0x55555555);
x = ((x & 0xcccccccc) >> 2) + (x & 0x33333333);
x = ((x >> 4) + x) & 0x0f0f0f0f;
x = (x >> 8) + x;
x = ((x >> 16) + x) & 0x3f;
return x;
}
// Byte stream to store partially built sparse array image.
class ByteStream {
public:
ByteStream();
void PushByte(uint8 b);
void PushInt(int n);
void PushString(const string &s);
void PushPadding(int pad);
int GetSize() const;
const char *GetImage() const;
const string &GetString() const;
private:
string str_;
};
ByteStream::ByteStream() {
}
void ByteStream::PushByte(uint8 b) {
str_ += static_cast<char>(b);
}
void ByteStream::PushInt(int n) {
for (int i = 0; i < 4; ++i) {
PushByte(n & 255);
n >>= 8;
}
}
void ByteStream::PushString(const string &s) {
str_ += s;
}
void ByteStream::PushPadding(int pad) {
int rem = pad - (str_.size() % pad);
if (rem != pad) {
for (int i = 0; i < rem; ++i) {
PushByte(0);
}
}
}
int ByteStream::GetSize() const {
return str_.size();
}
const char *ByteStream::GetImage() const {
return str_.data();
}
const string &ByteStream::GetString() const {
return str_;
}
} // namespace sparse_array_image
SparseArrayBuilder::SparseArrayBuilder() : use_1byte_value_(false),
root_node_(NULL),
value_stream_(new ByteStream),
main_stream_(new ByteStream) {
num_bits_per_node_ = (1 << kNumBitsPerLevel);
tree_depth_ = 32 / kNumBitsPerLevel;
if (32 % kNumBitsPerLevel) {
++tree_depth_;
}
}
SparseArrayBuilder::~SparseArrayBuilder() {
delete root_node_;
for (size_t i = 0; i < streams_.size(); ++i) {
delete streams_[i];
}
}
void SparseArrayBuilder::SetUse1ByteValue(bool use_1byte_value) {
use_1byte_value_ = use_1byte_value;
}
void SparseArrayBuilder::AddValue(uint32 key, int val) {
values_[key] = val;
}
void SparseArrayBuilder::Build() {
CHECK_EQ(main_stream_->GetSize(), 0)
<< "sparse array was already built.";
LOG(INFO) << "Building sparse array with "
<< values_.size() << " values";
root_node_ = AllocNode();
num_nodes_ = 0;
for (map<uint32, int>::const_iterator it = values_.begin();
it != values_.end(); ++it) {
AddNode(it->first);
const int value = it->second;
if (use_1byte_value_) {
CHECK_LT(value, 256) << "value should be within a byte.";
value_stream_->PushByte(value);
} else {
value_stream_->PushByte(value & 0xff);
value_stream_->PushByte((value >> 8) & 0xff);
}
}
LOG(INFO) << "allocated " << num_nodes_ << " nodes";
Serialize();
Concatenate();
LOG(INFO) << "image size=" << main_stream_->GetSize() << "bytes";
const int value_width = use_1byte_value_ ? 1 : 2;
const float ratio =
static_cast<float>(main_stream_->GetSize() -
(values_.size() * value_width))
/ values_.size();
LOG(INFO) << "trie over head per each value=" << ratio << "bytes";
}
int SparseArrayBuilder::GetSize() const {
return main_stream_->GetSize();
}
const char *SparseArrayBuilder::GetImage() const {
return main_stream_->GetImage();
}
void SparseArrayBuilder::AddNode(uint32 key) {
BitTreeNode *current_node = root_node_;
for (int i = 0; i < tree_depth_; ++i) {
const int shift_count = kNumBitsPerLevel * (tree_depth_ - i - 1);
const int idx = (key >> shift_count) % (1 << kNumBitsPerLevel);
if (!current_node->children[idx]) {
current_node->children[idx] = AllocNode();
current_node->mask |= (1 << idx);
}
current_node = current_node->children[idx];
}
}
BitTreeNode *SparseArrayBuilder::AllocNode() {
BitTreeNode *node = new BitTreeNode;
node->children.resize(num_bits_per_node_);
for (int i = 0; i < num_bits_per_node_; ++i) {
node->children[i] = NULL;
}
node->mask = 0;
++num_nodes_;
return node;
}
void SparseArrayBuilder::Serialize() {
vector<BitTreeNode *> queue;
vector<BitTreeNode *> child_queue;
queue.push_back(root_node_);
for (int level = 0; level < 11; ++level) {
ByteStream *stream = new ByteStream;
streams_.push_back(stream);
child_queue.clear();
for (size_t i = 0; i < queue.size(); ++i) {
BitTreeNode *node = queue[i];
for (size_t j = 0; j < node->children.size(); ++j) {
if (node->children[j]) {
child_queue.push_back(node->children[j]);
}
}
stream->PushByte(node->mask);
}
queue = child_queue;
}
for (size_t i = 0; i < streams_.size(); ++i) {
ByteStream *stream = streams_[i];
stream->PushPadding(4);
}
}
void SparseArrayBuilder::Concatenate() {
// num bits per level.
main_stream_->PushInt(kNumBitsPerLevel);
if (use_1byte_value_) {
main_stream_->PushInt(1);
} else {
main_stream_->PushInt(2);
}
main_stream_->PushInt(value_stream_->GetSize());
// write streams.
for (size_t i = 0; i < streams_.size(); ++i) {
ByteStream *stream = streams_[i];
main_stream_->PushInt(stream->GetSize());
}
for (size_t i = 0; i < streams_.size(); ++i) {
ByteStream *stream = streams_[i];
main_stream_->PushString(stream->GetString());
}
main_stream_->PushString(value_stream_->GetString());
// trailer for debug
main_stream_->PushInt(kTrailerMagic);
}
SparseArrayImage::SparseArrayImage(const char *image, int size)
: image_(image), size_(size) {
DCHECK(image) << "got empty image";
const char *p = image_;
num_bits_per_level_ = ReadInt(p);
p += 4;
const int use_1byte_value_flag = ReadInt(p);
use_1byte_value_ = (use_1byte_value_flag == 1);
p += 4;
values_size_ = ReadInt(p);
p += 4;
num_levels_ = 32 / num_bits_per_level_;
if (32 % num_bits_per_level_ != 0) {
++num_levels_;
}
const char *bytes = p + (num_levels_ * 4);
for (int i = 0; i < num_levels_; ++i) {
const int level_size = ReadInt(p);
p += 4;
BitArray *array = new BitArray(bytes, level_size);
bytes += level_size;
arrays_.push_back(array);
}
values_ = bytes;
bytes += values_size_;
const int magic = ReadInt(bytes);
CHECK(magic == SparseArrayBuilder::kTrailerMagic)
<< "trailer magic mismatch";
VLOG(1) << "SparseArrayImage: "
<< values_size_ / 2 << " values";
}
SparseArrayImage::~SparseArrayImage() {
for (int i = 0; i < arrays_.size(); ++i) {
delete arrays_[i];
}
}
int SparseArrayImage::ReadInt(const char *p) const {
const int *n = reinterpret_cast<const int *>(p);
return *n;
}
int SparseArrayImage::Peek(uint32 index) const {
int byte_offset = 0;
for (int level = 0; level < num_levels_; ++level) {
int shift_count = num_bits_per_level_ * (num_levels_ - level - 1);
int idx = (index >> shift_count) % (1 << num_bits_per_level_);
BitArray *array = arrays_[level];
// TODO(tabata): fix more than 8 bits.
char mask = array->GetByte(byte_offset);
if (!(mask & (1 << idx))) {
return kInvalidValueIndex;
}
byte_offset = array->Rank(byte_offset * 8 + idx);
}
return byte_offset;
}
int SparseArrayImage::GetValue(int nth) const {
if (use_1byte_value_) {
const uint8 *v = reinterpret_cast<const uint8 *>(&values_[nth]);
return *v;
}
const uint16 *v = reinterpret_cast<const uint16 *>(&values_[nth * 2]);
return *v;
}
} // namespace mozc