Codebase list mozc / 7306a9d dictionary / system / value_dictionary.cc
7306a9d

Tree @7306a9d (Download .tar.gz)

value_dictionary.cc @7306a9draw · history · blame

// Copyright 2010-2012, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "dictionary/system/value_dictionary.h"

#include <algorithm>
#include <climits>
#include <string>

#include "base/base.h"
#include "base/util.h"
#include "base/flags.h"
#include "converter/node.h"
#include "dictionary/file/dictionary_file.h"
#include "dictionary/pos_matcher.h"
#include "dictionary/rx/rx_trie.h"
#include "dictionary/system/codec_interface.h"

namespace mozc {

ValueDictionary::ValueDictionary()
    : value_trie_(new rx::RxTrie),
      dictionary_file_(new DictionaryFile),
      codec_(dictionary::SystemDictionaryCodecFactory::GetCodec()) {
}

ValueDictionary::~ValueDictionary() {}

// static
ValueDictionary *ValueDictionary::CreateValueDictionaryFromFile(
    const string &filename) {
  ValueDictionary *instance = new ValueDictionary();
  DCHECK(instance);
  if (!instance->dictionary_file_->OpenFromFile(filename)) {
    LOG(ERROR) << "Failed to open system dictionary file";
    return NULL;
  }
  if (!instance->OpenDictionaryFile()) {
    LOG(ERROR) << "Failed to create value dictionary";
    delete instance;
    return NULL;
  }
  return instance;
}

// static
ValueDictionary *ValueDictionary::CreateValueDictionaryFromImage(
    const char *ptr, int len) {
  // Make the dictionary not to be paged out.
  // We don't check the return value because the process doesn't necessarily
  // has the priviledge to mlock.
  // Note that we don't munlock the space because it's always better to keep
  // the singleton system dictionary paged in as long as the process runs.
#ifndef OS_WINDOWS
  mlock(ptr, len);
#endif  // OS_WINDOWS
  ValueDictionary *instance = new ValueDictionary();
  DCHECK(instance);
  if (!instance->dictionary_file_->OpenFromImage(ptr, len)) {
    LOG(ERROR) << "Failed to open system dictionary file";
    return NULL;
  }
  if (!instance->OpenDictionaryFile()) {
    LOG(ERROR) << "Failed to create value dictionary";
    delete instance;
    return NULL;
  }
  return instance;
}

bool ValueDictionary::OpenDictionaryFile() {
  int image_len = 0;
  const unsigned char *value_image =
      reinterpret_cast<const unsigned char *>(dictionary_file_->GetSection(
          codec_->GetSectionNameForValue(), &image_len));
  CHECK(value_image) << "can not find value section";
  if (!(value_trie_->OpenImage(value_image))) {
    DLOG(ERROR) << "Cannot open value trie";
    return false;
  }
  return true;
}

Node *ValueDictionary::LookupPredictive(
    const char *str, int size,
    NodeAllocatorInterface *allocator) const {
  string lookup_key_str;
  codec_->EncodeValue(string(str, size), &lookup_key_str);

  DCHECK(value_trie_.get() != NULL);

  vector<rx::RxEntry> results;
  int limit = -1;  // no limit
  if (allocator != NULL) {
    limit = allocator->max_nodes_size();
    value_trie_->PredictiveSearchWithLimit(lookup_key_str, limit, &results);
  } else {
    value_trie_->PredictiveSearch(lookup_key_str, &results);
  }

  Node *res = NULL;
  for (size_t i = 0; i < results.size(); ++i) {
    if (limit == 0) {
      break;
    }
    Node *new_node = NULL;
    if (allocator != NULL) {
      new_node = allocator->NewNode();
    } else {
      // for test
      new_node = new Node();
    }
    // Set fake token information.
    // Since value dictionary is intended to use for suggestion,
    // we use SuggestOnlyWordId here.
    // Cost is also set without lookup.
    // TODO(toshiyuki): If necessary, implement simple cost lookup.
    // Bloom filter may be one option.
    new_node->lid = POSMatcher::GetSuggestOnlyWordId();
    new_node->rid = POSMatcher::GetSuggestOnlyWordId();
    new_node->wcost = 10000;
    string value;
    codec_->DecodeValue(results[i].key, &value);
    new_node->key = value;
    new_node->value = value;
    new_node->node_type = Node::NOR_NODE;

    new_node->bnext = res;
    res = new_node;
    if (limit > 0) {
      --limit;
    }
  }
  return res;
}

// Value dictioanry is intended to use for prediction,
// so we don't support LookupPrefix
Node *ValueDictionary::LookupPrefixWithLimit(
    const char *str, int size,
    const Limit &limit,
    NodeAllocatorInterface *allocator) const {
  return NULL;
}

Node *ValueDictionary::LookupPrefix(
    const char *str, int size,
    NodeAllocatorInterface *allocator) const {
  return NULL;
}

Node *ValueDictionary::LookupReverse(const char *str, int size,
                                     NodeAllocatorInterface *allocator) const {
  return NULL;
}

}  // namespace mozc