Codebase list mozc / debian/0.12.410.102-1 dictionary / user_dictionary_storage.cc
debian/0.12.410.102-1

Tree @debian/0.12.410.102-1 (Download .tar.gz)

user_dictionary_storage.cc @debian/0.12.410.102-1raw · history · blame

// Copyright 2010, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "dictionary/user_dictionary_storage.h"

#include <algorithm>
#include <string>
#include <vector>

#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>

#include "base/base.h"
#include "base/file_stream.h"
#include "base/mutex.h"
#include "base/process_mutex.h"
#include "base/protobuf/descriptor.h"
#include "base/protobuf/message.h"
#include "base/protobuf/protobuf.h"
#include "base/protobuf/repeated_field.h"
#include "base/util.h"

namespace mozc {
namespace {
// Mutex name
const char kUserDictionaryStorageMutex[] = "user_dictionary_storage";

// Maximum number of dictionary entries per dictionary.
const size_t kMaxEntrySize          = 1000000;
const size_t kMaxDictionarySize     =     100;
const size_t kMaxDictionaryNameSize =     300;

// 512MByte
// We expand the limit of serialized message from 64MB(default) to 512MB
const size_t kDefaultTotalBytesLimit = 512 << 20;

// If the last file size exceeds kDefaultWarningTotalBytesLimit,
// we show a warning dialog saying that "All words will not be
// saved correctly. Please make the dictionary size smaller"
const size_t kDefaultWarningTotalBytesLimit = 256 << 20;

// Create Random ID for dictionary
uint64 CreateID() {
  uint64 id = 0;

  // dic_id == 0 is used as a magic number
  while (id == 0) {
    if (!Util::GetSecureRandomSequence(
            reinterpret_cast<char *>(&id), sizeof(id))) {
      LOG(ERROR) << "GetSecureRandomSequence() failed. use rand()";
      id = static_cast<uint64>(rand());
    }
  }

  return id;
}
}  // namespace

UserDictionaryStorage::UserDictionaryStorage(const string &file_name)
    : file_name_(file_name),
      last_error_type_(USER_DICTIONARY_STORAGE_NO_ERROR),
      mutex_(new ProcessMutex(kUserDictionaryStorageMutex)) {}

UserDictionaryStorage::~UserDictionaryStorage() {
  UnLock();
}

const string &UserDictionaryStorage::filename() const {
  return file_name_;
}

bool UserDictionaryStorage::Exists() const {
  return Util::FileExists(file_name_);
}

bool UserDictionaryStorage::Load() {
  last_error_type_ = USER_DICTIONARY_STORAGE_NO_ERROR;

  InputFileStream ifs(file_name_.c_str(), ios::binary);
  if (!ifs) {
    LOG(ERROR) << "cannot open file: " << file_name_;
    last_error_type_ = FILE_NOT_EXISTS;
    return false;
  }

  // Increase the maximum capacity of file size
  // from 64MB (default) to 512MB.
  // This is a tentative bug fix for http://b/2498675
  // TODO(taku): we have to introduce a restriction to
  // the file size and let user know "import failure" if user
  // wants to use more than 512MB.
  google::protobuf::io::IstreamInputStream zero_copy_input(&ifs);
  google::protobuf::io::CodedInputStream decoder(&zero_copy_input);
  decoder.SetTotalBytesLimit(kDefaultTotalBytesLimit, -1);
  if (!ParseFromCodedStream(&decoder) ||
      !decoder.ConsumedEntireMessage() ||
      !ifs.eof()) {
    LOG(ERROR) << "ParseFromStream failed: file seems broken";
    last_error_type_ = BROKEN_FILE;
    return false;
  }

  return true;
}

bool UserDictionaryStorage::Save() {
  last_error_type_ = USER_DICTIONARY_STORAGE_NO_ERROR;

  if (!locked_) {
    LOG(ERROR) << "Dictionary is not locked. "
               << "Call Lock() before saving the dictionary";
    last_error_type_ = SYNC_FAILURE;
    return false;
  }

  const string tmp_file_name = file_name_ + ".tmp";
  {
    OutputFileStream ofs(tmp_file_name.c_str(),
                         ios::out|ios::binary|ios::trunc);
    if (!ofs) {
      LOG(ERROR) << "cannot open file: " << tmp_file_name;
      last_error_type_ = SYNC_FAILURE;
      return false;
    }

    if (!SerializeToOstream(&ofs)) {
      LOG(ERROR) << "SerializeToString failed";
      last_error_type_ = SYNC_FAILURE;
      return false;
    }

    if (static_cast<size_t>(ofs.tellp()) >= kDefaultWarningTotalBytesLimit) {
      LOG(ERROR) << "The file size exceeds " << kDefaultWarningTotalBytesLimit;
      // continue "AtomicRename"
      last_error_type_ = TOO_BIG_FILE_BYTES;
    }
  }

  if (!Util::AtomicRename(tmp_file_name, file_name_)) {
    LOG(ERROR) << "AtomicRename failed";
    last_error_type_ = SYNC_FAILURE;
    return false;
  }

  if (last_error_type_ == TOO_BIG_FILE_BYTES) {
    return false;
  }

  return true;
}

bool UserDictionaryStorage::Lock() {
  locked_ = mutex_->Lock();
  LOG_IF(ERROR, !locked_) << "Lock() failed";
  return locked_;
}

bool UserDictionaryStorage::UnLock() {
  mutex_->UnLock();
  locked_ = false;
  return true;
}

bool UserDictionaryStorage::ExportDictionary(
    uint64 dic_id, const string &file_name) {
  const int index = GetUserDictionaryIndex(dic_id);
  if (index < 0) {
    last_error_type_ = INVALID_DICTIONARY_ID;
    LOG(ERROR) << "Invalid dictionary id: " << dic_id;
    return false;
  }

  OutputFileStream ofs(file_name.c_str());
  if (!ofs) {
    last_error_type_ = EXPORT_FAILURE;
    LOG(ERROR) << "Cannot open export file: " << file_name;
    return false;
  }

  const UserDictionary &dic = dictionaries(index);
  for (size_t i = 0; i < dic.entries_size(); ++i) {
    const UserDictionaryEntry &entry = dic.entries(i);
    ofs << entry.key() << "\t"
        << entry.value() << "\t"
        << entry.pos() << "\t"
        << entry.comment() << endl;
  }

  return true;
}

bool UserDictionaryStorage::CreateDictionary(
    const string &dic_name, uint64 *new_dic_id) {
  last_error_type_ = USER_DICTIONARY_STORAGE_NO_ERROR;

  if (!UserDictionaryStorage::IsValidDictionaryName(dic_name)) {
    LOG(ERROR) << "Invalid dictionary name is passed";
    return false;
  }

  if (dictionaries_size() >= kMaxDictionarySize) {
    last_error_type_ = TOO_MANY_DICTIONARIES;
    LOG(ERROR) << "too many dictionaries";
    return false;
  }

  if (new_dic_id == NULL) {
    last_error_type_ = UNKNOWN_ERROR;
    LOG(ERROR) << "new_dic_id is NULL";
    return false;
  }

  UserDictionary *dic = add_dictionaries();
  if (dic == NULL) {
    last_error_type_ = UNKNOWN_ERROR;
    LOG(ERROR) << "add_dictionaries() failed";
    return false;
  }

  *new_dic_id = CreateID();

  dic->set_id(*new_dic_id);
  dic->set_name(dic_name);
  dic->clear_entries();

  return true;
}

bool UserDictionaryStorage::CopyDictionary(uint64 dic_id,
                                           const string &dic_name,
                                           uint64 *new_dic_id) {
  last_error_type_ = USER_DICTIONARY_STORAGE_NO_ERROR;

  if (!UserDictionaryStorage::IsValidDictionaryName(dic_name)) {
    LOG(ERROR) << "Invalid dictionary name is passed";
    return false;
  }

  if (dictionaries_size() >= kMaxDictionarySize) {
    last_error_type_ = TOO_MANY_DICTIONARIES;
    LOG(ERROR) << "too many dictionaries";
    return false;
  }

  if (new_dic_id == NULL) {
    last_error_type_ = UNKNOWN_ERROR;
    LOG(ERROR) << "new_dic_id is NULL";
    return false;
  }

  UserDictionary *dic = GetUserDictionary(dic_id);
  if (dic == NULL) {
    last_error_type_ = INVALID_DICTIONARY_ID;
    LOG(ERROR) << "Invalid dictionary id: " << dic_id;
    return false;
  }

  UserDictionary *new_dic = add_dictionaries();
  new_dic->CopyFrom(*dic);

  *new_dic_id = CreateID();
  dic->set_id(*new_dic_id);
  dic->set_name(dic_name);

  return true;
}

bool UserDictionaryStorage::DeleteDictionary(uint64 dic_id) {
  last_error_type_ = USER_DICTIONARY_STORAGE_NO_ERROR;

  const int delete_index = GetUserDictionaryIndex(dic_id);
  if (delete_index == -1) {
    last_error_type_ = INVALID_DICTIONARY_ID;
    LOG(ERROR) << "Invalid dictionary id: " << dic_id;
    return false;
  }

  google::protobuf::RepeatedPtrField<UserDictionary> *dics =
      mutable_dictionaries();

  UserDictionary **data = dics->mutable_data();
  for (int i = delete_index; i < dictionaries_size() - 1; ++i) {
    swap(data[i], data[i + 1]);
  }

  dics->RemoveLast();

  return true;
}

bool UserDictionaryStorage::RenameDictionary(uint64 dic_id,
                                             const string &dic_name) {
  last_error_type_ = USER_DICTIONARY_STORAGE_NO_ERROR;

  if (!UserDictionaryStorage::IsValidDictionaryName(dic_name)) {
    LOG(ERROR) << "Invalid dictionary name is passed";
    return false;
  }

  UserDictionary *dic = GetUserDictionary(dic_id);
  if (dic == NULL) {
    last_error_type_ = INVALID_DICTIONARY_ID;
    LOG(ERROR) << "Invalid dictionary id: " << dic_id;
    return false;
  }

  dic->set_name(dic_name);

  return true;
}

int UserDictionaryStorage::GetUserDictionaryIndex(uint64 dic_id) const {
  for (int i = 0; i < dictionaries_size(); ++i) {
    if (dic_id == dictionaries(i).id()) {
      return i;
    }
  }

  LOG(ERROR) << "Cannot find dictionary id: " << dic_id;
  return -1;
}

UserDictionaryStorage::UserDictionary *
UserDictionaryStorage::GetUserDictionary(uint64 dic_id) {
  const int index = GetUserDictionaryIndex(dic_id);
  if (index < 0) {
    LOG(ERROR) << "Invalid dictionary id: " << dic_id;
    return NULL;
  }

  return mutable_dictionaries(index);
}

UserDictionaryStorage::UserDictionaryStorageErrorType
UserDictionaryStorage::GetLastError() const {
  return last_error_type_;
}

// static
size_t UserDictionaryStorage::max_entry_size() {
  return kMaxEntrySize;
}

// static
size_t UserDictionaryStorage::max_dictionary_size() {
  return kMaxDictionarySize;
}

bool UserDictionaryStorage::IsValidDictionaryName(const string &name) {
  if (name.empty()) {
    VLOG(1) << "Empty dictionary name.";
    last_error_type_ = EMPTY_DICTIONARY_NAME;
    return false;
  } else if (name.size() > kMaxDictionaryNameSize) {
    last_error_type_ = TOO_LONG_DICTIONARY_NAME;
    VLOG(1) << "Too long dictionary name";
    return false;
  } else if (name.find_first_of("\n\r\t") != string::npos) {
    last_error_type_ = INVALID_CHARACTERS_IN_DICTIONARY_NAME;
    VLOG(1) << "Invalid character in dictionary name: " << name;
    return false;
  }
  return true;
}
}  // namespace mozc