Codebase list mozc / debian/0.12.410.102-1 dictionary / user_dictionary_util_test.cc
debian/0.12.410.102-1

Tree @debian/0.12.410.102-1 (Download .tar.gz)

user_dictionary_util_test.cc @debian/0.12.410.102-1raw · history · blame

// Copyright 2010, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "base/util.h"
#include "dictionary/user_dictionary_util.h"

#include "testing/base/public/googletest.h"
#include "testing/base/public/gunit.h"

namespace mozc {

static void TestNormalizeReading(const string &golden, const string &input) {
  string output;
  UserDictionaryUtil::NormalizeReading(input, &output);
  EXPECT_EQ(golden, output);
}

TEST(UserDictionaryUtilTest, TestIsValidReading) {
  EXPECT_TRUE(UserDictionaryUtil::IsValidReading("ABYZabyz0189"));
  // "〜「」"
  EXPECT_TRUE(UserDictionaryUtil::IsValidReading(
                  "\xe3\x80\x9c\xe3\x80\x8c\xe3\x80\x8d"));
  // "あいうわをんゔ"
  EXPECT_TRUE(UserDictionaryUtil::IsValidReading(
                  "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x82\x8f\xe3\x82\x92"
                  "\xe3\x82\x93\xe3\x82\x94"));
  // "アイウワヲンヴ"
  EXPECT_TRUE(UserDictionaryUtil::IsValidReading(
                  "\xe3\x82\xa2\xe3\x82\xa4\xe3\x82\xa6\xe3\x83\xaf\xe3\x83\xb2"
                  "\xe3\x83\xb3\xe3\x83\xb4"));
  // "水雲"
  EXPECT_FALSE(UserDictionaryUtil::IsValidReading("\xe6\xb0\xb4\xe9\x9b\xb2"));

  // COMBINING KATAKANA-HIRAGANA VOICED/SEMI-VOICED SOUND MARK (u3099, u309A)
  EXPECT_FALSE(UserDictionaryUtil::IsValidReading("\xE3\x82\x99\xE3\x82\x9A"));

  // KATAKANA-HIRAGANA VOICED/SEMI-VOICED SOUND MARK (u309B, u309C)
  EXPECT_TRUE(UserDictionaryUtil::IsValidReading("\xE3\x82\x9B\xE3\x82\x9C"));
}

TEST(UserDictionaryUtilTest, TestNormalizeReading) {
  // "あいうゔゎ", "アイウヴヮ"
  TestNormalizeReading(
      "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x82\x94\xe3\x82\x8e",
      "\xe3\x82\xa2\xe3\x82\xa4\xe3\x82\xa6\xe3\x83\xb4\xe3\x83\xae");
  // "あいうゃ", "アイウャ"
  TestNormalizeReading(
      "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x82\x83",
      "\xef\xbd\xb1\xef\xbd\xb2\xef\xbd\xb3\xef\xbd\xac");
  // "ABab01@&=|"
  TestNormalizeReading(
      "ABab01@&=|",
      "\xef\xbc\xa1\xef\xbc\xa2\xef\xbd\x81\xef\xbd\x82\xef\xbc\x90\xef\xbc\x91"
      "\xef\xbc\xa0\xef\xbc\x86\xef\xbc\x9d\xef\xbd\x9c");
}

namespace {
struct UserDictionaryEntryData {
  const char *key;
  const char *value;
  const char *pos;
  const char *comment;
};

void ConvertUserDictionaryEntry(
    const UserDictionaryEntryData &input,
    UserDictionaryStorage::UserDictionaryEntry *output) {
  output->set_key(input.key);
  output->set_value(input.value);
  output->set_pos(input.pos);
  output->set_comment(input.comment);
}
}  // namespace

TEST(UserDictionaryUtilTest, TestSanitizeEntry) {
  const UserDictionaryEntryData golden_data = {
    "abc",
    "abc",
    "abc",
    "abc",
  };

  UserDictionaryStorage::UserDictionaryEntry golden, input;
  ConvertUserDictionaryEntry(golden_data, &golden);

  {
    UserDictionaryEntryData input_data = { "abc", "abc", "abc", "abc" };
    ConvertUserDictionaryEntry(input_data, &input);
    EXPECT_FALSE(UserDictionaryUtil::SanitizeEntry(&input));
    EXPECT_EQ(golden.DebugString(), input.DebugString());
  }

  {
    UserDictionaryEntryData input_data = { "ab\tc", "abc", "abc", "abc" };
    ConvertUserDictionaryEntry(input_data, &input);
    EXPECT_TRUE(UserDictionaryUtil::SanitizeEntry(&input));
    EXPECT_EQ(golden.DebugString(), input.DebugString());
  }

  {
    UserDictionaryEntryData input_data = { "abc", "ab\tc", "abc", "ab\tc" };
    ConvertUserDictionaryEntry(input_data, &input);
    EXPECT_TRUE(UserDictionaryUtil::SanitizeEntry(&input));
    EXPECT_EQ(golden.DebugString(), input.DebugString());
  }

  {
    UserDictionaryEntryData input_data = { "ab\tc", "ab\tc", "ab\tc", "ab\tc" };
    ConvertUserDictionaryEntry(input_data, &input);
    EXPECT_TRUE(UserDictionaryUtil::SanitizeEntry(&input));
    EXPECT_EQ(golden.DebugString(), input.DebugString());
  }
}

TEST(UserDictionaryUtilTest, TestSanitize) {
  string str('\t', 10);
  EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 5));
  EXPECT_EQ("", str);

  str = "ab\tc";
  EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 10));
  EXPECT_EQ("abc", str);

  str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B";  // "かしゆか"
  EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 3));
  EXPECT_EQ("\xE3\x81\x8B", str);  // "か"

  str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B";  // "かしゆか"
  EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 4));
  EXPECT_EQ("\xE3\x81\x8B", str);  // "か"

  str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B";  // "かしゆか"
  EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 5));
  EXPECT_EQ("\xE3\x81\x8B", str);  // "か"

  str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B";  // "かしゆか"
  EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 6));
  EXPECT_EQ("\xE3\x81\x8B\xE3\x81\x97", str);  // "かし"

  str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B";  // "かしゆか"
  EXPECT_FALSE(UserDictionaryUtil::Sanitize(&str, 100));
  // "かしゆか"
  EXPECT_EQ("\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B", str);
}
}  // namespace mozc