Codebase list mozc / upstream/1.1.758.102 converter / converter_test.cc
upstream/1.1.758.102

Tree @upstream/1.1.758.102 (Download .tar.gz)

converter_test.cc @upstream/1.1.758.102raw · history · blame

// Copyright 2010-2011, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <string>
#include <vector>
#include "base/base.h"
#include "base/util.h"
#include "converter/converter_interface.h"
#include "converter/node_allocator.h"
#include "converter/segments.h"
#include "dictionary/pos_matcher.h"
#include "dictionary/suffix_dictionary.h"
#include "dictionary/suppression_dictionary.h"
#include "session/config.pb.h"
#include "session/config_handler.h"
#include "testing/base/public/gunit.h"

DECLARE_string(test_tmpdir);

namespace mozc {

class ConverterTest : public testing::Test {
 public:
  virtual void SetUp() {
    // set default user profile directory
    Util::SetUserProfileDirectory(FLAGS_test_tmpdir);
    config::Config config;
    config::ConfigHandler::GetDefaultConfig(&config);
    config::ConfigHandler::SetConfig(config);
  }

  virtual void TearDown() {
    // just in case, reset the config in test_tmpdir
    config::Config config;
    config::ConfigHandler::GetDefaultConfig(&config);
    config::ConfigHandler::SetConfig(config);
    SuffixDictionaryFactory::SetSuffixDictionary(NULL);
  }
};

// test for issue:2209644
// just checking whether this causes segmentation fault or not.
// TODO(toshiyuki): make dictionary mock and test strictly.
TEST_F(ConverterTest, CanConvertTest) {
  ConverterInterface *converter = ConverterFactory::GetConverter();
  CHECK(converter);
  {
    Segments segments;
    EXPECT_TRUE(converter->StartConversion(&segments, string("-")));
  }
  {
    Segments segments;
    // "おきておきて"
    EXPECT_TRUE(converter->StartConversion(
        &segments,
        string("\xe3\x81\x8a\xe3\x81\x8d\xe3\x81\xa6\xe3\x81\x8a"
               "\xe3\x81\x8d\xe3\x81\xa6")));
  }
}

namespace {
string ContextAwareConvert(const string &first_key,
                           const string &first_value,
                           const string &second_key) {
  ConverterInterface *converter = ConverterFactory::GetConverter();
  CHECK(converter);
  converter->ClearUserHistory();

  Segments segments;
  EXPECT_TRUE(converter->StartConversion(&segments, first_key));

  int position = -1;
  for (size_t i = 0; i < segments.segment(0).candidates_size(); ++i) {
    if (first_value == segments.segment(0).candidate(i).value) {
      position = static_cast<int>(i);
      break;
    }
  }
  EXPECT_GE(position, 0) << first_value;

  EXPECT_TRUE(converter->CommitSegmentValue(&segments, 0, position))
      << first_value;
  EXPECT_TRUE(converter->FinishConversion(&segments));

  EXPECT_TRUE(converter->StartConversion(&segments, second_key));
  EXPECT_EQ(2, segments.segments_size());

  return segments.segment(1).candidate(0).value;
}
}  // anonymous namespace

TEST_F(ConverterTest, ContextAwareConversionTest) {
  // EXPECT_EQ("一髪", ContextAwareConvert("きき", "危機", "いっぱつ"));
  // EXPECT_EQ("大", ContextAwareConvert("きょうと", "京都", "だい"));
  // EXPECT_EQ("点", ContextAwareConvert("もんだい", "問題", "てん"));
  // EXPECT_EQ("來未", ContextAwareConvert("こうだ", "倖田", "くみ"));
  // EXPECT_EQ("々", ContextAwareConvert("ねん", "年", "ねん"));
  // EXPECT_EQ("々", ContextAwareConvert("ひと", "人", "びと"));

  EXPECT_EQ("\xE4\xB8\x80\xE9\xAB\xAA",
            ContextAwareConvert(
                "\xE3\x81\x8D\xE3\x81\x8D",
                "\xE5\x8D\xB1\xE6\xA9\x9F",
                "\xE3\x81\x84\xE3\x81\xA3\xE3\x81\xB1\xE3\x81\xA4"));
  EXPECT_EQ("\xE5\xA4\xA7",
            ContextAwareConvert(
                "\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8",
                "\xE4\xBA\xAC\xE9\x83\xBD",
                "\xE3\x81\xA0\xE3\x81\x84"));
  EXPECT_EQ("\xE7\x82\xB9",
            ContextAwareConvert(
                "\xE3\x82\x82\xE3\x82\x93\xE3\x81\xA0\xE3\x81\x84",
                "\xE5\x95\x8F\xE9\xA1\x8C",
                "\xE3\x81\xA6\xE3\x82\x93"));
  EXPECT_EQ("\xE4\xBE\x86\xE6\x9C\xAA",
            ContextAwareConvert(
                "\xE3\x81\x93\xE3\x81\x86\xE3\x81\xA0",
                "\xE5\x80\x96\xE7\x94\xB0",
                "\xE3\x81\x8F\xE3\x81\xBF"));
  EXPECT_EQ("\xE3\x80\x85",
            ContextAwareConvert(
                "\xE3\x81\xAD\xE3\x82\x93",
                "\xE5\xB9\xB4",
                "\xE3\x81\xAD\xE3\x82\x93"));
  EXPECT_EQ("\xE3\x80\x85",
            ContextAwareConvert(
                "\xE3\x81\xB2\xE3\x81\xA8",
                "\xE4\xBA\xBA",
                "\xE3\x81\xB3\xE3\x81\xA8"));
}

TEST_F(ConverterTest, CandidateKeyTest) {
  ConverterInterface *converter = ConverterFactory::GetConverter();
  CHECK(converter);
  Segments segments;
  // "わたしは"
  EXPECT_TRUE(converter->StartConversion(
      &segments,
      string("\xE3\x82\x8F\xE3\x81\x9F\xE3\x81\x97\xE3\x81\xAF")));
  EXPECT_EQ(1, segments.segments_size());
  // "わたしは"
  EXPECT_EQ("\xE3\x82\x8F\xE3\x81\x9F\xE3\x81\x97\xE3\x81\xAF",
            segments.segment(0).candidate(0).key);
  // "わたし"
  EXPECT_EQ("\xE3\x82\x8F\xE3\x81\x9F\xE3\x81\x97",
            segments.segment(0).candidate(0).content_key);
}

TEST_F(ConverterTest, QueryOfDeathTest) {
  ConverterInterface *converter = ConverterFactory::GetConverter();
  CHECK(converter);
  Segments segments;
  // "りゅきゅけmぽ"
  EXPECT_TRUE(converter->StartConversion(
      &segments,
      "\xE3\x82\x8A\xE3\x82\x85"
      "\xE3\x81\x8D\xE3\x82\x85"
      "\xE3\x81\x91"
      "m"
      "\xE3\x81\xBD"));
}

TEST_F(ConverterTest, Regression3323108) {
  ConverterInterface *converter = ConverterFactory::GetConverter();
  Segments segments;

  // "ここではきものをぬぐ"
  EXPECT_TRUE(converter->StartConversion(
      &segments,
      "\xE3\x81\x93\xE3\x81\x93\xE3\x81\xA7"
      "\xE3\x81\xAF\xE3\x81\x8D\xE3\x82\x82"
      "\xE3\x81\xAE\xE3\x82\x92\xE3\x81\xAC"
      "\xE3\x81\x90"));
  EXPECT_EQ(3, segments.conversion_segments_size());
  EXPECT_TRUE(converter->ResizeSegment(
      &segments, 1, 2));
  EXPECT_EQ(2, segments.conversion_segments_size());

  // "きものをぬぐ"
  EXPECT_EQ("\xE3\x81\x8D\xE3\x82\x82"
            "\xE3\x81\xAE\xE3\x82\x92"
            "\xE3\x81\xAC\xE3\x81\x90",
            segments.conversion_segment(1).key());
}

TEST_F(ConverterTest, Regression3437022) {
  ConverterInterface *converter = ConverterFactory::GetConverter();
  Segments segments;

  // "しっこうゆうよ"
  const char kKey[] = "\xE3\x81\x97\xE3\x81\xA3\xE3\x81\x93"
      "\xE3\x81\x86\xE3\x82\x86\xE3\x81\x86\xE3\x82\x88";

  // "執行猶予"
  const char kValue[] = "\xE5\x9F\xB7\xE8\xA1\x8C"
      "\xE7\x8C\xB6\xE4\xBA\x88";

  EXPECT_TRUE(converter->StartConversion(
      &segments, kKey));
  EXPECT_EQ(1, segments.conversion_segments_size());
  EXPECT_EQ(kValue, segments.conversion_segment(0).candidate(0).value);

  SuppressionDictionary *dic
      = SuppressionDictionary::GetSuppressionDictionary();
  dic->Lock();
  dic->AddEntry(kKey, kValue);
  dic->UnLock();

  EXPECT_TRUE(converter->StartConversion(
      &segments, kKey));

  EXPECT_NE(1, segments.conversion_segments_size());
  EXPECT_TRUE(converter->ResizeSegment(
      &segments, 0, 3));

  EXPECT_EQ(1, segments.conversion_segments_size());
  EXPECT_NE(kValue, segments.conversion_segment(0).candidate(0).value);

  dic->Lock();
  dic->Clear();
  dic->UnLock();
}

namespace {
struct Token {
  const char *key;
  const char *value;
  uint16 rid;
  int32 cost;
};

const Token kTestTokens[] = {
  // { "です", "です", 1, 100 },
  // { "そうです", "そうです", 2, 100 },
  // { "す", "す", 3, 100 },
  // { "です", "です", 4, 50 }
  { "\xE3\x81\xA7\xE3\x81\x99", "\xE3\x81\xA7\xE3\x81\x99", 1, 100 },
  { "\xE3\x81\x9D\xE3\x81\x86\xE3\x81\xA7\xE3\x81\x99",
    "\xE3\x81\x9D\xE3\x81\x86\xE3\x81\xA7\xE3\x81\x99", 2, 100 },
  { "\xE3\x81\x99", "\xE3\x81\x99", 3, 100 },
  { "\xE3\x81\xA7\xE3\x81\x99", "\xE3\x81\xA7\xE3\x81\x99", 4, 50 }
};

class TestDictionary : public DictionaryInterface {
 public:
  TestDictionary() {}
  virtual ~TestDictionary() {}

  virtual Node *LookupPredictive(const char *str, int size,
                                 NodeAllocatorInterface *allocator) const {
    Node *result = NULL;
    for (int i = 0; i < arraysize(kTestTokens); ++i) {
      Node *n = allocator->NewNode();
      n->Init();
      const Token *token = &kTestTokens[i];
      n->value = token->value;
      n->key = token->key;
      n->lid = 0;
      n->rid = token->rid;
      n->cost = token->cost;
      n->bnext = result;
      result = n;
    }

    return result;
  }

  virtual Node *LookupPrefix(const char *str, int size,
                             NodeAllocatorInterface *allocator) const {
    return NULL;
  }

  virtual Node *LookupReverse(const char *str, int size,
                              NodeAllocatorInterface *allocator) const {
    return NULL;
  }
};
}   // namespace

TEST_F(ConverterTest, CompletePOSIds) {
  Segment::Candidate candidate;

  TestDictionary test_dictionary;
  SuffixDictionaryFactory::SetSuffixDictionary(&test_dictionary);

  // NO rewrite
  {
    candidate.lid = 1;
    candidate.rid = 2;
    candidate.value = "test";
    ConverterUtil::CompletePOSIds(&candidate);
    EXPECT_EQ(1, candidate.lid);
    EXPECT_EQ(2, candidate.rid);
  }

  {
    candidate.lid = 0;
    candidate.rid = 0;
    //    candidate.value = "たべそうです";
    candidate.value =
        "\xE3\x81\x9F\xE3\x81\xB9\xE3\x81\x9D\xE3\x81\x86"
        "\xE3\x81\xA7\xE3\x81\x99";
    ConverterUtil::CompletePOSIds(&candidate);
    EXPECT_EQ(POSMatcher::GetUnknownId(), candidate.lid);
    // "そうです" is the longest match.
    EXPECT_EQ(2, candidate.rid);
  }

  {
    candidate.lid = 0;
    candidate.rid = 0;
    //    candidate.value = "おす";
    candidate.value = "\xE3\x81\x8A\xE3\x81\x99";
    ConverterUtil::CompletePOSIds(&candidate);
    EXPECT_EQ(POSMatcher::GetUnknownId(), candidate.lid);
    EXPECT_EQ(3, candidate.rid);
  }

  {
    candidate.lid = 0;
    candidate.rid = 0;
    //    candidate.value = "です";
    candidate.value = "\xE3\x81\xA7\xE3\x81\x99";
    ConverterUtil::CompletePOSIds(&candidate);
    EXPECT_EQ(POSMatcher::GetUnknownId(), candidate.lid);
    // Have two "です". Select one who has the minimum cost.
    EXPECT_EQ(4, candidate.rid);
  }

  {
    candidate.lid = 0;
    candidate.rid = 0;
    //    candidate.value = "うごくです";
    candidate.value = "\xE3\x81\x86\xE3\x81\x94\xE3\x81\x8F"
        "\xE3\x81\xA7\xE3\x81\x99";
    ConverterUtil::CompletePOSIds(&candidate);
    EXPECT_EQ(POSMatcher::GetUnknownId(), candidate.lid);
    EXPECT_EQ(4, candidate.rid);
  }

  // completely unknown.
  {
    candidate.lid = 0;
    candidate.rid = 0;
    // candidate.value = "京都";
    candidate.value = "\xE4\xBA\xAC\xE9\x83\xBD";
    ConverterUtil::CompletePOSIds(&candidate);
    EXPECT_EQ(POSMatcher::GetUnknownId(), candidate.lid);
    EXPECT_EQ(POSMatcher::GetUnknownId(), candidate.rid);
  }
}
}  // namespace mozc