Codebase list mozc / aca5be7
Stop espacing non-ASCII string literals under src/dictionary/. BUG=#385 REF_BUG=31204285 REF_CL=173076141 REF_TIME=2017-10-23T15:18:48+09:00 REF_TIME_RAW=1508739528 +0900 Noriyuki Takahashi 6 years ago
17 changed file(s) with 424 addition(s) and 832 deletion(s). Raw diff Collapse all Expand all
2929
3030 MAJOR=2
3131 MINOR=23
32 BUILD=2747
32 BUILD=2748
3333 REVISION=102
3434 # This version represents the version of Mozc IME engine (converter, predictor,
3535 # etc.). This version info is included both in the Mozc server and in the Mozc
9090 convreq_.set_config(&config_);
9191 }
9292
93 virtual void SetUp() {
93 void SetUp() override {
9494 config::ConfigHandler::GetDefaultConfig(&config_);
9595 }
9696
9999 CheckKeyValueExistenceCallback(StringPiece key, StringPiece value)
100100 : key_(key), value_(value), found_(false) {}
101101
102 virtual ResultType OnToken(StringPiece key, StringPiece actual_key,
103 const Token &token) {
102 ResultType OnToken(StringPiece /* key */, StringPiece /* actual_key */,
103 const Token &token) override {
104104 if (token.key == key_ && token.value == value_) {
105105 found_ = true;
106106 return TRAVERSE_DONE;
120120 CheckSpellingExistenceCallback(StringPiece key, StringPiece value)
121121 : key_(key), value_(value), found_(false) {}
122122
123 virtual ResultType OnToken(StringPiece key, StringPiece actual_key,
124 const Token &token) {
123 ResultType OnToken(StringPiece /* key */, StringPiece /* actual_key */,
124 const Token &token) override {
125125 if (token.key == key_ && token.value == value_ &&
126126 (token.attributes & Token::SPELLING_CORRECTION)) {
127127 found_ = true;
143143 const POSMatcher *pos_matcher)
144144 : key_(key), value_(value), pos_matcher_(pos_matcher), found_(false) {}
145145
146 virtual ResultType OnToken(StringPiece key, StringPiece actual_key,
147 const Token &token) {
146 ResultType OnToken(StringPiece /* key */, StringPiece /* actual_key */,
147 const Token &token) override {
148148 if (token.key == key_ && token.value == value_ &&
149149 pos_matcher_->IsZipcode(token.lid)) {
150150 found_ = true;
166166 CheckEnglishT13nCallback(StringPiece key, StringPiece value)
167167 : key_(key), value_(value), found_(false) {}
168168
169 virtual ResultType OnToken(StringPiece key, StringPiece actual_key,
170 const Token &token) {
169 ResultType OnToken(StringPiece /* key */, StringPiece /* actual_key */,
170 const Token &token) override {
171171 if (token.key == key_ && token.value == value_ &&
172172 Util::IsEnglishTransliteration(token.value)) {
173173 found_ = true;
201201 DictionaryInterface *d = data->dictionary.get();
202202 SuppressionDictionary *s = data->suppression_dictionary.get();
203203
204 const char kKey[] =
205 "\xE3\x81\x90\xE3\x83\xBC\xE3\x81\x90\xE3\x82\x8B"; // "ぐーぐる"
206 const char kValue[] =
207 "\xE3\x82\xB0\xE3\x83\xBC\xE3\x82\xB0\xE3\x83\xAB"; // "グーグル"
204 const char kKey[] = "ぐーぐる";
205 const char kValue[] = "グーグル";
208206
209207 const LookupMethodAndQuery kTestPair[] = {
210 // "ぐーぐるは"
211 {&DictionaryInterface::LookupPrefix,
212 "\xE3\x81\x90\xE3\x83\xBC\xE3\x81\x90\xE3\x82\x8B\xE3\x81\xAF"},
213 // "ぐーぐ"
214 {&DictionaryInterface::LookupPredictive,
215 "\xE3\x81\x90\xE3\x83\xBC\xE3\x81\x90"},
208 {&DictionaryInterface::LookupPrefix, "ぐーぐるは"},
209 {&DictionaryInterface::LookupPredictive, "ぐーぐ"},
216210 };
217211
218212 // First add (kKey, kValue) to the suppression dictionary; thus it should not
243237 DictionaryInterface *d = data->dictionary.get();
244238
245239 // "あぼがど" -> "アボカド", which is in the test dictionary.
246 const char kKey[] = "\xE3\x81\x82\xE3\x81\xBC\xE3\x81\x8C\xE3\x81\xA9";
247 const char kValue[] = "\xE3\x82\xA2\xE3\x83\x9C\xE3\x82\xAB\xE3\x83\x89";
240 const char kKey[] = "あぼがど";
241 const char kValue[] = "アボカド";
248242
249243 const LookupMethodAndQuery kTestPair[] = {
250 // "あぼがど"
251244 {&DictionaryInterface::LookupPrefix, kKey},
252 // "あぼ"
253 {&DictionaryInterface::LookupPredictive, "\xE3\x81\x82\xE3\x81\xBC"},
245 {&DictionaryInterface::LookupPredictive, "あぼ"},
254246 };
255247
256248 // The spelling correction entry (kKey, kValue) should be found if spelling
277269
278270 // "100-0000" -> "東京都千代田区", which is in the test dictionary.
279271 const char kKey[] = "100-0000";
280 const char kValue[] = "\xE6\x9D\xB1\xE4\xBA\xAC\xE9\x83\xBD\xE5\x8D"
281 "\x83\xE4\xBB\xA3\xE7\x94\xB0\xE5\x8C\xBA";
272 const char kValue[] = "東京都千代田区";
282273
283274 const LookupMethodAndQuery kTestPair[] = {
284275 {&DictionaryInterface::LookupPrefix, kKey},
308299 DictionaryInterface *d = data->dictionary.get();
309300 NodeAllocator allocator;
310301
311 // "ぐーぐる" -> "Google"
312 const char kKey[] =
313 "\xE3\x81\x90\xE3\x83\xBC\xE3\x81\x90\xE3\x82\x8B";
302 const char kKey[] = "ぐーぐる";
314303 const char kValue[] = "Google";
315304
316305 const LookupMethodAndQuery kTestPair[] = {
317306 {&DictionaryInterface::LookupPrefix, kKey},
318 // "ぐー"
319 {&DictionaryInterface::LookupPredictive, "\xE3\x81\x90\xE3\x83\xBC"},
307 {&DictionaryInterface::LookupPredictive, "ぐー"},
320308 };
321309
322310 // The T13N entry (kKey, kValue) should be found if the flag is set in the
3939 #include "request/conversion_request.h"
4040 #include "testing/base/public/gunit.h"
4141
42 using std::unique_ptr;
43
4442 namespace mozc {
4543 namespace dictionary {
4644 namespace {
4745
46 using std::unique_ptr;
47
4848 class DictionaryMockTest : public ::testing::Test {
4949 protected:
50 virtual void SetUp() {
50 void SetUp() override {
5151 mock_.reset(new DictionaryMock);
5252 }
5353
126126 TEST_F(DictionaryMockTest, LookupPrefix) {
127127 DictionaryMock *dic = GetMock();
128128
129 unique_ptr<Token> t0(CreateToken(
130 "\xe3\x81\xaf", // "は"
131 "v0", Token::NONE));
132 unique_ptr<Token> t1(CreateToken(
133 // "はひふへほ"
134 "\xe3\x81\xaf\xe3\x81\xb2\xe3\x81\xb5\xe3\x81\xb8\xe3\x81\xbb",
135 "v1", Token::NONE));
129 unique_ptr<Token> t0(CreateToken("は", "v0", Token::NONE));
130 unique_ptr<Token> t1(CreateToken("はひふへほ", "v1", Token::NONE));
136131
137132 dic->AddLookupPrefix(t0->key, t0->key, t0->value, Token::NONE);
138133 dic->AddLookupPrefix(t1->key, t1->key, t1->value, Token::NONE);
156151 TEST_F(DictionaryMockTest, LookupReverse) {
157152 DictionaryInterface *dic = GetMock();
158153
159 // "今"/"いま"
160 const string k0 = "\xE4\xBB\x8A";
161 const string v0 = "\xE3\x81\x84\xE3\x81\xBE";
162 // "今日"/"きょう"
163 const string k1 = "\xE4\xBB\x8A\xE6\x97\xA5";
164 const string v1 = "\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86";
154 const string k0 = "今";
155 const string v0 = "いま";
156 const string k1 = "今日";
157 const string v1 = "きょう";
165158
166159 std::vector<Token> source_tokens;
167160 unique_ptr<Token> t0(CreateToken(k0, v0));
186179
187180 TEST_F(DictionaryMockTest, LookupPredictive) {
188181 DictionaryInterface *dic = GetMock();
189 // "は"
190 const string k0 = "\xe3\x81\xaf";
191 // "はひふ"
192 const string k1 = "\xe3\x81\xaf\xe3\x81\xb2\xe3\x81\xb5";
193 // "はひふへほ"
194 const string k2 = "\xe3\x81\xaf\xe3\x81\xb2\xe3\x81\xb5\xe3\x81\xb8\xe3\x81"
195 "\xbb";
182
183 const string k0 = "は";
184 const string k1 = "はひふ";
185 const string k2 = "はひふへほ";
196186
197187 std::vector<Token> tokens;
198188 unique_ptr<Token> t1(CreateToken(k1, "v0", Token::NONE));
215205 TEST_F(DictionaryMockTest, LookupExact) {
216206 DictionaryInterface *dic = GetMock();
217207
218 const char *kKey = "\xE3\x81\xBB\xE3\x81\x92"; // "ほげ"
208 const char kKey[] = "ほげ";
219209
220210 unique_ptr<Token> t0(CreateToken(kKey, "value1", Token::NONE));
221211 unique_ptr<Token> t1(CreateToken(kKey, "value2", Token::NONE));
234224 EXPECT_TRUE(callback.tokens().empty());
235225
236226 callback.Clear();
237 dic->LookupExact("\xE3\x81\xBB", // "ほ"
238 convreq_,
239 &callback);
227 dic->LookupExact("ほ", convreq_, &callback);
240228 EXPECT_TRUE(callback.tokens().empty());
241229 }
242230
3939
4040 namespace mozc {
4141 namespace dictionary {
42 namespace {
4243
4344 TEST(SuffixDictionaryTest, LookupPredictive) {
4445 // Test SuffixDictionary with mock data.
7273 }
7374 {
7475 // Non-empty prefix.
75 const string kPrefix = "\xE3\x81\x9F"; // "た"
76 const string kPrefix = "た";
7677 CollectTokenCallback callback;
7778 dic->LookupPredictive(kPrefix, convreq, &callback);
7879 EXPECT_FALSE(callback.tokens().empty());
8788 }
8889 }
8990
91 } // namespace
9092 } // namespace dictionary
9193 } // namespace mozc
4040 #include "testing/base/public/googletest.h"
4141 #include "testing/base/public/gunit.h"
4242
43 using std::unique_ptr;
44
4543 namespace mozc {
4644 namespace dictionary {
4745 namespace {
46
47 using std::unique_ptr;
4848
4949 ::testing::AssertionResult MakeAssertResult(
5050 bool success, char32 c, const char *message) {
7272 namespace mozc {
7373 namespace dictionary {
7474
75 using mozc::storage::louds::BitVectorBasedArray;
76 using mozc::storage::louds::LoudsTrie;
75 using ::mozc::storage::louds::BitVectorBasedArray;
76 using ::mozc::storage::louds::LoudsTrie;
7777
7878 namespace {
7979
104104 // be mixed.
105105 // TODO(hidehiko): Clean up this hacky implementation.
106106 const char *kHiraganaExpansionTable[] = {
107 "\xe3\x81\x82\xe3\x81\x82\xe3\x81\x81", // "ああぁ"
108 "\xe3\x81\x84\xe3\x81\x84\xe3\x81\x83", // "いいぃ"
109 "\xe3\x81\x86\xe3\x81\x86\xe3\x81\x85\xe3\x82\x94", // "ううぅゔ"
110 "\xe3\x81\x88\xe3\x81\x88\xe3\x81\x87", // "ええぇ"
111 "\xe3\x81\x8a\xe3\x81\x8a\xe3\x81\x89", // "おおぉ"
112 "\xe3\x81\x8b\xe3\x81\x8b\xe3\x81\x8c", // "かかが"
113 "\xe3\x81\x8d\xe3\x81\x8d\xe3\x81\x8e", // "ききぎ"
114 "\xe3\x81\x8f\xe3\x81\x8f\xe3\x81\x90", // "くくぐ"
115 "\xe3\x81\x91\xe3\x81\x91\xe3\x81\x92", // "けけげ"
116 "\xe3\x81\x93\xe3\x81\x93\xe3\x81\x94", // "ここご"
117 "\xe3\x81\x95\xe3\x81\x95\xe3\x81\x96", // "ささざ"
118 "\xe3\x81\x97\xe3\x81\x97\xe3\x81\x98", // "ししじ"
119 "\xe3\x81\x99\xe3\x81\x99\xe3\x81\x9a", // "すすず"
120 "\xe3\x81\x9b\xe3\x81\x9b\xe3\x81\x9c", // "せせぜ"
121 "\xe3\x81\x9d\xe3\x81\x9d\xe3\x81\x9e", // "そそぞ"
122 "\xe3\x81\x9f\xe3\x81\x9f\xe3\x81\xa0", // "たただ"
123 "\xe3\x81\xa1\xe3\x81\xa1\xe3\x81\xa2", // "ちちぢ"
124 "\xe3\x81\xa4\xe3\x81\xa4\xe3\x81\xa3\xe3\x81\xa5", // "つつっづ"
125 "\xe3\x81\xa6\xe3\x81\xa6\xe3\x81\xa7", // "ててで"
126 "\xe3\x81\xa8\xe3\x81\xa8\xe3\x81\xa9", // "ととど"
127 "\xe3\x81\xaf\xe3\x81\xaf\xe3\x81\xb0\xe3\x81\xb1", // "ははばぱ"
128 "\xe3\x81\xb2\xe3\x81\xb2\xe3\x81\xb3\xe3\x81\xb4", // "ひひびぴ"
129 "\xe3\x81\xb5\xe3\x81\xb5\xe3\x81\xb6\xe3\x81\xb7", // "ふふぶぷ"
130 "\xe3\x81\xb8\xe3\x81\xb8\xe3\x81\xb9\xe3\x81\xba", // "へへべぺ"
131 "\xe3\x81\xbb\xe3\x81\xbb\xe3\x81\xbc\xe3\x81\xbd", // "ほほぼぽ"
132 "\xe3\x82\x84\xe3\x82\x84\xe3\x82\x83", // "ややゃ"
133 "\xe3\x82\x86\xe3\x82\x86\xe3\x82\x85", // "ゆゆゅ"
134 "\xe3\x82\x88\xe3\x82\x88\xe3\x82\x87", // "よよょ"
135 "\xe3\x82\x8f\xe3\x82\x8f\xe3\x82\x8e", // "わわゎ"
107 "ああぁ",
108 "いいぃ",
109 "ううぅゔ",
110 "ええぇ",
111 "おおぉ",
112 "かかが",
113 "ききぎ",
114 "くくぐ",
115 "けけげ",
116 "ここご",
117 "ささざ",
118 "ししじ",
119 "すすず",
120 "せせぜ",
121 "そそぞ",
122 "たただ",
123 "ちちぢ",
124 "つつっづ",
125 "ててで",
126 "ととど",
127 "ははばぱ",
128 "ひひびぴ",
129 "ふふぶぷ",
130 "へへべぺ",
131 "ほほぼぽ",
132 "ややゃ",
133 "ゆゆゅ",
134 "よよょ",
135 "わわゎ",
136136 };
137137
138138 const uint32 kAsciiRange = 0x80;
5656 #include "testing/base/public/gunit.h"
5757 #include "testing/base/public/mozctest.h"
5858
59 using std::unique_ptr;
60
61 using mozc::dictionary::CollectTokenCallback;
62
6359 DEFINE_int32(dictionary_test_size, 100000,
6460 "Dictionary size for this test.");
6561 DEFINE_int32(dictionary_reverse_lookup_test_size, 1000,
6864
6965 namespace mozc {
7066 namespace dictionary {
67 namespace {
68 using std::unique_ptr;
69 } // namespace
7170
7271 class SystemDictionaryTest : public ::testing::Test {
7372 protected:
176175 std::vector<Token *> tokens;
177176 for (int i = 0; i < 4; ++i) {
178177 Token *token = new Token;
179 // "きー%d"
180 token->key = Util::StringPrintf("\xE3\x81\x8D\xE3\x83\xBC%d", i);
181 // "バリュー%d"
182 token->value = Util::StringPrintf(
183 "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC%d", i);
178 token->key = Util::StringPrintf("きー%d", i);
179 token->value = Util::StringPrintf("バリュー%d", i);
184180 tokens.push_back(token);
185181 }
186182
198194 tokens.push_back(token);
199195 }
200196
201 // "full"
202 const string kFull = "\xEF\xBD\x86\xEF\xBD\x95\xEF\xBD\x8C\xEF\xBD\x8C";
203 // "ひらがな"
204 const string kHiragana = "\xE3\x81\xB2\xE3\x82\x89\xE3\x81\x8C\xE3\x81\xAA";
205 // "かたかな"
206 const string kKatakanaKey =
207 "\xE3\x81\x8B\xE3\x81\x9F\xE3\x81\x8B\xE3\x81\xAA";
208 // "カタカナ"
209 const string kKatakanaValue =
210 "\xE3\x82\xAB\xE3\x82\xBF\xE3\x82\xAB\xE3\x83\x8A";
197 const string kFull = "full";
198 const string kHiragana = "ひらがな";
199 const string kKatakanaKey = "かたかな";
200 const string kKatakanaValue = "カタカナ";
211201
212202 { // Alphabet full width
213203 Token *token = new Token;
214204 token->key = "full";
215 token->value = kFull; // "full"
205 token->value = kFull;
216206 tokens.push_back(token);
217207 }
218208
219209 { // Hiragana
220210 Token *token = new Token;
221 token->key = kHiragana; // "ひらがな"
222 token->value = kHiragana; // "ひらがな"
211 token->key = kHiragana;
212 token->value = kHiragana;
223213 tokens.push_back(token);
224214 }
225215
226216 { // Katakana
227217 Token *token = new Token;
228 token->key = kKatakanaKey; // "かたかな"
229 token->value = kKatakanaValue; // "カタカナ"
218 token->key = kKatakanaKey;
219 token->value = kKatakanaValue;
230220 tokens.push_back(token);
231221 }
232222
237227 ASSERT_TRUE(system_dic.get() != NULL)
238228 << "Failed to open dictionary source:" << dic_fn_;
239229
240 EXPECT_TRUE(system_dic->HasValue(
241 // "バリュー0"
242 "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x30"));
243 EXPECT_TRUE(system_dic->HasValue(
244 // "バリュー1"
245 "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x31"));
246 EXPECT_TRUE(system_dic->HasValue(
247 // "バリュー2"
248 "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x32"));
249 EXPECT_TRUE(system_dic->HasValue(
250 // "バリュー3"
251 "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x33"));
252 EXPECT_FALSE(system_dic->HasValue(
253 // "バリュー4"
254 "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x34"));
255 EXPECT_FALSE(system_dic->HasValue(
256 // "バリュー5"
257 "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x35"));
258 EXPECT_FALSE(system_dic->HasValue(
259 // "バリュー6"
260 "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x36"));
230 EXPECT_TRUE(system_dic->HasValue("バリュー0"));
231 EXPECT_TRUE(system_dic->HasValue("バリュー1"));
232 EXPECT_TRUE(system_dic->HasValue("バリュー2"));
233 EXPECT_TRUE(system_dic->HasValue("バリュー3"));
234 EXPECT_FALSE(system_dic->HasValue("バリュー4"));
235 EXPECT_FALSE(system_dic->HasValue("バリュー5"));
236 EXPECT_FALSE(system_dic->HasValue("バリュー6"));
261237
262238 EXPECT_TRUE(system_dic->HasValue("Mozc"));
263239 EXPECT_FALSE(system_dic->HasValue("mozc"));
265241 EXPECT_TRUE(system_dic->HasValue("UPPER"));
266242 EXPECT_FALSE(system_dic->HasValue("upper"));
267243
268 EXPECT_TRUE(system_dic->HasValue(kFull)); // "full"
244 EXPECT_TRUE(system_dic->HasValue(kFull));
269245 EXPECT_FALSE(system_dic->HasValue("full"));
270246
271 EXPECT_TRUE(system_dic->HasValue(kHiragana)); //"ひらがな"
272 EXPECT_FALSE(system_dic->HasValue(
273 "\xE3\x83\x92\xE3\x83\xA9\xE3\x82\xAC\xE3\x83\x8A\x0A")); // "ヒラガナ"
274
275 EXPECT_TRUE(system_dic->HasValue(kKatakanaValue)); // "カタカナ"
276 EXPECT_FALSE(system_dic->HasValue(kKatakanaKey)); // "かたかな"
247 EXPECT_TRUE(system_dic->HasValue(kHiragana));
248 EXPECT_FALSE(system_dic->HasValue("ヒラガナ\n"));
249
250 EXPECT_TRUE(system_dic->HasValue(kKatakanaValue));
251 EXPECT_FALSE(system_dic->HasValue(kKatakanaKey));
277252
278253 STLDeleteElements(&tokens);
279254 }
281256 TEST_F(SystemDictionaryTest, NormalWord) {
282257 std::vector<Token *> source_tokens;
283258 unique_ptr<Token> t0(new Token);
284 // "あ"
285 t0->key = "\xe3\x81\x82";
286 // "亜"
287 t0->value = "\xe4\xba\x9c";
259 t0->key = "あ";
260 t0->value = "亜";
288261 t0->cost = 100;
289262 t0->lid = 50;
290263 t0->rid = 70;
305278
306279 // Look up by prefix.
307280 callback.Clear();
308 system_dic->LookupPrefix(
309 "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86", // "あいう"
310 convreq_, &callback);
281 system_dic->LookupPrefix("あいう", convreq_, &callback);
311282 ASSERT_EQ(1, callback.tokens().size());
312283 EXPECT_TOKEN_EQ(*t0, callback.tokens().front());
313284
314285 // Nothing should be looked up.
315286 callback.Clear();
316 system_dic->LookupPrefix(
317 "\xE3\x81\x8B\xE3\x81\x8D\xE3\x81\x8F", // "かきく"
318 convreq_, &callback);
287 system_dic->LookupPrefix("かきく", convreq_, &callback);
319288 EXPECT_TRUE(callback.tokens().empty());
320289 }
321290
322291 TEST_F(SystemDictionaryTest, SameWord) {
323292 std::vector<Token> tokens(4);
324293
325 tokens[0].key = "\xe3\x81\x82"; // "あ"
326 tokens[0].value = "\xe4\xba\x9c"; // "亜"
294 tokens[0].key = "あ";
295 tokens[0].value = "亜";
327296 tokens[0].cost = 100;
328297 tokens[0].lid = 50;
329298 tokens[0].rid = 70;
330299
331 tokens[1].key = "\xe3\x81\x82"; // "あ"
332 tokens[1].value = "\xe4\xba\x9c"; // "亜"
300 tokens[1].key = "あ";
301 tokens[1].value = "亜";
333302 tokens[1].cost = 150;
334303 tokens[1].lid = 100;
335304 tokens[1].rid = 200;
336305
337 tokens[2].key = "\xe3\x81\x82"; // "あ"
338 tokens[2].value = "\xe3\x81\x82"; // "あ"
306 tokens[2].key = "あ";
307 tokens[2].value = "あ";
339308 tokens[2].cost = 100;
340309 tokens[2].lid = 1000;
341310 tokens[2].rid = 2000;
342311
343 tokens[3].key = "\xe3\x81\x82"; // "あ"
344 tokens[3].value = "\xe4\xba\x9c"; // "亜"
312 tokens[3].key = "あ";
313 tokens[3].value = "亜";
345314 tokens[3].cost = 1000;
346315 tokens[3].lid = 2000;
347316 tokens[3].rid = 3000;
359328
360329 // All the tokens should be looked up.
361330 CollectTokenCallback callback;
362 system_dic->LookupPrefix("\xe3\x81\x82", // "あ"
363 convreq_, &callback);
331 system_dic->LookupPrefix("あ", convreq_, &callback);
364332 EXPECT_TOKENS_EQ_UNORDERED(source_tokens, callback.tokens());
365333 }
366334
383351 }
384352
385353 TEST_F(SystemDictionaryTest, SimpleLookupPrefix) {
386 // "は"
387 const string k0 = "\xe3\x81\xaf";
388 // "はひふへほ"
389 const string k1 = "\xe3\x81\xaf\xe3\x81\xb2\xe3\x81\xb5\xe3\x81\xb8\xe3\x81"
390 "\xbb";
354 const string k0 = "は";
355 const string k1 = "はひふへほ";
391356 unique_ptr<Token> t0(CreateToken(k0, "aa"));
392357 unique_ptr<Token> t1(CreateToken(k1, "bb"));
393358
413378 class LookupPrefixTestCallback : public SystemDictionary::Callback {
414379 public:
415380 virtual ResultType OnKey(StringPiece key) {
416 if (key == "\xE3\x81\x8B\xE3\x81\x8D") { // key == "かき"
381 if (key == "かき") {
417382 return TRAVERSE_CULL;
418 } else if (key == "\xE3\x81\x95") { // key == "さ"
383 } else if (key == "さ") {
419384 return TRAVERSE_NEXT_KEY;
420 } else if (key == "\xE3\x81\x9F") { // key == "た"
385 } else if (key == "た") {
421386 return TRAVERSE_DONE;
422387 }
423388 return TRAVERSE_CONTINUE;
445410 const char *key;
446411 const char *value;
447412 } kKeyValues[] = {
448 // "あ", "亜"
449 { "\xE3\x81\x82", "\xE4\xBA\x9C" },
450 // "あ", "安"
451 { "\xE3\x81\x82", "\xE5\xAE\x89" },
452 // "あ", "在"
453 { "\xE3\x81\x82", "\xE5\x9C\xA8" },
454 // "あい", "愛"
455 { "\xE3\x81\x82\xE3\x81\x84", "\xE6\x84\x9B" },
456 // "あい", "藍"
457 { "\xE3\x81\x82\xE3\x81\x84", "\xE8\x97\x8D" },
458 // "あいう", "藍雨"
459 { "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86", "\xE8\x97\x8D\xE9\x9B\xA8" },
460 // "か", "可"
461 { "\xE3\x81\x8B", "\xE5\x8F\xAF" },
462 // "かき", "牡蠣"
463 { "\xE3\x81\x8B\xE3\x81\x8D", "\xE7\x89\xA1\xE8\xA0\xA3" },
464 // "かき", "夏季"
465 { "\xE3\x81\x8B\xE3\x81\x8D", "\xE5\xA4\x8F\xE5\xAD\xA3" },
466 // "かきく", "柿久"
467 { "\xE3\x81\x8B\xE3\x81\x8D\xE3\x81\x8F", "\xE6\x9F\xBF\xE4\xB9\x85" },
468 // "さ", "差"
469 { "\xE3\x81\x95", "\xE5\xB7\xAE" },
470 // "さ", "左"
471 { "\xE3\x81\x95", "\xE5\xB7\xA6" },
472 // "さし", "刺"
473 { "\xE3\x81\x95\xE3\x81\x97", "\xE5\x88\xBA" },
474 // "た", "田"
475 { "\xE3\x81\x9F", "\xE7\x94\xB0" },
476 // "た", "多"
477 { "\xE3\x81\x9F", "\xE5\xA4\x9A" },
478 // "たち", 多値"
479 { "\xE3\x81\x9F\xE3\x81\xA1", "\xE5\xA4\x9A\xE5\x80\xA4" },
480 // "たちつ", "タチツ"
481 { "\xE3\x81\x9F\xE3\x81\xA1\xE3\x81\xA4",
482 "\xE3\x82\xBF\xE3\x83\x81\xE3\x83\x84" },
483 // "は", "葉"
484 { "\xE3\x81\xAF", "\xE8\x91\x89" },
485 // "は", "歯"
486 { "\xE3\x81\xAF", "\xE6\xAD\xAF" },
487 // "はひ", "ハヒ"
488 { "\xE3\x81\xAF\xE3\x81\xB2", "\xE3\x83\x8F\xE3\x83\x92" },
489 // "ば", "場"
490 { "\xE3\x81\xB0", "\xE5\xA0\xB4" },
491 // "はび", "波美"
492 { "\xE3\x81\xAF\xE3\x81\xB3", "\xE6\xB3\xA2\xE7\xBE\x8E" },
493 // "ばび", "馬尾"
494 { "\xE3\x81\xB0\xE3\x81\xB3", "\xE9\xA6\xAC\xE5\xB0\xBE" },
495 // "ばびぶ", "バビブ"
496 { "\xE3\x81\xB0\xE3\x81\xB3\xE3\x81\xB6",
497 "\xE3\x83\x90\xE3\x83\x93\xE3\x83\x96" },
413 { "あ", "亜" },
414 { "あ", "安" },
415 { "あ", "在" },
416 { "あい", "愛" },
417 { "あい", "藍" },
418 { "あいう", "藍雨" },
419 { "か", "可" },
420 { "かき", "牡蠣" },
421 { "かき", "夏季" },
422 { "かきく", "柿久" },
423 { "さ", "差" },
424 { "さ", "左" },
425 { "さし", "刺" },
426 { "た", "田" },
427 { "た", "多" },
428 { "たち", "多値" },
429 { "たちつ", "タチツ" },
430 { "は", "葉" },
431 { "は", "歯" },
432 { "はひ", "ハヒ" },
433 { "ば", "場" },
434 { "はび", "波美" },
435 { "ばび", "馬尾" },
436 { "ばびぶ", "バビブ" },
498437 };
499438 const size_t kKeyValuesSize = arraysize(kKeyValues);
500439 unique_ptr<Token> tokens[kKeyValuesSize];
513452 // Test for normal prefix lookup without key expansion.
514453 {
515454 LookupPrefixTestCallback callback;
516 system_dic->LookupPrefix("\xE3\x81\x82\xE3\x81\x84", // "あい"
455 system_dic->LookupPrefix("あい", // "あい"
517456 convreq_, &callback);
518457 const std::set<std::pair<string, string>> &result = callback.result();
519458 // "あ" -- "あい" should be found.
534473 // feature.
535474 {
536475 LookupPrefixTestCallback callback;
537 system_dic->LookupPrefix(
538 "\xE3\x81\x8B\xE3\x81\x8D\xE3\x81\x8F", //"かきく"
539 convreq_,
540 &callback);
476 system_dic->LookupPrefix("かきく", convreq_, &callback);
541477 const std::set<std::pair<string, string>> &result = callback.result();
542478 // Only "か" should be found as the callback doesn't traverse the subtree of
543479 // "かき" due to culling request from LookupPrefixTestCallback::OnKey().
544480 for (size_t i = 0; i < kKeyValuesSize; ++i) {
545 const std::pair<string, string> entry(
546 kKeyValues[i].key, kKeyValues[i].value);
547 EXPECT_EQ(entry.first == "\xE3\x81\x8B", // "か"
548 result.find(entry) != result.end());
481 const std::pair<string, string> entry(kKeyValues[i].key,
482 kKeyValues[i].value);
483 EXPECT_EQ(entry.first == "か", result.find(entry) != result.end());
549484 }
550485 }
551486
552487 // Test for TRAVERSE_NEXT_KEY.
553488 {
554489 LookupPrefixTestCallback callback;
555 system_dic->LookupPrefix(
556 "\xE3\x81\x95\xE3\x81\x97\xE3\x81\x99", // "さしす"
557 convreq_,
558 &callback);
490 system_dic->LookupPrefix("さしす", convreq_, &callback);
559491 const std::set<std::pair<string, string>> &result = callback.result();
560492 // Only "さし" should be found as tokens for "さ" is skipped (see
561493 // LookupPrefixTestCallback::OnKey()).
562494 for (size_t i = 0; i < kKeyValuesSize; ++i) {
563 const std::pair<string, string> entry(
564 kKeyValues[i].key, kKeyValues[i].value);
565 EXPECT_EQ(entry.first == "\xE3\x81\x95\xE3\x81\x97", // "さし"
566 result.find(entry) != result.end());
495 const std::pair<string, string> entry(kKeyValues[i].key,
496 kKeyValues[i].value);
497 EXPECT_EQ(entry.first == "さし", result.find(entry) != result.end());
567498 }
568499 }
569500
570501 // Test for TRAVERSE_DONE.
571502 {
572503 LookupPrefixTestCallback callback;
573 system_dic->LookupPrefix(
574 "\xE3\x81\x9F\xE3\x81\xA1\xE3\x81\xA4", // "たちつ"
575 convreq_,
576 &callback);
504 system_dic->LookupPrefix("たちつ", convreq_, &callback);
577505 const std::set<std::pair<string, string>> &result = callback.result();
578506 // Nothing should be found as the traversal is immediately done after seeing
579507 // "た"; see LookupPrefixTestCallback::OnKey().
586514 // Use kana modifier insensitive lookup
587515 request_.set_kana_modifier_insensitive_conversion(true);
588516 config_.set_use_kana_modifier_insensitive_conversion(true);
589 system_dic->LookupPrefix(
590 "\xE3\x81\xAF\xE3\x81\xB2", // "はひ"
591 convreq_,
592 &callback);
517 system_dic->LookupPrefix("はひ", convreq_, &callback);
593518 const std::set<std::pair<string, string>> &result = callback.result();
594519 const char *kExpectedKeys[] = {
595 "\xE3\x81\xAF", // "は"
596 "\xE3\x81\xB0", // "ば"
597 "\xE3\x81\xAF\xE3\x81\xB2", // "はひ"
598 "\xE3\x81\xB0\xE3\x81\xB2", // "ばひ"
599 "\xE3\x81\xAF\xE3\x81\xB3", // "はび"
600 "\xE3\x81\xB0\xE3\x81\xB3", // "ばび"
520 "は",
521 "ば",
522 "はひ",
523 "ばひ",
524 "はび",
525 "ばび",
601526 };
602527 const std::set<string> expected(kExpectedKeys,
603 kExpectedKeys + arraysize(kExpectedKeys));
528 kExpectedKeys + arraysize(kExpectedKeys));
604529 for (size_t i = 0; i < kKeyValuesSize; ++i) {
605530 const bool to_be_found =
606531 expected.find(kKeyValues[i].key) != expected.end();
607 const std::pair<string, string> entry(
608 kKeyValues[i].key, kKeyValues[i].value);
532 const std::pair<string, string> entry(kKeyValues[i].key,
533 kKeyValues[i].value);
609534 EXPECT_EQ(to_be_found, result.find(entry) != result.end());
610535 }
611536 }
615540 std::vector<Token *> tokens;
616541 ScopedElementsDeleter<std::vector<Token *>> deleter(&tokens);
617542
618 // "まみむめもや" -> "value0"
619 tokens.push_back(CreateToken("\xe3\x81\xbe\xe3\x81\xbf\xe3\x82\x80"
620 "\xe3\x82\x81\xe3\x82\x82\xe3\x82\x84",
621 "value0"));
622 // "まみむめもやゆよ" -> "value1"
623 tokens.push_back(CreateToken("\xe3\x81\xbe\xe3\x81\xbf\xe3\x82\x80"
624 "\xe3\x82\x81\xe3\x82\x82\xe3\x82\x84"
625 "\xe3\x82\x86\xe3\x82\x88",
626 "value1"));
543 tokens.push_back(CreateToken("まみむめもや", "value0"));
544 tokens.push_back(CreateToken("まみむめもやゆよ", "value1"));
627545 // Build a dictionary with the above two tokens plus those from test data.
628546 {
629547 std::vector<Token *> source_tokens = tokens;
636554 << "Failed to open dictionary source: " << dic_fn_;
637555
638556 // All the tokens in |tokens| should be looked up by "まみむめも".
639 const char *kMamimumemo =
640 "\xe3\x81\xbe\xe3\x81\xbf\xe3\x82\x80\xe3\x82\x81\xe3\x82\x82";
557 const char kMamimumemo[] = "まみむめも";
641558 CheckMultiTokensExistenceCallback callback(tokens);
642559 system_dic->LookupPredictive(kMamimumemo, convreq_, &callback);
643560 EXPECT_TRUE(callback.AreAllFound());
647564 std::vector<Token *> tokens;
648565 ScopedElementsDeleter<std::vector<Token *>> deleter(&tokens);
649566
650 // "がっこう" -> "学校"
651 tokens.push_back(CreateToken(
652 "\xE3\x81\x8C\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86",
653 "\xE5\xAD\xA6\xE6\xA0\xA1"));
654 // "かっこう" -> "格好"
655 tokens.push_back(CreateToken(
656 "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86",
657 "\xE6\xA0\xBC\xE5\xA5\xBD"));
567 tokens.push_back(CreateToken("がっこう", "学校"));
568 tokens.push_back(CreateToken("かっこう", "格好"));
658569
659570 BuildSystemDictionary(tokens, 100);
660571 unique_ptr<SystemDictionary> system_dic(
662573 ASSERT_TRUE(system_dic.get() != NULL)
663574 << "Failed to open dictionary source: " << dic_fn_;
664575
665 // "かつこう"
666 const string kKey = "\xE3\x81\x8B\xE3\x81\xA4\xE3\x81\x93\xE3\x81\x86";
576 const string kKey = "かつこう";
667577
668578 // Without Kana modifier insensitive lookup flag, nothing is looked up.
669579 CollectTokenCallback callback;
684594 std::vector<Token *> tokens;
685595 ScopedElementsDeleter<std::vector<Token *>> deleter(&tokens);
686596
687 // "あい" -> "ai"
688 tokens.push_back(CreateToken("\xe3\x81\x82\xe3\x81\x84", "ai"));
689 // "あいうえお" -> "aiueo"
690 tokens.push_back(CreateToken(
691 "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a",
692 "aiueo"));
597 tokens.push_back(CreateToken("あい", "ai"));
598 tokens.push_back(CreateToken("あいうえお", "aiueo"));
693599 // Build a dictionary with the above two tokens plus those from test data.
694600 {
695601 std::vector<Token *> source_tokens = tokens;
705611 // expected that "あいうえお" is not looked up because of longer key cut-off
706612 // mechanism. However, "あい" is looked up as it's short.
707613 CheckMultiTokensExistenceCallback callback(tokens);
708 system_dic->LookupPredictive("\xe3\x81\x82", // "あ"
709 convreq_, &callback);
614 system_dic->LookupPredictive("あ", convreq_, &callback);
710615 EXPECT_TRUE(callback.IsFound(tokens[0]));
711616 EXPECT_FALSE(callback.IsFound(tokens[1]));
712617 }
714619 TEST_F(SystemDictionaryTest, LookupExact) {
715620 std::vector<Token *> source_tokens;
716621
717 // "は"
718 const string k0 = "\xe3\x81\xaf";
719 // "はひふへほ"
720 const string k1 = "\xe3\x81\xaf\xe3\x81\xb2\xe3\x81\xb5\xe3\x81\xb8\xe3\x81"
721 "\xbb";
622 const string k0 = "は";
623 const string k1 = "はひふへほ";
722624
723625 unique_ptr<Token> t0(CreateToken(k0, "aa"));
724626 unique_ptr<Token> t1(CreateToken(k1, "bb"));
749651
750652 TEST_F(SystemDictionaryTest, LookupReverse) {
751653 unique_ptr<Token> t0(new Token);
752 // "ど"
753 t0->key = "\xe3\x81\xa9";
754 // "ド"
755 t0->value = "\xe3\x83\x89";
654 t0->key = "ど";
655 t0->value = "ド";
756656 t0->cost = 1;
757657 t0->lid = 2;
758658 t0->rid = 3;
759659 unique_ptr<Token> t1(new Token);
760 // "どらえもん"
761 t1->key = "\xe3\x81\xa9\xe3\x82\x89\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93";
762 // "ドラえもん"
763 t1->value = "\xe3\x83\x89\xe3\x83\xa9\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93";
660 t1->key = "どらえもん";
661 t1->value = "ドラえもん";
764662 t1->cost = 1;
765663 t1->lid = 2;
766664 t1->rid = 3;
767665 unique_ptr<Token> t2(new Token);
768 // "といざらす®"
769 t2->key = "\xe3\x81\xa8\xe3\x81\x84\xe3\x81\x96\xe3\x82\x89\xe3\x81\x99\xc2"
770 "\xae";
771 // "トイザらス®"
772 t2->value = "\xe3\x83\x88\xe3\x82\xa4\xe3\x82\xb6\xe3\x82\x89\xe3\x82\xb9\xc2"
773 "\xae";
666 t2->key = "といざらす®";
667 t2->value = "トイザらス®";
774668 t2->cost = 1;
775669 t2->lid = 2;
776670 t2->rid = 3;
777671 unique_ptr<Token> t3(new Token);
778 // "ああああああ"
779672 // Both t3 and t4 will be encoded into 3 bytes.
780 t3->key = "\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82"
781 "\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82";
673 t3->key = "ああああああ";
782674 t3->value = t3->key;
783675 t3->cost = 32000;
784676 t3->lid = 1;
788680 t4->lid = 1;
789681 t4->rid = 2;
790682 unique_ptr<Token> t5(new Token);
791 // "いいいいいい"
792683 // t5 will be encoded into 3 bytes.
793 t5->key = "\xe3\x81\x84\xe3\x81\x84\xe3\x81\x84"
794 "\xe3\x81\x84\xe3\x81\x84\xe3\x81\x84";
684 t5->key = "いいいいいい";
795685 t5->value = t5->key;
796686 t5->cost = 32000;
797687 t5->lid = 1;
798688 t5->rid = 1;
799689 // spelling correction token should not be retrieved by reverse lookup.
800690 unique_ptr<Token> t6(new Token);
801 // "どらえもん"
802 t6->key = "\xe3\x81\xa9\xe3\x82\x89\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93";
803 // "ドラえもん"
804 t6->value = "\xe3\x83\x89\xe3\x83\xa9\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93";
691 t6->key = "どらえもん";
692 t6->value = "ドラえもん";
805693 t6->cost = 1;
806694 t6->lid = 2;
807695 t6->rid = 3;
808696 t6->attributes = Token::SPELLING_CORRECTION;
809697 unique_ptr<Token> t7(new Token);
810 // "こんさーと"
811 t7->key = "\xe3\x81\x93\xe3\x82\x93\xe3\x81\x95\xe3\x83\xbc\xe3\x81\xa8";
812 // "コンサート"
813 t7->value = "\xe3\x82\xb3\xe3\x83\xb3\xe3\x82\xb5\xe3\x83\xbc\xe3\x83\x88";
698 t7->key = "こんさーと";
699 t7->value = "コンサート";
814700 t7->cost = 1;
815701 t7->lid = 1;
816702 t7->rid = 1;
817703 // "バージョン" should not return a result with the key "ヴァージョン".
818704 unique_ptr<Token> t8(new Token);
819 // "ばーじょん"
820 t8->key = "\xE3\x81\xB0\xE3\x83\xBC\xE3\x81\x98\xE3\x82\x87\xE3\x82\x93";
821 // "バージョン"
822 t8->value = "\xE3\x83\x90\xE3\x83\xBC\xE3\x82\xB8\xE3\x83\xA7\xE3\x83\xB3";
705 t8->key = "ばーじょん";
706 t8->value = "バージョン";
823707 t8->cost = 1;
824708 t8->lid = 1;
825709 t8->rid = 1;
885769 {
886770 // test for non exact transliterated index string.
887771 // append "が"
888 const string key = t7->value + "\xe3\x81\x8c";
772 const string key = t7->value + "が";
889773 CollectTokenCallback callback;
890774 system_dic->LookupReverse(key, convreq_, &callback);
891775 const std::vector<Token> &tokens = callback.tokens();
936820 }
937821
938822 TEST_F(SystemDictionaryTest, LookupReverseWithCache) {
939 const string kDoraemon =
940 "\xe3\x83\x89\xe3\x83\xa9\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93";
823 const string kDoraemon = "ドラえもん";
941824
942825 Token source_token;
943 // "どらえもん"
944 source_token.key =
945 "\xe3\x81\xa9\xe3\x82\x89\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93";
946 // "ドラえもん"
826 source_token.key = "どらえもん";
947827 source_token.value = kDoraemon;
948828 source_token.cost = 1;
949829 source_token.lid = 2;
971851 TEST_F(SystemDictionaryTest, SpellingCorrectionTokens) {
972852 std::vector<Token> tokens(3);
973853
974 // "あぼがど"
975 tokens[0].key = "\xe3\x81\x82\xe3\x81\xbc\xe3\x81\x8c\xe3\x81\xa9";
976 // "アボカド"
977 tokens[0].value = "\xe3\x82\xa2\xe3\x83\x9c\xe3\x82\xab\xe3\x83\x89";
854 tokens[0].key = "あぼがど";
855 tokens[0].value = "アボカド";
978856 tokens[0].cost = 1;
979857 tokens[0].lid = 0;
980858 tokens[0].rid = 2;
981859 tokens[0].attributes = Token::SPELLING_CORRECTION;
982860
983 // "しゅみれーしょん"
984 tokens[1].key =
985 "\xe3\x81\x97\xe3\x82\x85\xe3\x81\xbf\xe3\x82\x8c"
986 "\xe3\x83\xbc\xe3\x81\x97\xe3\x82\x87\xe3\x82\x93";
987 // "シミュレーション"
988 tokens[1].value =
989 "\xe3\x82\xb7\xe3\x83\x9f\xe3\x83\xa5\xe3\x83\xac"
990 "\xe3\x83\xbc\xe3\x82\xb7\xe3\x83\xa7\xe3\x83\xb3";
861 tokens[1].key = "しゅみれーしょん";
862 tokens[1].value = "シミュレーション";
991863 tokens[1].cost = 1;
992864 tokens[1].lid = 100;
993865 tokens[1].rid = 3;
994866 tokens[1].attributes = Token::SPELLING_CORRECTION;
995867
996 // "あきはばら"
997 tokens[2].key =
998 "\xe3\x81\x82\xe3\x81\x8d\xe3\x81\xaf\xe3\x81\xb0\xe3\x82\x89";
999 // "秋葉原"
1000 tokens[2].value = "\xe7\xa7\x8b\xe8\x91\x89\xe5\x8e\x9f";
868 tokens[2].key = "あきはばら";
869 tokens[2].value = "秋葉原";
1001870 tokens[2].cost = 1000;
1002871 tokens[2].lid = 1;
1003872 tokens[2].rid = 2;
1022891 }
1023892
1024893 TEST_F(SystemDictionaryTest, EnableNoModifierTargetWithLoudsTrie) {
1025 // "かつ"
1026 const string k0 = "\xE3\x81\x8B\xE3\x81\xA4";
1027 // "かっこ"
1028 const string k1 = "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93";
1029 // "かつこう"
1030 const string k2 = "\xE3\x81\x8B\xE3\x81\xA4\xE3\x81\x93\xE3\x81\x86";
1031 // "かっこう"
1032 const string k3 = "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86";
1033 // "がっこう"
1034 const string k4 = "\xE3\x81\x8C\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86";
894 const string k0 = "かつ";
895 const string k1 = "かっこ";
896 const string k2 = "かつこう";
897 const string k3 = "かっこう";
898 const string k4 = "がっこう";
1035899
1036900 unique_ptr<Token> tokens[5];
1037901 tokens[0].reset(CreateToken(k0, "aa"));
1088952 }
1089953
1090954 TEST_F(SystemDictionaryTest, NoModifierForKanaEntries) {
1091 // "ていすてぃんぐ", "テイスティング"
1092 unique_ptr<Token> t0(CreateToken(
1093 "\xe3\x81\xa6\xe3\x81\x84\xe3\x81\x99\xe3\x81\xa6"
1094 "\xe3\x81\x83\xe3\x82\x93\xe3\x81\x90",
1095 "\xe3\x83\x86\xe3\x82\xa4\xe3\x82\xb9\xe3\x83\x86"
1096 "\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0"));
1097 // "てすとです", "てすとです"
1098 unique_ptr<Token> t1(CreateToken(
1099 "\xe3\x81\xa6\xe3\x81\x99\xe3\x81\xa8\xe3\x81\xa7\xe3\x81\x99",
1100 "\xe3\x81\xa6\xe3\x81\x99\xe3\x81\xa8\xe3\x81\xa7\xe3\x81\x99"));
955 unique_ptr<Token> t0(CreateToken("ていすてぃんぐ", "テイスティング"));
956 unique_ptr<Token> t1(CreateToken("てすとです", "てすとです"));
1101957
1102958 std::vector<Token *> source_tokens;
1103959 source_tokens.push_back(t0.get());
1112968 << "Failed to open dictionary source:" << dic_fn_;
1113969
1114970 // Lookup |t0| from "ていすていんぐ"
1115 const string k = "\xe3\x81\xa6\xe3\x81\x84\xe3\x81\x99\xe3\x81\xa6"
1116 "\xe3\x81\x84\xe3\x82\x93\xe3\x81\x90";
971 const string k = "ていすていんぐ";
1117972 request_.set_kana_modifier_insensitive_conversion(true);
1118973 config_.set_use_kana_modifier_insensitive_conversion(true);
1119974 CheckTokenExistenceCallback callback(t0.get());
1122977 }
1123978
1124979 TEST_F(SystemDictionaryTest, DoNotReturnNoModifierTargetWithLoudsTrie) {
1125 // "かつ"
1126 const string k0 = "\xE3\x81\x8B\xE3\x81\xA4";
1127 // "かっこ"
1128 const string k1 = "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93";
1129 // "かつこう"
1130 const string k2 = "\xE3\x81\x8B\xE3\x81\xA4\xE3\x81\x93\xE3\x81\x86";
1131 // "かっこう"
1132 const string k3 = "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86";
1133 // "がっこう"
1134 const string k4 = "\xE3\x81\x8C\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86";
980 const string k0 = "かつ";
981 const string k1 = "かっこ";
982 const string k2 = "かつこう";
983 const string k3 = "かっこう";
984 const string k4 = "がっこう";
1135985
1136986 unique_ptr<Token> t0(CreateToken(k0, "aa"));
1137987 unique_ptr<Token> t1(CreateToken(k1, "bb"));
8585 } // namespace
8686
8787 TEST(UserDictionaryImporter, ImportFromNormalTextTest) {
88 // "きょうと\t京都\t名詞\n"
89 // "おおさか\t大阪\t地名\n"
90 // "とうきょう\t東京\t地名\tコメント\n"
91 // "すずき\t鈴木\t人名\n"
9288 const char kInput[] =
93 "\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\t"
94 "\xE4\xBA\xAC\xE9\x83\xBD\t\xE5\x90\x8D\xE8\xA9\x9E\n"
95 "\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\t"
96 "\xE5\xA4\xA7\xE9\x98\xAA\t\xE5\x9C\xB0\xE5\x90\x8D\n"
97 "\xE3\x81\xA8\xE3\x81\x86\xE3\x81\x8D\xE3\x82\x87\xE3"
98 "\x81\x86\t\xE6\x9D\xB1\xE4\xBA\xAC\t\xE5\x9C\xB0\xE5"
99 "\x90\x8D\t\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\n"
100 "\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\t\xE9\x88\xB4"
101 "\xE6\x9C\xA8\t\xE4\xBA\xBA\xE5\x90\x8D\n";
89 "きょうと\t京都\t名詞\n"
90 "おおさか\t大阪\t地名\n"
91 "とうきょう\t東京\t地名\tコメント\n"
92 "すずき\t鈴木\t人名\n";
10293
10394 UserDictionaryImporter::StringTextLineIterator iter(kInput);
10495 UserDictionaryStorage::UserDictionary user_dic;
10596
10697 EXPECT_EQ(UserDictionaryImporter::IMPORT_NO_ERROR,
10798 UserDictionaryImporter::ImportFromTextLineIterator(
108 UserDictionaryImporter::MOZC,
109 &iter,
110 &user_dic));
99 UserDictionaryImporter::MOZC, &iter, &user_dic));
111100
112101 ASSERT_EQ(4, user_dic.entries_size());
113102
114 // EXPECT_EQ("きょうと", user_dic.entries(0).key());
115 // EXPECT_EQ("京都", user_dic.entries(0).value());
116 // EXPECT_EQ("名詞", user_dic.entries(0).pos());
117 // EXPECT_EQ("", user_dic.entries(0).comment());
118 EXPECT_EQ("\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8",
119 user_dic.entries(0).key());
120 EXPECT_EQ("\xE4\xBA\xAC\xE9\x83\xBD", user_dic.entries(0).value());
103 EXPECT_EQ("きょうと", user_dic.entries(0).key());
104 EXPECT_EQ("京都", user_dic.entries(0).value());
121105 EXPECT_EQ(user_dictionary::UserDictionary::NOUN, user_dic.entries(0).pos());
122106 EXPECT_EQ("", user_dic.entries(0).comment());
123107
124 // EXPECT_EQ("おおさか", user_dic.entries(1).key());
125 // EXPECT_EQ("大阪", user_dic.entries(1).value());
126 // EXPECT_EQ("地名", user_dic.entries(1).pos());
127 // EXPECT_EQ("", user_dic.entries(1).comment());
128 EXPECT_EQ("\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B",
129 user_dic.entries(1).key());
130 EXPECT_EQ("\xE5\xA4\xA7\xE9\x98\xAA", user_dic.entries(1).value());
108 EXPECT_EQ("おおさか", user_dic.entries(1).key());
109 EXPECT_EQ("大阪", user_dic.entries(1).value());
131110 EXPECT_EQ(user_dictionary::UserDictionary::PLACE_NAME,
132111 user_dic.entries(1).pos());
133112 EXPECT_EQ("", user_dic.entries(1).comment());
134113
135
136 // EXPECT_EQ("とうきょう", user_dic.entries(2).key());
137 // EXPECT_EQ("東京", user_dic.entries(2).value());
138 // EXPECT_EQ("地名", user_dic.entries(2).pos());
139 // EXPECT_EQ("コメント", user_dic.entries(2).comment());
140 EXPECT_EQ("\xE3\x81\xA8\xE3\x81\x86\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86",
141 user_dic.entries(2).key());
142 EXPECT_EQ("\xE6\x9D\xB1\xE4\xBA\xAC", user_dic.entries(2).value());
114 EXPECT_EQ("とうきょう", user_dic.entries(2).key());
115 EXPECT_EQ("東京", user_dic.entries(2).value());
143116 EXPECT_EQ(user_dictionary::UserDictionary::PLACE_NAME,
144117 user_dic.entries(2).pos());
145 EXPECT_EQ("\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88",
146 user_dic.entries(2).comment());
147
148 // EXPECT_EQ("すずき", user_dic.entries(3).key());
149 // EXPECT_EQ("鈴木", user_dic.entries(3).value());
150 // EXPECT_EQ("人名", user_dic.entries(3).pos());
151 // EXPECT_EQ("", user_dic.entries(3).comment());
152 EXPECT_EQ("\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D", user_dic.entries(3).key());
153 EXPECT_EQ("\xE9\x88\xB4\xE6\x9C\xA8", user_dic.entries(3).value());
118 EXPECT_EQ("コメント", user_dic.entries(2).comment());
119
120 EXPECT_EQ("すずき", user_dic.entries(3).key());
121 EXPECT_EQ("鈴木", user_dic.entries(3).value());
154122 EXPECT_EQ(user_dictionary::UserDictionary::PERSONAL_NAME,
155123 user_dic.entries(3).pos());
156124 EXPECT_EQ("", user_dic.entries(3).comment());
157125 }
158126
159127 TEST(UserDictionaryImporter, ImportFromKotoeriTextTest) {
160 // "\"きょうと\",\"京都\",\"名詞\"\n"
161 // "\"おおさか\",\"大阪\",\"地名\"\n"
162 // "// last line"
163128 const char kInput[] =
164 "\"\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\","
165 "\"\xE4\xBA\xAC\xE9\x83\xBD\",\"\xE5\x90\x8D\xE8\xA9\x9E\"\n"
166 "\"\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\","
167 "\"\xE5\xA4\xA7\xE9\x98\xAA\",\"\xE5\x9C\xB0\xE5\x90\x8D\"\n"
129 "\"きょうと\","
130 "\"京都\",\"名詞\"\n"
131 "\"おおさか\","
132 "\"大阪\",\"地名\"\n"
168133 "// last line";
169
170134 {
171135 UserDictionaryImporter::StringTextLineIterator iter(kInput);
172136 UserDictionaryStorage::UserDictionary user_dic;
173137
174138 EXPECT_EQ(UserDictionaryImporter::IMPORT_NOT_SUPPORTED,
175139 UserDictionaryImporter::ImportFromTextLineIterator(
176 UserDictionaryImporter::MOZC,
177 &iter,
178 &user_dic));
140 UserDictionaryImporter::MOZC, &iter, &user_dic));
179141
180142 EXPECT_EQ(0, user_dic.entries_size());
181143 }
182
183144 {
184145 UserDictionaryImporter::StringTextLineIterator iter(kInput);
185146 UserDictionaryStorage::UserDictionary user_dic;
186147
187148 EXPECT_EQ(UserDictionaryImporter::IMPORT_NO_ERROR,
188149 UserDictionaryImporter::ImportFromTextLineIterator(
189 UserDictionaryImporter::KOTOERI,
190 &iter,
191 &user_dic));
150 UserDictionaryImporter::KOTOERI, &iter, &user_dic));
192151
193152 ASSERT_EQ(2, user_dic.entries_size());
194153
195 // EXPECT_EQ("きょうと", user_dic.entries(0).key());
196 // EXPECT_EQ("京都", user_dic.entries(0).value());
197 // EXPECT_EQ("名詞", user_dic.entries(0).pos());
198 EXPECT_EQ("\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8",
199 user_dic.entries(0).key());
200 EXPECT_EQ("\xE4\xBA\xAC\xE9\x83\xBD", user_dic.entries(0).value());
201 EXPECT_EQ(user_dictionary::UserDictionary::NOUN,
202 user_dic.entries(0).pos());
203
204 // EXPECT_EQ("おおさか", user_dic.entries(1).key());
205 // EXPECT_EQ("大阪", user_dic.entries(1).value());
206 // EXPECT_EQ("地名", user_dic.entries(1).pos());
207 EXPECT_EQ("\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B",
208 user_dic.entries(1).key());
209 EXPECT_EQ("\xE5\xA4\xA7\xE9\x98\xAA", user_dic.entries(1).value());
154 EXPECT_EQ("きょうと", user_dic.entries(0).key());
155 EXPECT_EQ("京都", user_dic.entries(0).value());
156 EXPECT_EQ(user_dictionary::UserDictionary::NOUN, user_dic.entries(0).pos());
157
158 EXPECT_EQ("おおさか", user_dic.entries(1).key());
159 EXPECT_EQ("大阪", user_dic.entries(1).value());
210160 EXPECT_EQ(user_dictionary::UserDictionary::PLACE_NAME,
211161 user_dic.entries(1).pos());
212162 }
213163 }
214164
215165 TEST(UserDictionaryImporter, ImportFromCommentTextTest) {
216 // "きょうと\t京都\t名詞\n"
217 // "!おおさか\t大阪\t地名\n"
218 // "\n"
219 // "#とうきょう\t東京\t地名\tコメント\n"
220 // "すずき\t鈴木\t人名\n";
221166 const char kInput[] =
222 "\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\t"
223 "\xE4\xBA\xAC\xE9\x83\xBD\t\xE5\x90\x8D\xE8\xA9\x9E\n"
224 "!\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\t"
225 "\xE5\xA4\xA7\xE9\x98\xAA\t\xE5\x9C\xB0\xE5\x90\x8D\n"
167 "きょうと\t京都\t名詞\n"
168 "!おおさか\t大阪\t地名\n"
226169 "\n"
227 "#\xE3\x81\xA8\xE3\x81\x86\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\t"
228 "\xE6\x9D\xB1\xE4\xBA\xAC\t\xE5\x9C\xB0\xE5\x90\x8D\t"
229 "\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\n"
230 "\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\t"
231 "\xE9\x88\xB4\xE6\x9C\xA8\t\xE4\xBA\xBA\xE5\x90\x8D\n";
232
170 "#とうきょう\t東京\t地名\tコメント\n"
171 "すずき\t鈴木\t人名\n";
233172 {
234173 const string kMsImeInput(string("!Microsoft IME\n") + kInput);
235174 UserDictionaryImporter::StringTextLineIterator iter(kMsImeInput);
237176
238177 EXPECT_EQ(UserDictionaryImporter::IMPORT_NO_ERROR,
239178 UserDictionaryImporter::ImportFromTextLineIterator(
240 UserDictionaryImporter::MSIME,
241 &iter,
242 &user_dic));
179 UserDictionaryImporter::MSIME, &iter, &user_dic));
243180
244181 ASSERT_EQ(3, user_dic.entries_size());
245182
246 // EXPECT_EQ("きょうと", user_dic.entries(0).key());
247 // EXPECT_EQ("京都", user_dic.entries(0).value());
248 // EXPECT_EQ("名詞", user_dic.entries(0).pos());
249 EXPECT_EQ("\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8",
250 user_dic.entries(0).key());
251 EXPECT_EQ("\xE4\xBA\xAC\xE9\x83\xBD", user_dic.entries(0).value());
252 EXPECT_EQ(user_dictionary::UserDictionary::NOUN,
253 user_dic.entries(0).pos());
254
255 // EXPECT_EQ("#とうきょう", user_dic.entries(1).key());
256 // EXPECT_EQ("東京", user_dic.entries(1).value());
257 // EXPECT_EQ("地名", user_dic.entries(1).pos());
258 EXPECT_EQ("#\xE3\x81\xA8\xE3\x81\x86\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86",
259 user_dic.entries(1).key());
260 EXPECT_EQ("\xE6\x9D\xB1\xE4\xBA\xAC", user_dic.entries(1).value());
183 EXPECT_EQ("きょうと", user_dic.entries(0).key());
184 EXPECT_EQ("京都", user_dic.entries(0).value());
185 EXPECT_EQ(user_dictionary::UserDictionary::NOUN, user_dic.entries(0).pos());
186
187 EXPECT_EQ("#とうきょう", user_dic.entries(1).key());
188 EXPECT_EQ("東京", user_dic.entries(1).value());
261189 EXPECT_EQ(user_dictionary::UserDictionary::PLACE_NAME,
262190 user_dic.entries(1).pos());
263191
264 // EXPECT_EQ("すずき", user_dic.entries(2).key());
265 // EXPECT_EQ("鈴木", user_dic.entries(2).value());
266 // EXPECT_EQ("人名", user_dic.entries(2).pos());
267 EXPECT_EQ("\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D",
268 user_dic.entries(2).key());
269 EXPECT_EQ("\xE9\x88\xB4\xE6\x9C\xA8", user_dic.entries(2).value());
192 EXPECT_EQ("すずき", user_dic.entries(2).key());
193 EXPECT_EQ("鈴木", user_dic.entries(2).value());
270194 EXPECT_EQ(user_dictionary::UserDictionary::PERSONAL_NAME,
271195 user_dic.entries(2).pos());
272196 }
273
274197 {
275198 UserDictionaryImporter::StringTextLineIterator iter(kInput);
276199 UserDictionaryStorage::UserDictionary user_dic;
277200
278201 EXPECT_EQ(UserDictionaryImporter::IMPORT_NO_ERROR,
279202 UserDictionaryImporter::ImportFromTextLineIterator(
280 UserDictionaryImporter::MOZC,
281 &iter,
282 &user_dic));
203 UserDictionaryImporter::MOZC, &iter, &user_dic));
283204
284205 ASSERT_EQ(3, user_dic.entries_size());
285206
286 // EXPECT_EQ("きょうと", user_dic.entries(0).key());
287 // EXPECT_EQ("京都", user_dic.entries(0).value());
288 // EXPECT_EQ("名詞", user_dic.entries(0).pos());
289 EXPECT_EQ("\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8",
290 user_dic.entries(0).key());
291 EXPECT_EQ("\xE4\xBA\xAC\xE9\x83\xBD", user_dic.entries(0).value());
292 EXPECT_EQ(user_dictionary::UserDictionary::NOUN,
293 user_dic.entries(0).pos());
294
295 // EXPECT_EQ("!おおさか", user_dic.entries(1).key());
296 // EXPECT_EQ("大阪", user_dic.entries(1).value());
297 // EXPECT_EQ("地名", user_dic.entries(1).pos());
298 EXPECT_EQ("!\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B",
299 user_dic.entries(1).key());
300 EXPECT_EQ("\xE5\xA4\xA7\xE9\x98\xAA", user_dic.entries(1).value());
207 EXPECT_EQ("きょうと", user_dic.entries(0).key());
208 EXPECT_EQ("京都", user_dic.entries(0).value());
209 EXPECT_EQ(user_dictionary::UserDictionary::NOUN, user_dic.entries(0).pos());
210
211 EXPECT_EQ("!おおさか", user_dic.entries(1).key());
212 EXPECT_EQ("大阪", user_dic.entries(1).value());
301213 EXPECT_EQ(user_dictionary::UserDictionary::PLACE_NAME,
302214 user_dic.entries(1).pos());
303215
304 // EXPECT_EQ("すずき", user_dic.entries(2).key());
305 // EXPECT_EQ("鈴木", user_dic.entries(2).value());
306 // EXPECT_EQ("人名", user_dic.entries(2).pos());
307 EXPECT_EQ("\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D",
308 user_dic.entries(2).key());
309 EXPECT_EQ("\xE9\x88\xB4\xE6\x9C\xA8", user_dic.entries(2).value());
216 EXPECT_EQ("すずき", user_dic.entries(2).key());
217 EXPECT_EQ("鈴木", user_dic.entries(2).value());
310218 EXPECT_EQ(user_dictionary::UserDictionary::PERSONAL_NAME,
311219 user_dic.entries(2).pos());
312220 }
313221 }
314222
315223 TEST(UserDictionaryImporter, ImportFromInvalidTextTest) {
316 // "a"
317 // "\n"
318 // "東京\t\t地名\tコメント\n"
319 // "すずき\t鈴木\t人名\n";
320224 const char kInput[] =
321225 "a"
322226 "\n"
323 "\xE6\x9D\xB1\xE4\xBA\xAC\t\t\xE5\x9C\xB0\xE5\x90\x8D\t"
324 "\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\n"
325 "\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\t"
326 "\xE9\x88\xB4\xE6\x9C\xA8\t\xE4\xBA\xBA\xE5\x90\x8D\n";
227 "東京\t\t地名\tコメント\n"
228 "すずき\t鈴木\t人名\n";
327229
328230 UserDictionaryImporter::StringTextLineIterator iter(kInput);
329231 UserDictionaryStorage::UserDictionary user_dic;
330232
331233 EXPECT_EQ(UserDictionaryImporter::IMPORT_INVALID_ENTRIES,
332234 UserDictionaryImporter::ImportFromTextLineIterator(
333 UserDictionaryImporter::MOZC,
334 &iter,
335 &user_dic));
235 UserDictionaryImporter::MOZC, &iter, &user_dic));
336236
337237 ASSERT_EQ(1, user_dic.entries_size());
338238
339 // EXPECT_EQ("すずき", user_dic.entries(0).key());
340 // EXPECT_EQ("鈴木", user_dic.entries(0).value());
341 // EXPECT_EQ("人名", user_dic.entries(0).pos());
342 EXPECT_EQ("\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D", user_dic.entries(0).key());
343 EXPECT_EQ("\xE9\x88\xB4\xE6\x9C\xA8", user_dic.entries(0).value());
239 EXPECT_EQ("すずき", user_dic.entries(0).key());
240 EXPECT_EQ("鈴木", user_dic.entries(0).value());
344241 EXPECT_EQ(user_dictionary::UserDictionary::PERSONAL_NAME,
345242 user_dic.entries(0).pos());
346243 }
363260 UserDictionaryImporter::RawEntry entry;
364261 entry.key = "aa";
365262 entry.value = "aa";
366 // entry.pos = "名詞";
367 entry.pos = "\xE5\x90\x8D\xE8\xA9\x9E";
263 entry.pos = "名詞";
368264 entries.push_back(entry);
369265 }
370266
400296 std::to_string(static_cast<uint32>(j)));
401297 entry.key = key;
402298 entry.value = value;
403 // entry.pos = "名詞";
404 entry.pos = "\xE5\x90\x8D\xE8\xA9\x9E";
299 entry.pos = "名詞";
405300 entries.push_back(entry);
406301 }
407302
442337 entry.key = key;
443338 entry.value = value;
444339 if (j % 2 == 0) {
445 // entry.pos = "名詞";
446 entry.pos = "\xE5\x90\x8D\xE8\xA9\x9E";
340 entry.pos = "名詞";
447341 }
448342 entries.push_back(entry);
449343 }
467361 = user_dic.add_entries();
468362 entry->set_key("aa");
469363 entry->set_value("aa");
470 // entry->set_pos("名詞");
471364 entry->set_pos(user_dictionary::UserDictionary::NOUN);
472365 }
473366
477370 UserDictionaryImporter::RawEntry entry;
478371 entry.key = "aa";
479372 entry.value = "aa";
480 // entry.pos = "名詞";
481 entry.pos = "\xE5\x90\x8D\xE8\xA9\x9E";
373 entry.pos = "名詞";
482374 entries.push_back(entry);
483375 }
484376
493385 UserDictionaryImporter::RawEntry entry;
494386 entry.key = "bb";
495387 entry.value = "bb";
496 // entry.pos = "名詞";
497 entry.pos = "\xE5\x90\x8D\xE8\xA9\x9E";
388 entry.pos = "名詞";
498389 entries.push_back(entry);
499390 }
500391
618509
619510 TEST(UserDictionaryImporter, GuessEncodingTypeTest) {
620511 {
621 // "これはテストです。"
622 const char str[] = "\xE3\x81\x93\xE3\x82\x8C\xE3\x81\xAF\xE3\x83\x86"
623 "\xE3\x82\xB9\xE3\x83\x88\xE3\x81\xA7\xE3\x81\x99"
624 "\xE3\x80\x82";
512 const char str[] = "これはテストです。";
625513 EXPECT_EQ(UserDictionaryImporter::UTF8,
626514 UserDictionaryImporter::GuessEncodingType(str));
627515 }
628
629 {
630 // "私の名前は中野ですABC"
631 const char str[] = "\xE7\xA7\x81\xE3\x81\xAE\xE5\x90\x8D\xE5\x89\x8D"
632 "\xE3\x81\xAF\xE4\xB8\xAD\xE9\x87\x8E\xE3\x81\xA7"
633 "\xE3\x81\x99" "ABC";
516 {
517 const char str[] = "私の名前は中野ですABC";
634518 EXPECT_EQ(UserDictionaryImporter::UTF8,
635519 UserDictionaryImporter::GuessEncodingType(str));
636520 }
637
638521 {
639522 const char str[] = "ABCDEFG abcdefg";
640523 EXPECT_EQ(UserDictionaryImporter::UTF8,
641524 UserDictionaryImporter::GuessEncodingType(str));
642525 }
643
644 {
645 // "ハロー"
646 const char str[] = "\xE3\x83\x8F\xE3\x83\xAD\xE3\x83\xBC";
526 {
527 const char str[] = "ハロー";
647528 EXPECT_EQ(UserDictionaryImporter::UTF8,
648529 UserDictionaryImporter::GuessEncodingType(str));
649530 }
5151 using ::mozc::user_dictionary::UserDictionaryCommandStatus;
5252 using ::mozc::user_dictionary::UserDictionarySessionHandler;
5353
54 // "きょうと\t京都\t名詞\n"
55 // "!おおさか\t大阪\t地名\n"
56 // "\n"
57 // "#とうきょう\t東京\t地名\tコメント\n"
58 // "すずき\t鈴木\t人名\n";
5954 const char kDictionaryData[] =
60 "\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\t"
61 "\xE4\xBA\xAC\xE9\x83\xBD\t\xE5\x90\x8D\xE8\xA9\x9E\n"
62 "\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\t"
63 "\xE5\xA4\xA7\xE9\x98\xAA\t\xE5\x9C\xB0\xE5\x90\x8D\n"
64 "\xE3\x81\xA8\xE3\x81\x86\xE3\x81\x8D\xE3\x82\x87\xE3"
65 "\x81\x86\t\xE6\x9D\xB1\xE4\xBA\xAC\t\xE5\x9C\xB0\xE5"
66 "\x90\x8D\t\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\n"
67 "\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\t\xE9\x88\xB4"
68 "\xE6\x9C\xA8\t\xE4\xBA\xBA\xE5\x90\x8D\n";
55 "きょうと\t京都\t名詞\n"
56 "おおさか\t大阪\t地名\n"
57 "とうきょう\t東京\t地名\tコメント\n"
58 "すずき\t鈴木\t人名\n";
6959
7060 // 0 means invalid dictionary id.
7161 // c.f., UserDictionaryUtil::CreateNewDictionaryId()
7363
7464 class UserDictionarySessionHandlerTest : public ::testing::Test {
7565 protected:
76 virtual void SetUp() {
66 void SetUp() override {
7767 original_user_profile_directory_ = SystemUtil::GetUserProfileDirectory();
7868 SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
7969 FileUtil::Unlink(GetUserDictionaryFile());
8575 handler_->set_dictionary_path(GetUserDictionaryFile());
8676 }
8777
88 virtual void TearDown() {
78 void TearDown() override {
8979 FileUtil::Unlink(GetUserDictionaryFile());
9080 SystemUtil::SetUserProfileDirectory(original_user_profile_directory_);
9181 }
927917 const uint64 session_id = CreateSession();
928918
929919 string data = kDictionaryData;
930 // "☻\tEMOTICON\t名詞\n": Invalid symbol reading.
931 data.append("\xE2\x98\xBB\tEMOTICON\t\xE5\x90\x8D\xE8\xA9\x9E\n");
932 // "読み\tYOMI\t名詞\n": Invalid Kanji reading.
933 data.append("\xE8\xAA\xAD\xE3\x81\xBF\tYOMI\t\xE5\x90\x8D\xE8\xA9\x9E\n");
920 data.append("☻\tEMOTICON\t名詞\n"); // Invalid symbol reading.
921 data.append("読み\tYOMI\t名詞\n"); // Invalid Kanji reading.
934922
935923 // Import data to a new dictionary.
936924 Clear();
4343
4444 namespace {
4545
46 // "きょうと\t京都\t名詞\n"
47 // "!おおさか\t大阪\t地名\n"
48 // "\n"
49 // "#とうきょう\t東京\t地名\tコメント\n"
50 // "すずき\t鈴木\t人名\n";
5146 const char kDictionaryData[] =
52 "\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\t"
53 "\xE4\xBA\xAC\xE9\x83\xBD\t\xE5\x90\x8D\xE8\xA9\x9E\n"
54 "\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\t"
55 "\xE5\xA4\xA7\xE9\x98\xAA\t\xE5\x9C\xB0\xE5\x90\x8D\n"
56 "\xE3\x81\xA8\xE3\x81\x86\xE3\x81\x8D\xE3\x82\x87\xE3"
57 "\x81\x86\t\xE6\x9D\xB1\xE4\xBA\xAC\t\xE5\x9C\xB0\xE5"
58 "\x90\x8D\t\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\n"
59 "\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\t\xE9\x88\xB4"
60 "\xE6\x9C\xA8\t\xE4\xBA\xBA\xE5\x90\x8D\n";
47 "きょうと\t京都\t名詞\n"
48 "おおさか\t大阪\t地名\n"
49 "とうきょう\t東京\t地名\tコメント\n"
50 "すずき\t鈴木\t人名\n";
6151
6252 using ::mozc::FileUtil;
6353 using ::mozc::SystemUtil;
6858
6959 class UserDictionarySessionTest : public ::testing::Test {
7060 protected:
71 virtual void SetUp() {
61 void SetUp() override {
7262 original_user_profile_directory_ = SystemUtil::GetUserProfileDirectory();
7363 SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
7464 FileUtil::Unlink(GetUserDictionaryFile());
7565 }
7666
77 virtual void TearDown() {
67 void TearDown() override {
7868 FileUtil::Unlink(GetUserDictionaryFile());
7969 SystemUtil::SetUserProfileDirectory(original_user_profile_directory_);
8070 }
616606 EXPECT_PROTO_PEQ(
617607 "dictionaries: <\n"
618608 " entries: <\n"
619 " key: \"\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\"\n"
620 " value: \"\xE4\xBA\xAC\xE9\x83\xBD\"\n"
609 " key: \"きょうと\"\n"
610 " value: \"京都\"\n"
621611 " pos: NOUN\n"
622612 " >\n"
623613 " entries: <\n"
624 " key: \"\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\"\n"
625 " value: \"\xE5\xA4\xA7\xE9\x98\xAA\"\n"
614 " key: \"おおさか\"\n"
615 " value: \"大阪\"\n"
626616 " pos: PLACE_NAME\n"
627617 " >\n"
628618 " entries: <\n"
629 " key: \"\xE3\x81\xA8\xE3\x81\x86\xE3"
630 "\x81\x8D\xE3\x82\x87\xE3\x81\x86\"\n"
631 " value: \"\xE6\x9D\xB1\xE4\xBA\xAC\"\n"
619 " key: \"とうきょう\"\n"
620 " value: \"東京\"\n"
632621 " pos: PLACE_NAME\n"
633 " comment: \"\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\"\n"
622 " comment: \"コメント\"\n"
634623 " >\n"
635624 " entries: <\n"
636 " key: \"\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\"\n"
637 " value: \"\xE9\x88\xB4\xE6\x9C\xA8\"\n"
625 " key: \"すずき\"\n"
626 " value: \"鈴木\"\n"
638627 " pos: PERSONAL_NAME\n"
639628 " >\n"
640629 ">",
664653 "dictionaries: <\n"
665654 " name: \"user dictionary\"\n"
666655 " entries: <\n"
667 " key: \"\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\"\n"
668 " value: \"\xE4\xBA\xAC\xE9\x83\xBD\"\n"
656 " key: \"きょうと\"\n"
657 " value: \"京都\"\n"
669658 " pos: NOUN\n"
670659 " >\n"
671660 " entries: <\n"
672 " key: \"\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\"\n"
673 " value: \"\xE5\xA4\xA7\xE9\x98\xAA\"\n"
661 " key: \"おおさか\"\n"
662 " value: \"大阪\"\n"
674663 " pos: PLACE_NAME\n"
675664 " >\n"
676665 " entries: <\n"
677 " key: \"\xE3\x81\xA8\xE3\x81\x86\xE3"
678 "\x81\x8D\xE3\x82\x87\xE3\x81\x86\"\n"
679 " value: \"\xE6\x9D\xB1\xE4\xBA\xAC\"\n"
666 " key: \"とうきょう\"\n"
667 " value: \"東京\"\n"
680668 " pos: PLACE_NAME\n"
681 " comment: \"\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\"\n"
669 " comment: \"コメント\"\n"
682670 " >\n"
683671 " entries: <\n"
684 " key: \"\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\"\n"
685 " value: \"\xE9\x88\xB4\xE6\x9C\xA8\"\n"
672 " key: \"すずき\"\n"
673 " value: \"鈴木\"\n"
686674 " pos: PERSONAL_NAME\n"
687675 " >\n"
688676 ">",
5959 // saved correctly. Please make the dictionary size smaller"
6060 const size_t kDefaultWarningTotalBytesLimit = 256 << 20;
6161
62 // "自動登録単語";
63 const char kAutoRegisteredDictionaryName[] =
64 "\xE8\x87\xAA\xE5\x8B\x95\xE7\x99\xBB\xE9\x8C\xB2\xE5\x8D\x98\xE8\xAA\x9E";
65
62 const char kAutoRegisteredDictionaryName[] = "自動登録単語";
6663 const char kDefaultSyncDictionaryName[] = "Sync Dictionary";
67 // "同期用辞書"
68 const char *kDictionaryNameConvertedFromSyncableDictionary =
69 "\xE5\x90\x8C\xE6\x9C\x9F\xE7\x94\xA8\xE8\xBE\x9E\xE6\x9B\xB8";
64 const char *kDictionaryNameConvertedFromSyncableDictionary = "同期用辞書";
7065
7166 } // namespace
7267
4545 DECLARE_string(test_tmpdir);
4646
4747 namespace mozc {
48 namespace {
4849
4950 using user_dictionary::UserDictionary;
50
51 namespace {
5251
5352 string GenRandomString(int size) {
5453 string result;
373372
374373 ASSERT_TRUE(storage.ConvertSyncDictionariesToNormalDictionaries());
375374
376 // "同期用辞書"
377 const char *kDictionaryNameConvertedFromSyncableDictionary =
378 "\xE5\x90\x8C\xE6\x9C\x9F\xE7\x94\xA8\xE8\xBE\x9E\xE6\x9B\xB8";
375 const char kDictionaryNameConvertedFromSyncableDictionary[] = "同期用辞書";
379376 const struct ExpectedData {
380377 bool has_normal_entry;
381378 string dictionary_name;
492489
493490 // Make sure the exported format, especially that the pos is exported in
494491 // Japanese.
495 // "key value 名詞 comment" separted by a tab character.
496492 #ifdef OS_WIN
497 EXPECT_EQ("key\tvalue\t\xE5\x90\x8D\xE8\xA9\x9E\tcomment\r\n",
493 EXPECT_EQ("key\tvalue\t名詞\tcomment\r\n",
498494 string(mapped_data.begin(), mapped_data.size()));
499495 #else
500 EXPECT_EQ("key\tvalue\t\xE5\x90\x8D\xE8\xA9\x9E\tcomment\n",
496 EXPECT_EQ("key\tvalue\t名詞\tcomment\n",
501497 string(mapped_data.begin(), mapped_data.size()));
502498 #endif // OS_WIN
503499 }
5757 #include "usage_stats/usage_stats.h"
5858 #include "usage_stats/usage_stats_testing_util.h"
5959
60 using std::unique_ptr;
61
6260 namespace mozc {
6361 namespace dictionary {
6462 namespace {
63
64 using std::unique_ptr;
6565
6666 const char kUserDictionary0[] =
6767 "start\tstart\tverb\n"
7272 "smile\tsmile\tverb\n"
7373 "smog\tsmog\tnoun\n"
7474 // invalid characters "水雲" in reading
75 "\xE6\xB0\xB4\xE9\x9B\xB2\tvalue\tnoun\n"
75 "水雲\tvalue\tnoun\n"
7676
7777 // Empty key
7878 "\tvalue\tnoun\n"
145145 if (key.empty() ||
146146 value.empty() ||
147147 pos.empty() ||
148 tokens == NULL) {
148 tokens == nullptr) {
149149 return false;
150150 }
151151
172172 private:
173173 DISALLOW_COPY_AND_ASSIGN(UserPOSMock);
174174 };
175 // "名詞"
176 const char *UserPOSMock::kNoun = "\xE5\x90\x8D\xE8\xA9\x9E";
177 // "動詞ワ行五段"
178 const char *UserPOSMock::kVerb =
179 "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x83\xAF\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5";
175
176 const char *UserPOSMock::kNoun = "名詞";
177 const char *UserPOSMock::kVerb = "動詞ワ行五段";
180178
181179 string GenRandomAlphabet(int size) {
182180 string result;
264262 EntryCollector collector;
265263 dic.LookupPredictive(key, convreq_, &collector);
266264
267 if (expected == NULL || expected_size == 0) {
265 if (expected == nullptr || expected_size == 0) {
268266 EXPECT_TRUE(collector.entries().empty());
269267 } else {
270268 ASSERT_FALSE(collector.entries().empty());
280278 EntryCollector collector;
281279 dic.LookupPrefix(StringPiece(key, key_size), convreq_, &collector);
282280
283 if (expected == NULL || expected_size == 0) {
281 if (expected == nullptr || expected_size == 0) {
284282 EXPECT_TRUE(collector.entries().empty());
285283 } else {
286284 ASSERT_FALSE(collector.entries().empty());
296294 EntryCollector collector;
297295 dic.LookupExact(StringPiece(key, key_size), convreq_, &collector);
298296
299 if (expected == NULL || expected_size == 0) {
297 if (expected == nullptr || expected_size == 0) {
300298 EXPECT_TRUE(collector.entries().empty());
301299 } else {
302300 ASSERT_FALSE(collector.entries().empty());
400398 { "starting", "starting", 220, 220 },
401399 };
402400 TestLookupPredictiveHelper(kExpected0, arraysize(kExpected0),
403 "start", *dic.get());
401 "start", *dic);
404402
405403 // Another normal lookup operation.
406404 const Entry kExpected1[] = {
415413 { "starting", "starting", 220, 220 },
416414 };
417415 TestLookupPredictiveHelper(kExpected1, arraysize(kExpected1),
418 "st", *dic.get());
416 "st", *dic);
419417
420418 // Invalid input values should be just ignored.
421 TestLookupPredictiveHelper(NULL, 0, "", *dic.get());
422 TestLookupPredictiveHelper(NULL, 0,
423 "\xE6\xB0\xB4\xE9\x9B\xB2", // "水雲"
424 *dic.get());
419 TestLookupPredictiveHelper(nullptr, 0, "", *dic);
420 TestLookupPredictiveHelper(nullptr, 0, "水雲", *dic);
425421
426422 // Make a change to the dictionary file and load it again.
427423 {
437433 { "ending", "ending", 220, 220 },
438434 };
439435 TestLookupPredictiveHelper(kExpected2, arraysize(kExpected2),
440 "end", *dic.get());
436 "end", *dic);
441437
442438 // Entries in the dictionary before reloading cannot be looked up.
443 TestLookupPredictiveHelper(NULL, 0, "start", *dic.get());
444 TestLookupPredictiveHelper(NULL, 0, "st", *dic.get());
439 TestLookupPredictiveHelper(nullptr, 0, "start", *dic);
440 TestLookupPredictiveHelper(nullptr, 0, "st", *dic);
445441 }
446442
447443 TEST_F(UserDictionaryTest, TestLookupPrefix) {
462458 { "started", "started", 210, 210 },
463459 };
464460 TestLookupPrefixHelper(kExpected0, arraysize(kExpected0),
465 "started", 7, *dic.get());
461 "started", 7, *dic);
466462
467463 // Another normal lookup operation.
468464 const Entry kExpected1[] = {
471467 { "starting", "starting", 100, 100 },
472468 { "starting", "starting", 220, 220 },
473469 };
474 TestLookupPrefixHelper(kExpected1, arraysize(kExpected1),
475 "starting", 8, *dic.get());
470 TestLookupPrefixHelper(kExpected1, arraysize(kExpected1), "starting", 8,
471 *dic);
476472
477473 // Invalid input values should be just ignored.
478 TestLookupPrefixHelper(NULL, 0, "", 0, *dic.get());
479 TestLookupPrefixHelper(
480 NULL, 0, "\xE6\xB0\xB4\xE9\x9B\xB2", // "水雲"
481 strlen("\xE6\xB0\xB4\xE9\x9B\xB2"), *dic.get());
474 TestLookupPrefixHelper(nullptr, 0, "", 0, *dic);
475 TestLookupPrefixHelper(nullptr, 0, "水雲", strlen("水雲"), *dic);
482476
483477 // Make a change to the dictionary file and load it again.
484478 {
493487 { "ending", "ending", 220, 220 },
494488 };
495489 TestLookupPrefixHelper(kExpected2, arraysize(kExpected2),
496 "ending", 6, *dic.get());
490 "ending", 6, *dic);
497491
498492 // Lookup for entries which are gone should returns empty result.
499 TestLookupPrefixHelper(NULL, 0, "started", 7, *dic.get());
500 TestLookupPrefixHelper(NULL, 0, "starting", 8, *dic.get());
493 TestLookupPrefixHelper(nullptr, 0, "started", 7, *dic);
494 TestLookupPrefixHelper(nullptr, 0, "starting", 8, *dic);
501495 }
502496
503497 TEST_F(UserDictionaryTest, TestLookupExact) {
516510 { "start", "start", 200, 200 },
517511 };
518512 TestLookupExactHelper(kExpected0, arraysize(kExpected0),
519 "start", 5, *dic.get());
513 "start", 5, *dic);
520514
521515 // Another normal lookup operation.
522516 const Entry kExpected1[] = {
524518 { "starting", "starting", 220, 220 },
525519 };
526520 TestLookupExactHelper(kExpected1, arraysize(kExpected1),
527 "starting", 8, *dic.get());
521 "starting", 8, *dic);
528522
529523 // Invalid input values should be just ignored.
530 TestLookupPrefixHelper(NULL, 0, "", 0, *dic.get());
531 TestLookupPrefixHelper(NULL, 0, "\xE6\xB0\xB4\xE9\x9B\xB2", // "水雲"
532 strlen("\xE6\xB0\xB4\xE9\x9B\xB2"), *dic.get());
524 TestLookupPrefixHelper(nullptr, 0, "", 0, *dic);
525 TestLookupPrefixHelper(nullptr, 0, "水雲", strlen("水雲"), *dic);
533526 }
534527
535528 TEST_F(UserDictionaryTest, TestLookupExactWithSuggestionOnlyWords) {
585578 dic->Load(storage);
586579 }
587580
588 TestLookupPrefixHelper(NULL, 0, "start", 4, *dic);
589 TestLookupPredictiveHelper(NULL, 0, "s", *dic);
581 TestLookupPrefixHelper(nullptr, 0, "start", 4, *dic);
582 TestLookupPredictiveHelper(nullptr, 0, "s", *dic);
590583
591584 config_.set_incognito_mode(false);
592585 {
296296 // The index of each element should be matched with the actual value of enum.
297297 // See also user_dictionary_storage.proto for the definition of the enum.
298298 // Note that the '0' is invalid in the definition, so the corresponding
299 // element is NULL.
299 // element is nullptr.
300300 const char *kPosTypeStringTable[] = {
301 NULL,
302 "\xE5\x90\x8D\xE8\xA9\x9E", // "名詞"
303 "\xE7\x9F\xAD\xE7\xB8\xAE\xE3\x82\x88\xE3\x81\xBF", // "短縮よみ"
304 "\xE3\x82\xB5\xE3\x82\xB8\xE3\x82\xA7\xE3\x82\xB9\xE3\x83\x88"
305 "\xE3\x81\xAE\xE3\x81\xBF", // "サジェストのみ"
306 "\xE5\x9B\xBA\xE6\x9C\x89\xE5\x90\x8D\xE8\xA9\x9E", // "固有名詞"
307 "\xE4\xBA\xBA\xE5\x90\x8D", // "人名"
308 "\xE5\xA7\x93", // "姓"
309 "\xE5\x90\x8D", // "名"
310 "\xE7\xB5\x84\xE7\xB9\x94", // "組織"
311 "\xE5\x9C\xB0\xE5\x90\x8D", // "地名"
312 "\xE5\x90\x8D\xE8\xA9\x9E\xE3\x82\xB5\xE5\xA4\x89", // "名詞サ変"
313 "\xE5\x90\x8D\xE8\xA9\x9E\xE5\xBD\xA2\xE5\x8B\x95", // "名詞形動"
314 "\xE6\x95\xB0", // "数"
315 "\xE3\x82\xA2\xE3\x83\xAB\xE3\x83\x95\xE3\x82\xA1"
316 "\xE3\x83\x99\xE3\x83\x83\xE3\x83\x88", // "アルファベット"
317 "\xE8\xA8\x98\xE5\x8F\xB7", // "記号"
318 "\xE9\xA1\x94\xE6\x96\x87\xE5\xAD\x97", // "顔文字"
319
320 "\xE5\x89\xAF\xE8\xA9\x9E", // "副詞"
321 "\xE9\x80\xA3\xE4\xBD\x93\xE8\xA9\x9E", // "連体詞"
322 "\xE6\x8E\xA5\xE7\xB6\x9A\xE8\xA9\x9E", // "接続詞"
323 "\xE6\x84\x9F\xE5\x8B\x95\xE8\xA9\x9E", // "感動詞"
324 "\xE6\x8E\xA5\xE9\xA0\xAD\xE8\xAA\x9E", // "接頭語"
325 "\xE5\x8A\xA9\xE6\x95\xB0\xE8\xA9\x9E", // "助数詞"
326 "\xE6\x8E\xA5\xE5\xB0\xBE\xE4\xB8\x80\xE8\x88\xAC", // "接尾一般"
327 "\xE6\x8E\xA5\xE5\xB0\xBE\xE4\xBA\xBA\xE5\x90\x8D", // "接尾人名"
328 "\xE6\x8E\xA5\xE5\xB0\xBE\xE5\x9C\xB0\xE5\x90\x8D", // "接尾地名"
329 "\xE5\x8B\x95\xE8\xA9\x9E"
330 "\xE3\x83\xAF\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞ワ行五段"
331 "\xE5\x8B\x95\xE8\xA9\x9E"
332 "\xE3\x82\xAB\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞カ行五段"
333 "\xE5\x8B\x95\xE8\xA9\x9E"
334 "\xE3\x82\xB5\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞サ行五段"
335 "\xE5\x8B\x95\xE8\xA9\x9E"
336 "\xE3\x82\xBF\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞タ行五段"
337 "\xE5\x8B\x95\xE8\xA9\x9E"
338 "\xE3\x83\x8A\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞ナ行五段"
339 "\xE5\x8B\x95\xE8\xA9\x9E"
340 "\xE3\x83\x9E\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞マ行五段"
341 "\xE5\x8B\x95\xE8\xA9\x9E"
342 "\xE3\x83\xA9\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞ラ行五段"
343 "\xE5\x8B\x95\xE8\xA9\x9E"
344 "\xE3\x82\xAC\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞ガ行五段"
345 "\xE5\x8B\x95\xE8\xA9\x9E"
346 "\xE3\x83\x90\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞バ行五段"
347 "\xE5\x8B\x95\xE8\xA9\x9E"
348 "\xE3\x83\x8F\xE8\xA1\x8C\xE5\x9B\x9B\xE6\xAE\xB5", // "動詞ハ行四段"
349 "\xE5\x8B\x95\xE8\xA9\x9E\xE4\xB8\x80\xE6\xAE\xB5", // "動詞一段"
350 "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x82\xAB\xE5\xA4\x89", // "動詞カ変"
351 "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x82\xB5\xE5\xA4\x89", // "動詞サ変"
352 "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x82\xB6\xE5\xA4\x89", // "動詞ザ変"
353 "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x83\xA9\xE5\xA4\x89", // "動詞ラ変"
354 "\xE5\xBD\xA2\xE5\xAE\xB9\xE8\xA9\x9E", // "形容詞"
355 "\xE7\xB5\x82\xE5\x8A\xA9\xE8\xA9\x9E", // "終助詞"
356 "\xE5\x8F\xA5\xE8\xAA\xAD\xE7\x82\xB9", // "句読点"
357 "\xE7\x8B\xAC\xE7\xAB\x8B\xE8\xAA\x9E", // "独立語"
358 "\xE6\x8A\x91\xE5\x88\xB6\xE5\x8D\x98\xE8\xAA\x9E", // "抑制単語"
301 nullptr,
302 "名詞",
303 "短縮よみ",
304 "サジェストのみ",
305 "固有名詞",
306 "人名",
307 "姓",
308 "名",
309 "組織",
310 "地名",
311 "名詞サ変",
312 "名詞形動",
313 "数",
314 "アルファベット",
315 "記号",
316 "顔文字",
317
318 "副詞",
319 "連体詞",
320 "接続詞",
321 "感動詞",
322 "接頭語",
323 "助数詞",
324 "接尾一般",
325 "接尾人名",
326 "接尾地名",
327 "動詞ワ行五段",
328 "動詞カ行五段",
329 "動詞サ行五段",
330 "動詞タ行五段",
331 "動詞ナ行五段",
332 "動詞マ行五段",
333 "動詞ラ行五段",
334 "動詞ガ行五段",
335 "動詞バ行五段",
336 "動詞ハ行四段",
337 "動詞一段",
338 "動詞カ変",
339 "動詞サ変",
340 "動詞ザ変",
341 "動詞ラ変",
342 "形容詞",
343 "終助詞",
344 "句読点",
345 "独立語",
346 "抑制単語",
359347 };
360348 } // namespace
361349
4646
4747 TEST(UserDictionaryUtilTest, TestIsValidReading) {
4848 EXPECT_TRUE(UserDictionaryUtil::IsValidReading("ABYZabyz0189"));
49 // "〜「」"
50 EXPECT_TRUE(UserDictionaryUtil::IsValidReading(
51 "\xe3\x80\x9c\xe3\x80\x8c\xe3\x80\x8d"));
52 // "あいうわをんゔ"
53 EXPECT_TRUE(UserDictionaryUtil::IsValidReading(
54 "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x82\x8f\xe3\x82\x92"
55 "\xe3\x82\x93\xe3\x82\x94"));
56 // "アイウワヲンヴ"
57 EXPECT_TRUE(UserDictionaryUtil::IsValidReading(
58 "\xe3\x82\xa2\xe3\x82\xa4\xe3\x82\xa6\xe3\x83\xaf\xe3\x83\xb2"
59 "\xe3\x83\xb3\xe3\x83\xb4"));
60 // "水雲"
61 EXPECT_FALSE(UserDictionaryUtil::IsValidReading("\xe6\xb0\xb4\xe9\x9b\xb2"));
49 EXPECT_TRUE(UserDictionaryUtil::IsValidReading("〜「」"));
50 EXPECT_TRUE(UserDictionaryUtil::IsValidReading("あいうわをんゔ"));
51 EXPECT_TRUE(UserDictionaryUtil::IsValidReading("アイウワヲンヴ"));
52 EXPECT_FALSE(UserDictionaryUtil::IsValidReading("水雲"));
6253
6354 // COMBINING KATAKANA-HIRAGANA VOICED/SEMI-VOICED SOUND MARK (u3099, u309A)
64 EXPECT_FALSE(UserDictionaryUtil::IsValidReading("\xE3\x82\x99\xE3\x82\x9A"));
55 EXPECT_FALSE(UserDictionaryUtil::IsValidReading("゙゚"));
6556
6657 // KATAKANA-HIRAGANA VOICED/SEMI-VOICED SOUND MARK (u309B, u309C)
67 EXPECT_TRUE(UserDictionaryUtil::IsValidReading("\xE3\x82\x9B\xE3\x82\x9C"));
58 EXPECT_TRUE(UserDictionaryUtil::IsValidReading("゛゜"));
6859 }
6960
7061 TEST(UserDictionaryUtilTest, TestNormalizeReading) {
71 // "あいうゔゎ", "アイウヴヮ"
72 TestNormalizeReading(
73 "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x82\x94\xe3\x82\x8e",
74 "\xe3\x82\xa2\xe3\x82\xa4\xe3\x82\xa6\xe3\x83\xb4\xe3\x83\xae");
75 // "あいうゃ", "アイウャ"
76 TestNormalizeReading(
77 "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x82\x83",
78 "\xef\xbd\xb1\xef\xbd\xb2\xef\xbd\xb3\xef\xbd\xac");
79 // "ABab01@&=|"
80 TestNormalizeReading(
81 "ABab01@&=|",
82 "\xef\xbc\xa1\xef\xbc\xa2\xef\xbd\x81\xef\xbd\x82\xef\xbc\x90\xef\xbc\x91"
83 "\xef\xbc\xa0\xef\xbc\x86\xef\xbc\x9d\xef\xbd\x9c");
62 TestNormalizeReading("あいうゔゎ", "アイウヴヮ");
63 TestNormalizeReading("あいうゃ", "アイウャ");
64 TestNormalizeReading("ABab01@&=|", "ABab01@&=|");
8465 }
8566
8667 namespace {
150131 EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 10));
151132 EXPECT_EQ("abc", str);
152133
153 str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B"; // "かしゆか"
134 str = "かしゆか";
154135 EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 3));
155 EXPECT_EQ("\xE3\x81\x8B", str); // "か"
156
157 str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B"; // "かしゆか"
136 EXPECT_EQ("か", str);
137
138 str = "かしゆか";
158139 EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 4));
159 EXPECT_EQ("\xE3\x81\x8B", str); // "か"
160
161 str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B"; // "かしゆか"
140 EXPECT_EQ("か", str);
141
142 str = "かしゆか";
162143 EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 5));
163 EXPECT_EQ("\xE3\x81\x8B", str); // "か"
164
165 str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B"; // "かしゆか"
144 EXPECT_EQ("か", str);
145
146 str = "かしゆか";
166147 EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 6));
167 EXPECT_EQ("\xE3\x81\x8B\xE3\x81\x97", str); // "かし"
168
169 str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B"; // "かしゆか"
148 EXPECT_EQ("かし", str);
149
150 str = "かしゆか";
170151 EXPECT_FALSE(UserDictionaryUtil::Sanitize(&str, 100));
171 // "かしゆか"
172 EXPECT_EQ("\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B", str);
152 EXPECT_EQ("かしゆか", str);
173153 }
174154
175155 TEST(UserDictionaryUtilTest, ValidateEntry) {
176156 // Create a valid entry.
177157 UserDictionary::Entry base_entry;
178 // "よみ"
179 base_entry.set_key("\xE3\x82\x88\xE3\x81\xBF");
180
181 // "単語"
182 base_entry.set_value("\xE5\x8D\x98\xE8\xAA\x9E");
183
184 // "名詞"
158 base_entry.set_key("よみ");
159 base_entry.set_value("単語");
185160 base_entry.set_pos(UserDictionary::NOUN);
186
187 // "コメント"
188 base_entry.set_comment("\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88");
189
161 base_entry.set_comment("コメント");
190162
191163 UserDictionary::Entry entry;
192164 entry.CopyFrom(base_entry);
108108 // Set smaller cost for "短縮よみ" in order to make
109109 // the rank of the word higher than others.
110110 const int16 kIsolatedWordCost = 200;
111 const char kIsolatedWordPOS[] =
112 "\xE7\x9F\xAD\xE7\xB8\xAE\xE3\x82\x88\xE3\x81\xBF";
111 const char kIsolatedWordPOS[] = "短縮よみ";
113112
114113 if (size == 1) { // no conjugation
115114 const auto &token_iter = range.first;
7575 user_pos_->GetPOSList(&pos_list);
7676
7777 std::vector<UserPOS::Token> tokens;
78 EXPECT_FALSE(user_pos_->GetTokens("", "test",
79 pos_list[0],
80 &tokens));
81 EXPECT_FALSE(user_pos_->GetTokens("test", "",
82 pos_list[0],
83 &tokens));
84 EXPECT_FALSE(user_pos_->GetTokens("test", "test",
85 "",
86 &tokens));
87 EXPECT_TRUE(user_pos_->GetTokens("test", "test",
88 pos_list[0],
89 &tokens));
78 EXPECT_FALSE(user_pos_->GetTokens("", "test", pos_list[0], &tokens));
79 EXPECT_FALSE(user_pos_->GetTokens("test", "", pos_list[0], &tokens));
80 EXPECT_FALSE(user_pos_->GetTokens("test", "test", "", &tokens));
81 EXPECT_TRUE(user_pos_->GetTokens("test", "test", pos_list[0], &tokens));
9082
9183 // http://b/2674666
92 // "あか,赤,形容詞"
93 EXPECT_TRUE(user_pos_->GetTokens("\xE3\x81\x82\xE3\x81\x8B",
94 "\xE8\xB5\xA4",
95 "\xE5\xBD\xA2\xE5\xAE\xB9\xE8\xA9\x9E",
96 &tokens));
84 EXPECT_TRUE(user_pos_->GetTokens("あか", "赤", "形容詞", &tokens));
9785
9886 for (size_t i = 0; i < pos_list.size(); ++i) {
99 EXPECT_TRUE(user_pos_->GetTokens("test", "test",
100 pos_list[i],
101 &tokens));
87 EXPECT_TRUE(user_pos_->GetTokens("test", "test", pos_list[i], &tokens));
10288 }
10389 }
10490
10591 TEST_F(UserPOSTest, ConjugationTest) {
10692 std::vector<UserPOS::Token> tokens1, tokens2;
107 // EXPECT_TRUE(user_pos_->GetTokens("わら", "嗤",
108 // "動詞ワ行五段", &tokens1));
109 // EXPECT_TRUE(user_pos_->GetTokens("わらう", "嗤う",
110 // "動詞ワ行五段", &tokens2));
111 EXPECT_TRUE(user_pos_->GetTokens("\xE3\x82\x8F\xE3\x82\x89", "\xE5\x97\xA4",
112 "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x83\xAF"
113 "\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5",
114 &tokens1));
115 EXPECT_TRUE(user_pos_->GetTokens("\xE3\x82\x8F\xE3\x82\x89\xE3\x81\x86",
116 "\xE5\x97\xA4\xE3\x81\x86",
117 "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x83\xAF"
118 "\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5",
119 &tokens2));
93 EXPECT_TRUE(user_pos_->GetTokens("わら", "嗤", "動詞ワ行五段", &tokens1));
94 EXPECT_TRUE(user_pos_->GetTokens("わらう", "嗤う", "動詞ワ行五段", &tokens2));
12095 EXPECT_EQ(tokens1.size(), tokens2.size());
12196 for (size_t i = 0; i < tokens1.size(); ++i) {
12297 EXPECT_EQ(tokens1[i].key, tokens2[i].key);
125100 EXPECT_EQ(tokens1[i].cost, tokens2[i].cost);
126101 }
127102
128 // EXPECT_TRUE(user_pos_->GetTokens("おそれ", "惧れ",
129 // "動詞一段", &tokens1));
130 // EXPECT_TRUE(user_pos_->GetTokens("おそれる", "惧れる",
131 // "動詞一段", &tokens2));
132 EXPECT_TRUE(user_pos_->GetTokens("\xE3\x81\x8A\xE3\x81\x9D\xE3\x82\x8C",
133 "\xE6\x83\xA7\xE3\x82\x8C",
134 "\xE5\x8B\x95\xE8\xA9\x9E"
135 "\xE4\xB8\x80\xE6\xAE\xB5", &tokens1));
136 EXPECT_TRUE(user_pos_->GetTokens("\xE3\x81\x8A\xE3\x81\x9D"
137 "\xE3\x82\x8C\xE3\x82\x8B",
138 "\xE6\x83\xA7\xE3\x82\x8C\xE3\x82\x8B",
139 "\xE5\x8B\x95\xE8\xA9\x9E"
140 "\xE4\xB8\x80\xE6\xAE\xB5", &tokens2));
103 EXPECT_TRUE(user_pos_->GetTokens("おそれ", "惧れ", "動詞一段", &tokens1));
104 EXPECT_TRUE(user_pos_->GetTokens("おそれる", "惧れる", "動詞一段", &tokens2));
141105 EXPECT_EQ(tokens1.size(), tokens2.size());
142106 for (size_t i = 0; i < tokens1.size(); ++i) {
143107 EXPECT_EQ(tokens1[i].key, tokens2[i].key);