Stop espacing non-ASCII string literals under src/dictionary/.
BUG=#385
REF_BUG=31204285
REF_CL=173076141
REF_TIME=2017-10-23T15:18:48+09:00
REF_TIME_RAW=1508739528 +0900
Noriyuki Takahashi
6 years ago
29 | 29 | |
30 | 30 | MAJOR=2 |
31 | 31 | MINOR=23 |
32 | BUILD=2747 | |
32 | BUILD=2748 | |
33 | 33 | REVISION=102 |
34 | 34 | # This version represents the version of Mozc IME engine (converter, predictor, |
35 | 35 | # etc.). This version info is included both in the Mozc server and in the Mozc |
90 | 90 | convreq_.set_config(&config_); |
91 | 91 | } |
92 | 92 | |
93 | virtual void SetUp() { | |
93 | void SetUp() override { | |
94 | 94 | config::ConfigHandler::GetDefaultConfig(&config_); |
95 | 95 | } |
96 | 96 | |
99 | 99 | CheckKeyValueExistenceCallback(StringPiece key, StringPiece value) |
100 | 100 | : key_(key), value_(value), found_(false) {} |
101 | 101 | |
102 | virtual ResultType OnToken(StringPiece key, StringPiece actual_key, | |
103 | const Token &token) { | |
102 | ResultType OnToken(StringPiece /* key */, StringPiece /* actual_key */, | |
103 | const Token &token) override { | |
104 | 104 | if (token.key == key_ && token.value == value_) { |
105 | 105 | found_ = true; |
106 | 106 | return TRAVERSE_DONE; |
120 | 120 | CheckSpellingExistenceCallback(StringPiece key, StringPiece value) |
121 | 121 | : key_(key), value_(value), found_(false) {} |
122 | 122 | |
123 | virtual ResultType OnToken(StringPiece key, StringPiece actual_key, | |
124 | const Token &token) { | |
123 | ResultType OnToken(StringPiece /* key */, StringPiece /* actual_key */, | |
124 | const Token &token) override { | |
125 | 125 | if (token.key == key_ && token.value == value_ && |
126 | 126 | (token.attributes & Token::SPELLING_CORRECTION)) { |
127 | 127 | found_ = true; |
143 | 143 | const POSMatcher *pos_matcher) |
144 | 144 | : key_(key), value_(value), pos_matcher_(pos_matcher), found_(false) {} |
145 | 145 | |
146 | virtual ResultType OnToken(StringPiece key, StringPiece actual_key, | |
147 | const Token &token) { | |
146 | ResultType OnToken(StringPiece /* key */, StringPiece /* actual_key */, | |
147 | const Token &token) override { | |
148 | 148 | if (token.key == key_ && token.value == value_ && |
149 | 149 | pos_matcher_->IsZipcode(token.lid)) { |
150 | 150 | found_ = true; |
166 | 166 | CheckEnglishT13nCallback(StringPiece key, StringPiece value) |
167 | 167 | : key_(key), value_(value), found_(false) {} |
168 | 168 | |
169 | virtual ResultType OnToken(StringPiece key, StringPiece actual_key, | |
170 | const Token &token) { | |
169 | ResultType OnToken(StringPiece /* key */, StringPiece /* actual_key */, | |
170 | const Token &token) override { | |
171 | 171 | if (token.key == key_ && token.value == value_ && |
172 | 172 | Util::IsEnglishTransliteration(token.value)) { |
173 | 173 | found_ = true; |
201 | 201 | DictionaryInterface *d = data->dictionary.get(); |
202 | 202 | SuppressionDictionary *s = data->suppression_dictionary.get(); |
203 | 203 | |
204 | const char kKey[] = | |
205 | "\xE3\x81\x90\xE3\x83\xBC\xE3\x81\x90\xE3\x82\x8B"; // "ぐーぐる" | |
206 | const char kValue[] = | |
207 | "\xE3\x82\xB0\xE3\x83\xBC\xE3\x82\xB0\xE3\x83\xAB"; // "グーグル" | |
204 | const char kKey[] = "ぐーぐる"; | |
205 | const char kValue[] = "グーグル"; | |
208 | 206 | |
209 | 207 | const LookupMethodAndQuery kTestPair[] = { |
210 | // "ぐーぐるは" | |
211 | {&DictionaryInterface::LookupPrefix, | |
212 | "\xE3\x81\x90\xE3\x83\xBC\xE3\x81\x90\xE3\x82\x8B\xE3\x81\xAF"}, | |
213 | // "ぐーぐ" | |
214 | {&DictionaryInterface::LookupPredictive, | |
215 | "\xE3\x81\x90\xE3\x83\xBC\xE3\x81\x90"}, | |
208 | {&DictionaryInterface::LookupPrefix, "ぐーぐるは"}, | |
209 | {&DictionaryInterface::LookupPredictive, "ぐーぐ"}, | |
216 | 210 | }; |
217 | 211 | |
218 | 212 | // First add (kKey, kValue) to the suppression dictionary; thus it should not |
243 | 237 | DictionaryInterface *d = data->dictionary.get(); |
244 | 238 | |
245 | 239 | // "あぼがど" -> "アボカド", which is in the test dictionary. |
246 | const char kKey[] = "\xE3\x81\x82\xE3\x81\xBC\xE3\x81\x8C\xE3\x81\xA9"; | |
247 | const char kValue[] = "\xE3\x82\xA2\xE3\x83\x9C\xE3\x82\xAB\xE3\x83\x89"; | |
240 | const char kKey[] = "あぼがど"; | |
241 | const char kValue[] = "アボカド"; | |
248 | 242 | |
249 | 243 | const LookupMethodAndQuery kTestPair[] = { |
250 | // "あぼがど" | |
251 | 244 | {&DictionaryInterface::LookupPrefix, kKey}, |
252 | // "あぼ" | |
253 | {&DictionaryInterface::LookupPredictive, "\xE3\x81\x82\xE3\x81\xBC"}, | |
245 | {&DictionaryInterface::LookupPredictive, "あぼ"}, | |
254 | 246 | }; |
255 | 247 | |
256 | 248 | // The spelling correction entry (kKey, kValue) should be found if spelling |
277 | 269 | |
278 | 270 | // "100-0000" -> "東京都千代田区", which is in the test dictionary. |
279 | 271 | const char kKey[] = "100-0000"; |
280 | const char kValue[] = "\xE6\x9D\xB1\xE4\xBA\xAC\xE9\x83\xBD\xE5\x8D" | |
281 | "\x83\xE4\xBB\xA3\xE7\x94\xB0\xE5\x8C\xBA"; | |
272 | const char kValue[] = "東京都千代田区"; | |
282 | 273 | |
283 | 274 | const LookupMethodAndQuery kTestPair[] = { |
284 | 275 | {&DictionaryInterface::LookupPrefix, kKey}, |
308 | 299 | DictionaryInterface *d = data->dictionary.get(); |
309 | 300 | NodeAllocator allocator; |
310 | 301 | |
311 | // "ぐーぐる" -> "Google" | |
312 | const char kKey[] = | |
313 | "\xE3\x81\x90\xE3\x83\xBC\xE3\x81\x90\xE3\x82\x8B"; | |
302 | const char kKey[] = "ぐーぐる"; | |
314 | 303 | const char kValue[] = "Google"; |
315 | 304 | |
316 | 305 | const LookupMethodAndQuery kTestPair[] = { |
317 | 306 | {&DictionaryInterface::LookupPrefix, kKey}, |
318 | // "ぐー" | |
319 | {&DictionaryInterface::LookupPredictive, "\xE3\x81\x90\xE3\x83\xBC"}, | |
307 | {&DictionaryInterface::LookupPredictive, "ぐー"}, | |
320 | 308 | }; |
321 | 309 | |
322 | 310 | // The T13N entry (kKey, kValue) should be found if the flag is set in the |
39 | 39 | #include "request/conversion_request.h" |
40 | 40 | #include "testing/base/public/gunit.h" |
41 | 41 | |
42 | using std::unique_ptr; | |
43 | ||
44 | 42 | namespace mozc { |
45 | 43 | namespace dictionary { |
46 | 44 | namespace { |
47 | 45 | |
46 | using std::unique_ptr; | |
47 | ||
48 | 48 | class DictionaryMockTest : public ::testing::Test { |
49 | 49 | protected: |
50 | virtual void SetUp() { | |
50 | void SetUp() override { | |
51 | 51 | mock_.reset(new DictionaryMock); |
52 | 52 | } |
53 | 53 | |
126 | 126 | TEST_F(DictionaryMockTest, LookupPrefix) { |
127 | 127 | DictionaryMock *dic = GetMock(); |
128 | 128 | |
129 | unique_ptr<Token> t0(CreateToken( | |
130 | "\xe3\x81\xaf", // "は" | |
131 | "v0", Token::NONE)); | |
132 | unique_ptr<Token> t1(CreateToken( | |
133 | // "はひふへほ" | |
134 | "\xe3\x81\xaf\xe3\x81\xb2\xe3\x81\xb5\xe3\x81\xb8\xe3\x81\xbb", | |
135 | "v1", Token::NONE)); | |
129 | unique_ptr<Token> t0(CreateToken("は", "v0", Token::NONE)); | |
130 | unique_ptr<Token> t1(CreateToken("はひふへほ", "v1", Token::NONE)); | |
136 | 131 | |
137 | 132 | dic->AddLookupPrefix(t0->key, t0->key, t0->value, Token::NONE); |
138 | 133 | dic->AddLookupPrefix(t1->key, t1->key, t1->value, Token::NONE); |
156 | 151 | TEST_F(DictionaryMockTest, LookupReverse) { |
157 | 152 | DictionaryInterface *dic = GetMock(); |
158 | 153 | |
159 | // "今"/"いま" | |
160 | const string k0 = "\xE4\xBB\x8A"; | |
161 | const string v0 = "\xE3\x81\x84\xE3\x81\xBE"; | |
162 | // "今日"/"きょう" | |
163 | const string k1 = "\xE4\xBB\x8A\xE6\x97\xA5"; | |
164 | const string v1 = "\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86"; | |
154 | const string k0 = "今"; | |
155 | const string v0 = "いま"; | |
156 | const string k1 = "今日"; | |
157 | const string v1 = "きょう"; | |
165 | 158 | |
166 | 159 | std::vector<Token> source_tokens; |
167 | 160 | unique_ptr<Token> t0(CreateToken(k0, v0)); |
186 | 179 | |
187 | 180 | TEST_F(DictionaryMockTest, LookupPredictive) { |
188 | 181 | DictionaryInterface *dic = GetMock(); |
189 | // "は" | |
190 | const string k0 = "\xe3\x81\xaf"; | |
191 | // "はひふ" | |
192 | const string k1 = "\xe3\x81\xaf\xe3\x81\xb2\xe3\x81\xb5"; | |
193 | // "はひふへほ" | |
194 | const string k2 = "\xe3\x81\xaf\xe3\x81\xb2\xe3\x81\xb5\xe3\x81\xb8\xe3\x81" | |
195 | "\xbb"; | |
182 | ||
183 | const string k0 = "は"; | |
184 | const string k1 = "はひふ"; | |
185 | const string k2 = "はひふへほ"; | |
196 | 186 | |
197 | 187 | std::vector<Token> tokens; |
198 | 188 | unique_ptr<Token> t1(CreateToken(k1, "v0", Token::NONE)); |
215 | 205 | TEST_F(DictionaryMockTest, LookupExact) { |
216 | 206 | DictionaryInterface *dic = GetMock(); |
217 | 207 | |
218 | const char *kKey = "\xE3\x81\xBB\xE3\x81\x92"; // "ほげ" | |
208 | const char kKey[] = "ほげ"; | |
219 | 209 | |
220 | 210 | unique_ptr<Token> t0(CreateToken(kKey, "value1", Token::NONE)); |
221 | 211 | unique_ptr<Token> t1(CreateToken(kKey, "value2", Token::NONE)); |
234 | 224 | EXPECT_TRUE(callback.tokens().empty()); |
235 | 225 | |
236 | 226 | callback.Clear(); |
237 | dic->LookupExact("\xE3\x81\xBB", // "ほ" | |
238 | convreq_, | |
239 | &callback); | |
227 | dic->LookupExact("ほ", convreq_, &callback); | |
240 | 228 | EXPECT_TRUE(callback.tokens().empty()); |
241 | 229 | } |
242 | 230 |
39 | 39 | |
40 | 40 | namespace mozc { |
41 | 41 | namespace dictionary { |
42 | namespace { | |
42 | 43 | |
43 | 44 | TEST(SuffixDictionaryTest, LookupPredictive) { |
44 | 45 | // Test SuffixDictionary with mock data. |
72 | 73 | } |
73 | 74 | { |
74 | 75 | // Non-empty prefix. |
75 | const string kPrefix = "\xE3\x81\x9F"; // "た" | |
76 | const string kPrefix = "た"; | |
76 | 77 | CollectTokenCallback callback; |
77 | 78 | dic->LookupPredictive(kPrefix, convreq, &callback); |
78 | 79 | EXPECT_FALSE(callback.tokens().empty()); |
87 | 88 | } |
88 | 89 | } |
89 | 90 | |
91 | } // namespace | |
90 | 92 | } // namespace dictionary |
91 | 93 | } // namespace mozc |
40 | 40 | #include "testing/base/public/googletest.h" |
41 | 41 | #include "testing/base/public/gunit.h" |
42 | 42 | |
43 | using std::unique_ptr; | |
44 | ||
45 | 43 | namespace mozc { |
46 | 44 | namespace dictionary { |
47 | 45 | namespace { |
46 | ||
47 | using std::unique_ptr; | |
48 | 48 | |
49 | 49 | ::testing::AssertionResult MakeAssertResult( |
50 | 50 | bool success, char32 c, const char *message) { |
72 | 72 | namespace mozc { |
73 | 73 | namespace dictionary { |
74 | 74 | |
75 | using mozc::storage::louds::BitVectorBasedArray; | |
76 | using mozc::storage::louds::LoudsTrie; | |
75 | using ::mozc::storage::louds::BitVectorBasedArray; | |
76 | using ::mozc::storage::louds::LoudsTrie; | |
77 | 77 | |
78 | 78 | namespace { |
79 | 79 | |
104 | 104 | // be mixed. |
105 | 105 | // TODO(hidehiko): Clean up this hacky implementation. |
106 | 106 | const char *kHiraganaExpansionTable[] = { |
107 | "\xe3\x81\x82\xe3\x81\x82\xe3\x81\x81", // "ああぁ" | |
108 | "\xe3\x81\x84\xe3\x81\x84\xe3\x81\x83", // "いいぃ" | |
109 | "\xe3\x81\x86\xe3\x81\x86\xe3\x81\x85\xe3\x82\x94", // "ううぅゔ" | |
110 | "\xe3\x81\x88\xe3\x81\x88\xe3\x81\x87", // "ええぇ" | |
111 | "\xe3\x81\x8a\xe3\x81\x8a\xe3\x81\x89", // "おおぉ" | |
112 | "\xe3\x81\x8b\xe3\x81\x8b\xe3\x81\x8c", // "かかが" | |
113 | "\xe3\x81\x8d\xe3\x81\x8d\xe3\x81\x8e", // "ききぎ" | |
114 | "\xe3\x81\x8f\xe3\x81\x8f\xe3\x81\x90", // "くくぐ" | |
115 | "\xe3\x81\x91\xe3\x81\x91\xe3\x81\x92", // "けけげ" | |
116 | "\xe3\x81\x93\xe3\x81\x93\xe3\x81\x94", // "ここご" | |
117 | "\xe3\x81\x95\xe3\x81\x95\xe3\x81\x96", // "ささざ" | |
118 | "\xe3\x81\x97\xe3\x81\x97\xe3\x81\x98", // "ししじ" | |
119 | "\xe3\x81\x99\xe3\x81\x99\xe3\x81\x9a", // "すすず" | |
120 | "\xe3\x81\x9b\xe3\x81\x9b\xe3\x81\x9c", // "せせぜ" | |
121 | "\xe3\x81\x9d\xe3\x81\x9d\xe3\x81\x9e", // "そそぞ" | |
122 | "\xe3\x81\x9f\xe3\x81\x9f\xe3\x81\xa0", // "たただ" | |
123 | "\xe3\x81\xa1\xe3\x81\xa1\xe3\x81\xa2", // "ちちぢ" | |
124 | "\xe3\x81\xa4\xe3\x81\xa4\xe3\x81\xa3\xe3\x81\xa5", // "つつっづ" | |
125 | "\xe3\x81\xa6\xe3\x81\xa6\xe3\x81\xa7", // "ててで" | |
126 | "\xe3\x81\xa8\xe3\x81\xa8\xe3\x81\xa9", // "ととど" | |
127 | "\xe3\x81\xaf\xe3\x81\xaf\xe3\x81\xb0\xe3\x81\xb1", // "ははばぱ" | |
128 | "\xe3\x81\xb2\xe3\x81\xb2\xe3\x81\xb3\xe3\x81\xb4", // "ひひびぴ" | |
129 | "\xe3\x81\xb5\xe3\x81\xb5\xe3\x81\xb6\xe3\x81\xb7", // "ふふぶぷ" | |
130 | "\xe3\x81\xb8\xe3\x81\xb8\xe3\x81\xb9\xe3\x81\xba", // "へへべぺ" | |
131 | "\xe3\x81\xbb\xe3\x81\xbb\xe3\x81\xbc\xe3\x81\xbd", // "ほほぼぽ" | |
132 | "\xe3\x82\x84\xe3\x82\x84\xe3\x82\x83", // "ややゃ" | |
133 | "\xe3\x82\x86\xe3\x82\x86\xe3\x82\x85", // "ゆゆゅ" | |
134 | "\xe3\x82\x88\xe3\x82\x88\xe3\x82\x87", // "よよょ" | |
135 | "\xe3\x82\x8f\xe3\x82\x8f\xe3\x82\x8e", // "わわゎ" | |
107 | "ああぁ", | |
108 | "いいぃ", | |
109 | "ううぅゔ", | |
110 | "ええぇ", | |
111 | "おおぉ", | |
112 | "かかが", | |
113 | "ききぎ", | |
114 | "くくぐ", | |
115 | "けけげ", | |
116 | "ここご", | |
117 | "ささざ", | |
118 | "ししじ", | |
119 | "すすず", | |
120 | "せせぜ", | |
121 | "そそぞ", | |
122 | "たただ", | |
123 | "ちちぢ", | |
124 | "つつっづ", | |
125 | "ててで", | |
126 | "ととど", | |
127 | "ははばぱ", | |
128 | "ひひびぴ", | |
129 | "ふふぶぷ", | |
130 | "へへべぺ", | |
131 | "ほほぼぽ", | |
132 | "ややゃ", | |
133 | "ゆゆゅ", | |
134 | "よよょ", | |
135 | "わわゎ", | |
136 | 136 | }; |
137 | 137 | |
138 | 138 | const uint32 kAsciiRange = 0x80; |
56 | 56 | #include "testing/base/public/gunit.h" |
57 | 57 | #include "testing/base/public/mozctest.h" |
58 | 58 | |
59 | using std::unique_ptr; | |
60 | ||
61 | using mozc::dictionary::CollectTokenCallback; | |
62 | ||
63 | 59 | DEFINE_int32(dictionary_test_size, 100000, |
64 | 60 | "Dictionary size for this test."); |
65 | 61 | DEFINE_int32(dictionary_reverse_lookup_test_size, 1000, |
68 | 64 | |
69 | 65 | namespace mozc { |
70 | 66 | namespace dictionary { |
67 | namespace { | |
68 | using std::unique_ptr; | |
69 | } // namespace | |
71 | 70 | |
72 | 71 | class SystemDictionaryTest : public ::testing::Test { |
73 | 72 | protected: |
176 | 175 | std::vector<Token *> tokens; |
177 | 176 | for (int i = 0; i < 4; ++i) { |
178 | 177 | Token *token = new Token; |
179 | // "きー%d" | |
180 | token->key = Util::StringPrintf("\xE3\x81\x8D\xE3\x83\xBC%d", i); | |
181 | // "バリュー%d" | |
182 | token->value = Util::StringPrintf( | |
183 | "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC%d", i); | |
178 | token->key = Util::StringPrintf("きー%d", i); | |
179 | token->value = Util::StringPrintf("バリュー%d", i); | |
184 | 180 | tokens.push_back(token); |
185 | 181 | } |
186 | 182 | |
198 | 194 | tokens.push_back(token); |
199 | 195 | } |
200 | 196 | |
201 | // "full" | |
202 | const string kFull = "\xEF\xBD\x86\xEF\xBD\x95\xEF\xBD\x8C\xEF\xBD\x8C"; | |
203 | // "ひらがな" | |
204 | const string kHiragana = "\xE3\x81\xB2\xE3\x82\x89\xE3\x81\x8C\xE3\x81\xAA"; | |
205 | // "かたかな" | |
206 | const string kKatakanaKey = | |
207 | "\xE3\x81\x8B\xE3\x81\x9F\xE3\x81\x8B\xE3\x81\xAA"; | |
208 | // "カタカナ" | |
209 | const string kKatakanaValue = | |
210 | "\xE3\x82\xAB\xE3\x82\xBF\xE3\x82\xAB\xE3\x83\x8A"; | |
197 | const string kFull = "full"; | |
198 | const string kHiragana = "ひらがな"; | |
199 | const string kKatakanaKey = "かたかな"; | |
200 | const string kKatakanaValue = "カタカナ"; | |
211 | 201 | |
212 | 202 | { // Alphabet full width |
213 | 203 | Token *token = new Token; |
214 | 204 | token->key = "full"; |
215 | token->value = kFull; // "full" | |
205 | token->value = kFull; | |
216 | 206 | tokens.push_back(token); |
217 | 207 | } |
218 | 208 | |
219 | 209 | { // Hiragana |
220 | 210 | Token *token = new Token; |
221 | token->key = kHiragana; // "ひらがな" | |
222 | token->value = kHiragana; // "ひらがな" | |
211 | token->key = kHiragana; | |
212 | token->value = kHiragana; | |
223 | 213 | tokens.push_back(token); |
224 | 214 | } |
225 | 215 | |
226 | 216 | { // Katakana |
227 | 217 | Token *token = new Token; |
228 | token->key = kKatakanaKey; // "かたかな" | |
229 | token->value = kKatakanaValue; // "カタカナ" | |
218 | token->key = kKatakanaKey; | |
219 | token->value = kKatakanaValue; | |
230 | 220 | tokens.push_back(token); |
231 | 221 | } |
232 | 222 | |
237 | 227 | ASSERT_TRUE(system_dic.get() != NULL) |
238 | 228 | << "Failed to open dictionary source:" << dic_fn_; |
239 | 229 | |
240 | EXPECT_TRUE(system_dic->HasValue( | |
241 | // "バリュー0" | |
242 | "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x30")); | |
243 | EXPECT_TRUE(system_dic->HasValue( | |
244 | // "バリュー1" | |
245 | "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x31")); | |
246 | EXPECT_TRUE(system_dic->HasValue( | |
247 | // "バリュー2" | |
248 | "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x32")); | |
249 | EXPECT_TRUE(system_dic->HasValue( | |
250 | // "バリュー3" | |
251 | "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x33")); | |
252 | EXPECT_FALSE(system_dic->HasValue( | |
253 | // "バリュー4" | |
254 | "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x34")); | |
255 | EXPECT_FALSE(system_dic->HasValue( | |
256 | // "バリュー5" | |
257 | "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x35")); | |
258 | EXPECT_FALSE(system_dic->HasValue( | |
259 | // "バリュー6" | |
260 | "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x36")); | |
230 | EXPECT_TRUE(system_dic->HasValue("バリュー0")); | |
231 | EXPECT_TRUE(system_dic->HasValue("バリュー1")); | |
232 | EXPECT_TRUE(system_dic->HasValue("バリュー2")); | |
233 | EXPECT_TRUE(system_dic->HasValue("バリュー3")); | |
234 | EXPECT_FALSE(system_dic->HasValue("バリュー4")); | |
235 | EXPECT_FALSE(system_dic->HasValue("バリュー5")); | |
236 | EXPECT_FALSE(system_dic->HasValue("バリュー6")); | |
261 | 237 | |
262 | 238 | EXPECT_TRUE(system_dic->HasValue("Mozc")); |
263 | 239 | EXPECT_FALSE(system_dic->HasValue("mozc")); |
265 | 241 | EXPECT_TRUE(system_dic->HasValue("UPPER")); |
266 | 242 | EXPECT_FALSE(system_dic->HasValue("upper")); |
267 | 243 | |
268 | EXPECT_TRUE(system_dic->HasValue(kFull)); // "full" | |
244 | EXPECT_TRUE(system_dic->HasValue(kFull)); | |
269 | 245 | EXPECT_FALSE(system_dic->HasValue("full")); |
270 | 246 | |
271 | EXPECT_TRUE(system_dic->HasValue(kHiragana)); //"ひらがな" | |
272 | EXPECT_FALSE(system_dic->HasValue( | |
273 | "\xE3\x83\x92\xE3\x83\xA9\xE3\x82\xAC\xE3\x83\x8A\x0A")); // "ヒラガナ" | |
274 | ||
275 | EXPECT_TRUE(system_dic->HasValue(kKatakanaValue)); // "カタカナ" | |
276 | EXPECT_FALSE(system_dic->HasValue(kKatakanaKey)); // "かたかな" | |
247 | EXPECT_TRUE(system_dic->HasValue(kHiragana)); | |
248 | EXPECT_FALSE(system_dic->HasValue("ヒラガナ\n")); | |
249 | ||
250 | EXPECT_TRUE(system_dic->HasValue(kKatakanaValue)); | |
251 | EXPECT_FALSE(system_dic->HasValue(kKatakanaKey)); | |
277 | 252 | |
278 | 253 | STLDeleteElements(&tokens); |
279 | 254 | } |
281 | 256 | TEST_F(SystemDictionaryTest, NormalWord) { |
282 | 257 | std::vector<Token *> source_tokens; |
283 | 258 | unique_ptr<Token> t0(new Token); |
284 | // "あ" | |
285 | t0->key = "\xe3\x81\x82"; | |
286 | // "亜" | |
287 | t0->value = "\xe4\xba\x9c"; | |
259 | t0->key = "あ"; | |
260 | t0->value = "亜"; | |
288 | 261 | t0->cost = 100; |
289 | 262 | t0->lid = 50; |
290 | 263 | t0->rid = 70; |
305 | 278 | |
306 | 279 | // Look up by prefix. |
307 | 280 | callback.Clear(); |
308 | system_dic->LookupPrefix( | |
309 | "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86", // "あいう" | |
310 | convreq_, &callback); | |
281 | system_dic->LookupPrefix("あいう", convreq_, &callback); | |
311 | 282 | ASSERT_EQ(1, callback.tokens().size()); |
312 | 283 | EXPECT_TOKEN_EQ(*t0, callback.tokens().front()); |
313 | 284 | |
314 | 285 | // Nothing should be looked up. |
315 | 286 | callback.Clear(); |
316 | system_dic->LookupPrefix( | |
317 | "\xE3\x81\x8B\xE3\x81\x8D\xE3\x81\x8F", // "かきく" | |
318 | convreq_, &callback); | |
287 | system_dic->LookupPrefix("かきく", convreq_, &callback); | |
319 | 288 | EXPECT_TRUE(callback.tokens().empty()); |
320 | 289 | } |
321 | 290 | |
322 | 291 | TEST_F(SystemDictionaryTest, SameWord) { |
323 | 292 | std::vector<Token> tokens(4); |
324 | 293 | |
325 | tokens[0].key = "\xe3\x81\x82"; // "あ" | |
326 | tokens[0].value = "\xe4\xba\x9c"; // "亜" | |
294 | tokens[0].key = "あ"; | |
295 | tokens[0].value = "亜"; | |
327 | 296 | tokens[0].cost = 100; |
328 | 297 | tokens[0].lid = 50; |
329 | 298 | tokens[0].rid = 70; |
330 | 299 | |
331 | tokens[1].key = "\xe3\x81\x82"; // "あ" | |
332 | tokens[1].value = "\xe4\xba\x9c"; // "亜" | |
300 | tokens[1].key = "あ"; | |
301 | tokens[1].value = "亜"; | |
333 | 302 | tokens[1].cost = 150; |
334 | 303 | tokens[1].lid = 100; |
335 | 304 | tokens[1].rid = 200; |
336 | 305 | |
337 | tokens[2].key = "\xe3\x81\x82"; // "あ" | |
338 | tokens[2].value = "\xe3\x81\x82"; // "あ" | |
306 | tokens[2].key = "あ"; | |
307 | tokens[2].value = "あ"; | |
339 | 308 | tokens[2].cost = 100; |
340 | 309 | tokens[2].lid = 1000; |
341 | 310 | tokens[2].rid = 2000; |
342 | 311 | |
343 | tokens[3].key = "\xe3\x81\x82"; // "あ" | |
344 | tokens[3].value = "\xe4\xba\x9c"; // "亜" | |
312 | tokens[3].key = "あ"; | |
313 | tokens[3].value = "亜"; | |
345 | 314 | tokens[3].cost = 1000; |
346 | 315 | tokens[3].lid = 2000; |
347 | 316 | tokens[3].rid = 3000; |
359 | 328 | |
360 | 329 | // All the tokens should be looked up. |
361 | 330 | CollectTokenCallback callback; |
362 | system_dic->LookupPrefix("\xe3\x81\x82", // "あ" | |
363 | convreq_, &callback); | |
331 | system_dic->LookupPrefix("あ", convreq_, &callback); | |
364 | 332 | EXPECT_TOKENS_EQ_UNORDERED(source_tokens, callback.tokens()); |
365 | 333 | } |
366 | 334 | |
383 | 351 | } |
384 | 352 | |
385 | 353 | TEST_F(SystemDictionaryTest, SimpleLookupPrefix) { |
386 | // "は" | |
387 | const string k0 = "\xe3\x81\xaf"; | |
388 | // "はひふへほ" | |
389 | const string k1 = "\xe3\x81\xaf\xe3\x81\xb2\xe3\x81\xb5\xe3\x81\xb8\xe3\x81" | |
390 | "\xbb"; | |
354 | const string k0 = "は"; | |
355 | const string k1 = "はひふへほ"; | |
391 | 356 | unique_ptr<Token> t0(CreateToken(k0, "aa")); |
392 | 357 | unique_ptr<Token> t1(CreateToken(k1, "bb")); |
393 | 358 | |
413 | 378 | class LookupPrefixTestCallback : public SystemDictionary::Callback { |
414 | 379 | public: |
415 | 380 | virtual ResultType OnKey(StringPiece key) { |
416 | if (key == "\xE3\x81\x8B\xE3\x81\x8D") { // key == "かき" | |
381 | if (key == "かき") { | |
417 | 382 | return TRAVERSE_CULL; |
418 | } else if (key == "\xE3\x81\x95") { // key == "さ" | |
383 | } else if (key == "さ") { | |
419 | 384 | return TRAVERSE_NEXT_KEY; |
420 | } else if (key == "\xE3\x81\x9F") { // key == "た" | |
385 | } else if (key == "た") { | |
421 | 386 | return TRAVERSE_DONE; |
422 | 387 | } |
423 | 388 | return TRAVERSE_CONTINUE; |
445 | 410 | const char *key; |
446 | 411 | const char *value; |
447 | 412 | } kKeyValues[] = { |
448 | // "あ", "亜" | |
449 | { "\xE3\x81\x82", "\xE4\xBA\x9C" }, | |
450 | // "あ", "安" | |
451 | { "\xE3\x81\x82", "\xE5\xAE\x89" }, | |
452 | // "あ", "在" | |
453 | { "\xE3\x81\x82", "\xE5\x9C\xA8" }, | |
454 | // "あい", "愛" | |
455 | { "\xE3\x81\x82\xE3\x81\x84", "\xE6\x84\x9B" }, | |
456 | // "あい", "藍" | |
457 | { "\xE3\x81\x82\xE3\x81\x84", "\xE8\x97\x8D" }, | |
458 | // "あいう", "藍雨" | |
459 | { "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86", "\xE8\x97\x8D\xE9\x9B\xA8" }, | |
460 | // "か", "可" | |
461 | { "\xE3\x81\x8B", "\xE5\x8F\xAF" }, | |
462 | // "かき", "牡蠣" | |
463 | { "\xE3\x81\x8B\xE3\x81\x8D", "\xE7\x89\xA1\xE8\xA0\xA3" }, | |
464 | // "かき", "夏季" | |
465 | { "\xE3\x81\x8B\xE3\x81\x8D", "\xE5\xA4\x8F\xE5\xAD\xA3" }, | |
466 | // "かきく", "柿久" | |
467 | { "\xE3\x81\x8B\xE3\x81\x8D\xE3\x81\x8F", "\xE6\x9F\xBF\xE4\xB9\x85" }, | |
468 | // "さ", "差" | |
469 | { "\xE3\x81\x95", "\xE5\xB7\xAE" }, | |
470 | // "さ", "左" | |
471 | { "\xE3\x81\x95", "\xE5\xB7\xA6" }, | |
472 | // "さし", "刺" | |
473 | { "\xE3\x81\x95\xE3\x81\x97", "\xE5\x88\xBA" }, | |
474 | // "た", "田" | |
475 | { "\xE3\x81\x9F", "\xE7\x94\xB0" }, | |
476 | // "た", "多" | |
477 | { "\xE3\x81\x9F", "\xE5\xA4\x9A" }, | |
478 | // "たち", 多値" | |
479 | { "\xE3\x81\x9F\xE3\x81\xA1", "\xE5\xA4\x9A\xE5\x80\xA4" }, | |
480 | // "たちつ", "タチツ" | |
481 | { "\xE3\x81\x9F\xE3\x81\xA1\xE3\x81\xA4", | |
482 | "\xE3\x82\xBF\xE3\x83\x81\xE3\x83\x84" }, | |
483 | // "は", "葉" | |
484 | { "\xE3\x81\xAF", "\xE8\x91\x89" }, | |
485 | // "は", "歯" | |
486 | { "\xE3\x81\xAF", "\xE6\xAD\xAF" }, | |
487 | // "はひ", "ハヒ" | |
488 | { "\xE3\x81\xAF\xE3\x81\xB2", "\xE3\x83\x8F\xE3\x83\x92" }, | |
489 | // "ば", "場" | |
490 | { "\xE3\x81\xB0", "\xE5\xA0\xB4" }, | |
491 | // "はび", "波美" | |
492 | { "\xE3\x81\xAF\xE3\x81\xB3", "\xE6\xB3\xA2\xE7\xBE\x8E" }, | |
493 | // "ばび", "馬尾" | |
494 | { "\xE3\x81\xB0\xE3\x81\xB3", "\xE9\xA6\xAC\xE5\xB0\xBE" }, | |
495 | // "ばびぶ", "バビブ" | |
496 | { "\xE3\x81\xB0\xE3\x81\xB3\xE3\x81\xB6", | |
497 | "\xE3\x83\x90\xE3\x83\x93\xE3\x83\x96" }, | |
413 | { "あ", "亜" }, | |
414 | { "あ", "安" }, | |
415 | { "あ", "在" }, | |
416 | { "あい", "愛" }, | |
417 | { "あい", "藍" }, | |
418 | { "あいう", "藍雨" }, | |
419 | { "か", "可" }, | |
420 | { "かき", "牡蠣" }, | |
421 | { "かき", "夏季" }, | |
422 | { "かきく", "柿久" }, | |
423 | { "さ", "差" }, | |
424 | { "さ", "左" }, | |
425 | { "さし", "刺" }, | |
426 | { "た", "田" }, | |
427 | { "た", "多" }, | |
428 | { "たち", "多値" }, | |
429 | { "たちつ", "タチツ" }, | |
430 | { "は", "葉" }, | |
431 | { "は", "歯" }, | |
432 | { "はひ", "ハヒ" }, | |
433 | { "ば", "場" }, | |
434 | { "はび", "波美" }, | |
435 | { "ばび", "馬尾" }, | |
436 | { "ばびぶ", "バビブ" }, | |
498 | 437 | }; |
499 | 438 | const size_t kKeyValuesSize = arraysize(kKeyValues); |
500 | 439 | unique_ptr<Token> tokens[kKeyValuesSize]; |
513 | 452 | // Test for normal prefix lookup without key expansion. |
514 | 453 | { |
515 | 454 | LookupPrefixTestCallback callback; |
516 | system_dic->LookupPrefix("\xE3\x81\x82\xE3\x81\x84", // "あい" | |
455 | system_dic->LookupPrefix("あい", // "あい" | |
517 | 456 | convreq_, &callback); |
518 | 457 | const std::set<std::pair<string, string>> &result = callback.result(); |
519 | 458 | // "あ" -- "あい" should be found. |
534 | 473 | // feature. |
535 | 474 | { |
536 | 475 | LookupPrefixTestCallback callback; |
537 | system_dic->LookupPrefix( | |
538 | "\xE3\x81\x8B\xE3\x81\x8D\xE3\x81\x8F", //"かきく" | |
539 | convreq_, | |
540 | &callback); | |
476 | system_dic->LookupPrefix("かきく", convreq_, &callback); | |
541 | 477 | const std::set<std::pair<string, string>> &result = callback.result(); |
542 | 478 | // Only "か" should be found as the callback doesn't traverse the subtree of |
543 | 479 | // "かき" due to culling request from LookupPrefixTestCallback::OnKey(). |
544 | 480 | for (size_t i = 0; i < kKeyValuesSize; ++i) { |
545 | const std::pair<string, string> entry( | |
546 | kKeyValues[i].key, kKeyValues[i].value); | |
547 | EXPECT_EQ(entry.first == "\xE3\x81\x8B", // "か" | |
548 | result.find(entry) != result.end()); | |
481 | const std::pair<string, string> entry(kKeyValues[i].key, | |
482 | kKeyValues[i].value); | |
483 | EXPECT_EQ(entry.first == "か", result.find(entry) != result.end()); | |
549 | 484 | } |
550 | 485 | } |
551 | 486 | |
552 | 487 | // Test for TRAVERSE_NEXT_KEY. |
553 | 488 | { |
554 | 489 | LookupPrefixTestCallback callback; |
555 | system_dic->LookupPrefix( | |
556 | "\xE3\x81\x95\xE3\x81\x97\xE3\x81\x99", // "さしす" | |
557 | convreq_, | |
558 | &callback); | |
490 | system_dic->LookupPrefix("さしす", convreq_, &callback); | |
559 | 491 | const std::set<std::pair<string, string>> &result = callback.result(); |
560 | 492 | // Only "さし" should be found as tokens for "さ" is skipped (see |
561 | 493 | // LookupPrefixTestCallback::OnKey()). |
562 | 494 | for (size_t i = 0; i < kKeyValuesSize; ++i) { |
563 | const std::pair<string, string> entry( | |
564 | kKeyValues[i].key, kKeyValues[i].value); | |
565 | EXPECT_EQ(entry.first == "\xE3\x81\x95\xE3\x81\x97", // "さし" | |
566 | result.find(entry) != result.end()); | |
495 | const std::pair<string, string> entry(kKeyValues[i].key, | |
496 | kKeyValues[i].value); | |
497 | EXPECT_EQ(entry.first == "さし", result.find(entry) != result.end()); | |
567 | 498 | } |
568 | 499 | } |
569 | 500 | |
570 | 501 | // Test for TRAVERSE_DONE. |
571 | 502 | { |
572 | 503 | LookupPrefixTestCallback callback; |
573 | system_dic->LookupPrefix( | |
574 | "\xE3\x81\x9F\xE3\x81\xA1\xE3\x81\xA4", // "たちつ" | |
575 | convreq_, | |
576 | &callback); | |
504 | system_dic->LookupPrefix("たちつ", convreq_, &callback); | |
577 | 505 | const std::set<std::pair<string, string>> &result = callback.result(); |
578 | 506 | // Nothing should be found as the traversal is immediately done after seeing |
579 | 507 | // "た"; see LookupPrefixTestCallback::OnKey(). |
586 | 514 | // Use kana modifier insensitive lookup |
587 | 515 | request_.set_kana_modifier_insensitive_conversion(true); |
588 | 516 | config_.set_use_kana_modifier_insensitive_conversion(true); |
589 | system_dic->LookupPrefix( | |
590 | "\xE3\x81\xAF\xE3\x81\xB2", // "はひ" | |
591 | convreq_, | |
592 | &callback); | |
517 | system_dic->LookupPrefix("はひ", convreq_, &callback); | |
593 | 518 | const std::set<std::pair<string, string>> &result = callback.result(); |
594 | 519 | const char *kExpectedKeys[] = { |
595 | "\xE3\x81\xAF", // "は" | |
596 | "\xE3\x81\xB0", // "ば" | |
597 | "\xE3\x81\xAF\xE3\x81\xB2", // "はひ" | |
598 | "\xE3\x81\xB0\xE3\x81\xB2", // "ばひ" | |
599 | "\xE3\x81\xAF\xE3\x81\xB3", // "はび" | |
600 | "\xE3\x81\xB0\xE3\x81\xB3", // "ばび" | |
520 | "は", | |
521 | "ば", | |
522 | "はひ", | |
523 | "ばひ", | |
524 | "はび", | |
525 | "ばび", | |
601 | 526 | }; |
602 | 527 | const std::set<string> expected(kExpectedKeys, |
603 | kExpectedKeys + arraysize(kExpectedKeys)); | |
528 | kExpectedKeys + arraysize(kExpectedKeys)); | |
604 | 529 | for (size_t i = 0; i < kKeyValuesSize; ++i) { |
605 | 530 | const bool to_be_found = |
606 | 531 | expected.find(kKeyValues[i].key) != expected.end(); |
607 | const std::pair<string, string> entry( | |
608 | kKeyValues[i].key, kKeyValues[i].value); | |
532 | const std::pair<string, string> entry(kKeyValues[i].key, | |
533 | kKeyValues[i].value); | |
609 | 534 | EXPECT_EQ(to_be_found, result.find(entry) != result.end()); |
610 | 535 | } |
611 | 536 | } |
615 | 540 | std::vector<Token *> tokens; |
616 | 541 | ScopedElementsDeleter<std::vector<Token *>> deleter(&tokens); |
617 | 542 | |
618 | // "まみむめもや" -> "value0" | |
619 | tokens.push_back(CreateToken("\xe3\x81\xbe\xe3\x81\xbf\xe3\x82\x80" | |
620 | "\xe3\x82\x81\xe3\x82\x82\xe3\x82\x84", | |
621 | "value0")); | |
622 | // "まみむめもやゆよ" -> "value1" | |
623 | tokens.push_back(CreateToken("\xe3\x81\xbe\xe3\x81\xbf\xe3\x82\x80" | |
624 | "\xe3\x82\x81\xe3\x82\x82\xe3\x82\x84" | |
625 | "\xe3\x82\x86\xe3\x82\x88", | |
626 | "value1")); | |
543 | tokens.push_back(CreateToken("まみむめもや", "value0")); | |
544 | tokens.push_back(CreateToken("まみむめもやゆよ", "value1")); | |
627 | 545 | // Build a dictionary with the above two tokens plus those from test data. |
628 | 546 | { |
629 | 547 | std::vector<Token *> source_tokens = tokens; |
636 | 554 | << "Failed to open dictionary source: " << dic_fn_; |
637 | 555 | |
638 | 556 | // All the tokens in |tokens| should be looked up by "まみむめも". |
639 | const char *kMamimumemo = | |
640 | "\xe3\x81\xbe\xe3\x81\xbf\xe3\x82\x80\xe3\x82\x81\xe3\x82\x82"; | |
557 | const char kMamimumemo[] = "まみむめも"; | |
641 | 558 | CheckMultiTokensExistenceCallback callback(tokens); |
642 | 559 | system_dic->LookupPredictive(kMamimumemo, convreq_, &callback); |
643 | 560 | EXPECT_TRUE(callback.AreAllFound()); |
647 | 564 | std::vector<Token *> tokens; |
648 | 565 | ScopedElementsDeleter<std::vector<Token *>> deleter(&tokens); |
649 | 566 | |
650 | // "がっこう" -> "学校" | |
651 | tokens.push_back(CreateToken( | |
652 | "\xE3\x81\x8C\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86", | |
653 | "\xE5\xAD\xA6\xE6\xA0\xA1")); | |
654 | // "かっこう" -> "格好" | |
655 | tokens.push_back(CreateToken( | |
656 | "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86", | |
657 | "\xE6\xA0\xBC\xE5\xA5\xBD")); | |
567 | tokens.push_back(CreateToken("がっこう", "学校")); | |
568 | tokens.push_back(CreateToken("かっこう", "格好")); | |
658 | 569 | |
659 | 570 | BuildSystemDictionary(tokens, 100); |
660 | 571 | unique_ptr<SystemDictionary> system_dic( |
662 | 573 | ASSERT_TRUE(system_dic.get() != NULL) |
663 | 574 | << "Failed to open dictionary source: " << dic_fn_; |
664 | 575 | |
665 | // "かつこう" | |
666 | const string kKey = "\xE3\x81\x8B\xE3\x81\xA4\xE3\x81\x93\xE3\x81\x86"; | |
576 | const string kKey = "かつこう"; | |
667 | 577 | |
668 | 578 | // Without Kana modifier insensitive lookup flag, nothing is looked up. |
669 | 579 | CollectTokenCallback callback; |
684 | 594 | std::vector<Token *> tokens; |
685 | 595 | ScopedElementsDeleter<std::vector<Token *>> deleter(&tokens); |
686 | 596 | |
687 | // "あい" -> "ai" | |
688 | tokens.push_back(CreateToken("\xe3\x81\x82\xe3\x81\x84", "ai")); | |
689 | // "あいうえお" -> "aiueo" | |
690 | tokens.push_back(CreateToken( | |
691 | "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a", | |
692 | "aiueo")); | |
597 | tokens.push_back(CreateToken("あい", "ai")); | |
598 | tokens.push_back(CreateToken("あいうえお", "aiueo")); | |
693 | 599 | // Build a dictionary with the above two tokens plus those from test data. |
694 | 600 | { |
695 | 601 | std::vector<Token *> source_tokens = tokens; |
705 | 611 | // expected that "あいうえお" is not looked up because of longer key cut-off |
706 | 612 | // mechanism. However, "あい" is looked up as it's short. |
707 | 613 | CheckMultiTokensExistenceCallback callback(tokens); |
708 | system_dic->LookupPredictive("\xe3\x81\x82", // "あ" | |
709 | convreq_, &callback); | |
614 | system_dic->LookupPredictive("あ", convreq_, &callback); | |
710 | 615 | EXPECT_TRUE(callback.IsFound(tokens[0])); |
711 | 616 | EXPECT_FALSE(callback.IsFound(tokens[1])); |
712 | 617 | } |
714 | 619 | TEST_F(SystemDictionaryTest, LookupExact) { |
715 | 620 | std::vector<Token *> source_tokens; |
716 | 621 | |
717 | // "は" | |
718 | const string k0 = "\xe3\x81\xaf"; | |
719 | // "はひふへほ" | |
720 | const string k1 = "\xe3\x81\xaf\xe3\x81\xb2\xe3\x81\xb5\xe3\x81\xb8\xe3\x81" | |
721 | "\xbb"; | |
622 | const string k0 = "は"; | |
623 | const string k1 = "はひふへほ"; | |
722 | 624 | |
723 | 625 | unique_ptr<Token> t0(CreateToken(k0, "aa")); |
724 | 626 | unique_ptr<Token> t1(CreateToken(k1, "bb")); |
749 | 651 | |
750 | 652 | TEST_F(SystemDictionaryTest, LookupReverse) { |
751 | 653 | unique_ptr<Token> t0(new Token); |
752 | // "ど" | |
753 | t0->key = "\xe3\x81\xa9"; | |
754 | // "ド" | |
755 | t0->value = "\xe3\x83\x89"; | |
654 | t0->key = "ど"; | |
655 | t0->value = "ド"; | |
756 | 656 | t0->cost = 1; |
757 | 657 | t0->lid = 2; |
758 | 658 | t0->rid = 3; |
759 | 659 | unique_ptr<Token> t1(new Token); |
760 | // "どらえもん" | |
761 | t1->key = "\xe3\x81\xa9\xe3\x82\x89\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93"; | |
762 | // "ドラえもん" | |
763 | t1->value = "\xe3\x83\x89\xe3\x83\xa9\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93"; | |
660 | t1->key = "どらえもん"; | |
661 | t1->value = "ドラえもん"; | |
764 | 662 | t1->cost = 1; |
765 | 663 | t1->lid = 2; |
766 | 664 | t1->rid = 3; |
767 | 665 | unique_ptr<Token> t2(new Token); |
768 | // "といざらす®" | |
769 | t2->key = "\xe3\x81\xa8\xe3\x81\x84\xe3\x81\x96\xe3\x82\x89\xe3\x81\x99\xc2" | |
770 | "\xae"; | |
771 | // "トイザらス®" | |
772 | t2->value = "\xe3\x83\x88\xe3\x82\xa4\xe3\x82\xb6\xe3\x82\x89\xe3\x82\xb9\xc2" | |
773 | "\xae"; | |
666 | t2->key = "といざらす®"; | |
667 | t2->value = "トイザらス®"; | |
774 | 668 | t2->cost = 1; |
775 | 669 | t2->lid = 2; |
776 | 670 | t2->rid = 3; |
777 | 671 | unique_ptr<Token> t3(new Token); |
778 | // "ああああああ" | |
779 | 672 | // Both t3 and t4 will be encoded into 3 bytes. |
780 | t3->key = "\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82" | |
781 | "\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82"; | |
673 | t3->key = "ああああああ"; | |
782 | 674 | t3->value = t3->key; |
783 | 675 | t3->cost = 32000; |
784 | 676 | t3->lid = 1; |
788 | 680 | t4->lid = 1; |
789 | 681 | t4->rid = 2; |
790 | 682 | unique_ptr<Token> t5(new Token); |
791 | // "いいいいいい" | |
792 | 683 | // t5 will be encoded into 3 bytes. |
793 | t5->key = "\xe3\x81\x84\xe3\x81\x84\xe3\x81\x84" | |
794 | "\xe3\x81\x84\xe3\x81\x84\xe3\x81\x84"; | |
684 | t5->key = "いいいいいい"; | |
795 | 685 | t5->value = t5->key; |
796 | 686 | t5->cost = 32000; |
797 | 687 | t5->lid = 1; |
798 | 688 | t5->rid = 1; |
799 | 689 | // spelling correction token should not be retrieved by reverse lookup. |
800 | 690 | unique_ptr<Token> t6(new Token); |
801 | // "どらえもん" | |
802 | t6->key = "\xe3\x81\xa9\xe3\x82\x89\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93"; | |
803 | // "ドラえもん" | |
804 | t6->value = "\xe3\x83\x89\xe3\x83\xa9\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93"; | |
691 | t6->key = "どらえもん"; | |
692 | t6->value = "ドラえもん"; | |
805 | 693 | t6->cost = 1; |
806 | 694 | t6->lid = 2; |
807 | 695 | t6->rid = 3; |
808 | 696 | t6->attributes = Token::SPELLING_CORRECTION; |
809 | 697 | unique_ptr<Token> t7(new Token); |
810 | // "こんさーと" | |
811 | t7->key = "\xe3\x81\x93\xe3\x82\x93\xe3\x81\x95\xe3\x83\xbc\xe3\x81\xa8"; | |
812 | // "コンサート" | |
813 | t7->value = "\xe3\x82\xb3\xe3\x83\xb3\xe3\x82\xb5\xe3\x83\xbc\xe3\x83\x88"; | |
698 | t7->key = "こんさーと"; | |
699 | t7->value = "コンサート"; | |
814 | 700 | t7->cost = 1; |
815 | 701 | t7->lid = 1; |
816 | 702 | t7->rid = 1; |
817 | 703 | // "バージョン" should not return a result with the key "ヴァージョン". |
818 | 704 | unique_ptr<Token> t8(new Token); |
819 | // "ばーじょん" | |
820 | t8->key = "\xE3\x81\xB0\xE3\x83\xBC\xE3\x81\x98\xE3\x82\x87\xE3\x82\x93"; | |
821 | // "バージョン" | |
822 | t8->value = "\xE3\x83\x90\xE3\x83\xBC\xE3\x82\xB8\xE3\x83\xA7\xE3\x83\xB3"; | |
705 | t8->key = "ばーじょん"; | |
706 | t8->value = "バージョン"; | |
823 | 707 | t8->cost = 1; |
824 | 708 | t8->lid = 1; |
825 | 709 | t8->rid = 1; |
885 | 769 | { |
886 | 770 | // test for non exact transliterated index string. |
887 | 771 | // append "が" |
888 | const string key = t7->value + "\xe3\x81\x8c"; | |
772 | const string key = t7->value + "が"; | |
889 | 773 | CollectTokenCallback callback; |
890 | 774 | system_dic->LookupReverse(key, convreq_, &callback); |
891 | 775 | const std::vector<Token> &tokens = callback.tokens(); |
936 | 820 | } |
937 | 821 | |
938 | 822 | TEST_F(SystemDictionaryTest, LookupReverseWithCache) { |
939 | const string kDoraemon = | |
940 | "\xe3\x83\x89\xe3\x83\xa9\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93"; | |
823 | const string kDoraemon = "ドラえもん"; | |
941 | 824 | |
942 | 825 | Token source_token; |
943 | // "どらえもん" | |
944 | source_token.key = | |
945 | "\xe3\x81\xa9\xe3\x82\x89\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93"; | |
946 | // "ドラえもん" | |
826 | source_token.key = "どらえもん"; | |
947 | 827 | source_token.value = kDoraemon; |
948 | 828 | source_token.cost = 1; |
949 | 829 | source_token.lid = 2; |
971 | 851 | TEST_F(SystemDictionaryTest, SpellingCorrectionTokens) { |
972 | 852 | std::vector<Token> tokens(3); |
973 | 853 | |
974 | // "あぼがど" | |
975 | tokens[0].key = "\xe3\x81\x82\xe3\x81\xbc\xe3\x81\x8c\xe3\x81\xa9"; | |
976 | // "アボカド" | |
977 | tokens[0].value = "\xe3\x82\xa2\xe3\x83\x9c\xe3\x82\xab\xe3\x83\x89"; | |
854 | tokens[0].key = "あぼがど"; | |
855 | tokens[0].value = "アボカド"; | |
978 | 856 | tokens[0].cost = 1; |
979 | 857 | tokens[0].lid = 0; |
980 | 858 | tokens[0].rid = 2; |
981 | 859 | tokens[0].attributes = Token::SPELLING_CORRECTION; |
982 | 860 | |
983 | // "しゅみれーしょん" | |
984 | tokens[1].key = | |
985 | "\xe3\x81\x97\xe3\x82\x85\xe3\x81\xbf\xe3\x82\x8c" | |
986 | "\xe3\x83\xbc\xe3\x81\x97\xe3\x82\x87\xe3\x82\x93"; | |
987 | // "シミュレーション" | |
988 | tokens[1].value = | |
989 | "\xe3\x82\xb7\xe3\x83\x9f\xe3\x83\xa5\xe3\x83\xac" | |
990 | "\xe3\x83\xbc\xe3\x82\xb7\xe3\x83\xa7\xe3\x83\xb3"; | |
861 | tokens[1].key = "しゅみれーしょん"; | |
862 | tokens[1].value = "シミュレーション"; | |
991 | 863 | tokens[1].cost = 1; |
992 | 864 | tokens[1].lid = 100; |
993 | 865 | tokens[1].rid = 3; |
994 | 866 | tokens[1].attributes = Token::SPELLING_CORRECTION; |
995 | 867 | |
996 | // "あきはばら" | |
997 | tokens[2].key = | |
998 | "\xe3\x81\x82\xe3\x81\x8d\xe3\x81\xaf\xe3\x81\xb0\xe3\x82\x89"; | |
999 | // "秋葉原" | |
1000 | tokens[2].value = "\xe7\xa7\x8b\xe8\x91\x89\xe5\x8e\x9f"; | |
868 | tokens[2].key = "あきはばら"; | |
869 | tokens[2].value = "秋葉原"; | |
1001 | 870 | tokens[2].cost = 1000; |
1002 | 871 | tokens[2].lid = 1; |
1003 | 872 | tokens[2].rid = 2; |
1022 | 891 | } |
1023 | 892 | |
1024 | 893 | TEST_F(SystemDictionaryTest, EnableNoModifierTargetWithLoudsTrie) { |
1025 | // "かつ" | |
1026 | const string k0 = "\xE3\x81\x8B\xE3\x81\xA4"; | |
1027 | // "かっこ" | |
1028 | const string k1 = "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93"; | |
1029 | // "かつこう" | |
1030 | const string k2 = "\xE3\x81\x8B\xE3\x81\xA4\xE3\x81\x93\xE3\x81\x86"; | |
1031 | // "かっこう" | |
1032 | const string k3 = "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86"; | |
1033 | // "がっこう" | |
1034 | const string k4 = "\xE3\x81\x8C\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86"; | |
894 | const string k0 = "かつ"; | |
895 | const string k1 = "かっこ"; | |
896 | const string k2 = "かつこう"; | |
897 | const string k3 = "かっこう"; | |
898 | const string k4 = "がっこう"; | |
1035 | 899 | |
1036 | 900 | unique_ptr<Token> tokens[5]; |
1037 | 901 | tokens[0].reset(CreateToken(k0, "aa")); |
1088 | 952 | } |
1089 | 953 | |
1090 | 954 | TEST_F(SystemDictionaryTest, NoModifierForKanaEntries) { |
1091 | // "ていすてぃんぐ", "テイスティング" | |
1092 | unique_ptr<Token> t0(CreateToken( | |
1093 | "\xe3\x81\xa6\xe3\x81\x84\xe3\x81\x99\xe3\x81\xa6" | |
1094 | "\xe3\x81\x83\xe3\x82\x93\xe3\x81\x90", | |
1095 | "\xe3\x83\x86\xe3\x82\xa4\xe3\x82\xb9\xe3\x83\x86" | |
1096 | "\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0")); | |
1097 | // "てすとです", "てすとです" | |
1098 | unique_ptr<Token> t1(CreateToken( | |
1099 | "\xe3\x81\xa6\xe3\x81\x99\xe3\x81\xa8\xe3\x81\xa7\xe3\x81\x99", | |
1100 | "\xe3\x81\xa6\xe3\x81\x99\xe3\x81\xa8\xe3\x81\xa7\xe3\x81\x99")); | |
955 | unique_ptr<Token> t0(CreateToken("ていすてぃんぐ", "テイスティング")); | |
956 | unique_ptr<Token> t1(CreateToken("てすとです", "てすとです")); | |
1101 | 957 | |
1102 | 958 | std::vector<Token *> source_tokens; |
1103 | 959 | source_tokens.push_back(t0.get()); |
1112 | 968 | << "Failed to open dictionary source:" << dic_fn_; |
1113 | 969 | |
1114 | 970 | // Lookup |t0| from "ていすていんぐ" |
1115 | const string k = "\xe3\x81\xa6\xe3\x81\x84\xe3\x81\x99\xe3\x81\xa6" | |
1116 | "\xe3\x81\x84\xe3\x82\x93\xe3\x81\x90"; | |
971 | const string k = "ていすていんぐ"; | |
1117 | 972 | request_.set_kana_modifier_insensitive_conversion(true); |
1118 | 973 | config_.set_use_kana_modifier_insensitive_conversion(true); |
1119 | 974 | CheckTokenExistenceCallback callback(t0.get()); |
1122 | 977 | } |
1123 | 978 | |
1124 | 979 | TEST_F(SystemDictionaryTest, DoNotReturnNoModifierTargetWithLoudsTrie) { |
1125 | // "かつ" | |
1126 | const string k0 = "\xE3\x81\x8B\xE3\x81\xA4"; | |
1127 | // "かっこ" | |
1128 | const string k1 = "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93"; | |
1129 | // "かつこう" | |
1130 | const string k2 = "\xE3\x81\x8B\xE3\x81\xA4\xE3\x81\x93\xE3\x81\x86"; | |
1131 | // "かっこう" | |
1132 | const string k3 = "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86"; | |
1133 | // "がっこう" | |
1134 | const string k4 = "\xE3\x81\x8C\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86"; | |
980 | const string k0 = "かつ"; | |
981 | const string k1 = "かっこ"; | |
982 | const string k2 = "かつこう"; | |
983 | const string k3 = "かっこう"; | |
984 | const string k4 = "がっこう"; | |
1135 | 985 | |
1136 | 986 | unique_ptr<Token> t0(CreateToken(k0, "aa")); |
1137 | 987 | unique_ptr<Token> t1(CreateToken(k1, "bb")); |
85 | 85 | } // namespace |
86 | 86 | |
87 | 87 | TEST(UserDictionaryImporter, ImportFromNormalTextTest) { |
88 | // "きょうと\t京都\t名詞\n" | |
89 | // "おおさか\t大阪\t地名\n" | |
90 | // "とうきょう\t東京\t地名\tコメント\n" | |
91 | // "すずき\t鈴木\t人名\n" | |
92 | 88 | const char kInput[] = |
93 | "\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\t" | |
94 | "\xE4\xBA\xAC\xE9\x83\xBD\t\xE5\x90\x8D\xE8\xA9\x9E\n" | |
95 | "\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\t" | |
96 | "\xE5\xA4\xA7\xE9\x98\xAA\t\xE5\x9C\xB0\xE5\x90\x8D\n" | |
97 | "\xE3\x81\xA8\xE3\x81\x86\xE3\x81\x8D\xE3\x82\x87\xE3" | |
98 | "\x81\x86\t\xE6\x9D\xB1\xE4\xBA\xAC\t\xE5\x9C\xB0\xE5" | |
99 | "\x90\x8D\t\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\n" | |
100 | "\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\t\xE9\x88\xB4" | |
101 | "\xE6\x9C\xA8\t\xE4\xBA\xBA\xE5\x90\x8D\n"; | |
89 | "きょうと\t京都\t名詞\n" | |
90 | "おおさか\t大阪\t地名\n" | |
91 | "とうきょう\t東京\t地名\tコメント\n" | |
92 | "すずき\t鈴木\t人名\n"; | |
102 | 93 | |
103 | 94 | UserDictionaryImporter::StringTextLineIterator iter(kInput); |
104 | 95 | UserDictionaryStorage::UserDictionary user_dic; |
105 | 96 | |
106 | 97 | EXPECT_EQ(UserDictionaryImporter::IMPORT_NO_ERROR, |
107 | 98 | UserDictionaryImporter::ImportFromTextLineIterator( |
108 | UserDictionaryImporter::MOZC, | |
109 | &iter, | |
110 | &user_dic)); | |
99 | UserDictionaryImporter::MOZC, &iter, &user_dic)); | |
111 | 100 | |
112 | 101 | ASSERT_EQ(4, user_dic.entries_size()); |
113 | 102 | |
114 | // EXPECT_EQ("きょうと", user_dic.entries(0).key()); | |
115 | // EXPECT_EQ("京都", user_dic.entries(0).value()); | |
116 | // EXPECT_EQ("名詞", user_dic.entries(0).pos()); | |
117 | // EXPECT_EQ("", user_dic.entries(0).comment()); | |
118 | EXPECT_EQ("\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8", | |
119 | user_dic.entries(0).key()); | |
120 | EXPECT_EQ("\xE4\xBA\xAC\xE9\x83\xBD", user_dic.entries(0).value()); | |
103 | EXPECT_EQ("きょうと", user_dic.entries(0).key()); | |
104 | EXPECT_EQ("京都", user_dic.entries(0).value()); | |
121 | 105 | EXPECT_EQ(user_dictionary::UserDictionary::NOUN, user_dic.entries(0).pos()); |
122 | 106 | EXPECT_EQ("", user_dic.entries(0).comment()); |
123 | 107 | |
124 | // EXPECT_EQ("おおさか", user_dic.entries(1).key()); | |
125 | // EXPECT_EQ("大阪", user_dic.entries(1).value()); | |
126 | // EXPECT_EQ("地名", user_dic.entries(1).pos()); | |
127 | // EXPECT_EQ("", user_dic.entries(1).comment()); | |
128 | EXPECT_EQ("\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B", | |
129 | user_dic.entries(1).key()); | |
130 | EXPECT_EQ("\xE5\xA4\xA7\xE9\x98\xAA", user_dic.entries(1).value()); | |
108 | EXPECT_EQ("おおさか", user_dic.entries(1).key()); | |
109 | EXPECT_EQ("大阪", user_dic.entries(1).value()); | |
131 | 110 | EXPECT_EQ(user_dictionary::UserDictionary::PLACE_NAME, |
132 | 111 | user_dic.entries(1).pos()); |
133 | 112 | EXPECT_EQ("", user_dic.entries(1).comment()); |
134 | 113 | |
135 | ||
136 | // EXPECT_EQ("とうきょう", user_dic.entries(2).key()); | |
137 | // EXPECT_EQ("東京", user_dic.entries(2).value()); | |
138 | // EXPECT_EQ("地名", user_dic.entries(2).pos()); | |
139 | // EXPECT_EQ("コメント", user_dic.entries(2).comment()); | |
140 | EXPECT_EQ("\xE3\x81\xA8\xE3\x81\x86\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86", | |
141 | user_dic.entries(2).key()); | |
142 | EXPECT_EQ("\xE6\x9D\xB1\xE4\xBA\xAC", user_dic.entries(2).value()); | |
114 | EXPECT_EQ("とうきょう", user_dic.entries(2).key()); | |
115 | EXPECT_EQ("東京", user_dic.entries(2).value()); | |
143 | 116 | EXPECT_EQ(user_dictionary::UserDictionary::PLACE_NAME, |
144 | 117 | user_dic.entries(2).pos()); |
145 | EXPECT_EQ("\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88", | |
146 | user_dic.entries(2).comment()); | |
147 | ||
148 | // EXPECT_EQ("すずき", user_dic.entries(3).key()); | |
149 | // EXPECT_EQ("鈴木", user_dic.entries(3).value()); | |
150 | // EXPECT_EQ("人名", user_dic.entries(3).pos()); | |
151 | // EXPECT_EQ("", user_dic.entries(3).comment()); | |
152 | EXPECT_EQ("\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D", user_dic.entries(3).key()); | |
153 | EXPECT_EQ("\xE9\x88\xB4\xE6\x9C\xA8", user_dic.entries(3).value()); | |
118 | EXPECT_EQ("コメント", user_dic.entries(2).comment()); | |
119 | ||
120 | EXPECT_EQ("すずき", user_dic.entries(3).key()); | |
121 | EXPECT_EQ("鈴木", user_dic.entries(3).value()); | |
154 | 122 | EXPECT_EQ(user_dictionary::UserDictionary::PERSONAL_NAME, |
155 | 123 | user_dic.entries(3).pos()); |
156 | 124 | EXPECT_EQ("", user_dic.entries(3).comment()); |
157 | 125 | } |
158 | 126 | |
159 | 127 | TEST(UserDictionaryImporter, ImportFromKotoeriTextTest) { |
160 | // "\"きょうと\",\"京都\",\"名詞\"\n" | |
161 | // "\"おおさか\",\"大阪\",\"地名\"\n" | |
162 | // "// last line" | |
163 | 128 | const char kInput[] = |
164 | "\"\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\"," | |
165 | "\"\xE4\xBA\xAC\xE9\x83\xBD\",\"\xE5\x90\x8D\xE8\xA9\x9E\"\n" | |
166 | "\"\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\"," | |
167 | "\"\xE5\xA4\xA7\xE9\x98\xAA\",\"\xE5\x9C\xB0\xE5\x90\x8D\"\n" | |
129 | "\"きょうと\"," | |
130 | "\"京都\",\"名詞\"\n" | |
131 | "\"おおさか\"," | |
132 | "\"大阪\",\"地名\"\n" | |
168 | 133 | "// last line"; |
169 | ||
170 | 134 | { |
171 | 135 | UserDictionaryImporter::StringTextLineIterator iter(kInput); |
172 | 136 | UserDictionaryStorage::UserDictionary user_dic; |
173 | 137 | |
174 | 138 | EXPECT_EQ(UserDictionaryImporter::IMPORT_NOT_SUPPORTED, |
175 | 139 | UserDictionaryImporter::ImportFromTextLineIterator( |
176 | UserDictionaryImporter::MOZC, | |
177 | &iter, | |
178 | &user_dic)); | |
140 | UserDictionaryImporter::MOZC, &iter, &user_dic)); | |
179 | 141 | |
180 | 142 | EXPECT_EQ(0, user_dic.entries_size()); |
181 | 143 | } |
182 | ||
183 | 144 | { |
184 | 145 | UserDictionaryImporter::StringTextLineIterator iter(kInput); |
185 | 146 | UserDictionaryStorage::UserDictionary user_dic; |
186 | 147 | |
187 | 148 | EXPECT_EQ(UserDictionaryImporter::IMPORT_NO_ERROR, |
188 | 149 | UserDictionaryImporter::ImportFromTextLineIterator( |
189 | UserDictionaryImporter::KOTOERI, | |
190 | &iter, | |
191 | &user_dic)); | |
150 | UserDictionaryImporter::KOTOERI, &iter, &user_dic)); | |
192 | 151 | |
193 | 152 | ASSERT_EQ(2, user_dic.entries_size()); |
194 | 153 | |
195 | // EXPECT_EQ("きょうと", user_dic.entries(0).key()); | |
196 | // EXPECT_EQ("京都", user_dic.entries(0).value()); | |
197 | // EXPECT_EQ("名詞", user_dic.entries(0).pos()); | |
198 | EXPECT_EQ("\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8", | |
199 | user_dic.entries(0).key()); | |
200 | EXPECT_EQ("\xE4\xBA\xAC\xE9\x83\xBD", user_dic.entries(0).value()); | |
201 | EXPECT_EQ(user_dictionary::UserDictionary::NOUN, | |
202 | user_dic.entries(0).pos()); | |
203 | ||
204 | // EXPECT_EQ("おおさか", user_dic.entries(1).key()); | |
205 | // EXPECT_EQ("大阪", user_dic.entries(1).value()); | |
206 | // EXPECT_EQ("地名", user_dic.entries(1).pos()); | |
207 | EXPECT_EQ("\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B", | |
208 | user_dic.entries(1).key()); | |
209 | EXPECT_EQ("\xE5\xA4\xA7\xE9\x98\xAA", user_dic.entries(1).value()); | |
154 | EXPECT_EQ("きょうと", user_dic.entries(0).key()); | |
155 | EXPECT_EQ("京都", user_dic.entries(0).value()); | |
156 | EXPECT_EQ(user_dictionary::UserDictionary::NOUN, user_dic.entries(0).pos()); | |
157 | ||
158 | EXPECT_EQ("おおさか", user_dic.entries(1).key()); | |
159 | EXPECT_EQ("大阪", user_dic.entries(1).value()); | |
210 | 160 | EXPECT_EQ(user_dictionary::UserDictionary::PLACE_NAME, |
211 | 161 | user_dic.entries(1).pos()); |
212 | 162 | } |
213 | 163 | } |
214 | 164 | |
215 | 165 | TEST(UserDictionaryImporter, ImportFromCommentTextTest) { |
216 | // "きょうと\t京都\t名詞\n" | |
217 | // "!おおさか\t大阪\t地名\n" | |
218 | // "\n" | |
219 | // "#とうきょう\t東京\t地名\tコメント\n" | |
220 | // "すずき\t鈴木\t人名\n"; | |
221 | 166 | const char kInput[] = |
222 | "\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\t" | |
223 | "\xE4\xBA\xAC\xE9\x83\xBD\t\xE5\x90\x8D\xE8\xA9\x9E\n" | |
224 | "!\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\t" | |
225 | "\xE5\xA4\xA7\xE9\x98\xAA\t\xE5\x9C\xB0\xE5\x90\x8D\n" | |
167 | "きょうと\t京都\t名詞\n" | |
168 | "!おおさか\t大阪\t地名\n" | |
226 | 169 | "\n" |
227 | "#\xE3\x81\xA8\xE3\x81\x86\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\t" | |
228 | "\xE6\x9D\xB1\xE4\xBA\xAC\t\xE5\x9C\xB0\xE5\x90\x8D\t" | |
229 | "\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\n" | |
230 | "\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\t" | |
231 | "\xE9\x88\xB4\xE6\x9C\xA8\t\xE4\xBA\xBA\xE5\x90\x8D\n"; | |
232 | ||
170 | "#とうきょう\t東京\t地名\tコメント\n" | |
171 | "すずき\t鈴木\t人名\n"; | |
233 | 172 | { |
234 | 173 | const string kMsImeInput(string("!Microsoft IME\n") + kInput); |
235 | 174 | UserDictionaryImporter::StringTextLineIterator iter(kMsImeInput); |
237 | 176 | |
238 | 177 | EXPECT_EQ(UserDictionaryImporter::IMPORT_NO_ERROR, |
239 | 178 | UserDictionaryImporter::ImportFromTextLineIterator( |
240 | UserDictionaryImporter::MSIME, | |
241 | &iter, | |
242 | &user_dic)); | |
179 | UserDictionaryImporter::MSIME, &iter, &user_dic)); | |
243 | 180 | |
244 | 181 | ASSERT_EQ(3, user_dic.entries_size()); |
245 | 182 | |
246 | // EXPECT_EQ("きょうと", user_dic.entries(0).key()); | |
247 | // EXPECT_EQ("京都", user_dic.entries(0).value()); | |
248 | // EXPECT_EQ("名詞", user_dic.entries(0).pos()); | |
249 | EXPECT_EQ("\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8", | |
250 | user_dic.entries(0).key()); | |
251 | EXPECT_EQ("\xE4\xBA\xAC\xE9\x83\xBD", user_dic.entries(0).value()); | |
252 | EXPECT_EQ(user_dictionary::UserDictionary::NOUN, | |
253 | user_dic.entries(0).pos()); | |
254 | ||
255 | // EXPECT_EQ("#とうきょう", user_dic.entries(1).key()); | |
256 | // EXPECT_EQ("東京", user_dic.entries(1).value()); | |
257 | // EXPECT_EQ("地名", user_dic.entries(1).pos()); | |
258 | EXPECT_EQ("#\xE3\x81\xA8\xE3\x81\x86\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86", | |
259 | user_dic.entries(1).key()); | |
260 | EXPECT_EQ("\xE6\x9D\xB1\xE4\xBA\xAC", user_dic.entries(1).value()); | |
183 | EXPECT_EQ("きょうと", user_dic.entries(0).key()); | |
184 | EXPECT_EQ("京都", user_dic.entries(0).value()); | |
185 | EXPECT_EQ(user_dictionary::UserDictionary::NOUN, user_dic.entries(0).pos()); | |
186 | ||
187 | EXPECT_EQ("#とうきょう", user_dic.entries(1).key()); | |
188 | EXPECT_EQ("東京", user_dic.entries(1).value()); | |
261 | 189 | EXPECT_EQ(user_dictionary::UserDictionary::PLACE_NAME, |
262 | 190 | user_dic.entries(1).pos()); |
263 | 191 | |
264 | // EXPECT_EQ("すずき", user_dic.entries(2).key()); | |
265 | // EXPECT_EQ("鈴木", user_dic.entries(2).value()); | |
266 | // EXPECT_EQ("人名", user_dic.entries(2).pos()); | |
267 | EXPECT_EQ("\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D", | |
268 | user_dic.entries(2).key()); | |
269 | EXPECT_EQ("\xE9\x88\xB4\xE6\x9C\xA8", user_dic.entries(2).value()); | |
192 | EXPECT_EQ("すずき", user_dic.entries(2).key()); | |
193 | EXPECT_EQ("鈴木", user_dic.entries(2).value()); | |
270 | 194 | EXPECT_EQ(user_dictionary::UserDictionary::PERSONAL_NAME, |
271 | 195 | user_dic.entries(2).pos()); |
272 | 196 | } |
273 | ||
274 | 197 | { |
275 | 198 | UserDictionaryImporter::StringTextLineIterator iter(kInput); |
276 | 199 | UserDictionaryStorage::UserDictionary user_dic; |
277 | 200 | |
278 | 201 | EXPECT_EQ(UserDictionaryImporter::IMPORT_NO_ERROR, |
279 | 202 | UserDictionaryImporter::ImportFromTextLineIterator( |
280 | UserDictionaryImporter::MOZC, | |
281 | &iter, | |
282 | &user_dic)); | |
203 | UserDictionaryImporter::MOZC, &iter, &user_dic)); | |
283 | 204 | |
284 | 205 | ASSERT_EQ(3, user_dic.entries_size()); |
285 | 206 | |
286 | // EXPECT_EQ("きょうと", user_dic.entries(0).key()); | |
287 | // EXPECT_EQ("京都", user_dic.entries(0).value()); | |
288 | // EXPECT_EQ("名詞", user_dic.entries(0).pos()); | |
289 | EXPECT_EQ("\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8", | |
290 | user_dic.entries(0).key()); | |
291 | EXPECT_EQ("\xE4\xBA\xAC\xE9\x83\xBD", user_dic.entries(0).value()); | |
292 | EXPECT_EQ(user_dictionary::UserDictionary::NOUN, | |
293 | user_dic.entries(0).pos()); | |
294 | ||
295 | // EXPECT_EQ("!おおさか", user_dic.entries(1).key()); | |
296 | // EXPECT_EQ("大阪", user_dic.entries(1).value()); | |
297 | // EXPECT_EQ("地名", user_dic.entries(1).pos()); | |
298 | EXPECT_EQ("!\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B", | |
299 | user_dic.entries(1).key()); | |
300 | EXPECT_EQ("\xE5\xA4\xA7\xE9\x98\xAA", user_dic.entries(1).value()); | |
207 | EXPECT_EQ("きょうと", user_dic.entries(0).key()); | |
208 | EXPECT_EQ("京都", user_dic.entries(0).value()); | |
209 | EXPECT_EQ(user_dictionary::UserDictionary::NOUN, user_dic.entries(0).pos()); | |
210 | ||
211 | EXPECT_EQ("!おおさか", user_dic.entries(1).key()); | |
212 | EXPECT_EQ("大阪", user_dic.entries(1).value()); | |
301 | 213 | EXPECT_EQ(user_dictionary::UserDictionary::PLACE_NAME, |
302 | 214 | user_dic.entries(1).pos()); |
303 | 215 | |
304 | // EXPECT_EQ("すずき", user_dic.entries(2).key()); | |
305 | // EXPECT_EQ("鈴木", user_dic.entries(2).value()); | |
306 | // EXPECT_EQ("人名", user_dic.entries(2).pos()); | |
307 | EXPECT_EQ("\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D", | |
308 | user_dic.entries(2).key()); | |
309 | EXPECT_EQ("\xE9\x88\xB4\xE6\x9C\xA8", user_dic.entries(2).value()); | |
216 | EXPECT_EQ("すずき", user_dic.entries(2).key()); | |
217 | EXPECT_EQ("鈴木", user_dic.entries(2).value()); | |
310 | 218 | EXPECT_EQ(user_dictionary::UserDictionary::PERSONAL_NAME, |
311 | 219 | user_dic.entries(2).pos()); |
312 | 220 | } |
313 | 221 | } |
314 | 222 | |
315 | 223 | TEST(UserDictionaryImporter, ImportFromInvalidTextTest) { |
316 | // "a" | |
317 | // "\n" | |
318 | // "東京\t\t地名\tコメント\n" | |
319 | // "すずき\t鈴木\t人名\n"; | |
320 | 224 | const char kInput[] = |
321 | 225 | "a" |
322 | 226 | "\n" |
323 | "\xE6\x9D\xB1\xE4\xBA\xAC\t\t\xE5\x9C\xB0\xE5\x90\x8D\t" | |
324 | "\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\n" | |
325 | "\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\t" | |
326 | "\xE9\x88\xB4\xE6\x9C\xA8\t\xE4\xBA\xBA\xE5\x90\x8D\n"; | |
227 | "東京\t\t地名\tコメント\n" | |
228 | "すずき\t鈴木\t人名\n"; | |
327 | 229 | |
328 | 230 | UserDictionaryImporter::StringTextLineIterator iter(kInput); |
329 | 231 | UserDictionaryStorage::UserDictionary user_dic; |
330 | 232 | |
331 | 233 | EXPECT_EQ(UserDictionaryImporter::IMPORT_INVALID_ENTRIES, |
332 | 234 | UserDictionaryImporter::ImportFromTextLineIterator( |
333 | UserDictionaryImporter::MOZC, | |
334 | &iter, | |
335 | &user_dic)); | |
235 | UserDictionaryImporter::MOZC, &iter, &user_dic)); | |
336 | 236 | |
337 | 237 | ASSERT_EQ(1, user_dic.entries_size()); |
338 | 238 | |
339 | // EXPECT_EQ("すずき", user_dic.entries(0).key()); | |
340 | // EXPECT_EQ("鈴木", user_dic.entries(0).value()); | |
341 | // EXPECT_EQ("人名", user_dic.entries(0).pos()); | |
342 | EXPECT_EQ("\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D", user_dic.entries(0).key()); | |
343 | EXPECT_EQ("\xE9\x88\xB4\xE6\x9C\xA8", user_dic.entries(0).value()); | |
239 | EXPECT_EQ("すずき", user_dic.entries(0).key()); | |
240 | EXPECT_EQ("鈴木", user_dic.entries(0).value()); | |
344 | 241 | EXPECT_EQ(user_dictionary::UserDictionary::PERSONAL_NAME, |
345 | 242 | user_dic.entries(0).pos()); |
346 | 243 | } |
363 | 260 | UserDictionaryImporter::RawEntry entry; |
364 | 261 | entry.key = "aa"; |
365 | 262 | entry.value = "aa"; |
366 | // entry.pos = "名詞"; | |
367 | entry.pos = "\xE5\x90\x8D\xE8\xA9\x9E"; | |
263 | entry.pos = "名詞"; | |
368 | 264 | entries.push_back(entry); |
369 | 265 | } |
370 | 266 | |
400 | 296 | std::to_string(static_cast<uint32>(j))); |
401 | 297 | entry.key = key; |
402 | 298 | entry.value = value; |
403 | // entry.pos = "名詞"; | |
404 | entry.pos = "\xE5\x90\x8D\xE8\xA9\x9E"; | |
299 | entry.pos = "名詞"; | |
405 | 300 | entries.push_back(entry); |
406 | 301 | } |
407 | 302 | |
442 | 337 | entry.key = key; |
443 | 338 | entry.value = value; |
444 | 339 | if (j % 2 == 0) { |
445 | // entry.pos = "名詞"; | |
446 | entry.pos = "\xE5\x90\x8D\xE8\xA9\x9E"; | |
340 | entry.pos = "名詞"; | |
447 | 341 | } |
448 | 342 | entries.push_back(entry); |
449 | 343 | } |
467 | 361 | = user_dic.add_entries(); |
468 | 362 | entry->set_key("aa"); |
469 | 363 | entry->set_value("aa"); |
470 | // entry->set_pos("名詞"); | |
471 | 364 | entry->set_pos(user_dictionary::UserDictionary::NOUN); |
472 | 365 | } |
473 | 366 | |
477 | 370 | UserDictionaryImporter::RawEntry entry; |
478 | 371 | entry.key = "aa"; |
479 | 372 | entry.value = "aa"; |
480 | // entry.pos = "名詞"; | |
481 | entry.pos = "\xE5\x90\x8D\xE8\xA9\x9E"; | |
373 | entry.pos = "名詞"; | |
482 | 374 | entries.push_back(entry); |
483 | 375 | } |
484 | 376 | |
493 | 385 | UserDictionaryImporter::RawEntry entry; |
494 | 386 | entry.key = "bb"; |
495 | 387 | entry.value = "bb"; |
496 | // entry.pos = "名詞"; | |
497 | entry.pos = "\xE5\x90\x8D\xE8\xA9\x9E"; | |
388 | entry.pos = "名詞"; | |
498 | 389 | entries.push_back(entry); |
499 | 390 | } |
500 | 391 | |
618 | 509 | |
619 | 510 | TEST(UserDictionaryImporter, GuessEncodingTypeTest) { |
620 | 511 | { |
621 | // "これはテストです。" | |
622 | const char str[] = "\xE3\x81\x93\xE3\x82\x8C\xE3\x81\xAF\xE3\x83\x86" | |
623 | "\xE3\x82\xB9\xE3\x83\x88\xE3\x81\xA7\xE3\x81\x99" | |
624 | "\xE3\x80\x82"; | |
512 | const char str[] = "これはテストです。"; | |
625 | 513 | EXPECT_EQ(UserDictionaryImporter::UTF8, |
626 | 514 | UserDictionaryImporter::GuessEncodingType(str)); |
627 | 515 | } |
628 | ||
629 | { | |
630 | // "私の名前は中野ですABC" | |
631 | const char str[] = "\xE7\xA7\x81\xE3\x81\xAE\xE5\x90\x8D\xE5\x89\x8D" | |
632 | "\xE3\x81\xAF\xE4\xB8\xAD\xE9\x87\x8E\xE3\x81\xA7" | |
633 | "\xE3\x81\x99" "ABC"; | |
516 | { | |
517 | const char str[] = "私の名前は中野ですABC"; | |
634 | 518 | EXPECT_EQ(UserDictionaryImporter::UTF8, |
635 | 519 | UserDictionaryImporter::GuessEncodingType(str)); |
636 | 520 | } |
637 | ||
638 | 521 | { |
639 | 522 | const char str[] = "ABCDEFG abcdefg"; |
640 | 523 | EXPECT_EQ(UserDictionaryImporter::UTF8, |
641 | 524 | UserDictionaryImporter::GuessEncodingType(str)); |
642 | 525 | } |
643 | ||
644 | { | |
645 | // "ハロー" | |
646 | const char str[] = "\xE3\x83\x8F\xE3\x83\xAD\xE3\x83\xBC"; | |
526 | { | |
527 | const char str[] = "ハロー"; | |
647 | 528 | EXPECT_EQ(UserDictionaryImporter::UTF8, |
648 | 529 | UserDictionaryImporter::GuessEncodingType(str)); |
649 | 530 | } |
51 | 51 | using ::mozc::user_dictionary::UserDictionaryCommandStatus; |
52 | 52 | using ::mozc::user_dictionary::UserDictionarySessionHandler; |
53 | 53 | |
54 | // "きょうと\t京都\t名詞\n" | |
55 | // "!おおさか\t大阪\t地名\n" | |
56 | // "\n" | |
57 | // "#とうきょう\t東京\t地名\tコメント\n" | |
58 | // "すずき\t鈴木\t人名\n"; | |
59 | 54 | const char kDictionaryData[] = |
60 | "\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\t" | |
61 | "\xE4\xBA\xAC\xE9\x83\xBD\t\xE5\x90\x8D\xE8\xA9\x9E\n" | |
62 | "\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\t" | |
63 | "\xE5\xA4\xA7\xE9\x98\xAA\t\xE5\x9C\xB0\xE5\x90\x8D\n" | |
64 | "\xE3\x81\xA8\xE3\x81\x86\xE3\x81\x8D\xE3\x82\x87\xE3" | |
65 | "\x81\x86\t\xE6\x9D\xB1\xE4\xBA\xAC\t\xE5\x9C\xB0\xE5" | |
66 | "\x90\x8D\t\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\n" | |
67 | "\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\t\xE9\x88\xB4" | |
68 | "\xE6\x9C\xA8\t\xE4\xBA\xBA\xE5\x90\x8D\n"; | |
55 | "きょうと\t京都\t名詞\n" | |
56 | "おおさか\t大阪\t地名\n" | |
57 | "とうきょう\t東京\t地名\tコメント\n" | |
58 | "すずき\t鈴木\t人名\n"; | |
69 | 59 | |
70 | 60 | // 0 means invalid dictionary id. |
71 | 61 | // c.f., UserDictionaryUtil::CreateNewDictionaryId() |
73 | 63 | |
74 | 64 | class UserDictionarySessionHandlerTest : public ::testing::Test { |
75 | 65 | protected: |
76 | virtual void SetUp() { | |
66 | void SetUp() override { | |
77 | 67 | original_user_profile_directory_ = SystemUtil::GetUserProfileDirectory(); |
78 | 68 | SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir); |
79 | 69 | FileUtil::Unlink(GetUserDictionaryFile()); |
85 | 75 | handler_->set_dictionary_path(GetUserDictionaryFile()); |
86 | 76 | } |
87 | 77 | |
88 | virtual void TearDown() { | |
78 | void TearDown() override { | |
89 | 79 | FileUtil::Unlink(GetUserDictionaryFile()); |
90 | 80 | SystemUtil::SetUserProfileDirectory(original_user_profile_directory_); |
91 | 81 | } |
927 | 917 | const uint64 session_id = CreateSession(); |
928 | 918 | |
929 | 919 | string data = kDictionaryData; |
930 | // "☻\tEMOTICON\t名詞\n": Invalid symbol reading. | |
931 | data.append("\xE2\x98\xBB\tEMOTICON\t\xE5\x90\x8D\xE8\xA9\x9E\n"); | |
932 | // "読み\tYOMI\t名詞\n": Invalid Kanji reading. | |
933 | data.append("\xE8\xAA\xAD\xE3\x81\xBF\tYOMI\t\xE5\x90\x8D\xE8\xA9\x9E\n"); | |
920 | data.append("☻\tEMOTICON\t名詞\n"); // Invalid symbol reading. | |
921 | data.append("読み\tYOMI\t名詞\n"); // Invalid Kanji reading. | |
934 | 922 | |
935 | 923 | // Import data to a new dictionary. |
936 | 924 | Clear(); |
43 | 43 | |
44 | 44 | namespace { |
45 | 45 | |
46 | // "きょうと\t京都\t名詞\n" | |
47 | // "!おおさか\t大阪\t地名\n" | |
48 | // "\n" | |
49 | // "#とうきょう\t東京\t地名\tコメント\n" | |
50 | // "すずき\t鈴木\t人名\n"; | |
51 | 46 | const char kDictionaryData[] = |
52 | "\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\t" | |
53 | "\xE4\xBA\xAC\xE9\x83\xBD\t\xE5\x90\x8D\xE8\xA9\x9E\n" | |
54 | "\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\t" | |
55 | "\xE5\xA4\xA7\xE9\x98\xAA\t\xE5\x9C\xB0\xE5\x90\x8D\n" | |
56 | "\xE3\x81\xA8\xE3\x81\x86\xE3\x81\x8D\xE3\x82\x87\xE3" | |
57 | "\x81\x86\t\xE6\x9D\xB1\xE4\xBA\xAC\t\xE5\x9C\xB0\xE5" | |
58 | "\x90\x8D\t\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\n" | |
59 | "\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\t\xE9\x88\xB4" | |
60 | "\xE6\x9C\xA8\t\xE4\xBA\xBA\xE5\x90\x8D\n"; | |
47 | "きょうと\t京都\t名詞\n" | |
48 | "おおさか\t大阪\t地名\n" | |
49 | "とうきょう\t東京\t地名\tコメント\n" | |
50 | "すずき\t鈴木\t人名\n"; | |
61 | 51 | |
62 | 52 | using ::mozc::FileUtil; |
63 | 53 | using ::mozc::SystemUtil; |
68 | 58 | |
69 | 59 | class UserDictionarySessionTest : public ::testing::Test { |
70 | 60 | protected: |
71 | virtual void SetUp() { | |
61 | void SetUp() override { | |
72 | 62 | original_user_profile_directory_ = SystemUtil::GetUserProfileDirectory(); |
73 | 63 | SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir); |
74 | 64 | FileUtil::Unlink(GetUserDictionaryFile()); |
75 | 65 | } |
76 | 66 | |
77 | virtual void TearDown() { | |
67 | void TearDown() override { | |
78 | 68 | FileUtil::Unlink(GetUserDictionaryFile()); |
79 | 69 | SystemUtil::SetUserProfileDirectory(original_user_profile_directory_); |
80 | 70 | } |
616 | 606 | EXPECT_PROTO_PEQ( |
617 | 607 | "dictionaries: <\n" |
618 | 608 | " entries: <\n" |
619 | " key: \"\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\"\n" | |
620 | " value: \"\xE4\xBA\xAC\xE9\x83\xBD\"\n" | |
609 | " key: \"きょうと\"\n" | |
610 | " value: \"京都\"\n" | |
621 | 611 | " pos: NOUN\n" |
622 | 612 | " >\n" |
623 | 613 | " entries: <\n" |
624 | " key: \"\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\"\n" | |
625 | " value: \"\xE5\xA4\xA7\xE9\x98\xAA\"\n" | |
614 | " key: \"おおさか\"\n" | |
615 | " value: \"大阪\"\n" | |
626 | 616 | " pos: PLACE_NAME\n" |
627 | 617 | " >\n" |
628 | 618 | " entries: <\n" |
629 | " key: \"\xE3\x81\xA8\xE3\x81\x86\xE3" | |
630 | "\x81\x8D\xE3\x82\x87\xE3\x81\x86\"\n" | |
631 | " value: \"\xE6\x9D\xB1\xE4\xBA\xAC\"\n" | |
619 | " key: \"とうきょう\"\n" | |
620 | " value: \"東京\"\n" | |
632 | 621 | " pos: PLACE_NAME\n" |
633 | " comment: \"\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\"\n" | |
622 | " comment: \"コメント\"\n" | |
634 | 623 | " >\n" |
635 | 624 | " entries: <\n" |
636 | " key: \"\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\"\n" | |
637 | " value: \"\xE9\x88\xB4\xE6\x9C\xA8\"\n" | |
625 | " key: \"すずき\"\n" | |
626 | " value: \"鈴木\"\n" | |
638 | 627 | " pos: PERSONAL_NAME\n" |
639 | 628 | " >\n" |
640 | 629 | ">", |
664 | 653 | "dictionaries: <\n" |
665 | 654 | " name: \"user dictionary\"\n" |
666 | 655 | " entries: <\n" |
667 | " key: \"\xE3\x81\x8D\xE3\x82\x87\xE3\x81\x86\xE3\x81\xA8\"\n" | |
668 | " value: \"\xE4\xBA\xAC\xE9\x83\xBD\"\n" | |
656 | " key: \"きょうと\"\n" | |
657 | " value: \"京都\"\n" | |
669 | 658 | " pos: NOUN\n" |
670 | 659 | " >\n" |
671 | 660 | " entries: <\n" |
672 | " key: \"\xE3\x81\x8A\xE3\x81\x8A\xE3\x81\x95\xE3\x81\x8B\"\n" | |
673 | " value: \"\xE5\xA4\xA7\xE9\x98\xAA\"\n" | |
661 | " key: \"おおさか\"\n" | |
662 | " value: \"大阪\"\n" | |
674 | 663 | " pos: PLACE_NAME\n" |
675 | 664 | " >\n" |
676 | 665 | " entries: <\n" |
677 | " key: \"\xE3\x81\xA8\xE3\x81\x86\xE3" | |
678 | "\x81\x8D\xE3\x82\x87\xE3\x81\x86\"\n" | |
679 | " value: \"\xE6\x9D\xB1\xE4\xBA\xAC\"\n" | |
666 | " key: \"とうきょう\"\n" | |
667 | " value: \"東京\"\n" | |
680 | 668 | " pos: PLACE_NAME\n" |
681 | " comment: \"\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88\"\n" | |
669 | " comment: \"コメント\"\n" | |
682 | 670 | " >\n" |
683 | 671 | " entries: <\n" |
684 | " key: \"\xE3\x81\x99\xE3\x81\x9A\xE3\x81\x8D\"\n" | |
685 | " value: \"\xE9\x88\xB4\xE6\x9C\xA8\"\n" | |
672 | " key: \"すずき\"\n" | |
673 | " value: \"鈴木\"\n" | |
686 | 674 | " pos: PERSONAL_NAME\n" |
687 | 675 | " >\n" |
688 | 676 | ">", |
59 | 59 | // saved correctly. Please make the dictionary size smaller" |
60 | 60 | const size_t kDefaultWarningTotalBytesLimit = 256 << 20; |
61 | 61 | |
62 | // "自動登録単語"; | |
63 | const char kAutoRegisteredDictionaryName[] = | |
64 | "\xE8\x87\xAA\xE5\x8B\x95\xE7\x99\xBB\xE9\x8C\xB2\xE5\x8D\x98\xE8\xAA\x9E"; | |
65 | ||
62 | const char kAutoRegisteredDictionaryName[] = "自動登録単語"; | |
66 | 63 | const char kDefaultSyncDictionaryName[] = "Sync Dictionary"; |
67 | // "同期用辞書" | |
68 | const char *kDictionaryNameConvertedFromSyncableDictionary = | |
69 | "\xE5\x90\x8C\xE6\x9C\x9F\xE7\x94\xA8\xE8\xBE\x9E\xE6\x9B\xB8"; | |
64 | const char *kDictionaryNameConvertedFromSyncableDictionary = "同期用辞書"; | |
70 | 65 | |
71 | 66 | } // namespace |
72 | 67 |
45 | 45 | DECLARE_string(test_tmpdir); |
46 | 46 | |
47 | 47 | namespace mozc { |
48 | namespace { | |
48 | 49 | |
49 | 50 | using user_dictionary::UserDictionary; |
50 | ||
51 | namespace { | |
52 | 51 | |
53 | 52 | string GenRandomString(int size) { |
54 | 53 | string result; |
373 | 372 | |
374 | 373 | ASSERT_TRUE(storage.ConvertSyncDictionariesToNormalDictionaries()); |
375 | 374 | |
376 | // "同期用辞書" | |
377 | const char *kDictionaryNameConvertedFromSyncableDictionary = | |
378 | "\xE5\x90\x8C\xE6\x9C\x9F\xE7\x94\xA8\xE8\xBE\x9E\xE6\x9B\xB8"; | |
375 | const char kDictionaryNameConvertedFromSyncableDictionary[] = "同期用辞書"; | |
379 | 376 | const struct ExpectedData { |
380 | 377 | bool has_normal_entry; |
381 | 378 | string dictionary_name; |
492 | 489 | |
493 | 490 | // Make sure the exported format, especially that the pos is exported in |
494 | 491 | // Japanese. |
495 | // "key value 名詞 comment" separted by a tab character. | |
496 | 492 | #ifdef OS_WIN |
497 | EXPECT_EQ("key\tvalue\t\xE5\x90\x8D\xE8\xA9\x9E\tcomment\r\n", | |
493 | EXPECT_EQ("key\tvalue\t名詞\tcomment\r\n", | |
498 | 494 | string(mapped_data.begin(), mapped_data.size())); |
499 | 495 | #else |
500 | EXPECT_EQ("key\tvalue\t\xE5\x90\x8D\xE8\xA9\x9E\tcomment\n", | |
496 | EXPECT_EQ("key\tvalue\t名詞\tcomment\n", | |
501 | 497 | string(mapped_data.begin(), mapped_data.size())); |
502 | 498 | #endif // OS_WIN |
503 | 499 | } |
57 | 57 | #include "usage_stats/usage_stats.h" |
58 | 58 | #include "usage_stats/usage_stats_testing_util.h" |
59 | 59 | |
60 | using std::unique_ptr; | |
61 | ||
62 | 60 | namespace mozc { |
63 | 61 | namespace dictionary { |
64 | 62 | namespace { |
63 | ||
64 | using std::unique_ptr; | |
65 | 65 | |
66 | 66 | const char kUserDictionary0[] = |
67 | 67 | "start\tstart\tverb\n" |
72 | 72 | "smile\tsmile\tverb\n" |
73 | 73 | "smog\tsmog\tnoun\n" |
74 | 74 | // invalid characters "水雲" in reading |
75 | "\xE6\xB0\xB4\xE9\x9B\xB2\tvalue\tnoun\n" | |
75 | "水雲\tvalue\tnoun\n" | |
76 | 76 | |
77 | 77 | // Empty key |
78 | 78 | "\tvalue\tnoun\n" |
145 | 145 | if (key.empty() || |
146 | 146 | value.empty() || |
147 | 147 | pos.empty() || |
148 | tokens == NULL) { | |
148 | tokens == nullptr) { | |
149 | 149 | return false; |
150 | 150 | } |
151 | 151 | |
172 | 172 | private: |
173 | 173 | DISALLOW_COPY_AND_ASSIGN(UserPOSMock); |
174 | 174 | }; |
175 | // "名詞" | |
176 | const char *UserPOSMock::kNoun = "\xE5\x90\x8D\xE8\xA9\x9E"; | |
177 | // "動詞ワ行五段" | |
178 | const char *UserPOSMock::kVerb = | |
179 | "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x83\xAF\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5"; | |
175 | ||
176 | const char *UserPOSMock::kNoun = "名詞"; | |
177 | const char *UserPOSMock::kVerb = "動詞ワ行五段"; | |
180 | 178 | |
181 | 179 | string GenRandomAlphabet(int size) { |
182 | 180 | string result; |
264 | 262 | EntryCollector collector; |
265 | 263 | dic.LookupPredictive(key, convreq_, &collector); |
266 | 264 | |
267 | if (expected == NULL || expected_size == 0) { | |
265 | if (expected == nullptr || expected_size == 0) { | |
268 | 266 | EXPECT_TRUE(collector.entries().empty()); |
269 | 267 | } else { |
270 | 268 | ASSERT_FALSE(collector.entries().empty()); |
280 | 278 | EntryCollector collector; |
281 | 279 | dic.LookupPrefix(StringPiece(key, key_size), convreq_, &collector); |
282 | 280 | |
283 | if (expected == NULL || expected_size == 0) { | |
281 | if (expected == nullptr || expected_size == 0) { | |
284 | 282 | EXPECT_TRUE(collector.entries().empty()); |
285 | 283 | } else { |
286 | 284 | ASSERT_FALSE(collector.entries().empty()); |
296 | 294 | EntryCollector collector; |
297 | 295 | dic.LookupExact(StringPiece(key, key_size), convreq_, &collector); |
298 | 296 | |
299 | if (expected == NULL || expected_size == 0) { | |
297 | if (expected == nullptr || expected_size == 0) { | |
300 | 298 | EXPECT_TRUE(collector.entries().empty()); |
301 | 299 | } else { |
302 | 300 | ASSERT_FALSE(collector.entries().empty()); |
400 | 398 | { "starting", "starting", 220, 220 }, |
401 | 399 | }; |
402 | 400 | TestLookupPredictiveHelper(kExpected0, arraysize(kExpected0), |
403 | "start", *dic.get()); | |
401 | "start", *dic); | |
404 | 402 | |
405 | 403 | // Another normal lookup operation. |
406 | 404 | const Entry kExpected1[] = { |
415 | 413 | { "starting", "starting", 220, 220 }, |
416 | 414 | }; |
417 | 415 | TestLookupPredictiveHelper(kExpected1, arraysize(kExpected1), |
418 | "st", *dic.get()); | |
416 | "st", *dic); | |
419 | 417 | |
420 | 418 | // Invalid input values should be just ignored. |
421 | TestLookupPredictiveHelper(NULL, 0, "", *dic.get()); | |
422 | TestLookupPredictiveHelper(NULL, 0, | |
423 | "\xE6\xB0\xB4\xE9\x9B\xB2", // "水雲" | |
424 | *dic.get()); | |
419 | TestLookupPredictiveHelper(nullptr, 0, "", *dic); | |
420 | TestLookupPredictiveHelper(nullptr, 0, "水雲", *dic); | |
425 | 421 | |
426 | 422 | // Make a change to the dictionary file and load it again. |
427 | 423 | { |
437 | 433 | { "ending", "ending", 220, 220 }, |
438 | 434 | }; |
439 | 435 | TestLookupPredictiveHelper(kExpected2, arraysize(kExpected2), |
440 | "end", *dic.get()); | |
436 | "end", *dic); | |
441 | 437 | |
442 | 438 | // Entries in the dictionary before reloading cannot be looked up. |
443 | TestLookupPredictiveHelper(NULL, 0, "start", *dic.get()); | |
444 | TestLookupPredictiveHelper(NULL, 0, "st", *dic.get()); | |
439 | TestLookupPredictiveHelper(nullptr, 0, "start", *dic); | |
440 | TestLookupPredictiveHelper(nullptr, 0, "st", *dic); | |
445 | 441 | } |
446 | 442 | |
447 | 443 | TEST_F(UserDictionaryTest, TestLookupPrefix) { |
462 | 458 | { "started", "started", 210, 210 }, |
463 | 459 | }; |
464 | 460 | TestLookupPrefixHelper(kExpected0, arraysize(kExpected0), |
465 | "started", 7, *dic.get()); | |
461 | "started", 7, *dic); | |
466 | 462 | |
467 | 463 | // Another normal lookup operation. |
468 | 464 | const Entry kExpected1[] = { |
471 | 467 | { "starting", "starting", 100, 100 }, |
472 | 468 | { "starting", "starting", 220, 220 }, |
473 | 469 | }; |
474 | TestLookupPrefixHelper(kExpected1, arraysize(kExpected1), | |
475 | "starting", 8, *dic.get()); | |
470 | TestLookupPrefixHelper(kExpected1, arraysize(kExpected1), "starting", 8, | |
471 | *dic); | |
476 | 472 | |
477 | 473 | // Invalid input values should be just ignored. |
478 | TestLookupPrefixHelper(NULL, 0, "", 0, *dic.get()); | |
479 | TestLookupPrefixHelper( | |
480 | NULL, 0, "\xE6\xB0\xB4\xE9\x9B\xB2", // "水雲" | |
481 | strlen("\xE6\xB0\xB4\xE9\x9B\xB2"), *dic.get()); | |
474 | TestLookupPrefixHelper(nullptr, 0, "", 0, *dic); | |
475 | TestLookupPrefixHelper(nullptr, 0, "水雲", strlen("水雲"), *dic); | |
482 | 476 | |
483 | 477 | // Make a change to the dictionary file and load it again. |
484 | 478 | { |
493 | 487 | { "ending", "ending", 220, 220 }, |
494 | 488 | }; |
495 | 489 | TestLookupPrefixHelper(kExpected2, arraysize(kExpected2), |
496 | "ending", 6, *dic.get()); | |
490 | "ending", 6, *dic); | |
497 | 491 | |
498 | 492 | // Lookup for entries which are gone should returns empty result. |
499 | TestLookupPrefixHelper(NULL, 0, "started", 7, *dic.get()); | |
500 | TestLookupPrefixHelper(NULL, 0, "starting", 8, *dic.get()); | |
493 | TestLookupPrefixHelper(nullptr, 0, "started", 7, *dic); | |
494 | TestLookupPrefixHelper(nullptr, 0, "starting", 8, *dic); | |
501 | 495 | } |
502 | 496 | |
503 | 497 | TEST_F(UserDictionaryTest, TestLookupExact) { |
516 | 510 | { "start", "start", 200, 200 }, |
517 | 511 | }; |
518 | 512 | TestLookupExactHelper(kExpected0, arraysize(kExpected0), |
519 | "start", 5, *dic.get()); | |
513 | "start", 5, *dic); | |
520 | 514 | |
521 | 515 | // Another normal lookup operation. |
522 | 516 | const Entry kExpected1[] = { |
524 | 518 | { "starting", "starting", 220, 220 }, |
525 | 519 | }; |
526 | 520 | TestLookupExactHelper(kExpected1, arraysize(kExpected1), |
527 | "starting", 8, *dic.get()); | |
521 | "starting", 8, *dic); | |
528 | 522 | |
529 | 523 | // Invalid input values should be just ignored. |
530 | TestLookupPrefixHelper(NULL, 0, "", 0, *dic.get()); | |
531 | TestLookupPrefixHelper(NULL, 0, "\xE6\xB0\xB4\xE9\x9B\xB2", // "水雲" | |
532 | strlen("\xE6\xB0\xB4\xE9\x9B\xB2"), *dic.get()); | |
524 | TestLookupPrefixHelper(nullptr, 0, "", 0, *dic); | |
525 | TestLookupPrefixHelper(nullptr, 0, "水雲", strlen("水雲"), *dic); | |
533 | 526 | } |
534 | 527 | |
535 | 528 | TEST_F(UserDictionaryTest, TestLookupExactWithSuggestionOnlyWords) { |
585 | 578 | dic->Load(storage); |
586 | 579 | } |
587 | 580 | |
588 | TestLookupPrefixHelper(NULL, 0, "start", 4, *dic); | |
589 | TestLookupPredictiveHelper(NULL, 0, "s", *dic); | |
581 | TestLookupPrefixHelper(nullptr, 0, "start", 4, *dic); | |
582 | TestLookupPredictiveHelper(nullptr, 0, "s", *dic); | |
590 | 583 | |
591 | 584 | config_.set_incognito_mode(false); |
592 | 585 | { |
296 | 296 | // The index of each element should be matched with the actual value of enum. |
297 | 297 | // See also user_dictionary_storage.proto for the definition of the enum. |
298 | 298 | // Note that the '0' is invalid in the definition, so the corresponding |
299 | // element is NULL. | |
299 | // element is nullptr. | |
300 | 300 | const char *kPosTypeStringTable[] = { |
301 | NULL, | |
302 | "\xE5\x90\x8D\xE8\xA9\x9E", // "名詞" | |
303 | "\xE7\x9F\xAD\xE7\xB8\xAE\xE3\x82\x88\xE3\x81\xBF", // "短縮よみ" | |
304 | "\xE3\x82\xB5\xE3\x82\xB8\xE3\x82\xA7\xE3\x82\xB9\xE3\x83\x88" | |
305 | "\xE3\x81\xAE\xE3\x81\xBF", // "サジェストのみ" | |
306 | "\xE5\x9B\xBA\xE6\x9C\x89\xE5\x90\x8D\xE8\xA9\x9E", // "固有名詞" | |
307 | "\xE4\xBA\xBA\xE5\x90\x8D", // "人名" | |
308 | "\xE5\xA7\x93", // "姓" | |
309 | "\xE5\x90\x8D", // "名" | |
310 | "\xE7\xB5\x84\xE7\xB9\x94", // "組織" | |
311 | "\xE5\x9C\xB0\xE5\x90\x8D", // "地名" | |
312 | "\xE5\x90\x8D\xE8\xA9\x9E\xE3\x82\xB5\xE5\xA4\x89", // "名詞サ変" | |
313 | "\xE5\x90\x8D\xE8\xA9\x9E\xE5\xBD\xA2\xE5\x8B\x95", // "名詞形動" | |
314 | "\xE6\x95\xB0", // "数" | |
315 | "\xE3\x82\xA2\xE3\x83\xAB\xE3\x83\x95\xE3\x82\xA1" | |
316 | "\xE3\x83\x99\xE3\x83\x83\xE3\x83\x88", // "アルファベット" | |
317 | "\xE8\xA8\x98\xE5\x8F\xB7", // "記号" | |
318 | "\xE9\xA1\x94\xE6\x96\x87\xE5\xAD\x97", // "顔文字" | |
319 | ||
320 | "\xE5\x89\xAF\xE8\xA9\x9E", // "副詞" | |
321 | "\xE9\x80\xA3\xE4\xBD\x93\xE8\xA9\x9E", // "連体詞" | |
322 | "\xE6\x8E\xA5\xE7\xB6\x9A\xE8\xA9\x9E", // "接続詞" | |
323 | "\xE6\x84\x9F\xE5\x8B\x95\xE8\xA9\x9E", // "感動詞" | |
324 | "\xE6\x8E\xA5\xE9\xA0\xAD\xE8\xAA\x9E", // "接頭語" | |
325 | "\xE5\x8A\xA9\xE6\x95\xB0\xE8\xA9\x9E", // "助数詞" | |
326 | "\xE6\x8E\xA5\xE5\xB0\xBE\xE4\xB8\x80\xE8\x88\xAC", // "接尾一般" | |
327 | "\xE6\x8E\xA5\xE5\xB0\xBE\xE4\xBA\xBA\xE5\x90\x8D", // "接尾人名" | |
328 | "\xE6\x8E\xA5\xE5\xB0\xBE\xE5\x9C\xB0\xE5\x90\x8D", // "接尾地名" | |
329 | "\xE5\x8B\x95\xE8\xA9\x9E" | |
330 | "\xE3\x83\xAF\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞ワ行五段" | |
331 | "\xE5\x8B\x95\xE8\xA9\x9E" | |
332 | "\xE3\x82\xAB\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞カ行五段" | |
333 | "\xE5\x8B\x95\xE8\xA9\x9E" | |
334 | "\xE3\x82\xB5\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞サ行五段" | |
335 | "\xE5\x8B\x95\xE8\xA9\x9E" | |
336 | "\xE3\x82\xBF\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞タ行五段" | |
337 | "\xE5\x8B\x95\xE8\xA9\x9E" | |
338 | "\xE3\x83\x8A\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞ナ行五段" | |
339 | "\xE5\x8B\x95\xE8\xA9\x9E" | |
340 | "\xE3\x83\x9E\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞マ行五段" | |
341 | "\xE5\x8B\x95\xE8\xA9\x9E" | |
342 | "\xE3\x83\xA9\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞ラ行五段" | |
343 | "\xE5\x8B\x95\xE8\xA9\x9E" | |
344 | "\xE3\x82\xAC\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞ガ行五段" | |
345 | "\xE5\x8B\x95\xE8\xA9\x9E" | |
346 | "\xE3\x83\x90\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞バ行五段" | |
347 | "\xE5\x8B\x95\xE8\xA9\x9E" | |
348 | "\xE3\x83\x8F\xE8\xA1\x8C\xE5\x9B\x9B\xE6\xAE\xB5", // "動詞ハ行四段" | |
349 | "\xE5\x8B\x95\xE8\xA9\x9E\xE4\xB8\x80\xE6\xAE\xB5", // "動詞一段" | |
350 | "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x82\xAB\xE5\xA4\x89", // "動詞カ変" | |
351 | "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x82\xB5\xE5\xA4\x89", // "動詞サ変" | |
352 | "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x82\xB6\xE5\xA4\x89", // "動詞ザ変" | |
353 | "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x83\xA9\xE5\xA4\x89", // "動詞ラ変" | |
354 | "\xE5\xBD\xA2\xE5\xAE\xB9\xE8\xA9\x9E", // "形容詞" | |
355 | "\xE7\xB5\x82\xE5\x8A\xA9\xE8\xA9\x9E", // "終助詞" | |
356 | "\xE5\x8F\xA5\xE8\xAA\xAD\xE7\x82\xB9", // "句読点" | |
357 | "\xE7\x8B\xAC\xE7\xAB\x8B\xE8\xAA\x9E", // "独立語" | |
358 | "\xE6\x8A\x91\xE5\x88\xB6\xE5\x8D\x98\xE8\xAA\x9E", // "抑制単語" | |
301 | nullptr, | |
302 | "名詞", | |
303 | "短縮よみ", | |
304 | "サジェストのみ", | |
305 | "固有名詞", | |
306 | "人名", | |
307 | "姓", | |
308 | "名", | |
309 | "組織", | |
310 | "地名", | |
311 | "名詞サ変", | |
312 | "名詞形動", | |
313 | "数", | |
314 | "アルファベット", | |
315 | "記号", | |
316 | "顔文字", | |
317 | ||
318 | "副詞", | |
319 | "連体詞", | |
320 | "接続詞", | |
321 | "感動詞", | |
322 | "接頭語", | |
323 | "助数詞", | |
324 | "接尾一般", | |
325 | "接尾人名", | |
326 | "接尾地名", | |
327 | "動詞ワ行五段", | |
328 | "動詞カ行五段", | |
329 | "動詞サ行五段", | |
330 | "動詞タ行五段", | |
331 | "動詞ナ行五段", | |
332 | "動詞マ行五段", | |
333 | "動詞ラ行五段", | |
334 | "動詞ガ行五段", | |
335 | "動詞バ行五段", | |
336 | "動詞ハ行四段", | |
337 | "動詞一段", | |
338 | "動詞カ変", | |
339 | "動詞サ変", | |
340 | "動詞ザ変", | |
341 | "動詞ラ変", | |
342 | "形容詞", | |
343 | "終助詞", | |
344 | "句読点", | |
345 | "独立語", | |
346 | "抑制単語", | |
359 | 347 | }; |
360 | 348 | } // namespace |
361 | 349 |
46 | 46 | |
47 | 47 | TEST(UserDictionaryUtilTest, TestIsValidReading) { |
48 | 48 | EXPECT_TRUE(UserDictionaryUtil::IsValidReading("ABYZabyz0189")); |
49 | // "〜「」" | |
50 | EXPECT_TRUE(UserDictionaryUtil::IsValidReading( | |
51 | "\xe3\x80\x9c\xe3\x80\x8c\xe3\x80\x8d")); | |
52 | // "あいうわをんゔ" | |
53 | EXPECT_TRUE(UserDictionaryUtil::IsValidReading( | |
54 | "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x82\x8f\xe3\x82\x92" | |
55 | "\xe3\x82\x93\xe3\x82\x94")); | |
56 | // "アイウワヲンヴ" | |
57 | EXPECT_TRUE(UserDictionaryUtil::IsValidReading( | |
58 | "\xe3\x82\xa2\xe3\x82\xa4\xe3\x82\xa6\xe3\x83\xaf\xe3\x83\xb2" | |
59 | "\xe3\x83\xb3\xe3\x83\xb4")); | |
60 | // "水雲" | |
61 | EXPECT_FALSE(UserDictionaryUtil::IsValidReading("\xe6\xb0\xb4\xe9\x9b\xb2")); | |
49 | EXPECT_TRUE(UserDictionaryUtil::IsValidReading("〜「」")); | |
50 | EXPECT_TRUE(UserDictionaryUtil::IsValidReading("あいうわをんゔ")); | |
51 | EXPECT_TRUE(UserDictionaryUtil::IsValidReading("アイウワヲンヴ")); | |
52 | EXPECT_FALSE(UserDictionaryUtil::IsValidReading("水雲")); | |
62 | 53 | |
63 | 54 | // COMBINING KATAKANA-HIRAGANA VOICED/SEMI-VOICED SOUND MARK (u3099, u309A) |
64 | EXPECT_FALSE(UserDictionaryUtil::IsValidReading("\xE3\x82\x99\xE3\x82\x9A")); | |
55 | EXPECT_FALSE(UserDictionaryUtil::IsValidReading("゙゚")); | |
65 | 56 | |
66 | 57 | // KATAKANA-HIRAGANA VOICED/SEMI-VOICED SOUND MARK (u309B, u309C) |
67 | EXPECT_TRUE(UserDictionaryUtil::IsValidReading("\xE3\x82\x9B\xE3\x82\x9C")); | |
58 | EXPECT_TRUE(UserDictionaryUtil::IsValidReading("゛゜")); | |
68 | 59 | } |
69 | 60 | |
70 | 61 | TEST(UserDictionaryUtilTest, TestNormalizeReading) { |
71 | // "あいうゔゎ", "アイウヴヮ" | |
72 | TestNormalizeReading( | |
73 | "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x82\x94\xe3\x82\x8e", | |
74 | "\xe3\x82\xa2\xe3\x82\xa4\xe3\x82\xa6\xe3\x83\xb4\xe3\x83\xae"); | |
75 | // "あいうゃ", "アイウャ" | |
76 | TestNormalizeReading( | |
77 | "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x82\x83", | |
78 | "\xef\xbd\xb1\xef\xbd\xb2\xef\xbd\xb3\xef\xbd\xac"); | |
79 | // "ABab01@&=|" | |
80 | TestNormalizeReading( | |
81 | "ABab01@&=|", | |
82 | "\xef\xbc\xa1\xef\xbc\xa2\xef\xbd\x81\xef\xbd\x82\xef\xbc\x90\xef\xbc\x91" | |
83 | "\xef\xbc\xa0\xef\xbc\x86\xef\xbc\x9d\xef\xbd\x9c"); | |
62 | TestNormalizeReading("あいうゔゎ", "アイウヴヮ"); | |
63 | TestNormalizeReading("あいうゃ", "アイウャ"); | |
64 | TestNormalizeReading("ABab01@&=|", "ABab01@&=|"); | |
84 | 65 | } |
85 | 66 | |
86 | 67 | namespace { |
150 | 131 | EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 10)); |
151 | 132 | EXPECT_EQ("abc", str); |
152 | 133 | |
153 | str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B"; // "かしゆか" | |
134 | str = "かしゆか"; | |
154 | 135 | EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 3)); |
155 | EXPECT_EQ("\xE3\x81\x8B", str); // "か" | |
156 | ||
157 | str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B"; // "かしゆか" | |
136 | EXPECT_EQ("か", str); | |
137 | ||
138 | str = "かしゆか"; | |
158 | 139 | EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 4)); |
159 | EXPECT_EQ("\xE3\x81\x8B", str); // "か" | |
160 | ||
161 | str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B"; // "かしゆか" | |
140 | EXPECT_EQ("か", str); | |
141 | ||
142 | str = "かしゆか"; | |
162 | 143 | EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 5)); |
163 | EXPECT_EQ("\xE3\x81\x8B", str); // "か" | |
164 | ||
165 | str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B"; // "かしゆか" | |
144 | EXPECT_EQ("か", str); | |
145 | ||
146 | str = "かしゆか"; | |
166 | 147 | EXPECT_TRUE(UserDictionaryUtil::Sanitize(&str, 6)); |
167 | EXPECT_EQ("\xE3\x81\x8B\xE3\x81\x97", str); // "かし" | |
168 | ||
169 | str = "\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B"; // "かしゆか" | |
148 | EXPECT_EQ("かし", str); | |
149 | ||
150 | str = "かしゆか"; | |
170 | 151 | EXPECT_FALSE(UserDictionaryUtil::Sanitize(&str, 100)); |
171 | // "かしゆか" | |
172 | EXPECT_EQ("\xE3\x81\x8B\xE3\x81\x97\xE3\x82\x86\xE3\x81\x8B", str); | |
152 | EXPECT_EQ("かしゆか", str); | |
173 | 153 | } |
174 | 154 | |
175 | 155 | TEST(UserDictionaryUtilTest, ValidateEntry) { |
176 | 156 | // Create a valid entry. |
177 | 157 | UserDictionary::Entry base_entry; |
178 | // "よみ" | |
179 | base_entry.set_key("\xE3\x82\x88\xE3\x81\xBF"); | |
180 | ||
181 | // "単語" | |
182 | base_entry.set_value("\xE5\x8D\x98\xE8\xAA\x9E"); | |
183 | ||
184 | // "名詞" | |
158 | base_entry.set_key("よみ"); | |
159 | base_entry.set_value("単語"); | |
185 | 160 | base_entry.set_pos(UserDictionary::NOUN); |
186 | ||
187 | // "コメント" | |
188 | base_entry.set_comment("\xE3\x82\xB3\xE3\x83\xA1\xE3\x83\xB3\xE3\x83\x88"); | |
189 | ||
161 | base_entry.set_comment("コメント"); | |
190 | 162 | |
191 | 163 | UserDictionary::Entry entry; |
192 | 164 | entry.CopyFrom(base_entry); |
108 | 108 | // Set smaller cost for "短縮よみ" in order to make |
109 | 109 | // the rank of the word higher than others. |
110 | 110 | const int16 kIsolatedWordCost = 200; |
111 | const char kIsolatedWordPOS[] = | |
112 | "\xE7\x9F\xAD\xE7\xB8\xAE\xE3\x82\x88\xE3\x81\xBF"; | |
111 | const char kIsolatedWordPOS[] = "短縮よみ"; | |
113 | 112 | |
114 | 113 | if (size == 1) { // no conjugation |
115 | 114 | const auto &token_iter = range.first; |
75 | 75 | user_pos_->GetPOSList(&pos_list); |
76 | 76 | |
77 | 77 | std::vector<UserPOS::Token> tokens; |
78 | EXPECT_FALSE(user_pos_->GetTokens("", "test", | |
79 | pos_list[0], | |
80 | &tokens)); | |
81 | EXPECT_FALSE(user_pos_->GetTokens("test", "", | |
82 | pos_list[0], | |
83 | &tokens)); | |
84 | EXPECT_FALSE(user_pos_->GetTokens("test", "test", | |
85 | "", | |
86 | &tokens)); | |
87 | EXPECT_TRUE(user_pos_->GetTokens("test", "test", | |
88 | pos_list[0], | |
89 | &tokens)); | |
78 | EXPECT_FALSE(user_pos_->GetTokens("", "test", pos_list[0], &tokens)); | |
79 | EXPECT_FALSE(user_pos_->GetTokens("test", "", pos_list[0], &tokens)); | |
80 | EXPECT_FALSE(user_pos_->GetTokens("test", "test", "", &tokens)); | |
81 | EXPECT_TRUE(user_pos_->GetTokens("test", "test", pos_list[0], &tokens)); | |
90 | 82 | |
91 | 83 | // http://b/2674666 |
92 | // "あか,赤,形容詞" | |
93 | EXPECT_TRUE(user_pos_->GetTokens("\xE3\x81\x82\xE3\x81\x8B", | |
94 | "\xE8\xB5\xA4", | |
95 | "\xE5\xBD\xA2\xE5\xAE\xB9\xE8\xA9\x9E", | |
96 | &tokens)); | |
84 | EXPECT_TRUE(user_pos_->GetTokens("あか", "赤", "形容詞", &tokens)); | |
97 | 85 | |
98 | 86 | for (size_t i = 0; i < pos_list.size(); ++i) { |
99 | EXPECT_TRUE(user_pos_->GetTokens("test", "test", | |
100 | pos_list[i], | |
101 | &tokens)); | |
87 | EXPECT_TRUE(user_pos_->GetTokens("test", "test", pos_list[i], &tokens)); | |
102 | 88 | } |
103 | 89 | } |
104 | 90 | |
105 | 91 | TEST_F(UserPOSTest, ConjugationTest) { |
106 | 92 | std::vector<UserPOS::Token> tokens1, tokens2; |
107 | // EXPECT_TRUE(user_pos_->GetTokens("わら", "嗤", | |
108 | // "動詞ワ行五段", &tokens1)); | |
109 | // EXPECT_TRUE(user_pos_->GetTokens("わらう", "嗤う", | |
110 | // "動詞ワ行五段", &tokens2)); | |
111 | EXPECT_TRUE(user_pos_->GetTokens("\xE3\x82\x8F\xE3\x82\x89", "\xE5\x97\xA4", | |
112 | "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x83\xAF" | |
113 | "\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", | |
114 | &tokens1)); | |
115 | EXPECT_TRUE(user_pos_->GetTokens("\xE3\x82\x8F\xE3\x82\x89\xE3\x81\x86", | |
116 | "\xE5\x97\xA4\xE3\x81\x86", | |
117 | "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x83\xAF" | |
118 | "\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", | |
119 | &tokens2)); | |
93 | EXPECT_TRUE(user_pos_->GetTokens("わら", "嗤", "動詞ワ行五段", &tokens1)); | |
94 | EXPECT_TRUE(user_pos_->GetTokens("わらう", "嗤う", "動詞ワ行五段", &tokens2)); | |
120 | 95 | EXPECT_EQ(tokens1.size(), tokens2.size()); |
121 | 96 | for (size_t i = 0; i < tokens1.size(); ++i) { |
122 | 97 | EXPECT_EQ(tokens1[i].key, tokens2[i].key); |
125 | 100 | EXPECT_EQ(tokens1[i].cost, tokens2[i].cost); |
126 | 101 | } |
127 | 102 | |
128 | // EXPECT_TRUE(user_pos_->GetTokens("おそれ", "惧れ", | |
129 | // "動詞一段", &tokens1)); | |
130 | // EXPECT_TRUE(user_pos_->GetTokens("おそれる", "惧れる", | |
131 | // "動詞一段", &tokens2)); | |
132 | EXPECT_TRUE(user_pos_->GetTokens("\xE3\x81\x8A\xE3\x81\x9D\xE3\x82\x8C", | |
133 | "\xE6\x83\xA7\xE3\x82\x8C", | |
134 | "\xE5\x8B\x95\xE8\xA9\x9E" | |
135 | "\xE4\xB8\x80\xE6\xAE\xB5", &tokens1)); | |
136 | EXPECT_TRUE(user_pos_->GetTokens("\xE3\x81\x8A\xE3\x81\x9D" | |
137 | "\xE3\x82\x8C\xE3\x82\x8B", | |
138 | "\xE6\x83\xA7\xE3\x82\x8C\xE3\x82\x8B", | |
139 | "\xE5\x8B\x95\xE8\xA9\x9E" | |
140 | "\xE4\xB8\x80\xE6\xAE\xB5", &tokens2)); | |
103 | EXPECT_TRUE(user_pos_->GetTokens("おそれ", "惧れ", "動詞一段", &tokens1)); | |
104 | EXPECT_TRUE(user_pos_->GetTokens("おそれる", "惧れる", "動詞一段", &tokens2)); | |
141 | 105 | EXPECT_EQ(tokens1.size(), tokens2.size()); |
142 | 106 | for (size_t i = 0; i < tokens1.size(); ++i) { |
143 | 107 | EXPECT_EQ(tokens1[i].key, tokens2[i].key); |