Stop embedding POS matcher data as C++ code
This CL introduces the binary format for POS matcher data and moves all
the C++-embedded data to a binary data set file. Also, singletons of
POSMatcher for each platform are removed.
UserPosManager is a typedef of PackedDataManager in NaCl configuration,
so the actual data manager must be registered before using it. However,
the previous test didn't register it. This CL instead uses
MockDataManager because using PackedDataManager in unit test doesn't
make sense.
BUG=
TEST=
REF_BUG=26841123
REF_CL=116521585,116524711
REF_TIME=2016-03-07T17:41:05+09:00
REF_TIME_RAW=1457340065 +0900
Noriyuki Takahashi
8 years ago
71 | 71 | // considering this class as POD. |
72 | 72 | CandidateFilterTest() {} |
73 | 73 | |
74 | virtual void SetUp() { | |
74 | void SetUp() override { | |
75 | 75 | candidate_freelist_.reset(new FreeList<Segment::Candidate>(1024)); |
76 | 76 | node_freelist_.reset(new FreeList<Node>(1024)); |
77 | pos_matcher_ = UserPosManager::GetUserPosManager()->GetPOSMatcher(); | |
77 | pos_matcher_.Set(UserPosManager::GetUserPosManager()->GetPOSMatcherData()); | |
78 | 78 | |
79 | 79 | { |
80 | 80 | mozc::testing::MockDataManager data_manager; |
85 | 85 | } |
86 | 86 | } |
87 | 87 | |
88 | virtual void TearDown() { | |
88 | void TearDown() override { | |
89 | 89 | candidate_freelist_->Free(); |
90 | 90 | node_freelist_->Free(); |
91 | 91 | } |
122 | 122 | } |
123 | 123 | |
124 | 124 | const POSMatcher &pos_matcher() const { |
125 | return *pos_matcher_; | |
125 | return pos_matcher_; | |
126 | 126 | } |
127 | 127 | |
128 | 128 | CandidateFilter *CreateCandidateFilter( |
129 | 129 | bool apply_suggestion_filter_for_exact_match) const { |
130 | 130 | return new CandidateFilter(&suppression_dictionary_, |
131 | pos_matcher_, | |
131 | &pos_matcher_, | |
132 | 132 | suggestion_filter_.get(), |
133 | 133 | apply_suggestion_filter_for_exact_match); |
134 | 134 | } |
135 | 135 | |
136 | 136 | std::unique_ptr<FreeList<Segment::Candidate> > candidate_freelist_; |
137 | 137 | std::unique_ptr<FreeList<Node> > node_freelist_; |
138 | const POSMatcher *pos_matcher_; | |
138 | POSMatcher pos_matcher_; | |
139 | 139 | SuppressionDictionary suppression_dictionary_; |
140 | 140 | std::unique_ptr<SuggestionFilter> suggestion_filter_; |
141 | 141 | scoped_data_manager_initializer_for_testing |
423 | 423 | |
424 | 424 | TEST_F(CandidateFilterTest, Regression3437022) { |
425 | 425 | std::unique_ptr<SuppressionDictionary> dic(new SuppressionDictionary); |
426 | const POSMatcher *pos_matcher = | |
427 | UserPosManager::GetUserPosManager()->GetPOSMatcher(); | |
426 | const POSMatcher pos_matcher( | |
427 | UserPosManager::GetUserPosManager()->GetPOSMatcherData()); | |
428 | 428 | std::unique_ptr<CandidateFilter> filter( |
429 | new CandidateFilter(dic.get(), pos_matcher, | |
429 | new CandidateFilter(dic.get(), &pos_matcher, | |
430 | 430 | suggestion_filter_.get(), true)); |
431 | 431 | |
432 | 432 | vector<const Node *> n; |
198 | 198 | std::unique_ptr<const SuggestionFilter> suggestion_filter; |
199 | 199 | std::unique_ptr<ImmutableConverterInterface> immutable_converter; |
200 | 200 | std::unique_ptr<ConverterImpl> converter; |
201 | dictionary::POSMatcher pos_matcher; | |
201 | 202 | }; |
202 | 203 | |
203 | 204 | // Returns initialized predictor for the given type. |
278 | 279 | int dictionary_size = 0; |
279 | 280 | data_manager.GetSystemDictionaryData(&dictionary_data, &dictionary_size); |
280 | 281 | |
282 | converter_and_data->pos_matcher.Set(data_manager.GetPOSMatcherData()); | |
283 | ||
281 | 284 | SystemDictionary *sysdic = |
282 | 285 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(); |
283 | 286 | converter_and_data->user_dictionary.reset(user_dictionary); |
284 | 287 | converter_and_data->suppression_dictionary.reset(suppression_dictionary); |
285 | 288 | converter_and_data->dictionary.reset(new DictionaryImpl( |
286 | 289 | sysdic, // DictionaryImpl takes the ownership |
287 | new ValueDictionary(*data_manager.GetPOSMatcher(), | |
290 | new ValueDictionary(converter_and_data->pos_matcher, | |
288 | 291 | &sysdic->value_trie()), |
289 | 292 | converter_and_data->user_dictionary.get(), |
290 | 293 | converter_and_data->suppression_dictionary.get(), |
291 | data_manager.GetPOSMatcher())); | |
294 | &converter_and_data->pos_matcher)); | |
292 | 295 | converter_and_data->pos_group.reset( |
293 | 296 | new PosGroup(data_manager.GetPosGroupData())); |
294 | 297 | converter_and_data->suggestion_filter.reset( |
306 | 309 | converter_and_data->suppression_dictionary.get(), |
307 | 310 | converter_and_data->connector.get(), |
308 | 311 | converter_and_data->segmenter.get(), |
309 | data_manager.GetPOSMatcher(), | |
312 | &converter_and_data->pos_matcher, | |
310 | 313 | converter_and_data->pos_group.get(), |
311 | 314 | converter_and_data->suggestion_filter.get())); |
312 | 315 | converter_and_data->converter.reset(new ConverterImpl); |
313 | 316 | |
314 | 317 | PredictorInterface *predictor = CreatePredictor( |
315 | predictor_type, data_manager.GetPOSMatcher(), *converter_and_data); | |
318 | predictor_type, &converter_and_data->pos_matcher, *converter_and_data); | |
316 | 319 | converter_and_data->converter->Init( |
317 | data_manager.GetPOSMatcher(), | |
320 | &converter_and_data->pos_matcher, | |
318 | 321 | converter_and_data->suppression_dictionary.get(), |
319 | 322 | predictor, |
320 | 323 | rewriter, |
343 | 346 | ConverterAndData *ret = new ConverterAndData; |
344 | 347 | |
345 | 348 | testing::MockUserPosManager user_pos_manager; |
349 | ret->pos_matcher.Set(user_pos_manager.GetPOSMatcherData()); | |
350 | ||
346 | 351 | SuppressionDictionary *suppression_dictionary = new SuppressionDictionary; |
347 | 352 | dictionary::UserDictionary *user_dictionary = |
348 | 353 | new dictionary::UserDictionary( |
349 | 354 | dictionary::UserPOS::CreateFromDataManager(user_pos_manager), |
350 | user_pos_manager.GetPOSMatcher(), | |
355 | ret->pos_matcher, | |
351 | 356 | suppression_dictionary); |
352 | 357 | InitConverterAndData( |
353 | 358 | user_dictionary, suppression_dictionary, rewriter, predictor_type, ret); |
1365 | 1370 | int dictionary_size = 0; |
1366 | 1371 | data_manager.GetSystemDictionaryData(&dictionary_data, &dictionary_size); |
1367 | 1372 | |
1373 | const dictionary::POSMatcher pos_matcher( | |
1374 | data_manager.GetPOSMatcherData()); | |
1375 | ||
1368 | 1376 | SystemDictionary *sysdic = |
1369 | 1377 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(); |
1370 | 1378 | std::unique_ptr<DictionaryInterface> dictionary(new DictionaryImpl( |
1371 | 1379 | sysdic, // DictionaryImpl takes the ownership |
1372 | new ValueDictionary(*data_manager.GetPOSMatcher(), | |
1373 | &sysdic->value_trie()), | |
1380 | new ValueDictionary(pos_matcher, &sysdic->value_trie()), | |
1374 | 1381 | mock_user_dictionary.get(), |
1375 | 1382 | suppression_dictionary.get(), |
1376 | data_manager.GetPOSMatcher())); | |
1383 | &pos_matcher)); | |
1377 | 1384 | std::unique_ptr<const PosGroup> pos_group( |
1378 | 1385 | new PosGroup(data_manager.GetPosGroupData())); |
1379 | 1386 | std::unique_ptr<const DictionaryInterface> suffix_dictionary( |
1390 | 1397 | suppression_dictionary.get(), |
1391 | 1398 | connector.get(), |
1392 | 1399 | segmenter.get(), |
1393 | data_manager.GetPOSMatcher(), | |
1400 | &pos_matcher, | |
1394 | 1401 | pos_group.get(), |
1395 | 1402 | suggestion_filter.get())); |
1396 | 1403 | std::unique_ptr<const SuggestionFilter> suggegstion_filter( |
1397 | 1404 | CreateSuggestionFilter(data_manager)); |
1398 | 1405 | std::unique_ptr<ConverterImpl> converter(new ConverterImpl); |
1399 | 1406 | const DictionaryInterface *kNullDictionary = nullptr; |
1400 | converter->Init(data_manager.GetPOSMatcher(), | |
1407 | converter->Init(&pos_matcher, | |
1401 | 1408 | suppression_dictionary.get(), |
1402 | 1409 | DefaultPredictor::CreateDefaultPredictor( |
1403 | 1410 | new DictionaryPredictor( |
1407 | 1414 | suffix_dictionary.get(), |
1408 | 1415 | connector.get(), |
1409 | 1416 | segmenter.get(), |
1410 | data_manager.GetPOSMatcher(), | |
1417 | &pos_matcher, | |
1411 | 1418 | suggegstion_filter.get()), |
1412 | 1419 | new UserHistoryPredictor(dictionary.get(), |
1413 | data_manager.GetPOSMatcher(), | |
1420 | &pos_matcher, | |
1414 | 1421 | suppression_dictionary.get(), |
1415 | 1422 | false)), |
1416 | 1423 | new RewriterImpl(converter.get(), |
93 | 93 | const DictionaryInterface *suffix_dictionary = NULL) { |
94 | 94 | data_manager_.reset(new testing::MockDataManager); |
95 | 95 | |
96 | const POSMatcher *pos_matcher = data_manager_->GetPOSMatcher(); | |
97 | CHECK(pos_matcher); | |
96 | pos_matcher_.Set(data_manager_->GetPOSMatcherData()); | |
98 | 97 | |
99 | 98 | suppression_dictionary_.reset(new SuppressionDictionary); |
100 | 99 | CHECK(suppression_dictionary_.get()); |
110 | 109 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(); |
111 | 110 | dictionary_.reset(new DictionaryImpl( |
112 | 111 | sysdic, // DictionaryImpl takes the ownership |
113 | new ValueDictionary(*pos_matcher, &sysdic->value_trie()), | |
112 | new ValueDictionary(pos_matcher_, &sysdic->value_trie()), | |
114 | 113 | &user_dictionary_stub_, |
115 | 114 | suppression_dictionary_.get(), |
116 | pos_matcher)); | |
115 | &pos_matcher_)); | |
117 | 116 | } |
118 | 117 | CHECK(dictionary_.get()); |
119 | 118 | |
152 | 151 | suppression_dictionary_.get(), |
153 | 152 | connector_.get(), |
154 | 153 | segmenter_.get(), |
155 | pos_matcher, | |
154 | &pos_matcher_, | |
156 | 155 | pos_group_.get(), |
157 | 156 | suggestion_filter_.get())); |
158 | 157 | CHECK(immutable_converter_.get()); |
173 | 172 | std::unique_ptr<const SuggestionFilter> suggestion_filter_; |
174 | 173 | std::unique_ptr<ImmutableConverterImpl> immutable_converter_; |
175 | 174 | UserDictionaryStub user_dictionary_stub_; |
175 | dictionary::POSMatcher pos_matcher_; | |
176 | 176 | }; |
177 | 177 | |
178 | 178 | } // namespace |
73 | 73 | MockDataAndImmutableConverter() { |
74 | 74 | data_manager_.reset(new testing::MockDataManager); |
75 | 75 | |
76 | const POSMatcher *pos_matcher = data_manager_->GetPOSMatcher(); | |
77 | CHECK(pos_matcher); | |
76 | pos_matcher_.Set(data_manager_->GetPOSMatcherData()); | |
78 | 77 | |
79 | 78 | suppression_dictionary_.reset(new SuppressionDictionary); |
80 | 79 | CHECK(suppression_dictionary_.get()); |
87 | 86 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(); |
88 | 87 | dictionary_.reset(new DictionaryImpl( |
89 | 88 | sysdic, // DictionaryImpl takes the ownership |
90 | new ValueDictionary(*pos_matcher, &sysdic->value_trie()), | |
89 | new ValueDictionary(pos_matcher_, &sysdic->value_trie()), | |
91 | 90 | &user_dictionary_stub_, |
92 | 91 | suppression_dictionary_.get(), |
93 | pos_matcher)); | |
92 | &pos_matcher_)); | |
94 | 93 | CHECK(dictionary_.get()); |
95 | 94 | |
96 | 95 | StringPiece suffix_key_array_data, suffix_value_array_data; |
125 | 124 | suppression_dictionary_.get(), |
126 | 125 | connector_.get(), |
127 | 126 | segmenter_.get(), |
128 | pos_matcher, | |
127 | &pos_matcher_, | |
129 | 128 | pos_group_.get(), |
130 | 129 | suggestion_filter_.get())); |
131 | 130 | CHECK(immutable_converter_.get()); |
139 | 138 | return new NBestGenerator(suppression_dictionary_.get(), |
140 | 139 | segmenter_.get(), |
141 | 140 | connector_.get(), |
142 | data_manager_->GetPOSMatcher(), | |
141 | &pos_matcher_, | |
143 | 142 | lattice, |
144 | 143 | suggestion_filter_.get(), |
145 | 144 | true); |
156 | 155 | std::unique_ptr<const SuggestionFilter> suggestion_filter_; |
157 | 156 | std::unique_ptr<ImmutableConverterImpl> immutable_converter_; |
158 | 157 | UserDictionaryStub user_dictionary_stub_; |
158 | dictionary::POSMatcher pos_matcher_; | |
159 | 159 | }; |
160 | 160 | |
161 | 161 | } // namespace |
31 | 31 | #include "base/embedded_file.h" |
32 | 32 | #include "base/logging.h" |
33 | 33 | #include "base/singleton.h" |
34 | #include "dictionary/pos_matcher.h" | |
35 | 34 | |
36 | 35 | namespace mozc { |
37 | 36 | namespace chromeos { |
61 | 60 | manager_.GetUserPOSData(token_array_data, string_array_data); |
62 | 61 | } |
63 | 62 | |
64 | namespace { | |
65 | // This header file is autogenerated by gen_pos_matcher_code.py and contains | |
66 | // kRuleIdTable[] and kRangeTable[]. | |
67 | #include "data_manager/chromeos/pos_matcher_data.h" | |
68 | ||
69 | class ChromeOsPOSMatcher : public dictionary::POSMatcher { | |
70 | public: | |
71 | ChromeOsPOSMatcher() : POSMatcher(kRuleIdTable, kRangeTables) {} | |
72 | }; | |
73 | } // namespace | |
74 | ||
75 | const dictionary::POSMatcher *ChromeOsUserPosManager::GetPOSMatcher() const { | |
76 | return Singleton<ChromeOsPOSMatcher>::get(); | |
63 | const uint16 *ChromeOsUserPosManager::GetPOSMatcherData() const { | |
64 | return manager_.GetPOSMatcherData(); | |
77 | 65 | } |
78 | 66 | |
79 | 67 | } // namespace chromeos |
47 | 47 | // folloiwng embedded data. |
48 | 48 | void GetUserPOSData(StringPiece *token_array_data, |
49 | 49 | StringPiece *string_array_data) const override; |
50 | const dictionary::POSMatcher *GetPOSMatcher() const override; | |
50 | const uint16 *GetPOSMatcherData() const override; | |
51 | 51 | |
52 | 52 | // The following are implemented in ChromeOsDataManager. |
53 | 53 | const uint8 *GetPosGroupData() const override { return nullptr; } |
37 | 37 | namespace mozc { |
38 | 38 | namespace { |
39 | 39 | |
40 | bool InitUserPosManagerDataFromReader(const DataSetReader &reader, | |
41 | StringPiece *user_pos_token_array_data, | |
42 | StringPiece *user_pos_string_array_data) { | |
40 | bool InitUserPosManagerDataFromReader( | |
41 | const DataSetReader &reader, | |
42 | StringPiece *pos_matcher_data, | |
43 | StringPiece *user_pos_token_array_data, | |
44 | StringPiece *user_pos_string_array_data) { | |
45 | if (!reader.Get("pos_matcher", pos_matcher_data)) { | |
46 | LOG(ERROR) << "Cannot find POS matcher rule ID table"; | |
47 | return false; | |
48 | } | |
43 | 49 | if (!reader.Get("user_pos_token", user_pos_token_array_data)) { |
44 | 50 | LOG(ERROR) << "Cannot find a user POS token array"; |
45 | 51 | return false; |
70 | 76 | return false; |
71 | 77 | } |
72 | 78 | if (!InitUserPosManagerDataFromReader(reader, |
79 | &pos_matcher_data_, | |
73 | 80 | &user_pos_token_array_data_, |
74 | 81 | &user_pos_string_array_data_)) { |
75 | 82 | LOG(ERROR) << "User POS manager data is broken"; |
233 | 240 | return false; |
234 | 241 | } |
235 | 242 | if (!InitUserPosManagerDataFromReader(reader, |
243 | &pos_matcher_data_, | |
236 | 244 | &user_pos_token_array_data_, |
237 | 245 | &user_pos_string_array_data_)) { |
238 | 246 | LOG(ERROR) << "User POS manager data is broken"; |
274 | 282 | *string_array_data = user_pos_string_array_data_; |
275 | 283 | } |
276 | 284 | |
277 | const dictionary::POSMatcher *DataManager::GetPOSMatcher() const { | |
278 | LOG(FATAL) << "Not implemented"; | |
279 | return nullptr; | |
285 | const uint16 *DataManager::GetPOSMatcherData() const { | |
286 | return reinterpret_cast<const uint16 *>(pos_matcher_data_.data()); | |
280 | 287 | } |
281 | 288 | |
282 | 289 | const uint8 *DataManager::GetPosGroupData() const { |
119 | 119 | 'dependencies': [ |
120 | 120 | '../data_manager_base.gyp:dataset_writer_main', |
121 | 121 | '../../rewriter/rewriter_base.gyp:gen_rewriter_files#host', |
122 | '<(dataset_tag)_data_manager_base.gyp:gen_separate_pos_matcher_data_for_<(dataset_tag)#host', | |
122 | 123 | '<(dataset_tag)_data_manager_base.gyp:gen_separate_user_pos_data_for_<(dataset_tag)#host', |
123 | 124 | 'gen_separate_connection_data_for_<(dataset_tag)#host', |
124 | 125 | 'gen_separate_dictionary_data_for_<(dataset_tag)#host', |
137 | 138 | 'action_name': 'gen_mozc_dataset_for_<(dataset_tag)', |
138 | 139 | 'variables': { |
139 | 140 | 'generator': '<(PRODUCT_DIR)/dataset_writer_main<(EXECUTABLE_SUFFIX)', |
141 | 'pos_matcher': '<(gen_out_dir)/pos_matcher.data', | |
140 | 142 | 'user_pos_token': '<(gen_out_dir)/user_pos_token_array.data', |
141 | 143 | 'user_pos_string': '<(gen_out_dir)/user_pos_string_array.data', |
142 | 144 | 'dictionary': '<(gen_out_dir)/system.dictionary', |
161 | 163 | 'symbol_string': '<(gen_out_dir)/symbol_string.data', |
162 | 164 | }, |
163 | 165 | 'inputs': [ |
166 | '<(pos_matcher)', | |
164 | 167 | '<(user_pos_token)', |
165 | 168 | '<(user_pos_string)', |
166 | 169 | '<(dictionary)', |
191 | 194 | '<(generator)', |
192 | 195 | '--magic=<(magic_number)', |
193 | 196 | '--output=<(gen_out_dir)/<(out_mozc_data)', |
197 | 'pos_matcher:32:<(pos_matcher)', | |
194 | 198 | 'user_pos_token:32:<(user_pos_token)', |
195 | 199 | 'user_pos_string:32:<(user_pos_string)', |
196 | 200 | 'coll:32:<(gen_out_dir)/collocation_data.data', |
247 | 251 | 'type': 'none', |
248 | 252 | 'toolsets': ['host'], |
249 | 253 | 'dependencies': [ |
250 | '<(dataset_tag)_data_manager_base.gyp:gen_<(dataset_tag)_embedded_data_light', | |
251 | 254 | 'gen_embedded_collocation_data_for_<(dataset_tag)#host', |
252 | 255 | 'gen_embedded_collocation_suppression_data_for_<(dataset_tag)#host', |
253 | 256 | 'gen_embedded_connection_data_for_<(dataset_tag)#host', |
54 | 54 | // partial data set). |
55 | 55 | bool InitUserPosManagerDataFromArray(StringPiece array, StringPiece magic); |
56 | 56 | |
57 | // The following interfaces are implemented. | |
57 | // Implementation of DataManagerInterface. | |
58 | const uint16 *GetPOSMatcherData() const override; | |
58 | 59 | void GetUserPOSData(StringPiece *token_array_data, |
59 | 60 | StringPiece *string_array_data) const override; |
60 | 61 | void GetConnectorData(const char **data, size_t *size) const override; |
88 | 89 | StringPiece *string_array_data) const override; |
89 | 90 | #endif // NO_USAGE_REWRITER |
90 | 91 | |
91 | // The following interfaces are not yet implemented. | |
92 | // TODO(noriyukit): Implements all the interfaces by migrating embedded C++ | |
93 | // structures to a data set file. | |
94 | const dictionary::POSMatcher *GetPOSMatcher() const override; | |
95 | ||
96 | 92 | private: |
93 | StringPiece pos_matcher_data_; | |
97 | 94 | StringPiece user_pos_token_array_data_; |
98 | 95 | StringPiece user_pos_string_array_data_; |
99 | 96 | StringPiece connection_data_; |
37 | 37 | ], |
38 | 38 | 'dependencies': [ |
39 | 39 | '<(mozc_dir)/base/base.gyp:base', |
40 | '<(mozc_dir)/dictionary/dictionary_base.gyp:pos_matcher', | |
41 | 'gen_embedded_pos_matcher_data_for_<(dataset_tag)#host', | |
42 | 40 | 'gen_user_pos_manager_data_header_for_<(dataset_tag)#host', |
43 | 41 | '../data_manager_base.gyp:data_manager', |
44 | ], | |
45 | }, | |
46 | { | |
47 | 'target_name': 'gen_<(dataset_tag)_embedded_data_light', | |
48 | 'type': 'none', | |
49 | 'toolsets': ['host'], | |
50 | 'dependencies': [ | |
51 | 'gen_embedded_pos_matcher_data_for_<(dataset_tag)#host', | |
52 | 42 | ], |
53 | 43 | }, |
54 | 44 | { |
114 | 104 | 'dependencies': [ |
115 | 105 | '../data_manager_base.gyp:dataset_writer_main', |
116 | 106 | 'gen_separate_user_pos_data_for_<(dataset_tag)#host', |
107 | 'gen_separate_pos_matcher_data_for_<(dataset_tag)#host', | |
117 | 108 | ], |
118 | 109 | 'actions': [ |
119 | 110 | { |
120 | 111 | 'action_name': 'gen_user_pos_manager_data_for_<(dataset_tag)', |
121 | 112 | 'variables': { |
122 | 113 | 'generator': '<(PRODUCT_DIR)/dataset_writer_main<(EXECUTABLE_SUFFIX)', |
114 | 'pos_matcher': '<(gen_out_dir)/pos_matcher.data', | |
123 | 115 | 'user_pos_token': '<(gen_out_dir)/user_pos_token_array.data', |
124 | 116 | 'user_pos_string': '<(gen_out_dir)/user_pos_string_array.data', |
125 | 117 | }, |
126 | 118 | 'inputs': [ |
119 | '<(pos_matcher)', | |
127 | 120 | '<(user_pos_token)', |
128 | 121 | '<(user_pos_string)', |
129 | 122 | ], |
133 | 126 | 'action': [ |
134 | 127 | '<(generator)', |
135 | 128 | '--output=<(gen_out_dir)/user_pos_manager.data', |
129 | 'pos_matcher:32:<(pos_matcher)', | |
136 | 130 | 'user_pos_token:32:<(user_pos_token)', |
137 | 131 | 'user_pos_string:32:<(user_pos_string)', |
138 | 132 | ], |
185 | 179 | ], |
186 | 180 | }, |
187 | 181 | { |
188 | 'target_name': 'gen_embedded_pos_matcher_data_for_<(dataset_tag)', | |
182 | 'target_name': 'gen_separate_pos_matcher_data_for_<(dataset_tag)', | |
189 | 183 | 'type': 'none', |
190 | 184 | 'toolsets': ['host'], |
191 | 185 | 'dependencies': [ |
193 | 187 | ], |
194 | 188 | 'actions': [ |
195 | 189 | { |
196 | 'action_name': 'gen_embedded_pos_matcher_data_for_<(dataset_tag)', | |
190 | 'action_name': 'gen_separate_pos_matcher_data_for_<(dataset_tag)', | |
197 | 191 | 'variables': { |
198 | 192 | 'id_def': '<(platform_data_dir)/id.def', |
199 | 193 | 'special_pos': '<(common_data_dir)/rules/special_pos.def', |
200 | 194 | 'pos_matcher_rule': '<(common_data_dir)/rules/pos_matcher_rule.def', |
201 | 'pos_matcher_data': '<(gen_out_dir)/pos_matcher_data.h', | |
195 | 'pos_matcher_data': '<(gen_out_dir)/pos_matcher.data', | |
202 | 196 | }, |
203 | 197 | 'inputs': [ |
204 | 198 | '<(mozc_dir)/dictionary/gen_pos_matcher_code.py', |
34 | 34 | |
35 | 35 | namespace mozc { |
36 | 36 | |
37 | #ifndef NO_USAGE_REWRITER | |
38 | struct ConjugationSuffix; | |
39 | struct UsageDictItem; | |
40 | #endif // NO_USAGE_REWRITER | |
41 | ||
42 | namespace dictionary { | |
43 | class POSMatcher; | |
44 | } // namespace dictionary | |
45 | ||
46 | 37 | // Builds those objects that depend on a set of embedded data generated from |
47 | 38 | // files in data/dictionary, such as dictionary.txt, id.def, etc. |
48 | 39 | class DataManagerInterface { |
55 | 46 | |
56 | 47 | // Returns a reference to POSMatcher class handling POS rules. Don't |
57 | 48 | // delete the returned pointer, which is owned by the manager. |
58 | virtual const dictionary::POSMatcher *GetPOSMatcher() const = 0; | |
49 | virtual const uint16 *GetPOSMatcherData() const = 0; | |
59 | 50 | |
60 | 51 | // Returns the address of an array of lid group. |
61 | 52 | virtual const uint8 *GetPosGroupData() const = 0; |
142 | 142 | void DataManagerTestBase::SegmenterTest_ParticleTest() { |
143 | 143 | std::unique_ptr<Segmenter> segmenter( |
144 | 144 | Segmenter::CreateFromDataManager(*data_manager_)); |
145 | const POSMatcher *pos_matcher = data_manager_->GetPOSMatcher(); | |
145 | const POSMatcher pos_matcher(data_manager_->GetPOSMatcherData()); | |
146 | 146 | |
147 | 147 | Node lnode, rnode; |
148 | 148 | lnode.Init(); |
150 | 150 | lnode.node_type = Node::NOR_NODE; |
151 | 151 | rnode.node_type = Node::NOR_NODE; |
152 | 152 | // "助詞" |
153 | lnode.rid = pos_matcher->GetAcceptableParticleAtBeginOfSegmentId(); | |
153 | lnode.rid = pos_matcher.GetAcceptableParticleAtBeginOfSegmentId(); | |
154 | 154 | // "名詞,サ変". |
155 | rnode.lid = pos_matcher->GetUnknownId(); | |
155 | rnode.lid = pos_matcher.GetUnknownId(); | |
156 | 156 | EXPECT_TRUE(segmenter->IsBoundary(lnode, rnode, false)); |
157 | 157 | |
158 | 158 | lnode.attributes |= Node::STARTS_WITH_PARTICLE; |
31 | 31 | #include "base/embedded_file.h" |
32 | 32 | #include "base/logging.h" |
33 | 33 | #include "base/singleton.h" |
34 | #include "dictionary/pos_group.h" | |
35 | #include "dictionary/pos_matcher.h" | |
36 | 34 | |
37 | 35 | namespace mozc { |
38 | 36 | namespace oss { |
62 | 60 | manager_.GetUserPOSData(token_array_data, string_array_data); |
63 | 61 | } |
64 | 62 | |
65 | namespace { | |
66 | // This header file is autogenerated by gen_pos_matcher_code.py and contains | |
67 | // kRuleIdTable[] and kRangeTable[]. | |
68 | #include "data_manager/oss/pos_matcher_data.h" | |
69 | ||
70 | class OssPOSMatcher : public dictionary::POSMatcher { | |
71 | public: | |
72 | OssPOSMatcher() : POSMatcher(kRuleIdTable, kRangeTables) {} | |
73 | }; | |
74 | } // namespace | |
75 | ||
76 | const dictionary::POSMatcher *OssUserPosManager::GetPOSMatcher() const { | |
77 | return Singleton<OssPOSMatcher>::get(); | |
63 | const uint16 *OssUserPosManager::GetPOSMatcherData() const { | |
64 | return manager_.GetPOSMatcherData(); | |
78 | 65 | } |
79 | 66 | |
80 | 67 | } // namespace oss |
48 | 48 | // Returns the address to an array of UserPOS::POSToken. |
49 | 49 | void GetUserPOSData(StringPiece *token_array_data, |
50 | 50 | StringPiece *string_array_data) const override; |
51 | const dictionary::POSMatcher *GetPOSMatcher() const override; | |
51 | const uint16 *GetPOSMatcherData() const override; | |
52 | 52 | |
53 | 53 | // The following are implemented in OssDataManager. |
54 | 54 | const uint8 *GetPosGroupData() const override { return nullptr; } |
34 | 34 | #include "base/logging.h" |
35 | 35 | #include "base/version.h" |
36 | 36 | #include "data_manager/packed/system_dictionary_data_packer.h" |
37 | #include "dictionary/pos_group.h" | |
38 | #include "dictionary/pos_matcher.h" | |
39 | #include "dictionary/user_pos.h" | |
40 | 37 | |
41 | 38 | DEFINE_string(user_pos_manager_data, "", "Input user pos manager data"); |
42 | 39 | DEFINE_string(output, "", "Output data file name"); |
43 | 40 | |
44 | 41 | namespace mozc { |
45 | namespace { | |
46 | ||
47 | #include "data_manager/@DIR@/pos_matcher_data.h" | |
48 | ||
49 | } // namespace | |
50 | 42 | |
51 | 43 | bool OutputData(const string &file_path) { |
52 | 44 | const char* kMagicNumber = ""; // No magic number. |
54 | 46 | packer.SetMozcData(InputFileStream(FLAGS_user_pos_manager_data.c_str(), |
55 | 47 | ios_base::in | ios_base::binary).Read(), |
56 | 48 | kMagicNumber); |
57 | // The following two arrays contain sentinel elements but the packer doesn't | |
58 | // expect them. So, pass the shinked ranges of the arrays. Note that | |
59 | // sentinel elements are not necessary at runtime. | |
60 | packer.SetPosMatcherData(kRuleIdTable, arraysize(kRuleIdTable) - 1, | |
61 | kRangeTables, arraysize(kRangeTables) - 1); | |
62 | 49 | return packer.Output(file_path, false); |
63 | 50 | } |
64 | 51 |
35 | 35 | #include "base/util.h" |
36 | 36 | #include "base/version.h" |
37 | 37 | #include "data_manager/packed/system_dictionary_data_packer.h" |
38 | #include "dictionary/pos_group.h" | |
39 | #include "dictionary/pos_matcher.h" | |
40 | #include "dictionary/user_pos.h" | |
41 | #include "rewriter/embedded_dictionary.h" | |
42 | 38 | |
43 | 39 | DEFINE_string(mozc_data, "", "Data set file to be packed"); |
44 | 40 | DEFINE_string(mozc_data_magic, "", "Magic number for data set file"); |
48 | 44 | DEFINE_bool(use_gzip, false, "use gzip"); |
49 | 45 | |
50 | 46 | namespace mozc { |
51 | namespace { | |
52 | ||
53 | #include "data_manager/@DIR@/pos_matcher_data.h" | |
54 | ||
55 | } // namespace | |
56 | 47 | |
57 | 48 | bool OutputData(const string &file_path) { |
58 | 49 | string dictionary_version = Version::GetMozcVersion(); |
60 | 51 | dictionary_version = FLAGS_dictionary_version; |
61 | 52 | } |
62 | 53 | packed::SystemDictionaryDataPacker packer(dictionary_version); |
63 | // The following two arrays contain sentinel elements but the packer doesn't | |
64 | // expect them. So pass the shinked ranges of the arrays. Note that sentinel | |
65 | // elements are not required at runtime. | |
66 | packer.SetPosMatcherData(kRuleIdTable, arraysize(kRuleIdTable) - 1, | |
67 | kRangeTables, arraysize(kRangeTables) - 1); | |
68 | 54 | |
69 | 55 | string magic; |
70 | 56 | CHECK(Util::Unescape(FLAGS_mozc_data_magic, &magic)) |
40 | 40 | #include "data_manager/data_manager_interface.h" |
41 | 41 | #include "data_manager/packed/system_dictionary_data.pb.h" |
42 | 42 | #include "data_manager/packed/system_dictionary_format_version.h" |
43 | #include "dictionary/pos_matcher.h" | |
44 | 43 | |
45 | 44 | DEFINE_string(dataset, |
46 | 45 | "", |
47 | 46 | "The dataset tag of the POS data."); |
48 | 47 | |
49 | 48 | using std::unique_ptr; |
50 | ||
51 | using mozc::dictionary::POSMatcher; | |
52 | 49 | |
53 | 50 | namespace mozc { |
54 | 51 | namespace packed { |
56 | 53 | // Default value of the total bytes limit defined in protobuf library is 64MB. |
57 | 54 | // Our big dictionary size is about 50MB. So we don't need to change it. |
58 | 55 | const size_t kDefaultTotalBytesLimit = 64 << 20; |
59 | ||
60 | class PackedPOSMatcher : public POSMatcher { | |
61 | public: | |
62 | PackedPOSMatcher(const uint16 *const rule_id_table, | |
63 | const Range *const *const range_table) | |
64 | : POSMatcher(rule_id_table, range_table) { | |
65 | } | |
66 | }; | |
67 | 56 | |
68 | 57 | unique_ptr<PackedDataManager> g_data_manager; |
69 | 58 | |
79 | 68 | |
80 | 69 | void GetUserPOSData(StringPiece *token_array_data, |
81 | 70 | StringPiece *string_array_data) const; |
82 | const POSMatcher *GetPOSMatcher() const; | |
71 | const uint16 *GetPOSMatcherData() const; | |
83 | 72 | const uint8 *GetPosGroupData() const; |
84 | 73 | void GetConnectorData(const char **data, size_t *size) const; |
85 | 74 | void GetSegmenterData( |
106 | 95 | StringPiece *usage_items_data, |
107 | 96 | StringPiece *string_array_data) const; |
108 | 97 | #endif // NO_USAGE_REWRITER |
109 | const uint16 *GetRuleIdTableForTest() const; | |
110 | const void *GetRangeTablesForTest() const; | |
111 | 98 | void GetCounterSuffixSortedArray(const char **array, size_t *size) const; |
112 | 99 | StringPiece GetMozcData() const; |
113 | 100 | |
114 | 101 | private: |
115 | // Non-const struct of POSMatcher::Range | |
116 | struct Range { | |
117 | uint16 lower; | |
118 | uint16 upper; | |
119 | }; | |
120 | 102 | bool InitializeWithSystemDictionaryData(); |
121 | 103 | |
122 | unique_ptr<uint16[]> rule_id_table_; | |
123 | unique_ptr<POSMatcher::Range *[]> range_tables_; | |
124 | unique_ptr<Range[]> range_table_items_; | |
125 | unique_ptr<POSMatcher> pos_matcher_; | |
126 | 104 | unique_ptr<SystemDictionaryData> system_dictionary_data_; |
127 | 105 | DataManager manager_; |
128 | 106 | }; |
170 | 148 | << " actual:" << system_dictionary_data_->format_version(); |
171 | 149 | return false; |
172 | 150 | } |
173 | ||
174 | // Makes POSMatcher data. | |
175 | rule_id_table_.reset( | |
176 | new uint16[ | |
177 | system_dictionary_data_->pos_matcher_data().rule_id_table_size()]); | |
178 | for (size_t i = 0; | |
179 | i < system_dictionary_data_->pos_matcher_data().rule_id_table_size(); | |
180 | ++i) { | |
181 | rule_id_table_[i] = | |
182 | system_dictionary_data_->pos_matcher_data().rule_id_table(i); | |
183 | } | |
184 | const SystemDictionaryData::PosMatcherData &pos_matcher_data = | |
185 | system_dictionary_data_->pos_matcher_data(); | |
186 | range_tables_.reset( | |
187 | new POSMatcher::Range*[pos_matcher_data.range_tables_size()]); | |
188 | size_t range_count = 0; | |
189 | for (size_t i = 0; i < pos_matcher_data.range_tables_size(); ++i) { | |
190 | range_count += pos_matcher_data.range_tables(i).ranges_size(); | |
191 | } | |
192 | range_table_items_.reset( | |
193 | new Range[range_count + pos_matcher_data.range_tables_size()]); | |
194 | size_t range_index = 0; | |
195 | for (size_t i = 0; i < pos_matcher_data.range_tables_size(); ++i) { | |
196 | const SystemDictionaryData::PosMatcherData::RangeTable &table = | |
197 | pos_matcher_data.range_tables(i); | |
198 | range_tables_[i] = | |
199 | reinterpret_cast<POSMatcher::Range *>(&range_table_items_[range_index]); | |
200 | for (size_t j = 0; j < table.ranges_size(); ++j) { | |
201 | const SystemDictionaryData::PosMatcherData::RangeTable::Range &range = | |
202 | table.ranges(j); | |
203 | range_table_items_[range_index].lower = range.lower(); | |
204 | range_table_items_[range_index].upper = range.upper(); | |
205 | ++range_index; | |
206 | } | |
207 | range_table_items_[range_index].lower = static_cast<uint16>(0xFFFF); | |
208 | range_table_items_[range_index].upper = static_cast<uint16>(0xFFFF); | |
209 | ++range_index; | |
210 | } | |
211 | ||
212 | // Makes POSMatcher. | |
213 | pos_matcher_.reset( | |
214 | new PackedPOSMatcher(rule_id_table_.get(), range_tables_.get())); | |
215 | 151 | |
216 | 152 | // Initialize |manager_| (PackedDataManager for light doesn't have mozc data). |
217 | 153 | if (system_dictionary_data_->has_mozc_data() && |
237 | 173 | manager_.GetUserPOSData(token_array_data, string_array_data); |
238 | 174 | } |
239 | 175 | |
240 | const POSMatcher *PackedDataManager::Impl::GetPOSMatcher() const { | |
241 | return pos_matcher_.get(); | |
176 | const uint16 *PackedDataManager::Impl::GetPOSMatcherData() const { | |
177 | return manager_.GetPOSMatcherData(); | |
242 | 178 | } |
243 | 179 | |
244 | 180 | const uint8 *PackedDataManager::Impl::GetPosGroupData() const { |
316 | 252 | string_array_data); |
317 | 253 | } |
318 | 254 | #endif // NO_USAGE_REWRITER |
319 | ||
320 | const uint16 *PackedDataManager::Impl::GetRuleIdTableForTest() const { | |
321 | return rule_id_table_.get(); | |
322 | } | |
323 | ||
324 | const void *PackedDataManager::Impl::GetRangeTablesForTest() const { | |
325 | return range_tables_.get(); | |
326 | } | |
327 | 255 | |
328 | 256 | void PackedDataManager::Impl::GetCounterSuffixSortedArray( |
329 | 257 | const char **array, size_t *size) const { |
393 | 321 | return g_data_manager.get(); |
394 | 322 | } |
395 | 323 | |
396 | const POSMatcher *PackedDataManager::GetPOSMatcher() const { | |
397 | return manager_impl_->GetPOSMatcher(); | |
324 | const uint16 *PackedDataManager::GetPOSMatcherData() const { | |
325 | return manager_impl_->GetPOSMatcherData(); | |
398 | 326 | } |
399 | 327 | |
400 | 328 | const uint8 *PackedDataManager::GetPosGroupData() const { |
483 | 411 | manager_impl_->GetCounterSuffixSortedArray(array, size); |
484 | 412 | } |
485 | 413 | |
486 | const uint16 *PackedDataManager::GetRuleIdTableForTest() const { | |
487 | return manager_impl_->GetRuleIdTableForTest(); | |
488 | } | |
489 | ||
490 | const void *PackedDataManager::GetRangeTablesForTest() const { | |
491 | return manager_impl_->GetRangeTablesForTest(); | |
492 | } | |
493 | ||
494 | 414 | StringPiece PackedDataManager::GetMozcData() const { |
495 | 415 | return manager_impl_->GetMozcData(); |
496 | 416 | } |
53 | 53 | |
54 | 54 | void GetUserPOSData(StringPiece *token_array_data, |
55 | 55 | StringPiece *string_array_data) const override; |
56 | const dictionary::POSMatcher *GetPOSMatcher() const override; | |
56 | const uint16 *GetPOSMatcherData() const override; | |
57 | 57 | const uint8 *GetPosGroupData() const override; |
58 | 58 | void GetConnectorData(const char **data, size_t *size) const override; |
59 | 59 | void GetSegmenterData(size_t *l_num_elements, size_t *r_num_elements, |
86 | 86 | |
87 | 87 | private: |
88 | 88 | friend class PackedDataTestBase; |
89 | const uint16 *GetRuleIdTableForTest() const; | |
90 | const void *GetRangeTablesForTest() const; | |
91 | 89 | |
92 | 90 | class Impl; |
93 | 91 | std::unique_ptr<Impl> manager_impl_; |
63 | 63 | 'system_dictionary_data_protocol', |
64 | 64 | '../../base/base.gyp:base', |
65 | 65 | '../../dictionary/dictionary_base.gyp:pos_matcher', |
66 | '../<(dataset_dir)/<(dataset_tag)_data_manager_base.gyp:gen_<(dataset_tag)_embedded_data_light', | |
67 | 66 | ], |
68 | 67 | }, |
69 | 68 | { |
38 | 38 | |
39 | 39 | reserved 3; // DEPRECATED: repeated PosToken pos_tokens = 3; |
40 | 40 | |
41 | message PosMatcherData { | |
42 | repeated uint32 rule_id_table = 1; | |
43 | message RangeTable { | |
44 | message Range { | |
45 | optional uint32 lower = 1; | |
46 | optional uint32 upper = 2; | |
47 | }; | |
48 | repeated Range ranges = 2; | |
49 | }; | |
50 | repeated RangeTable range_tables = 2; | |
51 | }; | |
52 | optional PosMatcherData pos_matcher_data = 4; | |
41 | reserved 4; // DEPRECATED: optional PosMatcherData pos_matcher_data = 4; | |
53 | 42 | |
54 | 43 | reserved 5; // DEPRECATED: optional bytes lid_group_data = 5; |
55 | 44 |
39 | 39 | #include "base/version.h" |
40 | 40 | #include "data_manager/packed/system_dictionary_data.pb.h" |
41 | 41 | #include "data_manager/packed/system_dictionary_format_version.h" |
42 | #include "dictionary/pos_group.h" | |
43 | #include "dictionary/pos_matcher.h" | |
44 | #include "dictionary/user_pos.h" | |
45 | ||
46 | using mozc::dictionary::POSMatcher; | |
47 | using mozc::dictionary::UserPOS; | |
48 | 42 | |
49 | 43 | namespace mozc { |
50 | 44 | namespace packed { |
56 | 50 | } |
57 | 51 | |
58 | 52 | SystemDictionaryDataPacker::~SystemDictionaryDataPacker() { |
59 | } | |
60 | ||
61 | void SystemDictionaryDataPacker::SetPosMatcherData( | |
62 | const uint16 *rule_id_table, | |
63 | size_t rule_id_table_count, | |
64 | const POSMatcher::Range *const *range_tables, | |
65 | size_t range_tables_count) { | |
66 | SystemDictionaryData::PosMatcherData *pos_matcher_data = | |
67 | system_dictionary_->mutable_pos_matcher_data(); | |
68 | for (size_t i = 0; i < rule_id_table_count; ++i) { | |
69 | pos_matcher_data->add_rule_id_table(rule_id_table[i]); | |
70 | } | |
71 | for (size_t i = 0; i < range_tables_count; ++i) { | |
72 | SystemDictionaryData::PosMatcherData::RangeTable *range_table = | |
73 | pos_matcher_data->add_range_tables(); | |
74 | for (size_t j = 0; | |
75 | range_tables[i][j].lower != static_cast<uint16>(0xFFFF); | |
76 | ++j) { | |
77 | SystemDictionaryData::PosMatcherData::RangeTable::Range *range | |
78 | = range_table->add_ranges(); | |
79 | range->set_lower(range_tables[i][j].lower); | |
80 | range->set_upper(range_tables[i][j].upper); | |
81 | } | |
82 | } | |
83 | 53 | } |
84 | 54 | |
85 | 55 | void SystemDictionaryDataPacker::SetMozcData(const string &data, |
43 | 43 | public: |
44 | 44 | explicit SystemDictionaryDataPacker(const string &product_version); |
45 | 45 | ~SystemDictionaryDataPacker(); |
46 | void SetPosMatcherData( | |
47 | const uint16 *rule_id_table, | |
48 | size_t rule_id_table_count, | |
49 | const dictionary::POSMatcher::Range *const *range_tables, | |
50 | size_t range_tables_count); | |
51 | 46 | void SetMozcData(const string &data, const string &magic); |
52 | 47 | |
53 | 48 | bool Output(const string &file_path, bool use_gzip); |
32 | 32 | namespace mozc { |
33 | 33 | namespace packed { |
34 | 34 | |
35 | const int kSystemDictionaryFormatVersion = 21; | |
35 | const int kSystemDictionaryFormatVersion = 22; | |
36 | 36 | |
37 | 37 | } // namespace packed |
38 | 38 | } // namespace mozc |
31 | 31 | #include "base/embedded_file.h" |
32 | 32 | #include "base/logging.h" |
33 | 33 | #include "base/singleton.h" |
34 | #include "dictionary/pos_matcher.h" | |
35 | 34 | |
36 | 35 | namespace mozc { |
37 | 36 | namespace testing { |
61 | 60 | manager_.GetUserPOSData(token_array_data, string_array_data); |
62 | 61 | } |
63 | 62 | |
64 | namespace { | |
65 | // This header file is autogenerated by gen_pos_matcher_code.py and contains | |
66 | // kRuleIdTable[] and kRangeTable[]. | |
67 | #include "data_manager/testing/pos_matcher_data.h" | |
68 | ||
69 | class MockPOSMatcher : public dictionary::POSMatcher { | |
70 | public: | |
71 | MockPOSMatcher() : POSMatcher(kRuleIdTable, kRangeTables) {} | |
72 | }; | |
73 | } // namespace | |
74 | ||
75 | const dictionary::POSMatcher *MockUserPosManager::GetPOSMatcher() const { | |
76 | return Singleton<MockPOSMatcher>::get(); | |
63 | const uint16 *MockUserPosManager::GetPOSMatcherData() const { | |
64 | return manager_.GetPOSMatcherData(); | |
77 | 65 | } |
78 | 66 | |
79 | 67 | } // namespace testing |
47 | 47 | // folloiwng embedded data. |
48 | 48 | void GetUserPOSData(StringPiece *token_array_data, |
49 | 49 | StringPiece *string_array_data) const override; |
50 | const dictionary::POSMatcher *GetPOSMatcher() const override; | |
50 | const uint16 *GetPOSMatcherData() const override; | |
51 | 51 | |
52 | 52 | // The following are implemented in MockDataManager. |
53 | 53 | const uint8 *GetPosGroupData() const override { return nullptr; } |
57 | 57 | struct DictionaryData { |
58 | 58 | std::unique_ptr<DictionaryInterface> user_dictionary; |
59 | 59 | std::unique_ptr<SuppressionDictionary> suppression_dictionary; |
60 | const POSMatcher *pos_matcher; | |
60 | POSMatcher pos_matcher; | |
61 | 61 | std::unique_ptr<DictionaryInterface> dictionary; |
62 | 62 | }; |
63 | 63 | |
64 | 64 | DictionaryData *CreateDictionaryData() { |
65 | 65 | DictionaryData *ret = new DictionaryData; |
66 | 66 | testing::MockDataManager data_manager; |
67 | ret->pos_matcher = data_manager.GetPOSMatcher(); | |
67 | ret->pos_matcher.Set(data_manager.GetPOSMatcherData()); | |
68 | 68 | const char *dictionary_data = NULL; |
69 | 69 | int dictionary_size = 0; |
70 | 70 | data_manager.GetSystemDictionaryData(&dictionary_data, &dictionary_size); |
71 | 71 | SystemDictionary *sys_dict = |
72 | 72 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(); |
73 | 73 | ValueDictionary *val_dict = |
74 | new ValueDictionary(*ret->pos_matcher, &sys_dict->value_trie()); | |
74 | new ValueDictionary(ret->pos_matcher, &sys_dict->value_trie()); | |
75 | 75 | ret->user_dictionary.reset(new UserDictionaryStub); |
76 | 76 | ret->suppression_dictionary.reset(new SuppressionDictionary); |
77 | 77 | ret->dictionary.reset(new DictionaryImpl(sys_dict, |
78 | 78 | val_dict, |
79 | 79 | ret->user_dictionary.get(), |
80 | 80 | ret->suppression_dictionary.get(), |
81 | ret->pos_matcher)); | |
81 | &ret->pos_matcher)); | |
82 | 82 | return ret; |
83 | 83 | } |
84 | 84 | |
289 | 289 | // config. |
290 | 290 | config_.set_use_zip_code_conversion(true); |
291 | 291 | for (size_t i = 0; i < arraysize(kTestPair); ++i) { |
292 | CheckZipCodeExistenceCallback callback(kKey, kValue, data->pos_matcher); | |
292 | CheckZipCodeExistenceCallback callback(kKey, kValue, &data->pos_matcher); | |
293 | 293 | (d->*kTestPair[i].lookup_method)(kTestPair[i].query, convreq_, &callback); |
294 | 294 | EXPECT_TRUE(callback.found()); |
295 | 295 | } |
297 | 297 | // Without the flag, it should be suppressed. |
298 | 298 | config_.set_use_zip_code_conversion(false); |
299 | 299 | for (size_t i = 0; i < arraysize(kTestPair); ++i) { |
300 | CheckZipCodeExistenceCallback callback(kKey, kValue, data->pos_matcher); | |
300 | CheckZipCodeExistenceCallback callback(kKey, kValue, &data->pos_matcher); | |
301 | 301 | (d->*kTestPair[i].lookup_method)(kTestPair[i].query, convreq_, &callback); |
302 | 302 | EXPECT_FALSE(callback.found()); |
303 | 303 | } |
27 | 27 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | 29 | |
30 | """A tool to generate POS matcher.""" | |
30 | """A tool to generate POS matcher. | |
31 | ||
32 | This script generates POS matcher data and the C++ class that provides functions | |
33 | for POS ID matching. | |
34 | ||
35 | * C++ class: POSMatcher | |
36 | ||
37 | This class has two methods for each POS matching rule: | |
38 | - GetXXXId(): returns the POS ID for XXX. | |
39 | - IsXXX(uint16 id): checks if the given POS ID is XXX or not. | |
40 | Here, XXX is replaced by rule names; see data/rules/pos_matcher_rule.def. | |
41 | ||
42 | POSMathcer is created from the data generated by this script. | |
43 | The binary format is as follows. | |
44 | ||
45 | * Binary format | |
46 | ||
47 | Support there are N matching rules. Then, the first 2*N bytes is the array of | |
48 | uint16 that contains the results for GetXXXId() methods. The latter part | |
49 | contains the ranges of POS IDs for each IsXXX(uint16 id) methods (IsXXX should | |
50 | return true if id is in one of the ranges). See the following figure: | |
51 | ||
52 | +===========================================+============================= | |
53 | | POS ID for rule 0 (2 bytes) | For GetXXXID() methods | |
54 | +-------------------------------------------+ | |
55 | | POS ID for rule 1 (2 bytes) | | |
56 | +-------------------------------------------+ | |
57 | | .... | | |
58 | +-------------------------------------------+ | |
59 | | POS ID for rule N - 1 (2 bytes) | | |
60 | +===========================================+============================= | |
61 | | POS range for rule 0: start 0 (2 bytes) | For IsXXX() for rule 0 | |
62 | + - - - - - - - - - - - - - - - - - - - - - + | |
63 | | POS range for rule 0: end 0 (2 bytes) | | |
64 | +-------------------------------------------+ | |
65 | | POS range for rule 0: start 1 (2 bytes) | | |
66 | + - - - - - - - - - - - - - - - - - - - - - + | |
67 | | POS range for rule 0: end 1 (2 bytes) | | |
68 | |-------------------------------------------+ | |
69 | | .... | | |
70 | |-------------------------------------------+ | |
71 | | POS range for rule 0: start M (2 bytes) | | |
72 | + - - - - - - - - - - - - - - - - - - - - - + | |
73 | | POS range for rule 0: end M (2 bytes) | | |
74 | |-------------------------------------------+ | |
75 | | Sentinel element 0xFFFF (2 bytes) | | |
76 | +===========================================+============================= | |
77 | | POS range for rule 1: start 0 (2 bytes) | For IsXXX() for rule 1 | |
78 | + - - - - - - - - - - - - - - - - - - - - - + | |
79 | | POS range for rule 1: end 0 (2 bytes) | | |
80 | +-------------------------------------------+ | |
81 | | .... | | |
82 | +-------------------------------------------+ | |
83 | | Sentinel element 0xFFFF (2 bytes) | | |
84 | +===========================================+ | |
85 | | .... | | |
86 | | | | |
87 | """ | |
31 | 88 | |
32 | 89 | __author__ = "taku" |
33 | 90 | |
34 | 91 | import optparse |
35 | 92 | import re |
93 | import struct | |
36 | 94 | import sys |
37 | 95 | |
38 | 96 | from dictionary import pos_util |
39 | 97 | |
40 | 98 | |
41 | 99 | def OutputPosMatcherData(pos_matcher, output): |
42 | """Generates the data used by POSMatcher. | |
43 | ||
44 | Two data arrays are generated: | |
45 | 1) const uint16 kRuleIdTable[] | |
46 | This contains POS ID for each rule in pos_matcher_rule.def. The data is | |
47 | used by the method Get<RuleName>() generated by this script. Each array | |
48 | index corresponds to one rule name in its declared order. Namely, if | |
49 | pos_matcher_rule.def contain three rules, say | |
50 | Rule0 Regexp0 | |
51 | Rule1 Regexp1 | |
52 | Rule2 Regexp2 | |
53 | Then kRuleIdTable[0] contains the result of GetRule0(), etc. | |
54 | ||
55 | 2) const POSMatcher::Range kRangeTable[] | |
56 | Each element is a pointer to another array containing ranges of POS IDs | |
57 | whose union is the set of all POS IDs that match the regexp. Each array | |
58 | of ranges ends with the endmark { 0xFFFF, 0xFFFF }. | |
59 | ||
60 | Generated data can be passed to POSMatcher, which is also generated by | |
61 | this script, to get a POSMatcher corresponding to given data set. | |
62 | """ | |
63 | ||
64 | # Generate kRuleIdTable[]. | |
65 | output.write('const uint16 kRuleIdTable[] = {\n') | |
100 | data = [] | |
66 | 101 | for rule_name in pos_matcher.GetRuleNameList(): |
67 | output.write( | |
68 | ' %(id)4d, // %(rule_name)s "%(original_pattern)s"\n' | |
69 | % { 'id': pos_matcher.GetId(rule_name), | |
70 | 'rule_name': rule_name, | |
71 | 'original_pattern': pos_matcher.GetOriginalPattern(rule_name) }) | |
72 | output.write(' static_cast<uint16>(0xFFFF),\n') | |
73 | output.write('};\n') | |
74 | ||
75 | # Generate arrays of ranges each of which will be an element of kRangeTable[]. | |
76 | output.write('namespace {\n') | |
102 | data.append(pos_matcher.GetId(rule_name)) | |
103 | ||
104 | offset = 2 * len(pos_matcher.GetRuleNameList()) | |
77 | 105 | for rule_name in pos_matcher.GetRuleNameList(): |
78 | output.write( | |
79 | '// %(rule_name)s "%(original_pattern)s"\n' | |
80 | 'const ::mozc::dictionary::POSMatcher::Range ' | |
81 | 'kRangeTable_%(rule_name)s[] = {\n' | |
82 | % { 'rule_name': rule_name, | |
83 | 'original_pattern': pos_matcher.GetOriginalPattern(rule_name) }) | |
106 | data.append(offset) | |
107 | offset += 2 * len(pos_matcher.GetRange(rule_name)) + 1 | |
108 | ||
109 | for rule_name in pos_matcher.GetRuleNameList(): | |
84 | 110 | for id_range in pos_matcher.GetRange(rule_name): |
85 | output.write(' { %4d, %4d },\n' % id_range) | |
86 | # End mark for this array of ranges. | |
87 | output.write( | |
88 | ' { static_cast<uint16>(0xFFFF), static_cast<uint16>(0xFFFF) },\n' | |
89 | '};\n') | |
90 | output.write('} // namespace\n') | |
91 | ||
92 | # Generate kRangeTable[]. | |
93 | output.write( | |
94 | 'const ::mozc::dictionary::POSMatcher::Range *const ' | |
95 | 'kRangeTables[%d] = {\n' | |
96 | % (len(pos_matcher.GetRuleNameList()) + 1)) | |
97 | for rule_name in pos_matcher.GetRuleNameList(): | |
98 | output.write(' kRangeTable_%s,\n' % rule_name) | |
99 | output.write(' NULL,\n') | |
100 | output.write('};\n') | |
111 | data.append(id_range[0]) | |
112 | data.append(id_range[1]) | |
113 | data.append(0xFFFF) | |
114 | ||
115 | for u16 in data: | |
116 | output.write(struct.pack('<H', u16)) | |
101 | 117 | |
102 | 118 | |
103 | 119 | def OutputPosMatcherHeader(pos_matcher, output): |
104 | 120 | """Generates the definition of POSMatcher class. |
105 | 121 | |
106 | POSMatcher is independent of the actual input data but just provides logic | |
107 | for POS matching. To use a generated class, it's required to pass two arrays, | |
108 | kRuleIdTable[] and kRangeTables[], to the constructor of POSMatcher. | |
122 | POSMatcher is independent of the actual input data but just provides logic for | |
123 | POS matching. To use a generated class, it's required to pass the data | |
124 | generated by OutputPosMatcherData() above. | |
109 | 125 | """ |
126 | ||
127 | lid_table_size = len(pos_matcher.GetRuleNameList()) | |
110 | 128 | |
111 | 129 | output.write( |
112 | 130 | '#ifndef MOZC_DICTIONARY_POS_MATCHER_H_\n' |
115 | 133 | 'namespace mozc {\n' |
116 | 134 | 'namespace dictionary {\n' |
117 | 135 | 'class POSMatcher {\n' |
118 | ' public:\n' | |
119 | ' struct Range {\n' | |
120 | ' uint16 lower;\n' | |
121 | ' uint16 upper;\n' | |
122 | ' };\n') | |
136 | ' public:\n') | |
123 | 137 | |
124 | 138 | # Helper function to generate Get<RuleName>Id() method from rule name and its |
125 | 139 | # corresponding index. |
126 | 140 | def _GenerateGetMethod(rule_name, index): |
127 | 141 | return (' inline uint16 Get%(rule_name)sId() const {\n' |
128 | ' return rule_id_table_[%(index)d];\n' | |
129 | ' }' % { 'rule_name': rule_name, 'index': index }) | |
142 | ' return data_[%(index)d];\n' | |
143 | ' }' % { | |
144 | 'rule_name': rule_name, | |
145 | 'index': index, | |
146 | }) | |
130 | 147 | |
131 | 148 | # Helper function to generate Is<RuleName>(uint16 id) method from rule name |
132 | 149 | # and its corresponding index. The generated function checks if the given id |
133 | 150 | # belongs to some range in kRangeTable[index] = kRangeTable_RuleName[]. |
134 | 151 | def _GenerateIsMethod(rule_name, index): |
135 | 152 | return (' inline bool Is%(rule_name)s(uint16 id) const {\n' |
136 | ' for (const Range *range = range_table_[%(index)d];\n' | |
137 | ' range->lower != static_cast<uint16>(0xFFFF); ++range) {\n' | |
138 | ' if (id >= range->lower && id <= range->upper) {\n' | |
153 | ' const uint16 offset = data_[%(lid_table_size)d + %(index)d];\n' | |
154 | ' for (const uint16 *ptr = data_ + offset;\n' | |
155 | ' *ptr != static_cast<uint16>(0xFFFF); ptr += 2) {\n' | |
156 | ' if (id >= *ptr && id <= *(ptr + 1)) {\n' | |
139 | 157 | ' return true;\n' |
140 | 158 | ' }\n' |
141 | 159 | ' }\n' |
142 | 160 | ' return false;\n' |
143 | ' }' % { 'rule_name': rule_name, 'index': index }) | |
161 | ' }' % { | |
162 | 'rule_name': rule_name, | |
163 | 'index': index, | |
164 | 'lid_table_size': lid_table_size, | |
165 | }) | |
144 | 166 | |
145 | 167 | # Generate Get<RuleName>Id() and Is<RuleName>(uint16 id) for each rule. |
146 | 168 | for i, rule_name in enumerate(pos_matcher.GetRuleNameList()): |
157 | 179 | # function. |
158 | 180 | output.write( |
159 | 181 | ' public:\n' |
160 | ' POSMatcher(const uint16 *const rule_id_table,\n' | |
161 | ' const Range *const *const range_table)\n' | |
162 | ' : rule_id_table_(rule_id_table),\n' | |
163 | ' range_table_(range_table) {}\n' | |
182 | ' POSMatcher() : data_(nullptr) {}\n' | |
183 | ' explicit POSMatcher(const uint16 *data) : data_(data) {}\n' | |
184 | ' void Set(const uint16 *data) { data_ = data; }\n' | |
164 | 185 | ' private:\n' |
165 | ' const uint16 *const rule_id_table_;\n' | |
166 | ' const Range *const *const range_table_;\n' | |
186 | ' const uint16 *data_;\n' | |
167 | 187 | '};\n' |
168 | 188 | '} // namespace dictionary\n' |
169 | 189 | '} // namespace mozc\n' |
180 | 200 | parser.add_option('--output_pos_matcher_data', |
181 | 201 | dest='output_pos_matcher_data', |
182 | 202 | default='', |
183 | help='Path to the output header file of pos matcher data.') | |
203 | help='Path to the output file of pos matcher data.') | |
184 | 204 | parser.add_option('--output_pos_matcher_h', |
185 | 205 | dest='output_pos_matcher_h', |
186 | 206 | default='', |
205 | 225 | pos_database.Parse(options.id_file, options.special_pos_file) |
206 | 226 | pos_matcher = pos_util.PosMatcher(pos_database) |
207 | 227 | pos_matcher.Parse(options.pos_matcher_rule_file) |
208 | with open(options.output_pos_matcher_data, 'w') as stream: | |
228 | with open(options.output_pos_matcher_data, 'wb') as stream: | |
209 | 229 | OutputPosMatcherData(pos_matcher, stream) |
210 | 230 | |
211 | 231 |
101 | 101 | &system_dictionary_input, |
102 | 102 | &reading_correction_input); |
103 | 103 | |
104 | const mozc::dictionary::POSMatcher *pos_matcher = | |
105 | FLAGS_gen_test_dictionary ? | |
106 | mozc::testing::MockUserPosManager::GetUserPosManager()->GetPOSMatcher() : | |
107 | mozc::UserPosManager::GetUserPosManager()->GetPOSMatcher(); | |
108 | CHECK(pos_matcher); | |
104 | using mozc::testing::MockUserPosManager; | |
105 | using mozc::UserPosManager; | |
106 | const mozc::dictionary::POSMatcher pos_matcher( | |
107 | FLAGS_gen_test_dictionary | |
108 | ? MockUserPosManager::GetUserPosManager()->GetPOSMatcherData() | |
109 | : UserPosManager::GetUserPosManager()->GetPOSMatcherData()); | |
109 | 110 | |
110 | mozc::dictionary::TextDictionaryLoader loader(*pos_matcher); | |
111 | mozc::dictionary::TextDictionaryLoader loader(pos_matcher); | |
111 | 112 | loader.Load(system_dictionary_input, reading_correction_input); |
112 | 113 | |
113 | 114 | mozc::dictionary::SystemDictionaryBuilder builder; |
81 | 81 | class SystemDictionaryTest : public ::testing::Test { |
82 | 82 | protected: |
83 | 83 | SystemDictionaryTest() |
84 | : text_dict_(new TextDictionaryLoader( | |
85 | *UserPosManager::GetUserPosManager()->GetPOSMatcher())), | |
84 | : pos_matcher_(UserPosManager::GetUserPosManager()->GetPOSMatcherData()), | |
85 | text_dict_(new TextDictionaryLoader(pos_matcher_)), | |
86 | 86 | dic_fn_(FileUtil::JoinPath(FLAGS_test_tmpdir, "mozc.dic")) { |
87 | 87 | const string dic_path = mozc::testing::GetSourceFileOrDie({ |
88 | 88 | "data", "dictionary_oss", "dictionary00.txt"}); |
92 | 92 | convreq_.set_config(&config_); |
93 | 93 | } |
94 | 94 | |
95 | virtual void SetUp() { | |
95 | void SetUp() override { | |
96 | 96 | SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir); |
97 | 97 | |
98 | 98 | // Don't use small cost encoding by default. |
104 | 104 | config::ConfigHandler::GetDefaultConfig(&config_); |
105 | 105 | } |
106 | 106 | |
107 | virtual void TearDown() { | |
107 | void TearDown() override { | |
108 | 108 | FLAGS_min_key_length_to_use_small_cost_encoding = |
109 | 109 | original_flags_min_key_length_to_use_small_cost_encoding_; |
110 | 110 | |
121 | 121 | bool CompareTokensForLookup(const Token &a, const Token &b, |
122 | 122 | bool reverse) const; |
123 | 123 | |
124 | dictionary::POSMatcher pos_matcher_; | |
124 | 125 | unique_ptr<TextDictionaryLoader> text_dict_; |
125 | 126 | |
126 | 127 | ConversionRequest convreq_; |
47 | 47 | |
48 | 48 | class ValueDictionaryTest : public ::testing::Test { |
49 | 49 | protected: |
50 | virtual void SetUp() { | |
51 | pos_matcher_ = UserPosManager::GetUserPosManager()->GetPOSMatcher(); | |
50 | void SetUp() override { | |
51 | pos_matcher_.Set(UserPosManager::GetUserPosManager()->GetPOSMatcherData()); | |
52 | 52 | louds_trie_builder_.reset(new LoudsTrieBuilder); |
53 | 53 | louds_trie_.reset(new LoudsTrie); |
54 | 54 | } |
55 | 55 | |
56 | virtual void TearDown() { | |
56 | void TearDown() override { | |
57 | 57 | louds_trie_.reset(); |
58 | 58 | louds_trie_builder_.reset(); |
59 | 59 | } |
68 | 68 | louds_trie_builder_->Build(); |
69 | 69 | louds_trie_->Open( |
70 | 70 | reinterpret_cast<const uint8 *>(louds_trie_builder_->image().data())); |
71 | return new ValueDictionary(*pos_matcher_, louds_trie_.get()); | |
71 | return new ValueDictionary(pos_matcher_, louds_trie_.get()); | |
72 | 72 | } |
73 | 73 | |
74 | 74 | void InitToken(const string &value, Token *token) const { |
75 | 75 | token->key = token->value = value; |
76 | 76 | token->cost = 10000; |
77 | token->lid = token->rid = pos_matcher_->GetSuggestOnlyWordId(); | |
77 | token->lid = token->rid = pos_matcher_.GetSuggestOnlyWordId(); | |
78 | 78 | token->attributes = Token::NONE; |
79 | 79 | } |
80 | 80 | |
81 | const POSMatcher *pos_matcher_; | |
81 | POSMatcher pos_matcher_; | |
82 | 82 | ConversionRequest convreq_; |
83 | 83 | std::unique_ptr<LoudsTrieBuilder> louds_trie_builder_; |
84 | 84 | std::unique_ptr<LoudsTrie> louds_trie_; |
64 | 64 | // considering this class as POD. |
65 | 65 | TextDictionaryLoaderTest() {} |
66 | 66 | |
67 | virtual void SetUp() { | |
68 | pos_matcher_ = UserPosManager::GetUserPosManager()->GetPOSMatcher(); | |
67 | void SetUp() override { | |
68 | pos_matcher_.Set(UserPosManager::GetUserPosManager()->GetPOSMatcherData()); | |
69 | 69 | } |
70 | 70 | |
71 | 71 | TextDictionaryLoader *CreateTextDictionaryLoader() { |
72 | return new TextDictionaryLoader(*pos_matcher_); | |
73 | } | |
74 | ||
75 | const POSMatcher *pos_matcher_; | |
72 | return new TextDictionaryLoader(pos_matcher_); | |
73 | } | |
74 | ||
75 | POSMatcher pos_matcher_; | |
76 | 76 | scoped_data_manager_initializer_for_testing |
77 | 77 | scoped_data_manager_initializer_for_testing_; |
78 | 78 | }; |
182 | 182 | token.lid = 100; |
183 | 183 | token.rid = 200; |
184 | 184 | EXPECT_TRUE(loader->RewriteSpecialToken(&token, "ZIP_CODE")); |
185 | EXPECT_EQ(pos_matcher_->GetZipcodeId(), token.lid); | |
186 | EXPECT_EQ(pos_matcher_->GetZipcodeId(), token.rid); | |
185 | EXPECT_EQ(pos_matcher_.GetZipcodeId(), token.lid); | |
186 | EXPECT_EQ(pos_matcher_.GetZipcodeId(), token.rid); | |
187 | 187 | EXPECT_EQ(Token::NONE, token.attributes); |
188 | 188 | } |
189 | 189 | |
192 | 192 | token.lid = 100; |
193 | 193 | token.rid = 200; |
194 | 194 | EXPECT_TRUE(loader->RewriteSpecialToken(&token, "ENGLISH:RATED")); |
195 | EXPECT_EQ(pos_matcher_->GetIsolatedWordId(), token.lid); | |
196 | EXPECT_EQ(pos_matcher_->GetIsolatedWordId(), token.rid); | |
195 | EXPECT_EQ(pos_matcher_.GetIsolatedWordId(), token.lid); | |
196 | EXPECT_EQ(pos_matcher_.GetIsolatedWordId(), token.rid); | |
197 | 197 | EXPECT_EQ(Token::NONE, token.attributes); |
198 | 198 | } |
199 | 199 |
289 | 289 | }; |
290 | 290 | |
291 | 291 | UserDictionary::UserDictionary(const UserPOSInterface *user_pos, |
292 | const POSMatcher *pos_matcher, | |
292 | POSMatcher pos_matcher, | |
293 | 293 | SuppressionDictionary *suppression_dictionary) |
294 | 294 | : ALLOW_THIS_IN_INITIALIZER_LIST( |
295 | 295 | reloader_(new UserDictionaryReloader(this))), |
299 | 299 | tokens_(new TokensIndex(user_pos_.get(), suppression_dictionary)), |
300 | 300 | mutex_(new ReaderWriterMutex) { |
301 | 301 | DCHECK(user_pos_.get()); |
302 | DCHECK(pos_matcher_); | |
303 | 302 | DCHECK(suppression_dictionary_); |
304 | 303 | Reload(); |
305 | 304 | } |
363 | 362 | } |
364 | 363 | FillTokenFromUserPOSToken(**it, &token); |
365 | 364 | // Override POS IDs for suggest only words. |
366 | if (pos_matcher_->IsSuggestOnlyWord((*it)->id)) { | |
367 | token.lid = token.rid = pos_matcher_->GetUnknownId(); | |
365 | if (pos_matcher_.IsSuggestOnlyWord((*it)->id)) { | |
366 | token.lid = token.rid = pos_matcher_.GetUnknownId(); | |
368 | 367 | } |
369 | 368 | if (callback->OnToken((*it)->key, (*it)->key, token) == |
370 | 369 | Callback::TRAVERSE_DONE) { |
402 | 401 | if ((*it)->key > key) { |
403 | 402 | break; |
404 | 403 | } |
405 | if (pos_matcher_->IsSuggestOnlyWord((*it)->id)) { | |
404 | if (pos_matcher_.IsSuggestOnlyWord((*it)->id)) { | |
406 | 405 | continue; |
407 | 406 | } |
408 | 407 | if (!Util::StartsWith(key, (*it)->key)) { |
456 | 455 | Token token; |
457 | 456 | for (; range.first != range.second; ++range.first) { |
458 | 457 | const UserPOS::Token &user_pos_token = **range.first; |
459 | if (pos_matcher_->IsSuggestOnlyWord(user_pos_token.id)) { | |
458 | if (pos_matcher_.IsSuggestOnlyWord(user_pos_token.id)) { | |
460 | 459 | continue; |
461 | 460 | } |
462 | 461 | FillTokenFromUserPOSToken(user_pos_token, &token); |
50 | 50 | class UserDictionary : public DictionaryInterface { |
51 | 51 | public: |
52 | 52 | UserDictionary(const UserPOSInterface *user_pos, |
53 | const POSMatcher *pos_matcher, | |
53 | POSMatcher pos_matcher, | |
54 | 54 | SuppressionDictionary *suppression_dictionary); |
55 | 55 | virtual ~UserDictionary(); |
56 | 56 | |
114 | 114 | |
115 | 115 | std::unique_ptr<UserDictionaryReloader> reloader_; |
116 | 116 | std::unique_ptr<const UserPOSInterface> user_pos_; |
117 | const POSMatcher *pos_matcher_; | |
117 | const POSMatcher pos_matcher_; | |
118 | 118 | SuppressionDictionary *suppression_dictionary_; |
119 | 119 | TokensIndex *tokens_; |
120 | 120 | mutable std::unique_ptr<ReaderWriterMutex> mutex_; |
219 | 219 | const testing::MockUserPosManager user_pos_manager; |
220 | 220 | return new UserDictionary( |
221 | 221 | new UserPOSMock(), |
222 | user_pos_manager.GetPOSMatcher(), | |
222 | dictionary::POSMatcher(user_pos_manager.GetPOSMatcherData()), | |
223 | 223 | suppression_dictionary_.get()); |
224 | 224 | } |
225 | 225 | |
226 | 226 | // Creates a user dictionary with actual pos data. |
227 | 227 | UserDictionary *CreateDictionary() { |
228 | 228 | const testing::MockUserPosManager user_pos_manager; |
229 | return new UserDictionary(UserPOS::CreateFromDataManager(user_pos_manager), | |
230 | user_pos_manager.GetPOSMatcher(), | |
231 | Singleton<SuppressionDictionary>::get()); | |
229 | return new UserDictionary( | |
230 | UserPOS::CreateFromDataManager(user_pos_manager), | |
231 | dictionary::POSMatcher(user_pos_manager.GetPOSMatcherData()), | |
232 | Singleton<SuppressionDictionary>::get()); | |
232 | 233 | } |
233 | 234 | |
234 | 235 | struct Entry { |
566 | 567 | |
567 | 568 | // "suggestion_only" should not be looked up. |
568 | 569 | const testing::MockUserPosManager user_pos_manager; |
569 | const uint16 kNounId = user_pos_manager.GetPOSMatcher()->GetGeneralNounId(); | |
570 | const dictionary::POSMatcher pos_matcher( | |
571 | user_pos_manager.GetPOSMatcherData()); | |
572 | const uint16 kNounId = pos_matcher.GetGeneralNounId(); | |
570 | 573 | const Entry kExpected1[] = {{"key", "noun", kNounId, kNounId}}; |
571 | 574 | TestLookupExactHelper(kExpected1, arraysize(kExpected1), |
572 | 575 | "key", 3, *user_dic.get()); |
148 | 148 | suppression_dictionary_.reset(new SuppressionDictionary); |
149 | 149 | CHECK(suppression_dictionary_.get()); |
150 | 150 | |
151 | pos_matcher_.Set(data_manager->GetPOSMatcherData()); | |
152 | ||
151 | 153 | user_dictionary_.reset( |
152 | 154 | new UserDictionary(UserPOS::CreateFromDataManager(*data_manager), |
153 | data_manager->GetPOSMatcher(), | |
155 | pos_matcher_, | |
154 | 156 | suppression_dictionary_.get())); |
155 | 157 | CHECK(user_dictionary_.get()); |
156 | 158 | |
162 | 164 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(); |
163 | 165 | dictionary_.reset(new DictionaryImpl( |
164 | 166 | sysdic, // DictionaryImpl takes the ownership |
165 | new ValueDictionary(*data_manager->GetPOSMatcher(), | |
166 | &sysdic->value_trie()), | |
167 | new ValueDictionary(pos_matcher_, &sysdic->value_trie()), | |
167 | 168 | user_dictionary_.get(), |
168 | 169 | suppression_dictionary_.get(), |
169 | data_manager->GetPOSMatcher())); | |
170 | &pos_matcher_)); | |
170 | 171 | CHECK(dictionary_.get()); |
171 | 172 | |
172 | 173 | StringPiece suffix_key_array_data, suffix_value_array_data; |
202 | 203 | suppression_dictionary_.get(), |
203 | 204 | connector_.get(), |
204 | 205 | segmenter_.get(), |
205 | data_manager->GetPOSMatcher(), | |
206 | &pos_matcher_, | |
206 | 207 | pos_group_.get(), |
207 | 208 | suggestion_filter_.get())); |
208 | 209 | CHECK(immutable_converter_.get()); |
227 | 228 | suffix_dictionary_.get(), |
228 | 229 | connector_.get(), |
229 | 230 | segmenter_.get(), |
230 | data_manager->GetPOSMatcher(), | |
231 | &pos_matcher_, | |
231 | 232 | suggestion_filter_.get()); |
232 | 233 | CHECK(dictionary_predictor); |
233 | 234 | |
234 | 235 | PredictorInterface *user_history_predictor = |
235 | 236 | new UserHistoryPredictor(dictionary_.get(), |
236 | data_manager->GetPOSMatcher(), | |
237 | &pos_matcher_, | |
237 | 238 | suppression_dictionary_.get(), |
238 | 239 | enable_content_word_learning); |
239 | 240 | CHECK(user_history_predictor); |
249 | 250 | dictionary_.get()); |
250 | 251 | CHECK(rewriter_); |
251 | 252 | |
252 | converter_impl->Init(data_manager->GetPOSMatcher(), | |
253 | converter_impl->Init(&pos_matcher_, | |
253 | 254 | suppression_dictionary_.get(), |
254 | 255 | predictor_, |
255 | 256 | rewriter_, |
34 | 34 | #include "base/port.h" |
35 | 35 | #include "dictionary/dictionary_interface.h" |
36 | 36 | #include "dictionary/pos_group.h" |
37 | #include "dictionary/pos_matcher.h" | |
37 | 38 | #include "dictionary/user_dictionary.h" |
38 | 39 | #include "engine/engine_interface.h" |
39 | 40 | |
53 | 54 | class Engine : public EngineInterface { |
54 | 55 | public: |
55 | 56 | Engine(); |
56 | virtual ~Engine(); | |
57 | ~Engine() override; | |
57 | 58 | |
58 | 59 | // Initializes the object by given a data manager (providing embedded data |
59 | 60 | // set) and predictor factory function. |
63 | 64 | PredictorInterface *), |
64 | 65 | bool enable_content_word_learning); |
65 | 66 | |
66 | virtual ConverterInterface *GetConverter() const { return converter_.get(); } | |
67 | virtual PredictorInterface *GetPredictor() const { return predictor_; } | |
68 | virtual dictionary::SuppressionDictionary *GetSuppressionDictionary() { | |
67 | ConverterInterface *GetConverter() const override { return converter_.get(); } | |
68 | PredictorInterface *GetPredictor() const override { return predictor_; } | |
69 | dictionary::SuppressionDictionary *GetSuppressionDictionary() override { | |
69 | 70 | return suppression_dictionary_.get(); |
70 | 71 | } |
71 | 72 | |
72 | virtual bool Reload(); | |
73 | bool Reload() override; | |
73 | 74 | |
74 | virtual UserDataManagerInterface *GetUserDataManager() { | |
75 | UserDataManagerInterface *GetUserDataManager() override { | |
75 | 76 | return user_data_manager_.get(); |
76 | 77 | } |
77 | 78 | |
78 | 79 | private: |
80 | dictionary::POSMatcher pos_matcher_; | |
79 | 81 | std::unique_ptr<dictionary::SuppressionDictionary> suppression_dictionary_; |
80 | 82 | std::unique_ptr<const Connector> connector_; |
81 | 83 | std::unique_ptr<const Segmenter> segmenter_; |
0 | 0 | MAJOR=2 |
1 | 1 | MINOR=17 |
2 | BUILD=2519 | |
2 | BUILD=2520 | |
3 | 3 | REVISION=102 |
4 | 4 | # NACL_DICTIONARY_VERSION is the target version of the system dictionary to be |
5 | 5 | # downloaded by NaCl Mozc. |
6 | NACL_DICTIONARY_VERSION=21 | |
6 | NACL_DICTIONARY_VERSION=22 |
215 | 215 | const DictionaryInterface *suffix_dictionary = NULL) { |
216 | 216 | testing::MockDataManager data_manager; |
217 | 217 | |
218 | pos_matcher_ = data_manager.GetPOSMatcher(); | |
218 | pos_matcher_.Set(data_manager.GetPOSMatcherData()); | |
219 | 219 | suppression_dictionary_.reset(new SuppressionDictionary); |
220 | 220 | if (!dictionary) { |
221 | 221 | dictionary_mock_ = new DictionaryMock; |
246 | 246 | suppression_dictionary_.get(), |
247 | 247 | connector_.get(), |
248 | 248 | segmenter_.get(), |
249 | pos_matcher_, | |
249 | &pos_matcher_, | |
250 | 250 | pos_group_.get(), |
251 | 251 | suggestion_filter_.get())); |
252 | 252 | converter_.reset(new ConverterMock()); |
257 | 257 | suffix_dictionary_.get(), |
258 | 258 | connector_.get(), |
259 | 259 | segmenter_.get(), |
260 | data_manager.GetPOSMatcher(), | |
260 | &pos_matcher_, | |
261 | 261 | suggestion_filter_.get())); |
262 | 262 | } |
263 | 263 | |
264 | 264 | const POSMatcher &pos_matcher() const { |
265 | return *pos_matcher_; | |
265 | return pos_matcher_; | |
266 | 266 | } |
267 | 267 | |
268 | 268 | DictionaryMock *mutable_dictionary() { |
282 | 282 | } |
283 | 283 | |
284 | 284 | private: |
285 | const POSMatcher *pos_matcher_; | |
285 | POSMatcher pos_matcher_; | |
286 | 286 | unique_ptr<SuppressionDictionary> suppression_dictionary_; |
287 | 287 | unique_ptr<const Connector> connector_; |
288 | 288 | unique_ptr<const Segmenter> segmenter_; |
1566 | 1566 | Segmenter::CreateFromDataManager(data_manager)); |
1567 | 1567 | unique_ptr<const SuggestionFilter> suggestion_filter( |
1568 | 1568 | CreateSuggestionFilter(data_manager)); |
1569 | const dictionary::POSMatcher pos_matcher(data_manager.GetPOSMatcherData()); | |
1569 | 1570 | unique_ptr<TestableDictionaryPredictor> predictor( |
1570 | 1571 | new TestableDictionaryPredictor(converter.get(), |
1571 | 1572 | immutable_converter.get(), |
1573 | 1574 | suffix_dictionary.get(), |
1574 | 1575 | connector.get(), |
1575 | 1576 | segmenter.get(), |
1576 | data_manager.GetPOSMatcher(), | |
1577 | &pos_matcher, | |
1577 | 1578 | suggestion_filter.get())); |
1578 | 1579 | |
1579 | 1580 | // "わたしのなまえはなかのです" |
3159 | 3160 | Segmenter::CreateFromDataManager(data_manager)); |
3160 | 3161 | unique_ptr<const SuggestionFilter> suggestion_filter( |
3161 | 3162 | CreateSuggestionFilter(data_manager)); |
3163 | const dictionary::POSMatcher pos_matcher(data_manager.GetPOSMatcherData()); | |
3162 | 3164 | unique_ptr<TestableDictionaryPredictor> predictor( |
3163 | 3165 | new TestableDictionaryPredictor(converter.get(), |
3164 | 3166 | immutable_converter.get(), |
3166 | 3168 | suffix_dictionary.get(), |
3167 | 3169 | connector.get(), |
3168 | 3170 | segmenter.get(), |
3169 | data_manager.GetPOSMatcher(), | |
3171 | &pos_matcher, | |
3170 | 3172 | suggestion_filter.get())); |
3171 | 3173 | Segments segments; |
3172 | 3174 | // "わたしのなまえはなかのです" |
39 | 39 | #include "config/config_handler.h" |
40 | 40 | #include "converter/segments.h" |
41 | 41 | #include "data_manager/scoped_data_manager_initializer_for_testing.h" |
42 | #include "data_manager/user_pos_manager.h" | |
42 | #include "data_manager/testing/mock_data_manager.h" | |
43 | 43 | #include "dictionary/dictionary_mock.h" |
44 | #include "dictionary/pos_matcher.h" | |
44 | 45 | #include "dictionary/suppression_dictionary.h" |
45 | 46 | #include "prediction/predictor_interface.h" |
46 | 47 | #include "prediction/user_history_predictor.h" |
126 | 127 | |
127 | 128 | } // namespace |
128 | 129 | |
129 | class MobilePredictorTest : public testing::Test { | |
130 | class MobilePredictorTest : public ::testing::Test { | |
130 | 131 | protected: |
131 | 132 | virtual void SetUp() { |
132 | 133 | config_.reset(new config::Config); |
192 | 193 | |
193 | 194 | TEST_F(MobilePredictorTest, CallPredictorsForMobilePartialPrediction) { |
194 | 195 | DictionaryMock dictionary_mock; |
196 | testing::MockDataManager data_manager; | |
197 | const dictionary::POSMatcher pos_matcher(data_manager.GetPOSMatcherData()); | |
195 | 198 | unique_ptr<MobilePredictor> predictor( |
196 | 199 | new MobilePredictor( |
197 | 200 | new CheckCandSizePredictor(200), |
198 | 201 | new UserHistoryPredictor( |
199 | 202 | &dictionary_mock, |
200 | UserPosManager::GetUserPosManager()->GetPOSMatcher(), | |
203 | &pos_matcher, | |
201 | 204 | Singleton<SuppressionDictionary>::get(), |
202 | 205 | true))); |
203 | 206 | Segments segments; |
231 | 234 | } |
232 | 235 | |
233 | 236 | |
234 | class PredictorTest : public testing::Test { | |
237 | class PredictorTest : public ::testing::Test { | |
235 | 238 | protected: |
236 | 239 | virtual void SetUp() { |
237 | 240 | config_.reset(new config::Config); |
322 | 322 | unique_ptr<DictionaryMock> dictionary; |
323 | 323 | unique_ptr<SuppressionDictionary> suppression_dictionary; |
324 | 324 | unique_ptr<UserHistoryPredictor> predictor; |
325 | dictionary::POSMatcher pos_matcher; | |
325 | 326 | }; |
326 | 327 | |
327 | 328 | DataAndPredictor *CreateDataAndPredictor() const { |
329 | 330 | testing::MockDataManager data_manager; |
330 | 331 | ret->dictionary.reset(new DictionaryMock); |
331 | 332 | ret->suppression_dictionary.reset(new SuppressionDictionary); |
333 | ret->pos_matcher.Set(data_manager.GetPOSMatcherData()); | |
332 | 334 | ret->predictor.reset( |
333 | 335 | new UserHistoryPredictor(ret->dictionary.get(), |
334 | data_manager.GetPOSMatcher(), | |
336 | &ret->pos_matcher, | |
335 | 337 | ret->suppression_dictionary.get(), |
336 | 338 | false)); |
337 | 339 | return ret; |
505 | 505 | // Segment is adverb if; |
506 | 506 | // 1) lid and rid is adverb. |
507 | 507 | // 2) or rid is adverb suffix. |
508 | ((pos_matcher_->IsAdverb(segments->segment(i - 1).candidate(0).lid) && | |
509 | pos_matcher_->IsAdverb(segments->segment(i - 1).candidate(0).rid)) || | |
510 | pos_matcher_->IsAdverbSegmentSuffix( | |
508 | ((pos_matcher_.IsAdverb(segments->segment(i - 1).candidate(0).lid) && | |
509 | pos_matcher_.IsAdverb(segments->segment(i - 1).candidate(0).rid)) || | |
510 | pos_matcher_.IsAdverbSegmentSuffix( | |
511 | 511 | segments->segment(i - 1).candidate(0).rid)) && |
512 | 512 | (cand.content_value != cand.value || |
513 | 513 | cand.value != "\xe3\x83\xbb")) { // "・" workaround |
583 | 583 | |
584 | 584 | CollocationRewriter::CollocationRewriter( |
585 | 585 | const DataManagerInterface *data_manager) |
586 | : pos_matcher_(data_manager->GetPOSMatcher()), | |
587 | first_name_id_(pos_matcher_->GetFirstNameId()), | |
588 | last_name_id_(pos_matcher_->GetLastNameId()) { | |
586 | : pos_matcher_(data_manager->GetPOSMatcherData()), | |
587 | first_name_id_(pos_matcher_.GetFirstNameId()), | |
588 | last_name_id_(pos_matcher_.GetLastNameId()) { | |
589 | 589 | const char *data = NULL; |
590 | 590 | size_t size = 0; |
591 | 591 |
31 | 31 | |
32 | 32 | #include "base/port.h" |
33 | 33 | #include "converter/segments.h" |
34 | #include "dictionary/pos_matcher.h" | |
34 | 35 | #include "rewriter/rewriter_interface.h" |
35 | 36 | |
36 | 37 | namespace mozc { |
37 | 38 | |
38 | 39 | class DataManagerInterface; |
39 | ||
40 | namespace dictionary { class POSMatcher; } | |
41 | 40 | |
42 | 41 | class CollocationRewriter : public RewriterInterface { |
43 | 42 | public: |
57 | 56 | Segment *seg) const; |
58 | 57 | bool RewriteCollocation(Segments *segments) const; |
59 | 58 | |
60 | const dictionary::POSMatcher *pos_matcher_; | |
59 | const dictionary::POSMatcher pos_matcher_; | |
61 | 60 | const uint16 first_name_id_; |
62 | 61 | const uint16 last_name_id_; |
63 | 62 |
73 | 73 | const size_t segments_size; |
74 | 74 | }; |
75 | 75 | |
76 | CollocationRewriterTest() {} | |
77 | virtual ~CollocationRewriterTest() {} | |
78 | ||
79 | virtual void SetUp() { | |
76 | CollocationRewriterTest() = default; | |
77 | ~CollocationRewriterTest() override = default; | |
78 | ||
79 | void SetUp() override { | |
80 | 80 | SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir); |
81 | 81 | |
82 | 82 | const mozc::testing::MockDataManager data_manager; |
83 | pos_matcher_ = data_manager.GetPOSMatcher(); | |
83 | pos_matcher_.Set(data_manager.GetPOSMatcherData()); | |
84 | 84 | collocation_rewriter_.reset(new CollocationRewriter(&data_manager)); |
85 | 85 | } |
86 | 86 | |
125 | 125 | return result; |
126 | 126 | } |
127 | 127 | |
128 | const POSMatcher *pos_matcher_; | |
128 | POSMatcher pos_matcher_; | |
129 | 129 | |
130 | 130 | private: |
131 | 131 | std::unique_ptr<const CollocationRewriter> collocation_rewriter_; |
142 | 142 | const char *kNekowo = |
143 | 143 | "\xE3\x81\xAD\xE3\x81\x93\xE3\x82\x92"; // "ねこを" |
144 | 144 | const char *kNeko = "\xE3\x81\xAD\xE3\x81\x93"; // "ねこ" |
145 | const uint16 id = pos_matcher_->GetUnknownId(); | |
145 | const uint16 id = pos_matcher_.GetUnknownId(); | |
146 | 146 | const CandidateData kNekowoCands[] = { |
147 | 147 | {kNekowo, kNeko, |
148 | 148 | "\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコを" |
194 | 194 | "\xE3\x81\xBE\xE3\x81\x90\xE3\x82\x8D\xE3\x82\x92"; // "まぐろを" |
195 | 195 | const char *kMaguro = |
196 | 196 | "\xE3\x81\xBE\xE3\x81\x90\xE3\x82\x8D"; // "まぐろ" |
197 | const uint16 id = pos_matcher_->GetUnknownId(); | |
197 | const uint16 id = pos_matcher_.GetUnknownId(); | |
198 | 198 | const CandidateData kMagurowoCands[] = { |
199 | 199 | {kMagurowo, kMaguro, |
200 | 200 | "\xE3\x83\x9E\xE3\x82\xB0\xE3\x83\xAD\xE3\x82\x92", // "マグロを" |
240 | 240 | const char *kNekowo = |
241 | 241 | "\xE3\x81\xAD\xE3\x81\x93\xE3\x82\x92"; // "ねこを" |
242 | 242 | const char *kNeko = "\xE3\x81\xAD\xE3\x81\x93"; // "ねこ" |
243 | const uint16 id = pos_matcher_->GetUnknownId(); | |
243 | const uint16 id = pos_matcher_.GetUnknownId(); | |
244 | 244 | const CandidateData kNekowoCands[] = { |
245 | 245 | {kNekowo, kNeko, |
246 | 246 | "\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコを" |
254 | 254 | |
255 | 255 | // "すごく" |
256 | 256 | const char *kSugoku = "\xe3\x81\x99\xe3\x81\x94\xe3\x81\x8f"; |
257 | const uint16 adverb_id = pos_matcher_->GetAdverbId(); | |
257 | const uint16 adverb_id = pos_matcher_.GetAdverbId(); | |
258 | 258 | const CandidateData kSugokuCands[] = { |
259 | 259 | {kSugoku, kSugoku, kSugoku, kSugoku, 0, adverb_id, adverb_id}, |
260 | 260 | }; |
297 | 297 | const char *kNekowo = |
298 | 298 | "\xE3\x81\xAD\xE3\x81\x93\xE3\x82\x92"; // "ねこを" |
299 | 299 | const char *kNeko = "\xE3\x81\xAD\xE3\x81\x93"; // "ねこ" |
300 | const uint16 id = pos_matcher_->GetUnknownId(); | |
300 | const uint16 id = pos_matcher_.GetUnknownId(); | |
301 | 301 | const CandidateData kNekowoCands[] = { |
302 | 302 | {kNekowo, kNeko, |
303 | 303 | "\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコを" |
361 | 361 | const char *kNekowo = |
362 | 362 | "\xE3\x81\xAD\xE3\x81\x93\xE3\x82\x92"; // "ねこを" |
363 | 363 | const char *kNeko = "\xE3\x81\xAD\xE3\x81\x93"; // "ねこ" |
364 | const uint16 id = pos_matcher_->GetUnknownId(); | |
364 | const uint16 id = pos_matcher_.GetUnknownId(); | |
365 | 365 | const CandidateData kNekowoCands[] = { |
366 | 366 | {kNekowo, kNeko, |
367 | 367 | "\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコを" |
40 | 40 | #include "base/hash.h" |
41 | 41 | #include "base/logging.h" |
42 | 42 | #include "data_manager/user_pos_manager.h" |
43 | #include "dictionary/pos_matcher.h" | |
44 | 43 | #include "dictionary/user_pos.h" |
45 | 44 | |
46 | 45 | namespace mozc { |
81 | 80 | DictionaryGenerator::DictionaryGenerator() |
82 | 81 | : token_pool_(new ObjectPool<Token>(kTokenSize)), |
83 | 82 | token_map_(new map<uint64, Token *>), |
84 | open_bracket_id_(UserPosManager::GetUserPosManager()->GetPOSMatcher() | |
85 | ->GetOpenBracketId()), | |
86 | close_bracket_id_(UserPosManager::GetUserPosManager()->GetPOSMatcher() | |
87 | ->GetCloseBracketId()) { | |
83 | pos_matcher_(UserPosManager::GetUserPosManager()->GetPOSMatcherData()), | |
84 | open_bracket_id_(pos_matcher_.GetOpenBracketId()), | |
85 | close_bracket_id_(pos_matcher_.GetCloseBracketId()) { | |
88 | 86 | user_pos_.reset(dictionary::UserPOS::CreateFromDataManager( |
89 | 87 | *UserPosManager::GetUserPosManager())); |
90 | 88 | } |
37 | 37 | #include <vector> |
38 | 38 | |
39 | 39 | #include "base/port.h" |
40 | #include "dictionary/pos_matcher.h" | |
40 | 41 | |
41 | 42 | namespace mozc { |
42 | 43 | |
108 | 109 | bool Output(const string &filename) const; |
109 | 110 | |
110 | 111 | private: |
111 | std::unique_ptr<ObjectPool<Token> > token_pool_; | |
112 | std::unique_ptr<map<uint64, Token *> > token_map_; | |
112 | std::unique_ptr<ObjectPool<Token>> token_pool_; | |
113 | std::unique_ptr<map<uint64, Token *>> token_map_; | |
113 | 114 | std::unique_ptr<const UserPOSInterface> user_pos_; |
115 | const dictionary::POSMatcher pos_matcher_; | |
114 | 116 | const uint16 open_bracket_id_; |
115 | 117 | const uint16 close_bracket_id_; |
116 | 118 |
421 | 421 | TEST_F(EmojiRewriterTest, CheckDescription) { |
422 | 422 | Segments segments; |
423 | 423 | VariantsRewriter variants_rewriter( |
424 | UserPosManager::GetUserPosManager()->GetPOSMatcher()); | |
424 | dictionary::POSMatcher( | |
425 | UserPosManager::GetUserPosManager()->GetPOSMatcherData())); | |
425 | 426 | |
426 | 427 | SetSegment("Emoji", "test", &segments); |
427 | 428 | EXPECT_TRUE(rewriter_->Rewrite(convreq_, &segments)); |
37 | 37 | #include "base/util.h" |
38 | 38 | #include "converter/segments.h" |
39 | 39 | #include "data_manager/data_manager_interface.h" |
40 | #include "dictionary/pos_matcher.h" | |
41 | 40 | #include "rewriter/number_compound_util.h" |
42 | 41 | |
43 | 42 | namespace mozc { |
129 | 128 | } // namespace |
130 | 129 | |
131 | 130 | FocusCandidateRewriter::FocusCandidateRewriter( |
132 | const DataManagerInterface *data_manager) { | |
131 | const DataManagerInterface *data_manager) | |
132 | : pos_matcher_(data_manager->GetPOSMatcherData()) { | |
133 | 133 | const char *array = nullptr; |
134 | 134 | size_t size = 0; |
135 | 135 | data_manager->GetCounterSuffixSortedArray(&array, &size); |
138 | 138 | // in debug build. |
139 | 139 | DCHECK(SerializedStringArray::VerifyData(data)); |
140 | 140 | suffix_array_.Set(data); |
141 | ||
142 | pos_matcher_ = data_manager->GetPOSMatcher(); | |
143 | 141 | } |
144 | 142 | |
145 | 143 | FocusCandidateRewriter::~FocusCandidateRewriter() {} |
398 | 396 | // Otherwise, the following wrong rewrite will occur. |
399 | 397 | // Example: "一階へは | 二回 | 行った -> 一階へは | 二階 | 行った" |
400 | 398 | if (cand.content_value.size() != cand.value.size()) { |
401 | if (!pos_matcher_->IsParallelMarker(cand.rid)) { | |
399 | if (!pos_matcher_.IsParallelMarker(cand.rid)) { | |
402 | 400 | return false; |
403 | 401 | } |
404 | 402 | } |
32 | 32 | #include "base/port.h" |
33 | 33 | #include "base/serialized_string_array.h" |
34 | 34 | #include "converter/segments.h" |
35 | #include "dictionary/pos_matcher.h" | |
35 | 36 | #include "rewriter/rewriter_interface.h" |
36 | 37 | |
37 | 38 | namespace mozc { |
38 | 39 | |
39 | 40 | class DataManagerInterface; |
40 | 41 | struct CounterSuffixEntry; |
41 | ||
42 | namespace dictionary { class POSMatcher; } | |
43 | 42 | |
44 | 43 | class FocusCandidateRewriter : public RewriterInterface { |
45 | 44 | public: |
83 | 82 | uint32 *script_type) const; |
84 | 83 | |
85 | 84 | SerializedStringArray suffix_array_; |
86 | const dictionary::POSMatcher *pos_matcher_; | |
85 | const dictionary::POSMatcher pos_matcher_; | |
87 | 86 | |
88 | 87 | DISALLOW_COPY_AND_ASSIGN(FocusCandidateRewriter); |
89 | 88 | }; |
102 | 102 | |
103 | 103 | LanguageAwareRewriter *CreateLanguageAwareRewriter() const { |
104 | 104 | return new LanguageAwareRewriter( |
105 | *UserPosManager::GetUserPosManager()->GetPOSMatcher(), | |
105 | dictionary::POSMatcher( | |
106 | UserPosManager::GetUserPosManager()->GetPOSMatcherData()), | |
106 | 107 | dictionary_mock_.get()); |
107 | 108 | } |
108 | 109 |
148 | 148 | ASSERT_TRUE(suffix_array.Init(data)); |
149 | 149 | |
150 | 150 | const testing::MockDataManager data_manager; |
151 | const POSMatcher* pos_matcher = data_manager.GetPOSMatcher(); | |
151 | const POSMatcher pos_matcher(data_manager.GetPOSMatcherData()); | |
152 | 152 | |
153 | 153 | Segment::Candidate c; |
154 | 154 | |
155 | 155 | c.Init(); |
156 | c.lid = pos_matcher->GetNumberId(); | |
157 | c.rid = pos_matcher->GetNumberId(); | |
158 | EXPECT_TRUE(IsNumber(suffix_array, *pos_matcher, c)); | |
156 | c.lid = pos_matcher.GetNumberId(); | |
157 | c.rid = pos_matcher.GetNumberId(); | |
158 | EXPECT_TRUE(IsNumber(suffix_array, pos_matcher, c)); | |
159 | 159 | |
160 | 160 | c.Init(); |
161 | c.lid = pos_matcher->GetKanjiNumberId(); | |
162 | c.rid = pos_matcher->GetKanjiNumberId(); | |
163 | EXPECT_TRUE(IsNumber(suffix_array, *pos_matcher, c)); | |
161 | c.lid = pos_matcher.GetKanjiNumberId(); | |
162 | c.rid = pos_matcher.GetKanjiNumberId(); | |
163 | EXPECT_TRUE(IsNumber(suffix_array, pos_matcher, c)); | |
164 | 164 | |
165 | 165 | c.Init(); |
166 | c.lid = pos_matcher->GetNumberId(); | |
167 | c.rid = pos_matcher->GetCounterSuffixWordId(); | |
168 | EXPECT_TRUE(IsNumber(suffix_array, *pos_matcher, c)); | |
166 | c.lid = pos_matcher.GetNumberId(); | |
167 | c.rid = pos_matcher.GetCounterSuffixWordId(); | |
168 | EXPECT_TRUE(IsNumber(suffix_array, pos_matcher, c)); | |
169 | 169 | |
170 | 170 | c.Init(); |
171 | c.lid = pos_matcher->GetNumberId(); | |
172 | c.rid = pos_matcher->GetParallelMarkerId(); | |
173 | EXPECT_TRUE(IsNumber(suffix_array, *pos_matcher, c)); | |
171 | c.lid = pos_matcher.GetNumberId(); | |
172 | c.rid = pos_matcher.GetParallelMarkerId(); | |
173 | EXPECT_TRUE(IsNumber(suffix_array, pos_matcher, c)); | |
174 | 174 | |
175 | 175 | c.Init(); |
176 | 176 | c.value = "\xE4\xB8\x80\xE9\x9A\x8E"; //"一階" |
177 | 177 | c.content_value = "\xE4\xB8\x80\xE9\x9A\x8E"; //"一階" |
178 | c.lid = pos_matcher->GetNumberId(); | |
179 | c.rid = pos_matcher->GetNumberId(); | |
180 | EXPECT_TRUE(IsNumber(suffix_array, *pos_matcher, c)); | |
178 | c.lid = pos_matcher.GetNumberId(); | |
179 | c.rid = pos_matcher.GetNumberId(); | |
180 | EXPECT_TRUE(IsNumber(suffix_array, pos_matcher, c)); | |
181 | 181 | |
182 | 182 | c.Init(); |
183 | c.lid = pos_matcher->GetAdverbId(); | |
184 | c.rid = pos_matcher->GetAdverbId(); | |
185 | EXPECT_FALSE(IsNumber(suffix_array, *pos_matcher, c)); | |
183 | c.lid = pos_matcher.GetAdverbId(); | |
184 | c.rid = pos_matcher.GetAdverbId(); | |
185 | EXPECT_FALSE(IsNumber(suffix_array, pos_matcher, c)); | |
186 | 186 | } |
187 | 187 | |
188 | 188 | } // namespace number_compound_util |
421 | 421 | } // namespace |
422 | 422 | |
423 | 423 | NumberRewriter::NumberRewriter(const DataManagerInterface *data_manager) |
424 | : pos_matcher_(data_manager->GetPOSMatcher()) { | |
424 | : pos_matcher_(data_manager->GetPOSMatcherData()) { | |
425 | 425 | const char *array = nullptr; |
426 | 426 | size_t size = 0; |
427 | 427 | data_manager->GetCounterSuffixSortedArray(&array, &size); |
458 | 458 | |
459 | 459 | for (size_t i = 0; i < segments->conversion_segments_size(); ++i) { |
460 | 460 | Segment *seg = segments->mutable_conversion_segment(i); |
461 | modified |= RewriteOneSegment(suffix_array_, *pos_matcher_, | |
461 | modified |= RewriteOneSegment(suffix_array_, pos_matcher_, | |
462 | 462 | exec_radix_conversion, seg); |
463 | 463 | } |
464 | 464 |
50 | 50 | |
51 | 51 | private: |
52 | 52 | SerializedStringArray suffix_array_; |
53 | const dictionary::POSMatcher *pos_matcher_; | |
53 | const dictionary::POSMatcher pos_matcher_; | |
54 | 54 | |
55 | 55 | DISALLOW_COPY_AND_ASSIGN(NumberRewriter); |
56 | 56 | }; |
89 | 89 | return false; |
90 | 90 | } |
91 | 91 | |
92 | Segment *SetupSegments(const POSMatcher* pos_matcher, | |
92 | Segment *SetupSegments(const POSMatcher& pos_matcher, | |
93 | 93 | const string &candidate_value, Segments *segments) { |
94 | 94 | segments->Clear(); |
95 | 95 | Segment *segment = segments->push_back_segment(); |
96 | 96 | Segment::Candidate *candidate = segment->add_candidate(); |
97 | 97 | candidate->Init(); |
98 | candidate->lid = pos_matcher->GetNumberId(); | |
99 | candidate->rid = pos_matcher->GetNumberId(); | |
98 | candidate->lid = pos_matcher.GetNumberId(); | |
99 | candidate->rid = pos_matcher.GetNumberId(); | |
100 | 100 | candidate->value = candidate_value; |
101 | 101 | candidate->content_value = candidate_value; |
102 | 102 | return segment; |
129 | 129 | // considering this class as POD. |
130 | 130 | NumberRewriterTest() {} |
131 | 131 | |
132 | virtual void SetUp() { | |
132 | void SetUp() override { | |
133 | 133 | #ifdef MOZC_USE_PACKED_DICTIONARY |
134 | 134 | // TODO(noriyukit): Currently this test uses mock data manager. Check if we |
135 | 135 | // can remove this registration of packed data manager. |
142 | 142 | #endif // MOZC_USE_PACKED_DICTIONARY |
143 | 143 | |
144 | 144 | SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir); |
145 | pos_matcher_ = mock_data_manager_.GetPOSMatcher(); | |
146 | } | |
147 | ||
148 | virtual void TearDown() { | |
145 | pos_matcher_.Set(mock_data_manager_.GetPOSMatcherData()); | |
146 | } | |
147 | ||
148 | void TearDown() override { | |
149 | 149 | #ifdef MOZC_USE_PACKED_DICTIONARY |
150 | 150 | // Unregisters mocked PackedDataManager. |
151 | 151 | packed::RegisterPackedDataManager(NULL); |
157 | 157 | } |
158 | 158 | |
159 | 159 | const testing::MockDataManager mock_data_manager_; |
160 | const POSMatcher *pos_matcher_; | |
160 | POSMatcher pos_matcher_; | |
161 | 161 | const ConversionRequest default_request_; |
162 | 162 | }; |
163 | 163 | |
176 | 176 | Segment *seg = segments.push_back_segment(); |
177 | 177 | Segment::Candidate *candidate = seg->add_candidate(); |
178 | 178 | candidate->Init(); |
179 | candidate->lid = pos_matcher_->GetNumberId(); | |
180 | candidate->rid = pos_matcher_->GetNumberId(); | |
179 | candidate->lid = pos_matcher_.GetNumberId(); | |
180 | candidate->rid = pos_matcher_.GetNumberId(); | |
181 | 181 | candidate->value = "012"; |
182 | 182 | candidate->content_value = "012"; |
183 | 183 | |
251 | 251 | Segment *seg = segments.push_back_segment(); |
252 | 252 | Segment::Candidate *candidate = seg->add_candidate(); |
253 | 253 | candidate->Init(); |
254 | candidate->lid = pos_matcher_->GetNumberId(); | |
255 | candidate->rid = pos_matcher_->GetNumberId(); | |
254 | candidate->lid = pos_matcher_.GetNumberId(); | |
255 | candidate->rid = pos_matcher_.GetNumberId(); | |
256 | 256 | candidate->value = "012"; |
257 | 257 | candidate->content_value = "012"; |
258 | 258 | EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments)); |
267 | 267 | Segment *seg = segments.push_back_segment(); |
268 | 268 | Segment::Candidate *candidate = seg->add_candidate(); |
269 | 269 | candidate->Init(); |
270 | candidate->lid = pos_matcher_->GetNumberId(); | |
271 | candidate->rid = pos_matcher_->GetNumberId(); | |
270 | candidate->lid = pos_matcher_.GetNumberId(); | |
271 | candidate->rid = pos_matcher_.GetNumberId(); | |
272 | 272 | candidate->value = "012""\xE3\x81\x8C"; // "012が" |
273 | 273 | candidate->content_value = "012"; |
274 | 274 | |
325 | 325 | Segment *seg = segments.push_back_segment(); |
326 | 326 | Segment::Candidate *candidate = seg->add_candidate(); |
327 | 327 | candidate->Init(); |
328 | candidate->lid = pos_matcher_->GetNumberId(); | |
329 | candidate->rid = pos_matcher_->GetCounterSuffixWordId(); | |
328 | candidate->lid = pos_matcher_.GetNumberId(); | |
329 | candidate->rid = pos_matcher_.GetCounterSuffixWordId(); | |
330 | 330 | candidate->value = "\xE5\x8D\x81\xE4\xBA\x94\xE5\x80\x8B"; // "十五個" |
331 | 331 | candidate->content_value = "\xE5\x8D\x81\xE4\xBA\x94\xE5\x80\x8B"; // ditto |
332 | 332 | |
354 | 354 | Segment *seg = segments.push_back_segment(); |
355 | 355 | Segment::Candidate *candidate = seg->add_candidate(); |
356 | 356 | candidate->Init(); |
357 | candidate->lid = pos_matcher_->GetNumberId(); | |
358 | candidate->rid = pos_matcher_->GetCounterSuffixWordId(); | |
357 | candidate->lid = pos_matcher_.GetNumberId(); | |
358 | candidate->rid = pos_matcher_.GetCounterSuffixWordId(); | |
359 | 359 | candidate->value = "\xE5\x8D\x81\xE4\xBA\x94\xE5\x9B\x9E"; // "十五回" |
360 | 360 | candidate->content_value = "\xE5\x8D\x81\xE4\xBA\x94\xE5\x9B\x9E"; // ditto |
361 | 361 | candidate = seg->add_candidate(); |
362 | 362 | candidate->Init(); |
363 | candidate->lid = pos_matcher_->GetNumberId(); | |
364 | candidate->rid = pos_matcher_->GetCounterSuffixWordId(); | |
363 | candidate->lid = pos_matcher_.GetNumberId(); | |
364 | candidate->rid = pos_matcher_.GetCounterSuffixWordId(); | |
365 | 365 | candidate->value = "\xE5\x8D\x81\xE4\xBA\x94\xE9\x9A\x8E"; // "十五階" |
366 | 366 | candidate->content_value = "\xE5\x8D\x81\xE4\xBA\x94\xE9\x9A\x8E"; // ditto |
367 | 367 | |
457 | 457 | second_candidate->Init(); |
458 | 458 | |
459 | 459 | second_candidate->value = "0"; |
460 | second_candidate->lid = pos_matcher_->GetNumberId(); | |
461 | second_candidate->rid = pos_matcher_->GetNumberId(); | |
460 | second_candidate->lid = pos_matcher_.GetNumberId(); | |
461 | second_candidate->rid = pos_matcher_.GetNumberId(); | |
462 | 462 | second_candidate->content_value = second_candidate->value; |
463 | 463 | |
464 | 464 | EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments)); |
521 | 521 | Segment *seg = segments.push_back_segment(); |
522 | 522 | Segment::Candidate *candidate = seg->add_candidate(); |
523 | 523 | candidate->Init(); |
524 | candidate->lid = pos_matcher_->GetNumberId(); | |
525 | candidate->rid = pos_matcher_->GetNumberId(); | |
524 | candidate->lid = pos_matcher_.GetNumberId(); | |
525 | candidate->rid = pos_matcher_.GetNumberId(); | |
526 | 526 | candidate->value = "0"; |
527 | 527 | candidate->content_value = "0"; |
528 | 528 | |
563 | 563 | Segment *seg = segments.push_back_segment(); |
564 | 564 | Segment::Candidate *candidate = seg->add_candidate(); |
565 | 565 | candidate->Init(); |
566 | candidate->lid = pos_matcher_->GetNumberId(); | |
567 | candidate->rid = pos_matcher_->GetNumberId(); | |
566 | candidate->lid = pos_matcher_.GetNumberId(); | |
567 | candidate->rid = pos_matcher_.GetNumberId(); | |
568 | 568 | candidate->value = "00"; |
569 | 569 | candidate->content_value = "00"; |
570 | 570 | |
603 | 603 | Segment *seg = segments.push_back_segment(); |
604 | 604 | Segment::Candidate *candidate = seg->add_candidate(); |
605 | 605 | candidate->Init(); |
606 | candidate->lid = pos_matcher_->GetNumberId(); | |
607 | candidate->rid = pos_matcher_->GetNumberId(); | |
606 | candidate->lid = pos_matcher_.GetNumberId(); | |
607 | candidate->rid = pos_matcher_.GetNumberId(); | |
608 | 608 | candidate->value = "1000000000000000000"; |
609 | 609 | candidate->content_value = "1000000000000000000"; |
610 | 610 | |
687 | 687 | Segment *seg = segments.push_back_segment(); |
688 | 688 | Segment::Candidate *candidate = seg->add_candidate(); |
689 | 689 | candidate->Init(); |
690 | candidate->lid = pos_matcher_->GetNumberId(); | |
691 | candidate->rid = pos_matcher_->GetNumberId(); | |
690 | candidate->lid = pos_matcher_.GetNumberId(); | |
691 | candidate->rid = pos_matcher_.GetNumberId(); | |
692 | 692 | candidate->value = "18446744073709551616"; // 2^64 |
693 | 693 | candidate->content_value = "18446744073709551616"; |
694 | 694 | |
792 | 792 | Segment *seg = segments.push_back_segment(); |
793 | 793 | Segment::Candidate *candidate = seg->add_candidate(); |
794 | 794 | candidate->Init(); |
795 | candidate->lid = pos_matcher_->GetNumberId(); | |
796 | candidate->rid = pos_matcher_->GetNumberId(); | |
795 | candidate->lid = pos_matcher_.GetNumberId(); | |
796 | candidate->rid = pos_matcher_.GetNumberId(); | |
797 | 797 | |
798 | 798 | // 10^100 as "100000 ... 0" |
799 | 799 | string input = "1"; |
872 | 872 | Segment::Candidate *candidate = segment->add_candidate(); |
873 | 873 | candidate = segment->add_candidate(); |
874 | 874 | candidate->Init(); |
875 | candidate->lid = pos_matcher_->GetNumberId(); | |
876 | candidate->rid = pos_matcher_->GetNumberId(); | |
875 | candidate->lid = pos_matcher_.GetNumberId(); | |
876 | candidate->rid = pos_matcher_.GetNumberId(); | |
877 | 877 | // "さんびゃく" |
878 | 878 | candidate->key = |
879 | 879 | "\xe3\x81\x95\xe3\x82\x93\xe3\x81\xb3\xe3\x82\x83\xe3\x81\x8f"; |
907 | 907 | "\xe3\x81\x95\xe3\x82\x93\xe3\x81\xb3\xe3\x82\x83\xe3\x81\x8f"); |
908 | 908 | Segment::Candidate *candidate = segment->add_candidate(); |
909 | 909 | candidate->Init(); |
910 | candidate->lid = pos_matcher_->GetNumberId(); | |
911 | candidate->rid = pos_matcher_->GetNumberId(); | |
910 | candidate->lid = pos_matcher_.GetNumberId(); | |
911 | candidate->rid = pos_matcher_.GetNumberId(); | |
912 | 912 | // "さんびゃく" |
913 | 913 | candidate->key = |
914 | 914 | "\xe3\x81\x95\xe3\x82\x93\xe3\x81\xb3\xe3\x82\x83\xe3\x81\x8f"; |
919 | 919 | |
920 | 920 | candidate = segment->add_candidate(); |
921 | 921 | candidate->Init(); |
922 | candidate->lid = pos_matcher_->GetNumberId(); | |
923 | candidate->rid = pos_matcher_->GetNumberId(); | |
922 | candidate->lid = pos_matcher_.GetNumberId(); | |
923 | candidate->rid = pos_matcher_.GetNumberId(); | |
924 | 924 | // "さんびゃく" |
925 | 925 | candidate->key = |
926 | 926 | "\xe3\x81\x95\xe3\x82\x93\xe3\x81\xb3\xe3\x82\x83\xe3\x81\x8f"; |
953 | 953 | segment->set_key("\xe3\x81\x84\xe3\x81\xa1"); |
954 | 954 | Segment::Candidate *candidate = segment->add_candidate(); |
955 | 955 | candidate->Init(); |
956 | candidate->lid = pos_matcher_->GetUnknownId(); // Not number POS | |
957 | candidate->rid = pos_matcher_->GetUnknownId(); | |
956 | candidate->lid = pos_matcher_.GetUnknownId(); // Not number POS | |
957 | candidate->rid = pos_matcher_.GetUnknownId(); | |
958 | 958 | // "いち" |
959 | 959 | candidate->key = "\xe3\x81\x84\xe3\x81\xa1"; |
960 | 960 | // "いち" |
966 | 966 | |
967 | 967 | candidate = segment->add_candidate(); |
968 | 968 | candidate->Init(); |
969 | candidate->lid = pos_matcher_->GetNumberId(); // Number POS | |
970 | candidate->rid = pos_matcher_->GetNumberId(); | |
969 | candidate->lid = pos_matcher_.GetNumberId(); // Number POS | |
970 | candidate->rid = pos_matcher_.GetNumberId(); | |
971 | 971 | // "いち" |
972 | 972 | candidate->key = "\xe3\x81\x84\xe3\x81\xa1"; |
973 | 973 | // "いち" |
991 | 991 | // "壱" |
992 | 992 | EXPECT_TRUE(FindCandidateId(segments.segment(0), "\xe5\xa3\xb1", &daiji_pos)); |
993 | 993 | EXPECT_GT(daiji_pos, 0); |
994 | EXPECT_EQ(pos_matcher_->GetNumberId(), | |
994 | EXPECT_EQ(pos_matcher_.GetNumberId(), | |
995 | 995 | segments.segment(0).candidate(daiji_pos).lid); |
996 | EXPECT_EQ(pos_matcher_->GetNumberId(), | |
996 | EXPECT_EQ(pos_matcher_.GetNumberId(), | |
997 | 997 | segments.segment(0).candidate(daiji_pos).rid); |
998 | 998 | } |
999 | 999 | |
1019 | 1019 | Segment *seg = segments.push_back_segment(); |
1020 | 1020 | Segment::Candidate *candidate = seg->add_candidate(); |
1021 | 1021 | candidate->Init(); |
1022 | candidate->lid = pos_matcher_->GetNumberId(); | |
1023 | candidate->rid = pos_matcher_->GetNumberId(); | |
1022 | candidate->lid = pos_matcher_.GetNumberId(); | |
1023 | candidate->rid = pos_matcher_.GetNumberId(); | |
1024 | 1024 | candidate->value = kSuccess[i][0]; |
1025 | 1025 | candidate->content_value = kSuccess[i][0]; |
1026 | 1026 | EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments)); |
1046 | 1046 | Segment *seg = segments.push_back_segment(); |
1047 | 1047 | Segment::Candidate *candidate = seg->add_candidate(); |
1048 | 1048 | candidate->Init(); |
1049 | candidate->lid = pos_matcher_->GetNumberId(); | |
1050 | candidate->rid = pos_matcher_->GetNumberId(); | |
1049 | candidate->lid = pos_matcher_.GetNumberId(); | |
1050 | candidate->rid = pos_matcher_.GetNumberId(); | |
1051 | 1051 | candidate->value = kFail[i][0]; |
1052 | 1052 | candidate->content_value = kFail[i][0]; |
1053 | 1053 | EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments)); |
1072 | 1072 | Segment *seg = segments.push_back_segment(); |
1073 | 1073 | Segment::Candidate *candidate = seg->add_candidate(); |
1074 | 1074 | candidate->Init(); |
1075 | candidate->lid = pos_matcher_->GetGeneralNounId(); | |
1076 | candidate->rid = pos_matcher_->GetGeneralNounId(); | |
1075 | candidate->lid = pos_matcher_.GetGeneralNounId(); | |
1076 | candidate->rid = pos_matcher_.GetGeneralNounId(); | |
1077 | 1077 | // "はやぶさ" |
1078 | 1078 | candidate->key = "\xE3\x81\xAF\xE3\x82\x84\xE3\x81\xB6\xE3\x81\x95"; |
1079 | 1079 | candidate->content_key = candidate->key; |
1134 | 1134 | cand->content_key = cand->key; |
1135 | 1135 | cand->value = "\xE7\x99\xBE\xE8\x88\x8C\xE9\xB3\xA5"; // "百舌鳥" |
1136 | 1136 | cand->content_value = cand->value; |
1137 | cand->lid = pos_matcher_->GetGeneralNounId(); | |
1138 | cand->rid = pos_matcher_->GetGeneralNounId(); | |
1137 | cand->lid = pos_matcher_.GetGeneralNounId(); | |
1138 | cand->rid = pos_matcher_.GetGeneralNounId(); | |
1139 | 1139 | EXPECT_FALSE(number_rewriter->Rewrite(default_request_, &segments)); |
1140 | 1140 | } |
1141 | 1141 | |
1148 | 1148 | Segment *seg = segments.push_back_segment(); |
1149 | 1149 | Segment::Candidate *candidate = seg->add_candidate(); |
1150 | 1150 | candidate->Init(); |
1151 | candidate->lid = pos_matcher_->GetNumberId(); | |
1152 | candidate->rid = pos_matcher_->GetNumberId(); | |
1151 | candidate->lid = pos_matcher_.GetNumberId(); | |
1152 | candidate->rid = pos_matcher_.GetNumberId(); | |
1153 | 1153 | candidate->key = "090"; |
1154 | 1154 | candidate->value = "090"; |
1155 | 1155 | candidate->content_key = "090"; |
1192 | 1192 | "\xe3\x81\xa8\xe3\x81\xb1\xe3\x81\xa3\xe3\x81\x8f"); |
1193 | 1193 | Segment::Candidate *candidate = seg->add_candidate(); |
1194 | 1194 | candidate->Init(); |
1195 | candidate->lid = pos_matcher_->GetNumberId(); | |
1196 | candidate->rid = pos_matcher_->GetNumberId(); | |
1195 | candidate->lid = pos_matcher_.GetNumberId(); | |
1196 | candidate->rid = pos_matcher_.GetNumberId(); | |
1197 | 1197 | // "ひとり" |
1198 | 1198 | candidate->key = "\xe3\x81\xb2\xe3\x81\xa8\xe3\x82\x8a"; |
1199 | 1199 | // "一人" |
88 | 88 | RewriterImpl::RewriterImpl(const ConverterInterface *parent_converter, |
89 | 89 | const DataManagerInterface *data_manager, |
90 | 90 | const PosGroup *pos_group, |
91 | const DictionaryInterface *dictionary) { | |
91 | const DictionaryInterface *dictionary) | |
92 | : pos_matcher_(data_manager->GetPOSMatcherData()) { | |
92 | 93 | DCHECK(parent_converter); |
93 | 94 | DCHECK(data_manager); |
94 | 95 | DCHECK(pos_group); |
95 | const POSMatcher *pos_matcher = data_manager->GetPOSMatcher(); | |
96 | DCHECK(pos_matcher); | |
97 | 96 | // |dictionary| can be NULL |
98 | 97 | |
99 | 98 | AddRewriter(new UserDictionaryRewriter); |
100 | 99 | AddRewriter(new FocusCandidateRewriter(data_manager)); |
101 | AddRewriter(new LanguageAwareRewriter(*pos_matcher, dictionary)); | |
102 | AddRewriter(new TransliterationRewriter(*pos_matcher)); | |
100 | AddRewriter(new LanguageAwareRewriter(pos_matcher_, dictionary)); | |
101 | AddRewriter(new TransliterationRewriter(pos_matcher_)); | |
103 | 102 | AddRewriter(new EnglishVariantsRewriter); |
104 | 103 | AddRewriter(new NumberRewriter(data_manager)); |
105 | 104 | AddRewriter(new CollocationRewriter(data_manager)); |
106 | AddRewriter(new SingleKanjiRewriter(*pos_matcher)); | |
105 | AddRewriter(new SingleKanjiRewriter(pos_matcher_)); | |
107 | 106 | AddRewriter(new EmojiRewriter( |
108 | 107 | kEmojiDataList, arraysize(kEmojiDataList), |
109 | 108 | kEmojiTokenList, arraysize(kEmojiTokenList), |
112 | 111 | AddRewriter(new CalculatorRewriter(parent_converter)); |
113 | 112 | AddRewriter(new SymbolRewriter(parent_converter, data_manager)); |
114 | 113 | AddRewriter(new UnicodeRewriter(parent_converter)); |
115 | AddRewriter(new VariantsRewriter(pos_matcher)); | |
116 | AddRewriter(new ZipcodeRewriter(pos_matcher)); | |
114 | AddRewriter(new VariantsRewriter(pos_matcher_)); | |
115 | AddRewriter(new ZipcodeRewriter(&pos_matcher_)); | |
117 | 116 | AddRewriter(new DiceRewriter); |
118 | 117 | |
119 | 118 | if (FLAGS_use_history_rewriter) { |
120 | 119 | AddRewriter(new UserBoundaryHistoryRewriter(parent_converter)); |
121 | AddRewriter(new UserSegmentHistoryRewriter(pos_matcher, pos_group)); | |
120 | AddRewriter(new UserSegmentHistoryRewriter(&pos_matcher_, pos_group)); | |
122 | 121 | } |
123 | 122 | |
124 | 123 | AddRewriter(new DateRewriter); |
32 | 32 | #include "base/port.h" |
33 | 33 | #include "dictionary/dictionary_interface.h" |
34 | 34 | #include "dictionary/pos_group.h" |
35 | #include "dictionary/pos_matcher.h" | |
35 | 36 | #include "rewriter/merger_rewriter.h" |
36 | 37 | |
37 | 38 | namespace mozc { |
47 | 48 | const dictionary::DictionaryInterface *dictionary); |
48 | 49 | |
49 | 50 | private: |
51 | const dictionary::POSMatcher pos_matcher_; | |
50 | 52 | DISALLOW_COPY_AND_ASSIGN(RewriterImpl); |
51 | 53 | }; |
52 | 54 |
238 | 238 | } // namespace |
239 | 239 | |
240 | 240 | SingleKanjiRewriter::SingleKanjiRewriter(const POSMatcher &pos_matcher) |
241 | : pos_matcher_(&pos_matcher) {} | |
241 | : pos_matcher_(pos_matcher) {} | |
242 | 242 | |
243 | 243 | SingleKanjiRewriter::~SingleKanjiRewriter() {} |
244 | 244 | |
269 | 269 | continue; |
270 | 270 | } |
271 | 271 | InsertCandidate(is_single_segment, |
272 | pos_matcher_->GetGeneralSymbolId(), | |
272 | pos_matcher_.GetGeneralSymbolId(), | |
273 | 273 | kanji_list, |
274 | 274 | segments->mutable_conversion_segment(i)); |
275 | 275 | |
288 | 288 | const Segment::Candidate &right_candidate = |
289 | 289 | segments->conversion_segment(i + 1).candidate(0); |
290 | 290 | // right segment must be a noun. |
291 | if (!pos_matcher_->IsContentNoun(right_candidate.lid)) { | |
291 | if (!pos_matcher_.IsContentNoun(right_candidate.lid)) { | |
292 | 292 | continue; |
293 | 293 | } |
294 | 294 | } else if (segments_size != 1) { // also apply if segments_size == 1. |
301 | 301 | if (token == NULL) { |
302 | 302 | continue; |
303 | 303 | } |
304 | InsertNounPrefix(*pos_matcher_, | |
304 | InsertNounPrefix(pos_matcher_, | |
305 | 305 | segments->mutable_conversion_segment(i), |
306 | 306 | token->value, token->value_size); |
307 | 307 | // Ignore the next noun content word. |
37 | 37 | class SingleKanjiRewriter : public RewriterInterface { |
38 | 38 | public: |
39 | 39 | explicit SingleKanjiRewriter(const dictionary::POSMatcher &pos_matcher); |
40 | virtual ~SingleKanjiRewriter(); | |
40 | ~SingleKanjiRewriter() override; | |
41 | 41 | |
42 | virtual int capability(const ConversionRequest &request) const; | |
42 | int capability(const ConversionRequest &request) const override; | |
43 | 43 | |
44 | virtual bool Rewrite(const ConversionRequest &request, | |
45 | Segments *segments) const; | |
44 | bool Rewrite(const ConversionRequest &request, | |
45 | Segments *segments) const override; | |
46 | 46 | |
47 | 47 | private: |
48 | const dictionary::POSMatcher *pos_matcher_; | |
48 | const dictionary::POSMatcher pos_matcher_; | |
49 | 49 | }; |
50 | 50 | |
51 | 51 | } // namespace mozc |
51 | 51 | protected: |
52 | 52 | SingleKanjiRewriterTest() { |
53 | 53 | data_manager_.reset(new testing::MockDataManager); |
54 | pos_matcher_ = data_manager_->GetPOSMatcher(); | |
55 | } | |
56 | ||
57 | virtual ~SingleKanjiRewriterTest() {} | |
58 | ||
59 | virtual void SetUp() { | |
54 | pos_matcher_.Set(data_manager_->GetPOSMatcherData()); | |
55 | } | |
56 | ||
57 | ~SingleKanjiRewriterTest() override = default; | |
58 | ||
59 | void SetUp() override { | |
60 | 60 | SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir); |
61 | 61 | } |
62 | 62 | |
63 | 63 | SingleKanjiRewriter *CreateSingleKanjiRewriter() const { |
64 | return new SingleKanjiRewriter(*pos_matcher_); | |
64 | return new SingleKanjiRewriter(pos_matcher_); | |
65 | 65 | } |
66 | 66 | |
67 | 67 | const POSMatcher &pos_matcher() { |
68 | return *pos_matcher_; | |
68 | return pos_matcher_; | |
69 | 69 | } |
70 | 70 | |
71 | 71 | const ConversionRequest default_request_; |
72 | 72 | |
73 | 73 | private: |
74 | 74 | std::unique_ptr<testing::MockDataManager> data_manager_; |
75 | const POSMatcher *pos_matcher_; | |
75 | POSMatcher pos_matcher_; | |
76 | 76 | }; |
77 | 77 | |
78 | 78 | TEST_F(SingleKanjiRewriterTest, CapabilityTest) { |
105 | 105 | |
106 | 106 | TransliterationRewriter *CreateTransliterationRewriter() const { |
107 | 107 | return new TransliterationRewriter( |
108 | *UserPosManager::GetUserPosManager()->GetPOSMatcher()); | |
108 | dictionary::POSMatcher( | |
109 | UserPosManager::GetUserPosManager()->GetPOSMatcherData())); | |
109 | 110 | } |
110 | 111 | |
111 | 112 | const commands::Request &default_request() const { |
50 | 50 | |
51 | 51 | UsageRewriter::UsageRewriter(const DataManagerInterface *data_manager, |
52 | 52 | const DictionaryInterface *dictionary) |
53 | : pos_matcher_(data_manager->GetPOSMatcher()), | |
53 | : pos_matcher_(data_manager->GetPOSMatcherData()), | |
54 | 54 | dictionary_(dictionary), |
55 | 55 | base_conjugation_suffix_(nullptr) { |
56 | 56 | StringPiece base_conjugation_suffix_data; |
142 | 142 | const Segment::Candidate &candidate) const { |
143 | 143 | // We check Unknwon POS ("名詞,サ変接続") as well, since |
144 | 144 | // target verbs/adjectives may be in web dictionary. |
145 | if (!pos_matcher_->IsContentWordWithConjugation(candidate.lid) && | |
146 | !pos_matcher_->IsUnknown(candidate.lid)) { | |
145 | if (!pos_matcher_.IsContentWordWithConjugation(candidate.lid) && | |
146 | !pos_matcher_.IsUnknown(candidate.lid)) { | |
147 | 147 | return UsageDictItemIterator(); |
148 | 148 | } |
149 | 149 |
112 | 112 | const Segment::Candidate &candidate) const; |
113 | 113 | |
114 | 114 | map<StrPair, UsageDictItemIterator> key_value_usageitem_map_; |
115 | const dictionary::POSMatcher *pos_matcher_; | |
115 | const dictionary::POSMatcher pos_matcher_; | |
116 | 116 | const dictionary::DictionaryInterface *dictionary_; |
117 | 117 | const uint32 *base_conjugation_suffix_; |
118 | 118 | SerializedStringArray string_array_; |
82 | 82 | config::ConfigHandler::GetDefaultConfig(&config_); |
83 | 83 | |
84 | 84 | data_manager_.reset(new testing::MockDataManager); |
85 | ||
85 | pos_matcher_.Set(data_manager_->GetPOSMatcherData()); | |
86 | 86 | suppression_dictionary_.reset(new SuppressionDictionary); |
87 | 87 | user_dictionary_.reset( |
88 | 88 | new UserDictionary(UserPOS::CreateFromDataManager(*data_manager_), |
89 | data_manager_->GetPOSMatcher(), | |
89 | pos_matcher_, | |
90 | 90 | suppression_dictionary_.get())); |
91 | 91 | } |
92 | 92 | |
108 | 108 | std::unique_ptr<SuppressionDictionary> suppression_dictionary_; |
109 | 109 | std::unique_ptr<UserDictionary> user_dictionary_; |
110 | 110 | std::unique_ptr<testing::MockDataManager> data_manager_; |
111 | dictionary::POSMatcher pos_matcher_; | |
111 | 112 | }; |
112 | 113 | |
113 | 114 | TEST_F(UsageRewriterTest, CapabilityTest) { |
113 | 113 | request_.set_config(&config_); |
114 | 114 | } |
115 | 115 | |
116 | virtual void SetUp() { | |
116 | void SetUp() override { | |
117 | 117 | SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir); |
118 | 118 | |
119 | 119 | ConfigHandler::GetDefaultConfig(&config_); |
130 | 130 | |
131 | 131 | Clock::SetClockForUnitTest(NULL); |
132 | 132 | |
133 | pos_matcher_ = mock_data_manager_.GetPOSMatcher(); | |
133 | pos_matcher_.Set(mock_data_manager_.GetPOSMatcherData()); | |
134 | 134 | pos_group_.reset(new PosGroup(mock_data_manager_.GetPosGroupData())); |
135 | ASSERT_TRUE(pos_matcher_ != NULL); | |
136 | 135 | ASSERT_TRUE(pos_group_.get() != NULL); |
137 | 136 | } |
138 | 137 | |
139 | virtual void TearDown() { | |
138 | void TearDown() override { | |
140 | 139 | Clock::SetClockForUnitTest(NULL); |
141 | 140 | |
142 | 141 | std::unique_ptr<UserSegmentHistoryRewriter> rewriter( |
148 | 147 | } |
149 | 148 | |
150 | 149 | const POSMatcher &pos_matcher() const { |
151 | return *pos_matcher_; | |
150 | return pos_matcher_; | |
152 | 151 | } |
153 | 152 | |
154 | 153 | NumberRewriter *CreateNumberRewriter() const { |
156 | 155 | } |
157 | 156 | |
158 | 157 | UserSegmentHistoryRewriter *CreateUserSegmentHistoryRewriter() const { |
159 | return new UserSegmentHistoryRewriter(pos_matcher_, pos_group_.get()); | |
158 | return new UserSegmentHistoryRewriter(&pos_matcher_, pos_group_.get()); | |
160 | 159 | } |
161 | 160 | |
162 | 161 | void SetNumberForm(Config::CharacterForm form) { |
177 | 176 | |
178 | 177 | private: |
179 | 178 | const testing::MockDataManager mock_data_manager_; |
180 | const POSMatcher *pos_matcher_; | |
179 | POSMatcher pos_matcher_; | |
181 | 180 | std::unique_ptr<const PosGroup> pos_group_; |
182 | 181 | DISALLOW_COPY_AND_ASSIGN(UserSegmentHistoryRewriterTest); |
183 | 182 | }; |
131 | 131 | return true; |
132 | 132 | } |
133 | 133 | |
134 | VariantsRewriter::VariantsRewriter(const POSMatcher *pos_matcher) | |
134 | VariantsRewriter::VariantsRewriter(const POSMatcher pos_matcher) | |
135 | 135 | : pos_matcher_(pos_matcher) {} |
136 | 136 | |
137 | 137 | VariantsRewriter::~VariantsRewriter() {} |
324 | 324 | if (candidate->attributes & Segment::Candidate::NO_EXTRA_DESCRIPTION) { |
325 | 325 | continue; |
326 | 326 | } |
327 | SetDescriptionForTransliteration(*pos_matcher_, candidate); | |
327 | SetDescriptionForTransliteration(pos_matcher_, candidate); | |
328 | 328 | } |
329 | 329 | |
330 | 330 | // Regular Candidate |
343 | 343 | |
344 | 344 | if (original_candidate->attributes & |
345 | 345 | Segment::Candidate::NO_VARIANTS_EXPANSION) { |
346 | SetDescriptionForCandidate(*pos_matcher_, original_candidate); | |
346 | SetDescriptionForCandidate(pos_matcher_, original_candidate); | |
347 | 347 | VLOG(1) << "Canidate has NO_NORMALIZATION node"; |
348 | 348 | continue; |
349 | 349 | } |
355 | 355 | &alternative_content_value, |
356 | 356 | &default_inner_segment_boundary, |
357 | 357 | &alternative_inner_segment_boundary)) { |
358 | SetDescriptionForCandidate(*pos_matcher_, original_candidate); | |
358 | SetDescriptionForCandidate(pos_matcher_, original_candidate); | |
359 | 359 | continue; |
360 | 360 | } |
361 | 361 | |
408 | 408 | new_candidate->lid = original_candidate->lid; |
409 | 409 | new_candidate->rid = original_candidate->rid; |
410 | 410 | new_candidate->description = original_candidate->description; |
411 | SetDescription(*pos_matcher_, default_description_type, new_candidate); | |
411 | SetDescription(pos_matcher_, default_description_type, new_candidate); | |
412 | 412 | |
413 | 413 | original_candidate->value = alternative_value; |
414 | 414 | original_candidate->content_value = alternative_content_value; |
415 | SetDescription(*pos_matcher_, | |
415 | SetDescription(pos_matcher_, | |
416 | 416 | alternative_description_type, original_candidate); |
417 | 417 | ++i; // skip inserted candidate |
418 | 418 | } else if (type == SELECT_VARIANT) { |
421 | 421 | original_candidate->content_value = default_content_value; |
422 | 422 | original_candidate->inner_segment_boundary.swap( |
423 | 423 | default_inner_segment_boundary); |
424 | SetDescription(*pos_matcher_, | |
424 | SetDescription(pos_matcher_, | |
425 | 425 | default_description_type, original_candidate); |
426 | 426 | } |
427 | 427 | modified = true; |
53 | 53 | static const char *kDidYouMean; |
54 | 54 | static const char *kYenKigou; |
55 | 55 | |
56 | explicit VariantsRewriter(const dictionary::POSMatcher *pos_matcher); | |
56 | explicit VariantsRewriter(dictionary::POSMatcher pos_matcher); | |
57 | 57 | virtual ~VariantsRewriter(); |
58 | 58 | virtual int capability(const ConversionRequest &request) const; |
59 | 59 | virtual bool Rewrite(const ConversionRequest &request, |
111 | 111 | vector<uint32> *default_inner_segment_boundary, |
112 | 112 | vector<uint32> *alternative_inner_segment_boundary) const; |
113 | 113 | |
114 | const dictionary::POSMatcher *pos_matcher_; | |
114 | const dictionary::POSMatcher pos_matcher_; | |
115 | 115 | }; |
116 | 116 | |
117 | 117 | } // namespace mozc |
70 | 70 | // considering this class as POD. |
71 | 71 | VariantsRewriterTest() {} |
72 | 72 | |
73 | virtual void SetUp() { | |
73 | void SetUp() override { | |
74 | 74 | Reset(); |
75 | 75 | #ifdef MOZC_USE_PACKED_DICTIONARY |
76 | 76 | // Registers mocked PackedDataManager. |
80 | 80 | kPackedSystemDictionary_size))); |
81 | 81 | packed::RegisterPackedDataManager(data_manager.release()); |
82 | 82 | #endif // MOZC_USE_PACKED_DICTIONARY |
83 | pos_matcher_ = UserPosManager::GetUserPosManager()->GetPOSMatcher(); | |
83 | pos_matcher_.Set(UserPosManager::GetUserPosManager()->GetPOSMatcherData()); | |
84 | 84 | } |
85 | 85 | |
86 | 86 | virtual void TearDown() { |
114 | 114 | return new VariantsRewriter(pos_matcher_); |
115 | 115 | } |
116 | 116 | |
117 | const POSMatcher *pos_matcher_; | |
117 | POSMatcher pos_matcher_; | |
118 | 118 | }; |
119 | 119 | |
120 | 120 | TEST_F(VariantsRewriterTest, RewriteTest) { |
349 | 349 | candidate.value = "HalfASCII"; |
350 | 350 | candidate.content_value = candidate.value; |
351 | 351 | candidate.content_key = "halfascii"; |
352 | VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate); | |
352 | VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate); | |
353 | 353 | // "[半] アルファベット" |
354 | 354 | EXPECT_EQ(AppendString(VariantsRewriter::kHalfWidth, |
355 | 355 | VariantsRewriter::kAlphabet), |
362 | 362 | candidate.value = "Half ASCII"; |
363 | 363 | candidate.content_value = candidate.value; |
364 | 364 | candidate.content_key = "half ascii"; |
365 | VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate); | |
365 | VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate); | |
366 | 366 | // "[半] アルファベット" |
367 | 367 | EXPECT_EQ(AppendString(VariantsRewriter::kHalfWidth, |
368 | 368 | VariantsRewriter::kAlphabet), |
374 | 374 | candidate.value = "Half!ASCII!"; |
375 | 375 | candidate.content_value = candidate.value; |
376 | 376 | candidate.content_key = "half!ascii!"; |
377 | VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate); | |
377 | VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate); | |
378 | 378 | // "[半] アルファベット" |
379 | 379 | EXPECT_EQ(AppendString(VariantsRewriter::kHalfWidth, |
380 | 380 | VariantsRewriter::kAlphabet), |
389 | 389 | candidate.content_key = |
390 | 390 | "\xe3\x81\x97\xe3\x83\xbc\xe3\x81\xa7\xe3\x81\x83\xe3" |
391 | 391 | "\x83\xbc\xe3\x82\x8d\xe3\x82\x80"; |
392 | VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate); | |
392 | VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate); | |
393 | 393 | // "[半] アルファベット" |
394 | 394 | EXPECT_EQ(AppendString(VariantsRewriter::kHalfWidth, |
395 | 395 | VariantsRewriter::kAlphabet), |
406 | 406 | candidate.content_key = |
407 | 407 | "\xe3\x81\x93\xe3\x81\x8e\xe3\x81\xa8\xe3\x81\x88\xe3\x82\x8b\xe3\x81" |
408 | 408 | "\x94\xe3\x81\x99\xe3\x82\x80"; |
409 | VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate); | |
409 | VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate); | |
410 | 410 | // "[全] カタカナ" |
411 | 411 | EXPECT_EQ(AppendString(VariantsRewriter::kFullWidth, |
412 | 412 | VariantsRewriter::kKatakana), |
418 | 418 | candidate.value = "!@#"; |
419 | 419 | candidate.content_value = candidate.value; |
420 | 420 | candidate.content_key = "!@#"; |
421 | VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate); | |
421 | VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate); | |
422 | 422 | // "[半]" |
423 | 423 | EXPECT_EQ(VariantsRewriter::kHalfWidth, candidate.description); |
424 | 424 | } |
430 | 430 | "\x80\x8d"; |
431 | 431 | candidate.content_value = candidate.value; |
432 | 432 | candidate.content_key = "[ABC]"; |
433 | VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate); | |
433 | VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate); | |
434 | 434 | // "[全] アルファベット" |
435 | 435 | EXPECT_EQ(AppendString(VariantsRewriter::kFullWidth, |
436 | 436 | VariantsRewriter::kAlphabet), |
445 | 445 | // "くさなぎつよし" |
446 | 446 | candidate.content_key = "\xE3\x81\x8F\xE3\x81\x95\xE3\x81\xAA" |
447 | 447 | "\xE3\x81\x8E\xE3\x81\xA4\xE3\x82\x88\xE3\x81\x97"; |
448 | VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate); | |
448 | VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate); | |
449 | 449 | // "<機種依存文字>" |
450 | 450 | EXPECT_EQ(VariantsRewriter::kPlatformDependent, candidate.description); |
451 | 451 | } |
456 | 456 | candidate.content_value = candidate.value; |
457 | 457 | // "えん" |
458 | 458 | candidate.content_key = "\xE3\x81\x88\xE3\x82\x93"; |
459 | VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate); | |
459 | VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate); | |
460 | 460 | // "[半] バックスラッシュ" |
461 | 461 | const char *expected = |
462 | 462 | "\x5B\xE5\x8D\x8A\x5D\x20\xE3\x83\x90\xE3\x83\x83" |
471 | 471 | candidate.content_value = candidate.value; |
472 | 472 | // "えん" |
473 | 473 | candidate.content_key = "\xE3\x81\x88\xE3\x82\x93"; |
474 | VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate); | |
474 | VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate); | |
475 | 475 | // "[全] バックスラッシュ" |
476 | 476 | const char *expected = |
477 | 477 | "\x5B\xE5\x85\xA8\x5D\x20\xE3\x83\x90\xE3\x83\x83\xE3\x82\xAF" |
485 | 485 | candidate.content_value = candidate.value; |
486 | 486 | // "えん" |
487 | 487 | candidate.content_key = "\xE3\x81\x88\xE3\x82\x93"; |
488 | VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate); | |
488 | VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate); | |
489 | 489 | // "[半] 円記号 <機種依存文字>" for Desktop, |
490 | 490 | // "[半] 円記号 <機種依存>" for Android |
491 | 491 | string expected =("[" "\xE5\x8D\x8A" "] " |
500 | 500 | candidate.content_value = candidate.value; |
501 | 501 | // "えん" |
502 | 502 | candidate.content_key = "\xE3\x81\x88\xE3\x82\x93"; |
503 | VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate); | |
503 | VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate); | |
504 | 504 | // "[全] 円記号" |
505 | 505 | const char *expected = |
506 | 506 | "[" "\xE5\x85\xA8" "] " "\xE5\x86\x86\xE8\xA8\x98\xE5\x8F\xB7"; |
516 | 516 | candidate.content_key = "\xE3\x81\xAD\xE3\x81\x9A\xE3\x81\xBF"; |
517 | 517 | // "絵文字" |
518 | 518 | candidate.description = "\xE7\xB5\xB5\xE6\x96\x87\xE5\xAD\x97"; |
519 | VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate); | |
519 | VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate); | |
520 | 520 | // "絵文字 <機種依存文字>" for Desktop, "絵文字 <機種依存>" for Andorid |
521 | 521 | string expected("\xE7\xB5\xB5\xE6\x96\x87\xE5\xAD\x97" " "); |
522 | 522 | expected.append(VariantsRewriter::kPlatformDependent); |
531 | 531 | candidate.value = "HalfASCII"; |
532 | 532 | candidate.content_value = candidate.value; |
533 | 533 | candidate.content_key = "halfascii"; |
534 | VariantsRewriter::SetDescriptionForTransliteration(*pos_matcher_, | |
534 | VariantsRewriter::SetDescriptionForTransliteration(pos_matcher_, | |
535 | 535 | &candidate); |
536 | 536 | // "[半] アルファベット" |
537 | 537 | EXPECT_EQ(AppendString(VariantsRewriter::kHalfWidth, |
544 | 544 | candidate.value = "!@#"; |
545 | 545 | candidate.content_value = candidate.value; |
546 | 546 | candidate.content_key = "!@#"; |
547 | VariantsRewriter::SetDescriptionForTransliteration(*pos_matcher_, | |
547 | VariantsRewriter::SetDescriptionForTransliteration(pos_matcher_, | |
548 | 548 | &candidate); |
549 | 549 | // "[半]" |
550 | 550 | EXPECT_EQ(VariantsRewriter::kHalfWidth, candidate.description); |
557 | 557 | "\x80\x8d"; |
558 | 558 | candidate.content_value = candidate.value; |
559 | 559 | candidate.content_key = "[ABC]"; |
560 | VariantsRewriter::SetDescriptionForTransliteration(*pos_matcher_, | |
560 | VariantsRewriter::SetDescriptionForTransliteration(pos_matcher_, | |
561 | 561 | &candidate); |
562 | 562 | // "[全] アルファベット" |
563 | 563 | EXPECT_EQ(AppendString(VariantsRewriter::kFullWidth, |
573 | 573 | // "くさなぎつよし" |
574 | 574 | candidate.content_key = "\xE3\x81\x8F\xE3\x81\x95\xE3\x81\xAA" |
575 | 575 | "\xE3\x81\x8E\xE3\x81\xA4\xE3\x82\x88\xE3\x81\x97"; |
576 | VariantsRewriter::SetDescriptionForTransliteration(*pos_matcher_, | |
576 | VariantsRewriter::SetDescriptionForTransliteration(pos_matcher_, | |
577 | 577 | &candidate); |
578 | 578 | // "<機種依存文字>" |
579 | 579 | EXPECT_EQ(VariantsRewriter::kPlatformDependent, candidate.description); |
587 | 587 | candidate.value = "HalfASCII"; |
588 | 588 | candidate.content_value = candidate.value; |
589 | 589 | candidate.content_key = "halfascii"; |
590 | VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate); | |
590 | VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate); | |
591 | 591 | EXPECT_EQ("", candidate.description); |
592 | 592 | } |
593 | 593 | // containing symbols |
597 | 597 | candidate.value = "Half ASCII"; |
598 | 598 | candidate.content_value = candidate.value; |
599 | 599 | candidate.content_key = "half ascii"; |
600 | VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate); | |
600 | VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate); | |
601 | 601 | EXPECT_EQ("", candidate.description); |
602 | 602 | } |
603 | 603 | { |
606 | 606 | candidate.value = "Half!ASCII!"; |
607 | 607 | candidate.content_value = candidate.value; |
608 | 608 | candidate.content_key = "half!ascii!"; |
609 | VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate); | |
609 | VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate); | |
610 | 610 | EXPECT_EQ("", candidate.description); |
611 | 611 | } |
612 | 612 | { |
618 | 618 | candidate.content_key = |
619 | 619 | "\xe3\x81\x97\xe3\x83\xbc\xe3\x81\xa7\xe3\x81\x83\xe3" |
620 | 620 | "\x83\xbc\xe3\x82\x8d\xe3\x82\x80"; |
621 | VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate); | |
621 | VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate); | |
622 | 622 | EXPECT_EQ("", candidate.description); |
623 | 623 | } |
624 | 624 | { |
627 | 627 | candidate.value = "!@#"; |
628 | 628 | candidate.content_value = candidate.value; |
629 | 629 | candidate.content_key = "!@#"; |
630 | VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate); | |
630 | VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate); | |
631 | 631 | EXPECT_EQ("", candidate.description); |
632 | 632 | } |
633 | 633 | { |
638 | 638 | "\x80\x8d"; |
639 | 639 | candidate.content_value = candidate.value; |
640 | 640 | candidate.content_key = "[ABC]"; |
641 | VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate); | |
641 | VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate); | |
642 | 642 | EXPECT_EQ("", candidate.description); |
643 | 643 | } |
644 | 644 | { |
650 | 650 | // "くさなぎつよし" |
651 | 651 | candidate.content_key = "\xE3\x81\x8F\xE3\x81\x95\xE3\x81\xAA" |
652 | 652 | "\xE3\x81\x8E\xE3\x81\xA4\xE3\x82\x88\xE3\x81\x97"; |
653 | VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate); | |
653 | VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate); | |
654 | 654 | // "<機種依存文字>" |
655 | 655 | EXPECT_EQ(VariantsRewriter::kPlatformDependent, candidate.description); |
656 | 656 | } |
69 | 69 | candidate->content_value = value; |
70 | 70 | |
71 | 71 | if (type == ZIPCODE) { |
72 | const POSMatcher *pos_matcher = | |
73 | UserPosManager::GetUserPosManager()->GetPOSMatcher(); | |
74 | candidate->lid = pos_matcher->GetZipcodeId(); | |
75 | candidate->rid = pos_matcher->GetZipcodeId(); | |
72 | const POSMatcher pos_matcher( | |
73 | UserPosManager::GetUserPosManager()->GetPOSMatcherData()); | |
74 | candidate->lid = pos_matcher.GetZipcodeId(); | |
75 | candidate->rid = pos_matcher.GetZipcodeId(); | |
76 | 76 | } |
77 | 77 | } |
78 | 78 | |
97 | 97 | |
98 | 98 | class ZipcodeRewriterTest : public ::testing::Test { |
99 | 99 | protected: |
100 | virtual void SetUp() { | |
100 | void SetUp() override { | |
101 | 101 | #ifdef MOZC_USE_PACKED_DICTIONARY |
102 | 102 | // Registers mocked PackedDataManager. |
103 | 103 | std::unique_ptr<packed::PackedDataManager> |
106 | 106 | kPackedSystemDictionary_size))); |
107 | 107 | packed::RegisterPackedDataManager(data_manager.release()); |
108 | 108 | #endif // MOZC_USE_PACKED_DICTIONARY |
109 | ||
109 | pos_matcher_.Set(UserPosManager::GetUserPosManager()->GetPOSMatcherData()); | |
110 | 110 | SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir); |
111 | 111 | } |
112 | 112 | |
113 | virtual void TearDown() { | |
113 | void TearDown() override { | |
114 | 114 | #ifdef MOZC_USE_PACKED_DICTIONARY |
115 | 115 | // Unregisters mocked PackedDataManager. |
116 | 116 | packed::RegisterPackedDataManager(NULL); |
118 | 118 | } |
119 | 119 | |
120 | 120 | ZipcodeRewriter *CreateZipcodeRewriter() const { |
121 | return new ZipcodeRewriter( | |
122 | UserPosManager::GetUserPosManager()->GetPOSMatcher()); | |
121 | return new ZipcodeRewriter(&pos_matcher_); | |
123 | 122 | } |
123 | ||
124 | dictionary::POSMatcher pos_matcher_; | |
124 | 125 | }; |
125 | 126 | |
126 | 127 | TEST_F(ZipcodeRewriterTest, BasicTest) { |