Codebase list mozc / fc4b2a3
Stop embedding POS matcher data as C++ code This CL introduces the binary format for POS matcher data and moves all the C++-embedded data to a binary data set file. Also, singletons of POSMatcher for each platform are removed. UserPosManager is a typedef of PackedDataManager in NaCl configuration, so the actual data manager must be registered before using it. However, the previous test didn't register it. This CL instead uses MockDataManager because using PackedDataManager in unit test doesn't make sense. BUG= TEST= REF_BUG=26841123 REF_CL=116521585,116524711 REF_TIME=2016-03-07T17:41:05+09:00 REF_TIME_RAW=1457340065 +0900 Noriyuki Takahashi 8 years ago
68 changed file(s) with 480 addition(s) and 638 deletion(s). Raw diff Collapse all Expand all
7171 // considering this class as POD.
7272 CandidateFilterTest() {}
7373
74 virtual void SetUp() {
74 void SetUp() override {
7575 candidate_freelist_.reset(new FreeList<Segment::Candidate>(1024));
7676 node_freelist_.reset(new FreeList<Node>(1024));
77 pos_matcher_ = UserPosManager::GetUserPosManager()->GetPOSMatcher();
77 pos_matcher_.Set(UserPosManager::GetUserPosManager()->GetPOSMatcherData());
7878
7979 {
8080 mozc::testing::MockDataManager data_manager;
8585 }
8686 }
8787
88 virtual void TearDown() {
88 void TearDown() override {
8989 candidate_freelist_->Free();
9090 node_freelist_->Free();
9191 }
122122 }
123123
124124 const POSMatcher &pos_matcher() const {
125 return *pos_matcher_;
125 return pos_matcher_;
126126 }
127127
128128 CandidateFilter *CreateCandidateFilter(
129129 bool apply_suggestion_filter_for_exact_match) const {
130130 return new CandidateFilter(&suppression_dictionary_,
131 pos_matcher_,
131 &pos_matcher_,
132132 suggestion_filter_.get(),
133133 apply_suggestion_filter_for_exact_match);
134134 }
135135
136136 std::unique_ptr<FreeList<Segment::Candidate> > candidate_freelist_;
137137 std::unique_ptr<FreeList<Node> > node_freelist_;
138 const POSMatcher *pos_matcher_;
138 POSMatcher pos_matcher_;
139139 SuppressionDictionary suppression_dictionary_;
140140 std::unique_ptr<SuggestionFilter> suggestion_filter_;
141141 scoped_data_manager_initializer_for_testing
423423
424424 TEST_F(CandidateFilterTest, Regression3437022) {
425425 std::unique_ptr<SuppressionDictionary> dic(new SuppressionDictionary);
426 const POSMatcher *pos_matcher =
427 UserPosManager::GetUserPosManager()->GetPOSMatcher();
426 const POSMatcher pos_matcher(
427 UserPosManager::GetUserPosManager()->GetPOSMatcherData());
428428 std::unique_ptr<CandidateFilter> filter(
429 new CandidateFilter(dic.get(), pos_matcher,
429 new CandidateFilter(dic.get(), &pos_matcher,
430430 suggestion_filter_.get(), true));
431431
432432 vector<const Node *> n;
198198 std::unique_ptr<const SuggestionFilter> suggestion_filter;
199199 std::unique_ptr<ImmutableConverterInterface> immutable_converter;
200200 std::unique_ptr<ConverterImpl> converter;
201 dictionary::POSMatcher pos_matcher;
201202 };
202203
203204 // Returns initialized predictor for the given type.
278279 int dictionary_size = 0;
279280 data_manager.GetSystemDictionaryData(&dictionary_data, &dictionary_size);
280281
282 converter_and_data->pos_matcher.Set(data_manager.GetPOSMatcherData());
283
281284 SystemDictionary *sysdic =
282285 SystemDictionary::Builder(dictionary_data, dictionary_size).Build();
283286 converter_and_data->user_dictionary.reset(user_dictionary);
284287 converter_and_data->suppression_dictionary.reset(suppression_dictionary);
285288 converter_and_data->dictionary.reset(new DictionaryImpl(
286289 sysdic, // DictionaryImpl takes the ownership
287 new ValueDictionary(*data_manager.GetPOSMatcher(),
290 new ValueDictionary(converter_and_data->pos_matcher,
288291 &sysdic->value_trie()),
289292 converter_and_data->user_dictionary.get(),
290293 converter_and_data->suppression_dictionary.get(),
291 data_manager.GetPOSMatcher()));
294 &converter_and_data->pos_matcher));
292295 converter_and_data->pos_group.reset(
293296 new PosGroup(data_manager.GetPosGroupData()));
294297 converter_and_data->suggestion_filter.reset(
306309 converter_and_data->suppression_dictionary.get(),
307310 converter_and_data->connector.get(),
308311 converter_and_data->segmenter.get(),
309 data_manager.GetPOSMatcher(),
312 &converter_and_data->pos_matcher,
310313 converter_and_data->pos_group.get(),
311314 converter_and_data->suggestion_filter.get()));
312315 converter_and_data->converter.reset(new ConverterImpl);
313316
314317 PredictorInterface *predictor = CreatePredictor(
315 predictor_type, data_manager.GetPOSMatcher(), *converter_and_data);
318 predictor_type, &converter_and_data->pos_matcher, *converter_and_data);
316319 converter_and_data->converter->Init(
317 data_manager.GetPOSMatcher(),
320 &converter_and_data->pos_matcher,
318321 converter_and_data->suppression_dictionary.get(),
319322 predictor,
320323 rewriter,
343346 ConverterAndData *ret = new ConverterAndData;
344347
345348 testing::MockUserPosManager user_pos_manager;
349 ret->pos_matcher.Set(user_pos_manager.GetPOSMatcherData());
350
346351 SuppressionDictionary *suppression_dictionary = new SuppressionDictionary;
347352 dictionary::UserDictionary *user_dictionary =
348353 new dictionary::UserDictionary(
349354 dictionary::UserPOS::CreateFromDataManager(user_pos_manager),
350 user_pos_manager.GetPOSMatcher(),
355 ret->pos_matcher,
351356 suppression_dictionary);
352357 InitConverterAndData(
353358 user_dictionary, suppression_dictionary, rewriter, predictor_type, ret);
13651370 int dictionary_size = 0;
13661371 data_manager.GetSystemDictionaryData(&dictionary_data, &dictionary_size);
13671372
1373 const dictionary::POSMatcher pos_matcher(
1374 data_manager.GetPOSMatcherData());
1375
13681376 SystemDictionary *sysdic =
13691377 SystemDictionary::Builder(dictionary_data, dictionary_size).Build();
13701378 std::unique_ptr<DictionaryInterface> dictionary(new DictionaryImpl(
13711379 sysdic, // DictionaryImpl takes the ownership
1372 new ValueDictionary(*data_manager.GetPOSMatcher(),
1373 &sysdic->value_trie()),
1380 new ValueDictionary(pos_matcher, &sysdic->value_trie()),
13741381 mock_user_dictionary.get(),
13751382 suppression_dictionary.get(),
1376 data_manager.GetPOSMatcher()));
1383 &pos_matcher));
13771384 std::unique_ptr<const PosGroup> pos_group(
13781385 new PosGroup(data_manager.GetPosGroupData()));
13791386 std::unique_ptr<const DictionaryInterface> suffix_dictionary(
13901397 suppression_dictionary.get(),
13911398 connector.get(),
13921399 segmenter.get(),
1393 data_manager.GetPOSMatcher(),
1400 &pos_matcher,
13941401 pos_group.get(),
13951402 suggestion_filter.get()));
13961403 std::unique_ptr<const SuggestionFilter> suggegstion_filter(
13971404 CreateSuggestionFilter(data_manager));
13981405 std::unique_ptr<ConverterImpl> converter(new ConverterImpl);
13991406 const DictionaryInterface *kNullDictionary = nullptr;
1400 converter->Init(data_manager.GetPOSMatcher(),
1407 converter->Init(&pos_matcher,
14011408 suppression_dictionary.get(),
14021409 DefaultPredictor::CreateDefaultPredictor(
14031410 new DictionaryPredictor(
14071414 suffix_dictionary.get(),
14081415 connector.get(),
14091416 segmenter.get(),
1410 data_manager.GetPOSMatcher(),
1417 &pos_matcher,
14111418 suggegstion_filter.get()),
14121419 new UserHistoryPredictor(dictionary.get(),
1413 data_manager.GetPOSMatcher(),
1420 &pos_matcher,
14141421 suppression_dictionary.get(),
14151422 false)),
14161423 new RewriterImpl(converter.get(),
9393 const DictionaryInterface *suffix_dictionary = NULL) {
9494 data_manager_.reset(new testing::MockDataManager);
9595
96 const POSMatcher *pos_matcher = data_manager_->GetPOSMatcher();
97 CHECK(pos_matcher);
96 pos_matcher_.Set(data_manager_->GetPOSMatcherData());
9897
9998 suppression_dictionary_.reset(new SuppressionDictionary);
10099 CHECK(suppression_dictionary_.get());
110109 SystemDictionary::Builder(dictionary_data, dictionary_size).Build();
111110 dictionary_.reset(new DictionaryImpl(
112111 sysdic, // DictionaryImpl takes the ownership
113 new ValueDictionary(*pos_matcher, &sysdic->value_trie()),
112 new ValueDictionary(pos_matcher_, &sysdic->value_trie()),
114113 &user_dictionary_stub_,
115114 suppression_dictionary_.get(),
116 pos_matcher));
115 &pos_matcher_));
117116 }
118117 CHECK(dictionary_.get());
119118
152151 suppression_dictionary_.get(),
153152 connector_.get(),
154153 segmenter_.get(),
155 pos_matcher,
154 &pos_matcher_,
156155 pos_group_.get(),
157156 suggestion_filter_.get()));
158157 CHECK(immutable_converter_.get());
173172 std::unique_ptr<const SuggestionFilter> suggestion_filter_;
174173 std::unique_ptr<ImmutableConverterImpl> immutable_converter_;
175174 UserDictionaryStub user_dictionary_stub_;
175 dictionary::POSMatcher pos_matcher_;
176176 };
177177
178178 } // namespace
7373 MockDataAndImmutableConverter() {
7474 data_manager_.reset(new testing::MockDataManager);
7575
76 const POSMatcher *pos_matcher = data_manager_->GetPOSMatcher();
77 CHECK(pos_matcher);
76 pos_matcher_.Set(data_manager_->GetPOSMatcherData());
7877
7978 suppression_dictionary_.reset(new SuppressionDictionary);
8079 CHECK(suppression_dictionary_.get());
8786 SystemDictionary::Builder(dictionary_data, dictionary_size).Build();
8887 dictionary_.reset(new DictionaryImpl(
8988 sysdic, // DictionaryImpl takes the ownership
90 new ValueDictionary(*pos_matcher, &sysdic->value_trie()),
89 new ValueDictionary(pos_matcher_, &sysdic->value_trie()),
9190 &user_dictionary_stub_,
9291 suppression_dictionary_.get(),
93 pos_matcher));
92 &pos_matcher_));
9493 CHECK(dictionary_.get());
9594
9695 StringPiece suffix_key_array_data, suffix_value_array_data;
125124 suppression_dictionary_.get(),
126125 connector_.get(),
127126 segmenter_.get(),
128 pos_matcher,
127 &pos_matcher_,
129128 pos_group_.get(),
130129 suggestion_filter_.get()));
131130 CHECK(immutable_converter_.get());
139138 return new NBestGenerator(suppression_dictionary_.get(),
140139 segmenter_.get(),
141140 connector_.get(),
142 data_manager_->GetPOSMatcher(),
141 &pos_matcher_,
143142 lattice,
144143 suggestion_filter_.get(),
145144 true);
156155 std::unique_ptr<const SuggestionFilter> suggestion_filter_;
157156 std::unique_ptr<ImmutableConverterImpl> immutable_converter_;
158157 UserDictionaryStub user_dictionary_stub_;
158 dictionary::POSMatcher pos_matcher_;
159159 };
160160
161161 } // namespace
3131 #include "base/embedded_file.h"
3232 #include "base/logging.h"
3333 #include "base/singleton.h"
34 #include "dictionary/pos_matcher.h"
3534
3635 namespace mozc {
3736 namespace chromeos {
6160 manager_.GetUserPOSData(token_array_data, string_array_data);
6261 }
6362
64 namespace {
65 // This header file is autogenerated by gen_pos_matcher_code.py and contains
66 // kRuleIdTable[] and kRangeTable[].
67 #include "data_manager/chromeos/pos_matcher_data.h"
68
69 class ChromeOsPOSMatcher : public dictionary::POSMatcher {
70 public:
71 ChromeOsPOSMatcher() : POSMatcher(kRuleIdTable, kRangeTables) {}
72 };
73 } // namespace
74
75 const dictionary::POSMatcher *ChromeOsUserPosManager::GetPOSMatcher() const {
76 return Singleton<ChromeOsPOSMatcher>::get();
63 const uint16 *ChromeOsUserPosManager::GetPOSMatcherData() const {
64 return manager_.GetPOSMatcherData();
7765 }
7866
7967 } // namespace chromeos
4747 // folloiwng embedded data.
4848 void GetUserPOSData(StringPiece *token_array_data,
4949 StringPiece *string_array_data) const override;
50 const dictionary::POSMatcher *GetPOSMatcher() const override;
50 const uint16 *GetPOSMatcherData() const override;
5151
5252 // The following are implemented in ChromeOsDataManager.
5353 const uint8 *GetPosGroupData() const override { return nullptr; }
3737 namespace mozc {
3838 namespace {
3939
40 bool InitUserPosManagerDataFromReader(const DataSetReader &reader,
41 StringPiece *user_pos_token_array_data,
42 StringPiece *user_pos_string_array_data) {
40 bool InitUserPosManagerDataFromReader(
41 const DataSetReader &reader,
42 StringPiece *pos_matcher_data,
43 StringPiece *user_pos_token_array_data,
44 StringPiece *user_pos_string_array_data) {
45 if (!reader.Get("pos_matcher", pos_matcher_data)) {
46 LOG(ERROR) << "Cannot find POS matcher rule ID table";
47 return false;
48 }
4349 if (!reader.Get("user_pos_token", user_pos_token_array_data)) {
4450 LOG(ERROR) << "Cannot find a user POS token array";
4551 return false;
7076 return false;
7177 }
7278 if (!InitUserPosManagerDataFromReader(reader,
79 &pos_matcher_data_,
7380 &user_pos_token_array_data_,
7481 &user_pos_string_array_data_)) {
7582 LOG(ERROR) << "User POS manager data is broken";
233240 return false;
234241 }
235242 if (!InitUserPosManagerDataFromReader(reader,
243 &pos_matcher_data_,
236244 &user_pos_token_array_data_,
237245 &user_pos_string_array_data_)) {
238246 LOG(ERROR) << "User POS manager data is broken";
274282 *string_array_data = user_pos_string_array_data_;
275283 }
276284
277 const dictionary::POSMatcher *DataManager::GetPOSMatcher() const {
278 LOG(FATAL) << "Not implemented";
279 return nullptr;
285 const uint16 *DataManager::GetPOSMatcherData() const {
286 return reinterpret_cast<const uint16 *>(pos_matcher_data_.data());
280287 }
281288
282289 const uint8 *DataManager::GetPosGroupData() const {
119119 'dependencies': [
120120 '../data_manager_base.gyp:dataset_writer_main',
121121 '../../rewriter/rewriter_base.gyp:gen_rewriter_files#host',
122 '<(dataset_tag)_data_manager_base.gyp:gen_separate_pos_matcher_data_for_<(dataset_tag)#host',
122123 '<(dataset_tag)_data_manager_base.gyp:gen_separate_user_pos_data_for_<(dataset_tag)#host',
123124 'gen_separate_connection_data_for_<(dataset_tag)#host',
124125 'gen_separate_dictionary_data_for_<(dataset_tag)#host',
137138 'action_name': 'gen_mozc_dataset_for_<(dataset_tag)',
138139 'variables': {
139140 'generator': '<(PRODUCT_DIR)/dataset_writer_main<(EXECUTABLE_SUFFIX)',
141 'pos_matcher': '<(gen_out_dir)/pos_matcher.data',
140142 'user_pos_token': '<(gen_out_dir)/user_pos_token_array.data',
141143 'user_pos_string': '<(gen_out_dir)/user_pos_string_array.data',
142144 'dictionary': '<(gen_out_dir)/system.dictionary',
161163 'symbol_string': '<(gen_out_dir)/symbol_string.data',
162164 },
163165 'inputs': [
166 '<(pos_matcher)',
164167 '<(user_pos_token)',
165168 '<(user_pos_string)',
166169 '<(dictionary)',
191194 '<(generator)',
192195 '--magic=<(magic_number)',
193196 '--output=<(gen_out_dir)/<(out_mozc_data)',
197 'pos_matcher:32:<(pos_matcher)',
194198 'user_pos_token:32:<(user_pos_token)',
195199 'user_pos_string:32:<(user_pos_string)',
196200 'coll:32:<(gen_out_dir)/collocation_data.data',
247251 'type': 'none',
248252 'toolsets': ['host'],
249253 'dependencies': [
250 '<(dataset_tag)_data_manager_base.gyp:gen_<(dataset_tag)_embedded_data_light',
251254 'gen_embedded_collocation_data_for_<(dataset_tag)#host',
252255 'gen_embedded_collocation_suppression_data_for_<(dataset_tag)#host',
253256 'gen_embedded_connection_data_for_<(dataset_tag)#host',
5454 // partial data set).
5555 bool InitUserPosManagerDataFromArray(StringPiece array, StringPiece magic);
5656
57 // The following interfaces are implemented.
57 // Implementation of DataManagerInterface.
58 const uint16 *GetPOSMatcherData() const override;
5859 void GetUserPOSData(StringPiece *token_array_data,
5960 StringPiece *string_array_data) const override;
6061 void GetConnectorData(const char **data, size_t *size) const override;
8889 StringPiece *string_array_data) const override;
8990 #endif // NO_USAGE_REWRITER
9091
91 // The following interfaces are not yet implemented.
92 // TODO(noriyukit): Implements all the interfaces by migrating embedded C++
93 // structures to a data set file.
94 const dictionary::POSMatcher *GetPOSMatcher() const override;
95
9692 private:
93 StringPiece pos_matcher_data_;
9794 StringPiece user_pos_token_array_data_;
9895 StringPiece user_pos_string_array_data_;
9996 StringPiece connection_data_;
3737 ],
3838 'dependencies': [
3939 '<(mozc_dir)/base/base.gyp:base',
40 '<(mozc_dir)/dictionary/dictionary_base.gyp:pos_matcher',
41 'gen_embedded_pos_matcher_data_for_<(dataset_tag)#host',
4240 'gen_user_pos_manager_data_header_for_<(dataset_tag)#host',
4341 '../data_manager_base.gyp:data_manager',
44 ],
45 },
46 {
47 'target_name': 'gen_<(dataset_tag)_embedded_data_light',
48 'type': 'none',
49 'toolsets': ['host'],
50 'dependencies': [
51 'gen_embedded_pos_matcher_data_for_<(dataset_tag)#host',
5242 ],
5343 },
5444 {
114104 'dependencies': [
115105 '../data_manager_base.gyp:dataset_writer_main',
116106 'gen_separate_user_pos_data_for_<(dataset_tag)#host',
107 'gen_separate_pos_matcher_data_for_<(dataset_tag)#host',
117108 ],
118109 'actions': [
119110 {
120111 'action_name': 'gen_user_pos_manager_data_for_<(dataset_tag)',
121112 'variables': {
122113 'generator': '<(PRODUCT_DIR)/dataset_writer_main<(EXECUTABLE_SUFFIX)',
114 'pos_matcher': '<(gen_out_dir)/pos_matcher.data',
123115 'user_pos_token': '<(gen_out_dir)/user_pos_token_array.data',
124116 'user_pos_string': '<(gen_out_dir)/user_pos_string_array.data',
125117 },
126118 'inputs': [
119 '<(pos_matcher)',
127120 '<(user_pos_token)',
128121 '<(user_pos_string)',
129122 ],
133126 'action': [
134127 '<(generator)',
135128 '--output=<(gen_out_dir)/user_pos_manager.data',
129 'pos_matcher:32:<(pos_matcher)',
136130 'user_pos_token:32:<(user_pos_token)',
137131 'user_pos_string:32:<(user_pos_string)',
138132 ],
185179 ],
186180 },
187181 {
188 'target_name': 'gen_embedded_pos_matcher_data_for_<(dataset_tag)',
182 'target_name': 'gen_separate_pos_matcher_data_for_<(dataset_tag)',
189183 'type': 'none',
190184 'toolsets': ['host'],
191185 'dependencies': [
193187 ],
194188 'actions': [
195189 {
196 'action_name': 'gen_embedded_pos_matcher_data_for_<(dataset_tag)',
190 'action_name': 'gen_separate_pos_matcher_data_for_<(dataset_tag)',
197191 'variables': {
198192 'id_def': '<(platform_data_dir)/id.def',
199193 'special_pos': '<(common_data_dir)/rules/special_pos.def',
200194 'pos_matcher_rule': '<(common_data_dir)/rules/pos_matcher_rule.def',
201 'pos_matcher_data': '<(gen_out_dir)/pos_matcher_data.h',
195 'pos_matcher_data': '<(gen_out_dir)/pos_matcher.data',
202196 },
203197 'inputs': [
204198 '<(mozc_dir)/dictionary/gen_pos_matcher_code.py',
3434
3535 namespace mozc {
3636
37 #ifndef NO_USAGE_REWRITER
38 struct ConjugationSuffix;
39 struct UsageDictItem;
40 #endif // NO_USAGE_REWRITER
41
42 namespace dictionary {
43 class POSMatcher;
44 } // namespace dictionary
45
4637 // Builds those objects that depend on a set of embedded data generated from
4738 // files in data/dictionary, such as dictionary.txt, id.def, etc.
4839 class DataManagerInterface {
5546
5647 // Returns a reference to POSMatcher class handling POS rules. Don't
5748 // delete the returned pointer, which is owned by the manager.
58 virtual const dictionary::POSMatcher *GetPOSMatcher() const = 0;
49 virtual const uint16 *GetPOSMatcherData() const = 0;
5950
6051 // Returns the address of an array of lid group.
6152 virtual const uint8 *GetPosGroupData() const = 0;
142142 void DataManagerTestBase::SegmenterTest_ParticleTest() {
143143 std::unique_ptr<Segmenter> segmenter(
144144 Segmenter::CreateFromDataManager(*data_manager_));
145 const POSMatcher *pos_matcher = data_manager_->GetPOSMatcher();
145 const POSMatcher pos_matcher(data_manager_->GetPOSMatcherData());
146146
147147 Node lnode, rnode;
148148 lnode.Init();
150150 lnode.node_type = Node::NOR_NODE;
151151 rnode.node_type = Node::NOR_NODE;
152152 // "助詞"
153 lnode.rid = pos_matcher->GetAcceptableParticleAtBeginOfSegmentId();
153 lnode.rid = pos_matcher.GetAcceptableParticleAtBeginOfSegmentId();
154154 // "名詞,サ変".
155 rnode.lid = pos_matcher->GetUnknownId();
155 rnode.lid = pos_matcher.GetUnknownId();
156156 EXPECT_TRUE(segmenter->IsBoundary(lnode, rnode, false));
157157
158158 lnode.attributes |= Node::STARTS_WITH_PARTICLE;
3131 #include "base/embedded_file.h"
3232 #include "base/logging.h"
3333 #include "base/singleton.h"
34 #include "dictionary/pos_group.h"
35 #include "dictionary/pos_matcher.h"
3634
3735 namespace mozc {
3836 namespace oss {
6260 manager_.GetUserPOSData(token_array_data, string_array_data);
6361 }
6462
65 namespace {
66 // This header file is autogenerated by gen_pos_matcher_code.py and contains
67 // kRuleIdTable[] and kRangeTable[].
68 #include "data_manager/oss/pos_matcher_data.h"
69
70 class OssPOSMatcher : public dictionary::POSMatcher {
71 public:
72 OssPOSMatcher() : POSMatcher(kRuleIdTable, kRangeTables) {}
73 };
74 } // namespace
75
76 const dictionary::POSMatcher *OssUserPosManager::GetPOSMatcher() const {
77 return Singleton<OssPOSMatcher>::get();
63 const uint16 *OssUserPosManager::GetPOSMatcherData() const {
64 return manager_.GetPOSMatcherData();
7865 }
7966
8067 } // namespace oss
4848 // Returns the address to an array of UserPOS::POSToken.
4949 void GetUserPOSData(StringPiece *token_array_data,
5050 StringPiece *string_array_data) const override;
51 const dictionary::POSMatcher *GetPOSMatcher() const override;
51 const uint16 *GetPOSMatcherData() const override;
5252
5353 // The following are implemented in OssDataManager.
5454 const uint8 *GetPosGroupData() const override { return nullptr; }
3434 #include "base/logging.h"
3535 #include "base/version.h"
3636 #include "data_manager/packed/system_dictionary_data_packer.h"
37 #include "dictionary/pos_group.h"
38 #include "dictionary/pos_matcher.h"
39 #include "dictionary/user_pos.h"
4037
4138 DEFINE_string(user_pos_manager_data, "", "Input user pos manager data");
4239 DEFINE_string(output, "", "Output data file name");
4340
4441 namespace mozc {
45 namespace {
46
47 #include "data_manager/@DIR@/pos_matcher_data.h"
48
49 } // namespace
5042
5143 bool OutputData(const string &file_path) {
5244 const char* kMagicNumber = ""; // No magic number.
5446 packer.SetMozcData(InputFileStream(FLAGS_user_pos_manager_data.c_str(),
5547 ios_base::in | ios_base::binary).Read(),
5648 kMagicNumber);
57 // The following two arrays contain sentinel elements but the packer doesn't
58 // expect them. So, pass the shinked ranges of the arrays. Note that
59 // sentinel elements are not necessary at runtime.
60 packer.SetPosMatcherData(kRuleIdTable, arraysize(kRuleIdTable) - 1,
61 kRangeTables, arraysize(kRangeTables) - 1);
6249 return packer.Output(file_path, false);
6350 }
6451
3535 #include "base/util.h"
3636 #include "base/version.h"
3737 #include "data_manager/packed/system_dictionary_data_packer.h"
38 #include "dictionary/pos_group.h"
39 #include "dictionary/pos_matcher.h"
40 #include "dictionary/user_pos.h"
41 #include "rewriter/embedded_dictionary.h"
4238
4339 DEFINE_string(mozc_data, "", "Data set file to be packed");
4440 DEFINE_string(mozc_data_magic, "", "Magic number for data set file");
4844 DEFINE_bool(use_gzip, false, "use gzip");
4945
5046 namespace mozc {
51 namespace {
52
53 #include "data_manager/@DIR@/pos_matcher_data.h"
54
55 } // namespace
5647
5748 bool OutputData(const string &file_path) {
5849 string dictionary_version = Version::GetMozcVersion();
6051 dictionary_version = FLAGS_dictionary_version;
6152 }
6253 packed::SystemDictionaryDataPacker packer(dictionary_version);
63 // The following two arrays contain sentinel elements but the packer doesn't
64 // expect them. So pass the shinked ranges of the arrays. Note that sentinel
65 // elements are not required at runtime.
66 packer.SetPosMatcherData(kRuleIdTable, arraysize(kRuleIdTable) - 1,
67 kRangeTables, arraysize(kRangeTables) - 1);
6854
6955 string magic;
7056 CHECK(Util::Unescape(FLAGS_mozc_data_magic, &magic))
4040 #include "data_manager/data_manager_interface.h"
4141 #include "data_manager/packed/system_dictionary_data.pb.h"
4242 #include "data_manager/packed/system_dictionary_format_version.h"
43 #include "dictionary/pos_matcher.h"
4443
4544 DEFINE_string(dataset,
4645 "",
4746 "The dataset tag of the POS data.");
4847
4948 using std::unique_ptr;
50
51 using mozc::dictionary::POSMatcher;
5249
5350 namespace mozc {
5451 namespace packed {
5653 // Default value of the total bytes limit defined in protobuf library is 64MB.
5754 // Our big dictionary size is about 50MB. So we don't need to change it.
5855 const size_t kDefaultTotalBytesLimit = 64 << 20;
59
60 class PackedPOSMatcher : public POSMatcher {
61 public:
62 PackedPOSMatcher(const uint16 *const rule_id_table,
63 const Range *const *const range_table)
64 : POSMatcher(rule_id_table, range_table) {
65 }
66 };
6756
6857 unique_ptr<PackedDataManager> g_data_manager;
6958
7968
8069 void GetUserPOSData(StringPiece *token_array_data,
8170 StringPiece *string_array_data) const;
82 const POSMatcher *GetPOSMatcher() const;
71 const uint16 *GetPOSMatcherData() const;
8372 const uint8 *GetPosGroupData() const;
8473 void GetConnectorData(const char **data, size_t *size) const;
8574 void GetSegmenterData(
10695 StringPiece *usage_items_data,
10796 StringPiece *string_array_data) const;
10897 #endif // NO_USAGE_REWRITER
109 const uint16 *GetRuleIdTableForTest() const;
110 const void *GetRangeTablesForTest() const;
11198 void GetCounterSuffixSortedArray(const char **array, size_t *size) const;
11299 StringPiece GetMozcData() const;
113100
114101 private:
115 // Non-const struct of POSMatcher::Range
116 struct Range {
117 uint16 lower;
118 uint16 upper;
119 };
120102 bool InitializeWithSystemDictionaryData();
121103
122 unique_ptr<uint16[]> rule_id_table_;
123 unique_ptr<POSMatcher::Range *[]> range_tables_;
124 unique_ptr<Range[]> range_table_items_;
125 unique_ptr<POSMatcher> pos_matcher_;
126104 unique_ptr<SystemDictionaryData> system_dictionary_data_;
127105 DataManager manager_;
128106 };
170148 << " actual:" << system_dictionary_data_->format_version();
171149 return false;
172150 }
173
174 // Makes POSMatcher data.
175 rule_id_table_.reset(
176 new uint16[
177 system_dictionary_data_->pos_matcher_data().rule_id_table_size()]);
178 for (size_t i = 0;
179 i < system_dictionary_data_->pos_matcher_data().rule_id_table_size();
180 ++i) {
181 rule_id_table_[i] =
182 system_dictionary_data_->pos_matcher_data().rule_id_table(i);
183 }
184 const SystemDictionaryData::PosMatcherData &pos_matcher_data =
185 system_dictionary_data_->pos_matcher_data();
186 range_tables_.reset(
187 new POSMatcher::Range*[pos_matcher_data.range_tables_size()]);
188 size_t range_count = 0;
189 for (size_t i = 0; i < pos_matcher_data.range_tables_size(); ++i) {
190 range_count += pos_matcher_data.range_tables(i).ranges_size();
191 }
192 range_table_items_.reset(
193 new Range[range_count + pos_matcher_data.range_tables_size()]);
194 size_t range_index = 0;
195 for (size_t i = 0; i < pos_matcher_data.range_tables_size(); ++i) {
196 const SystemDictionaryData::PosMatcherData::RangeTable &table =
197 pos_matcher_data.range_tables(i);
198 range_tables_[i] =
199 reinterpret_cast<POSMatcher::Range *>(&range_table_items_[range_index]);
200 for (size_t j = 0; j < table.ranges_size(); ++j) {
201 const SystemDictionaryData::PosMatcherData::RangeTable::Range &range =
202 table.ranges(j);
203 range_table_items_[range_index].lower = range.lower();
204 range_table_items_[range_index].upper = range.upper();
205 ++range_index;
206 }
207 range_table_items_[range_index].lower = static_cast<uint16>(0xFFFF);
208 range_table_items_[range_index].upper = static_cast<uint16>(0xFFFF);
209 ++range_index;
210 }
211
212 // Makes POSMatcher.
213 pos_matcher_.reset(
214 new PackedPOSMatcher(rule_id_table_.get(), range_tables_.get()));
215151
216152 // Initialize |manager_| (PackedDataManager for light doesn't have mozc data).
217153 if (system_dictionary_data_->has_mozc_data() &&
237173 manager_.GetUserPOSData(token_array_data, string_array_data);
238174 }
239175
240 const POSMatcher *PackedDataManager::Impl::GetPOSMatcher() const {
241 return pos_matcher_.get();
176 const uint16 *PackedDataManager::Impl::GetPOSMatcherData() const {
177 return manager_.GetPOSMatcherData();
242178 }
243179
244180 const uint8 *PackedDataManager::Impl::GetPosGroupData() const {
316252 string_array_data);
317253 }
318254 #endif // NO_USAGE_REWRITER
319
320 const uint16 *PackedDataManager::Impl::GetRuleIdTableForTest() const {
321 return rule_id_table_.get();
322 }
323
324 const void *PackedDataManager::Impl::GetRangeTablesForTest() const {
325 return range_tables_.get();
326 }
327255
328256 void PackedDataManager::Impl::GetCounterSuffixSortedArray(
329257 const char **array, size_t *size) const {
393321 return g_data_manager.get();
394322 }
395323
396 const POSMatcher *PackedDataManager::GetPOSMatcher() const {
397 return manager_impl_->GetPOSMatcher();
324 const uint16 *PackedDataManager::GetPOSMatcherData() const {
325 return manager_impl_->GetPOSMatcherData();
398326 }
399327
400328 const uint8 *PackedDataManager::GetPosGroupData() const {
483411 manager_impl_->GetCounterSuffixSortedArray(array, size);
484412 }
485413
486 const uint16 *PackedDataManager::GetRuleIdTableForTest() const {
487 return manager_impl_->GetRuleIdTableForTest();
488 }
489
490 const void *PackedDataManager::GetRangeTablesForTest() const {
491 return manager_impl_->GetRangeTablesForTest();
492 }
493
494414 StringPiece PackedDataManager::GetMozcData() const {
495415 return manager_impl_->GetMozcData();
496416 }
5353
5454 void GetUserPOSData(StringPiece *token_array_data,
5555 StringPiece *string_array_data) const override;
56 const dictionary::POSMatcher *GetPOSMatcher() const override;
56 const uint16 *GetPOSMatcherData() const override;
5757 const uint8 *GetPosGroupData() const override;
5858 void GetConnectorData(const char **data, size_t *size) const override;
5959 void GetSegmenterData(size_t *l_num_elements, size_t *r_num_elements,
8686
8787 private:
8888 friend class PackedDataTestBase;
89 const uint16 *GetRuleIdTableForTest() const;
90 const void *GetRangeTablesForTest() const;
9189
9290 class Impl;
9391 std::unique_ptr<Impl> manager_impl_;
6363 'system_dictionary_data_protocol',
6464 '../../base/base.gyp:base',
6565 '../../dictionary/dictionary_base.gyp:pos_matcher',
66 '../<(dataset_dir)/<(dataset_tag)_data_manager_base.gyp:gen_<(dataset_tag)_embedded_data_light',
6766 ],
6867 },
6968 {
3838
3939 reserved 3; // DEPRECATED: repeated PosToken pos_tokens = 3;
4040
41 message PosMatcherData {
42 repeated uint32 rule_id_table = 1;
43 message RangeTable {
44 message Range {
45 optional uint32 lower = 1;
46 optional uint32 upper = 2;
47 };
48 repeated Range ranges = 2;
49 };
50 repeated RangeTable range_tables = 2;
51 };
52 optional PosMatcherData pos_matcher_data = 4;
41 reserved 4; // DEPRECATED: optional PosMatcherData pos_matcher_data = 4;
5342
5443 reserved 5; // DEPRECATED: optional bytes lid_group_data = 5;
5544
3939 #include "base/version.h"
4040 #include "data_manager/packed/system_dictionary_data.pb.h"
4141 #include "data_manager/packed/system_dictionary_format_version.h"
42 #include "dictionary/pos_group.h"
43 #include "dictionary/pos_matcher.h"
44 #include "dictionary/user_pos.h"
45
46 using mozc::dictionary::POSMatcher;
47 using mozc::dictionary::UserPOS;
4842
4943 namespace mozc {
5044 namespace packed {
5650 }
5751
5852 SystemDictionaryDataPacker::~SystemDictionaryDataPacker() {
59 }
60
61 void SystemDictionaryDataPacker::SetPosMatcherData(
62 const uint16 *rule_id_table,
63 size_t rule_id_table_count,
64 const POSMatcher::Range *const *range_tables,
65 size_t range_tables_count) {
66 SystemDictionaryData::PosMatcherData *pos_matcher_data =
67 system_dictionary_->mutable_pos_matcher_data();
68 for (size_t i = 0; i < rule_id_table_count; ++i) {
69 pos_matcher_data->add_rule_id_table(rule_id_table[i]);
70 }
71 for (size_t i = 0; i < range_tables_count; ++i) {
72 SystemDictionaryData::PosMatcherData::RangeTable *range_table =
73 pos_matcher_data->add_range_tables();
74 for (size_t j = 0;
75 range_tables[i][j].lower != static_cast<uint16>(0xFFFF);
76 ++j) {
77 SystemDictionaryData::PosMatcherData::RangeTable::Range *range
78 = range_table->add_ranges();
79 range->set_lower(range_tables[i][j].lower);
80 range->set_upper(range_tables[i][j].upper);
81 }
82 }
8353 }
8454
8555 void SystemDictionaryDataPacker::SetMozcData(const string &data,
4343 public:
4444 explicit SystemDictionaryDataPacker(const string &product_version);
4545 ~SystemDictionaryDataPacker();
46 void SetPosMatcherData(
47 const uint16 *rule_id_table,
48 size_t rule_id_table_count,
49 const dictionary::POSMatcher::Range *const *range_tables,
50 size_t range_tables_count);
5146 void SetMozcData(const string &data, const string &magic);
5247
5348 bool Output(const string &file_path, bool use_gzip);
3232 namespace mozc {
3333 namespace packed {
3434
35 const int kSystemDictionaryFormatVersion = 21;
35 const int kSystemDictionaryFormatVersion = 22;
3636
3737 } // namespace packed
3838 } // namespace mozc
3131 #include "base/embedded_file.h"
3232 #include "base/logging.h"
3333 #include "base/singleton.h"
34 #include "dictionary/pos_matcher.h"
3534
3635 namespace mozc {
3736 namespace testing {
6160 manager_.GetUserPOSData(token_array_data, string_array_data);
6261 }
6362
64 namespace {
65 // This header file is autogenerated by gen_pos_matcher_code.py and contains
66 // kRuleIdTable[] and kRangeTable[].
67 #include "data_manager/testing/pos_matcher_data.h"
68
69 class MockPOSMatcher : public dictionary::POSMatcher {
70 public:
71 MockPOSMatcher() : POSMatcher(kRuleIdTable, kRangeTables) {}
72 };
73 } // namespace
74
75 const dictionary::POSMatcher *MockUserPosManager::GetPOSMatcher() const {
76 return Singleton<MockPOSMatcher>::get();
63 const uint16 *MockUserPosManager::GetPOSMatcherData() const {
64 return manager_.GetPOSMatcherData();
7765 }
7866
7967 } // namespace testing
4747 // folloiwng embedded data.
4848 void GetUserPOSData(StringPiece *token_array_data,
4949 StringPiece *string_array_data) const override;
50 const dictionary::POSMatcher *GetPOSMatcher() const override;
50 const uint16 *GetPOSMatcherData() const override;
5151
5252 // The following are implemented in MockDataManager.
5353 const uint8 *GetPosGroupData() const override { return nullptr; }
5757 struct DictionaryData {
5858 std::unique_ptr<DictionaryInterface> user_dictionary;
5959 std::unique_ptr<SuppressionDictionary> suppression_dictionary;
60 const POSMatcher *pos_matcher;
60 POSMatcher pos_matcher;
6161 std::unique_ptr<DictionaryInterface> dictionary;
6262 };
6363
6464 DictionaryData *CreateDictionaryData() {
6565 DictionaryData *ret = new DictionaryData;
6666 testing::MockDataManager data_manager;
67 ret->pos_matcher = data_manager.GetPOSMatcher();
67 ret->pos_matcher.Set(data_manager.GetPOSMatcherData());
6868 const char *dictionary_data = NULL;
6969 int dictionary_size = 0;
7070 data_manager.GetSystemDictionaryData(&dictionary_data, &dictionary_size);
7171 SystemDictionary *sys_dict =
7272 SystemDictionary::Builder(dictionary_data, dictionary_size).Build();
7373 ValueDictionary *val_dict =
74 new ValueDictionary(*ret->pos_matcher, &sys_dict->value_trie());
74 new ValueDictionary(ret->pos_matcher, &sys_dict->value_trie());
7575 ret->user_dictionary.reset(new UserDictionaryStub);
7676 ret->suppression_dictionary.reset(new SuppressionDictionary);
7777 ret->dictionary.reset(new DictionaryImpl(sys_dict,
7878 val_dict,
7979 ret->user_dictionary.get(),
8080 ret->suppression_dictionary.get(),
81 ret->pos_matcher));
81 &ret->pos_matcher));
8282 return ret;
8383 }
8484
289289 // config.
290290 config_.set_use_zip_code_conversion(true);
291291 for (size_t i = 0; i < arraysize(kTestPair); ++i) {
292 CheckZipCodeExistenceCallback callback(kKey, kValue, data->pos_matcher);
292 CheckZipCodeExistenceCallback callback(kKey, kValue, &data->pos_matcher);
293293 (d->*kTestPair[i].lookup_method)(kTestPair[i].query, convreq_, &callback);
294294 EXPECT_TRUE(callback.found());
295295 }
297297 // Without the flag, it should be suppressed.
298298 config_.set_use_zip_code_conversion(false);
299299 for (size_t i = 0; i < arraysize(kTestPair); ++i) {
300 CheckZipCodeExistenceCallback callback(kKey, kValue, data->pos_matcher);
300 CheckZipCodeExistenceCallback callback(kKey, kValue, &data->pos_matcher);
301301 (d->*kTestPair[i].lookup_method)(kTestPair[i].query, convreq_, &callback);
302302 EXPECT_FALSE(callback.found());
303303 }
2727 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2828 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929
30 """A tool to generate POS matcher."""
30 """A tool to generate POS matcher.
31
32 This script generates POS matcher data and the C++ class that provides functions
33 for POS ID matching.
34
35 * C++ class: POSMatcher
36
37 This class has two methods for each POS matching rule:
38 - GetXXXId(): returns the POS ID for XXX.
39 - IsXXX(uint16 id): checks if the given POS ID is XXX or not.
40 Here, XXX is replaced by rule names; see data/rules/pos_matcher_rule.def.
41
42 POSMathcer is created from the data generated by this script.
43 The binary format is as follows.
44
45 * Binary format
46
47 Support there are N matching rules. Then, the first 2*N bytes is the array of
48 uint16 that contains the results for GetXXXId() methods. The latter part
49 contains the ranges of POS IDs for each IsXXX(uint16 id) methods (IsXXX should
50 return true if id is in one of the ranges). See the following figure:
51
52 +===========================================+=============================
53 | POS ID for rule 0 (2 bytes) | For GetXXXID() methods
54 +-------------------------------------------+
55 | POS ID for rule 1 (2 bytes) |
56 +-------------------------------------------+
57 | .... |
58 +-------------------------------------------+
59 | POS ID for rule N - 1 (2 bytes) |
60 +===========================================+=============================
61 | POS range for rule 0: start 0 (2 bytes) | For IsXXX() for rule 0
62 + - - - - - - - - - - - - - - - - - - - - - +
63 | POS range for rule 0: end 0 (2 bytes) |
64 +-------------------------------------------+
65 | POS range for rule 0: start 1 (2 bytes) |
66 + - - - - - - - - - - - - - - - - - - - - - +
67 | POS range for rule 0: end 1 (2 bytes) |
68 |-------------------------------------------+
69 | .... |
70 |-------------------------------------------+
71 | POS range for rule 0: start M (2 bytes) |
72 + - - - - - - - - - - - - - - - - - - - - - +
73 | POS range for rule 0: end M (2 bytes) |
74 |-------------------------------------------+
75 | Sentinel element 0xFFFF (2 bytes) |
76 +===========================================+=============================
77 | POS range for rule 1: start 0 (2 bytes) | For IsXXX() for rule 1
78 + - - - - - - - - - - - - - - - - - - - - - +
79 | POS range for rule 1: end 0 (2 bytes) |
80 +-------------------------------------------+
81 | .... |
82 +-------------------------------------------+
83 | Sentinel element 0xFFFF (2 bytes) |
84 +===========================================+
85 | .... |
86 | |
87 """
3188
3289 __author__ = "taku"
3390
3491 import optparse
3592 import re
93 import struct
3694 import sys
3795
3896 from dictionary import pos_util
3997
4098
4199 def OutputPosMatcherData(pos_matcher, output):
42 """Generates the data used by POSMatcher.
43
44 Two data arrays are generated:
45 1) const uint16 kRuleIdTable[]
46 This contains POS ID for each rule in pos_matcher_rule.def. The data is
47 used by the method Get<RuleName>() generated by this script. Each array
48 index corresponds to one rule name in its declared order. Namely, if
49 pos_matcher_rule.def contain three rules, say
50 Rule0 Regexp0
51 Rule1 Regexp1
52 Rule2 Regexp2
53 Then kRuleIdTable[0] contains the result of GetRule0(), etc.
54
55 2) const POSMatcher::Range kRangeTable[]
56 Each element is a pointer to another array containing ranges of POS IDs
57 whose union is the set of all POS IDs that match the regexp. Each array
58 of ranges ends with the endmark { 0xFFFF, 0xFFFF }.
59
60 Generated data can be passed to POSMatcher, which is also generated by
61 this script, to get a POSMatcher corresponding to given data set.
62 """
63
64 # Generate kRuleIdTable[].
65 output.write('const uint16 kRuleIdTable[] = {\n')
100 data = []
66101 for rule_name in pos_matcher.GetRuleNameList():
67 output.write(
68 ' %(id)4d, // %(rule_name)s "%(original_pattern)s"\n'
69 % { 'id': pos_matcher.GetId(rule_name),
70 'rule_name': rule_name,
71 'original_pattern': pos_matcher.GetOriginalPattern(rule_name) })
72 output.write(' static_cast<uint16>(0xFFFF),\n')
73 output.write('};\n')
74
75 # Generate arrays of ranges each of which will be an element of kRangeTable[].
76 output.write('namespace {\n')
102 data.append(pos_matcher.GetId(rule_name))
103
104 offset = 2 * len(pos_matcher.GetRuleNameList())
77105 for rule_name in pos_matcher.GetRuleNameList():
78 output.write(
79 '// %(rule_name)s "%(original_pattern)s"\n'
80 'const ::mozc::dictionary::POSMatcher::Range '
81 'kRangeTable_%(rule_name)s[] = {\n'
82 % { 'rule_name': rule_name,
83 'original_pattern': pos_matcher.GetOriginalPattern(rule_name) })
106 data.append(offset)
107 offset += 2 * len(pos_matcher.GetRange(rule_name)) + 1
108
109 for rule_name in pos_matcher.GetRuleNameList():
84110 for id_range in pos_matcher.GetRange(rule_name):
85 output.write(' { %4d, %4d },\n' % id_range)
86 # End mark for this array of ranges.
87 output.write(
88 ' { static_cast<uint16>(0xFFFF), static_cast<uint16>(0xFFFF) },\n'
89 '};\n')
90 output.write('} // namespace\n')
91
92 # Generate kRangeTable[].
93 output.write(
94 'const ::mozc::dictionary::POSMatcher::Range *const '
95 'kRangeTables[%d] = {\n'
96 % (len(pos_matcher.GetRuleNameList()) + 1))
97 for rule_name in pos_matcher.GetRuleNameList():
98 output.write(' kRangeTable_%s,\n' % rule_name)
99 output.write(' NULL,\n')
100 output.write('};\n')
111 data.append(id_range[0])
112 data.append(id_range[1])
113 data.append(0xFFFF)
114
115 for u16 in data:
116 output.write(struct.pack('<H', u16))
101117
102118
103119 def OutputPosMatcherHeader(pos_matcher, output):
104120 """Generates the definition of POSMatcher class.
105121
106 POSMatcher is independent of the actual input data but just provides logic
107 for POS matching. To use a generated class, it's required to pass two arrays,
108 kRuleIdTable[] and kRangeTables[], to the constructor of POSMatcher.
122 POSMatcher is independent of the actual input data but just provides logic for
123 POS matching. To use a generated class, it's required to pass the data
124 generated by OutputPosMatcherData() above.
109125 """
126
127 lid_table_size = len(pos_matcher.GetRuleNameList())
110128
111129 output.write(
112130 '#ifndef MOZC_DICTIONARY_POS_MATCHER_H_\n'
115133 'namespace mozc {\n'
116134 'namespace dictionary {\n'
117135 'class POSMatcher {\n'
118 ' public:\n'
119 ' struct Range {\n'
120 ' uint16 lower;\n'
121 ' uint16 upper;\n'
122 ' };\n')
136 ' public:\n')
123137
124138 # Helper function to generate Get<RuleName>Id() method from rule name and its
125139 # corresponding index.
126140 def _GenerateGetMethod(rule_name, index):
127141 return (' inline uint16 Get%(rule_name)sId() const {\n'
128 ' return rule_id_table_[%(index)d];\n'
129 ' }' % { 'rule_name': rule_name, 'index': index })
142 ' return data_[%(index)d];\n'
143 ' }' % {
144 'rule_name': rule_name,
145 'index': index,
146 })
130147
131148 # Helper function to generate Is<RuleName>(uint16 id) method from rule name
132149 # and its corresponding index. The generated function checks if the given id
133150 # belongs to some range in kRangeTable[index] = kRangeTable_RuleName[].
134151 def _GenerateIsMethod(rule_name, index):
135152 return (' inline bool Is%(rule_name)s(uint16 id) const {\n'
136 ' for (const Range *range = range_table_[%(index)d];\n'
137 ' range->lower != static_cast<uint16>(0xFFFF); ++range) {\n'
138 ' if (id >= range->lower && id <= range->upper) {\n'
153 ' const uint16 offset = data_[%(lid_table_size)d + %(index)d];\n'
154 ' for (const uint16 *ptr = data_ + offset;\n'
155 ' *ptr != static_cast<uint16>(0xFFFF); ptr += 2) {\n'
156 ' if (id >= *ptr && id <= *(ptr + 1)) {\n'
139157 ' return true;\n'
140158 ' }\n'
141159 ' }\n'
142160 ' return false;\n'
143 ' }' % { 'rule_name': rule_name, 'index': index })
161 ' }' % {
162 'rule_name': rule_name,
163 'index': index,
164 'lid_table_size': lid_table_size,
165 })
144166
145167 # Generate Get<RuleName>Id() and Is<RuleName>(uint16 id) for each rule.
146168 for i, rule_name in enumerate(pos_matcher.GetRuleNameList()):
157179 # function.
158180 output.write(
159181 ' public:\n'
160 ' POSMatcher(const uint16 *const rule_id_table,\n'
161 ' const Range *const *const range_table)\n'
162 ' : rule_id_table_(rule_id_table),\n'
163 ' range_table_(range_table) {}\n'
182 ' POSMatcher() : data_(nullptr) {}\n'
183 ' explicit POSMatcher(const uint16 *data) : data_(data) {}\n'
184 ' void Set(const uint16 *data) { data_ = data; }\n'
164185 ' private:\n'
165 ' const uint16 *const rule_id_table_;\n'
166 ' const Range *const *const range_table_;\n'
186 ' const uint16 *data_;\n'
167187 '};\n'
168188 '} // namespace dictionary\n'
169189 '} // namespace mozc\n'
180200 parser.add_option('--output_pos_matcher_data',
181201 dest='output_pos_matcher_data',
182202 default='',
183 help='Path to the output header file of pos matcher data.')
203 help='Path to the output file of pos matcher data.')
184204 parser.add_option('--output_pos_matcher_h',
185205 dest='output_pos_matcher_h',
186206 default='',
205225 pos_database.Parse(options.id_file, options.special_pos_file)
206226 pos_matcher = pos_util.PosMatcher(pos_database)
207227 pos_matcher.Parse(options.pos_matcher_rule_file)
208 with open(options.output_pos_matcher_data, 'w') as stream:
228 with open(options.output_pos_matcher_data, 'wb') as stream:
209229 OutputPosMatcherData(pos_matcher, stream)
210230
211231
101101 &system_dictionary_input,
102102 &reading_correction_input);
103103
104 const mozc::dictionary::POSMatcher *pos_matcher =
105 FLAGS_gen_test_dictionary ?
106 mozc::testing::MockUserPosManager::GetUserPosManager()->GetPOSMatcher() :
107 mozc::UserPosManager::GetUserPosManager()->GetPOSMatcher();
108 CHECK(pos_matcher);
104 using mozc::testing::MockUserPosManager;
105 using mozc::UserPosManager;
106 const mozc::dictionary::POSMatcher pos_matcher(
107 FLAGS_gen_test_dictionary
108 ? MockUserPosManager::GetUserPosManager()->GetPOSMatcherData()
109 : UserPosManager::GetUserPosManager()->GetPOSMatcherData());
109110
110 mozc::dictionary::TextDictionaryLoader loader(*pos_matcher);
111 mozc::dictionary::TextDictionaryLoader loader(pos_matcher);
111112 loader.Load(system_dictionary_input, reading_correction_input);
112113
113114 mozc::dictionary::SystemDictionaryBuilder builder;
8181 class SystemDictionaryTest : public ::testing::Test {
8282 protected:
8383 SystemDictionaryTest()
84 : text_dict_(new TextDictionaryLoader(
85 *UserPosManager::GetUserPosManager()->GetPOSMatcher())),
84 : pos_matcher_(UserPosManager::GetUserPosManager()->GetPOSMatcherData()),
85 text_dict_(new TextDictionaryLoader(pos_matcher_)),
8686 dic_fn_(FileUtil::JoinPath(FLAGS_test_tmpdir, "mozc.dic")) {
8787 const string dic_path = mozc::testing::GetSourceFileOrDie({
8888 "data", "dictionary_oss", "dictionary00.txt"});
9292 convreq_.set_config(&config_);
9393 }
9494
95 virtual void SetUp() {
95 void SetUp() override {
9696 SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
9797
9898 // Don't use small cost encoding by default.
104104 config::ConfigHandler::GetDefaultConfig(&config_);
105105 }
106106
107 virtual void TearDown() {
107 void TearDown() override {
108108 FLAGS_min_key_length_to_use_small_cost_encoding =
109109 original_flags_min_key_length_to_use_small_cost_encoding_;
110110
121121 bool CompareTokensForLookup(const Token &a, const Token &b,
122122 bool reverse) const;
123123
124 dictionary::POSMatcher pos_matcher_;
124125 unique_ptr<TextDictionaryLoader> text_dict_;
125126
126127 ConversionRequest convreq_;
4747
4848 class ValueDictionaryTest : public ::testing::Test {
4949 protected:
50 virtual void SetUp() {
51 pos_matcher_ = UserPosManager::GetUserPosManager()->GetPOSMatcher();
50 void SetUp() override {
51 pos_matcher_.Set(UserPosManager::GetUserPosManager()->GetPOSMatcherData());
5252 louds_trie_builder_.reset(new LoudsTrieBuilder);
5353 louds_trie_.reset(new LoudsTrie);
5454 }
5555
56 virtual void TearDown() {
56 void TearDown() override {
5757 louds_trie_.reset();
5858 louds_trie_builder_.reset();
5959 }
6868 louds_trie_builder_->Build();
6969 louds_trie_->Open(
7070 reinterpret_cast<const uint8 *>(louds_trie_builder_->image().data()));
71 return new ValueDictionary(*pos_matcher_, louds_trie_.get());
71 return new ValueDictionary(pos_matcher_, louds_trie_.get());
7272 }
7373
7474 void InitToken(const string &value, Token *token) const {
7575 token->key = token->value = value;
7676 token->cost = 10000;
77 token->lid = token->rid = pos_matcher_->GetSuggestOnlyWordId();
77 token->lid = token->rid = pos_matcher_.GetSuggestOnlyWordId();
7878 token->attributes = Token::NONE;
7979 }
8080
81 const POSMatcher *pos_matcher_;
81 POSMatcher pos_matcher_;
8282 ConversionRequest convreq_;
8383 std::unique_ptr<LoudsTrieBuilder> louds_trie_builder_;
8484 std::unique_ptr<LoudsTrie> louds_trie_;
6464 // considering this class as POD.
6565 TextDictionaryLoaderTest() {}
6666
67 virtual void SetUp() {
68 pos_matcher_ = UserPosManager::GetUserPosManager()->GetPOSMatcher();
67 void SetUp() override {
68 pos_matcher_.Set(UserPosManager::GetUserPosManager()->GetPOSMatcherData());
6969 }
7070
7171 TextDictionaryLoader *CreateTextDictionaryLoader() {
72 return new TextDictionaryLoader(*pos_matcher_);
73 }
74
75 const POSMatcher *pos_matcher_;
72 return new TextDictionaryLoader(pos_matcher_);
73 }
74
75 POSMatcher pos_matcher_;
7676 scoped_data_manager_initializer_for_testing
7777 scoped_data_manager_initializer_for_testing_;
7878 };
182182 token.lid = 100;
183183 token.rid = 200;
184184 EXPECT_TRUE(loader->RewriteSpecialToken(&token, "ZIP_CODE"));
185 EXPECT_EQ(pos_matcher_->GetZipcodeId(), token.lid);
186 EXPECT_EQ(pos_matcher_->GetZipcodeId(), token.rid);
185 EXPECT_EQ(pos_matcher_.GetZipcodeId(), token.lid);
186 EXPECT_EQ(pos_matcher_.GetZipcodeId(), token.rid);
187187 EXPECT_EQ(Token::NONE, token.attributes);
188188 }
189189
192192 token.lid = 100;
193193 token.rid = 200;
194194 EXPECT_TRUE(loader->RewriteSpecialToken(&token, "ENGLISH:RATED"));
195 EXPECT_EQ(pos_matcher_->GetIsolatedWordId(), token.lid);
196 EXPECT_EQ(pos_matcher_->GetIsolatedWordId(), token.rid);
195 EXPECT_EQ(pos_matcher_.GetIsolatedWordId(), token.lid);
196 EXPECT_EQ(pos_matcher_.GetIsolatedWordId(), token.rid);
197197 EXPECT_EQ(Token::NONE, token.attributes);
198198 }
199199
289289 };
290290
291291 UserDictionary::UserDictionary(const UserPOSInterface *user_pos,
292 const POSMatcher *pos_matcher,
292 POSMatcher pos_matcher,
293293 SuppressionDictionary *suppression_dictionary)
294294 : ALLOW_THIS_IN_INITIALIZER_LIST(
295295 reloader_(new UserDictionaryReloader(this))),
299299 tokens_(new TokensIndex(user_pos_.get(), suppression_dictionary)),
300300 mutex_(new ReaderWriterMutex) {
301301 DCHECK(user_pos_.get());
302 DCHECK(pos_matcher_);
303302 DCHECK(suppression_dictionary_);
304303 Reload();
305304 }
363362 }
364363 FillTokenFromUserPOSToken(**it, &token);
365364 // Override POS IDs for suggest only words.
366 if (pos_matcher_->IsSuggestOnlyWord((*it)->id)) {
367 token.lid = token.rid = pos_matcher_->GetUnknownId();
365 if (pos_matcher_.IsSuggestOnlyWord((*it)->id)) {
366 token.lid = token.rid = pos_matcher_.GetUnknownId();
368367 }
369368 if (callback->OnToken((*it)->key, (*it)->key, token) ==
370369 Callback::TRAVERSE_DONE) {
402401 if ((*it)->key > key) {
403402 break;
404403 }
405 if (pos_matcher_->IsSuggestOnlyWord((*it)->id)) {
404 if (pos_matcher_.IsSuggestOnlyWord((*it)->id)) {
406405 continue;
407406 }
408407 if (!Util::StartsWith(key, (*it)->key)) {
456455 Token token;
457456 for (; range.first != range.second; ++range.first) {
458457 const UserPOS::Token &user_pos_token = **range.first;
459 if (pos_matcher_->IsSuggestOnlyWord(user_pos_token.id)) {
458 if (pos_matcher_.IsSuggestOnlyWord(user_pos_token.id)) {
460459 continue;
461460 }
462461 FillTokenFromUserPOSToken(user_pos_token, &token);
5050 class UserDictionary : public DictionaryInterface {
5151 public:
5252 UserDictionary(const UserPOSInterface *user_pos,
53 const POSMatcher *pos_matcher,
53 POSMatcher pos_matcher,
5454 SuppressionDictionary *suppression_dictionary);
5555 virtual ~UserDictionary();
5656
114114
115115 std::unique_ptr<UserDictionaryReloader> reloader_;
116116 std::unique_ptr<const UserPOSInterface> user_pos_;
117 const POSMatcher *pos_matcher_;
117 const POSMatcher pos_matcher_;
118118 SuppressionDictionary *suppression_dictionary_;
119119 TokensIndex *tokens_;
120120 mutable std::unique_ptr<ReaderWriterMutex> mutex_;
219219 const testing::MockUserPosManager user_pos_manager;
220220 return new UserDictionary(
221221 new UserPOSMock(),
222 user_pos_manager.GetPOSMatcher(),
222 dictionary::POSMatcher(user_pos_manager.GetPOSMatcherData()),
223223 suppression_dictionary_.get());
224224 }
225225
226226 // Creates a user dictionary with actual pos data.
227227 UserDictionary *CreateDictionary() {
228228 const testing::MockUserPosManager user_pos_manager;
229 return new UserDictionary(UserPOS::CreateFromDataManager(user_pos_manager),
230 user_pos_manager.GetPOSMatcher(),
231 Singleton<SuppressionDictionary>::get());
229 return new UserDictionary(
230 UserPOS::CreateFromDataManager(user_pos_manager),
231 dictionary::POSMatcher(user_pos_manager.GetPOSMatcherData()),
232 Singleton<SuppressionDictionary>::get());
232233 }
233234
234235 struct Entry {
566567
567568 // "suggestion_only" should not be looked up.
568569 const testing::MockUserPosManager user_pos_manager;
569 const uint16 kNounId = user_pos_manager.GetPOSMatcher()->GetGeneralNounId();
570 const dictionary::POSMatcher pos_matcher(
571 user_pos_manager.GetPOSMatcherData());
572 const uint16 kNounId = pos_matcher.GetGeneralNounId();
570573 const Entry kExpected1[] = {{"key", "noun", kNounId, kNounId}};
571574 TestLookupExactHelper(kExpected1, arraysize(kExpected1),
572575 "key", 3, *user_dic.get());
148148 suppression_dictionary_.reset(new SuppressionDictionary);
149149 CHECK(suppression_dictionary_.get());
150150
151 pos_matcher_.Set(data_manager->GetPOSMatcherData());
152
151153 user_dictionary_.reset(
152154 new UserDictionary(UserPOS::CreateFromDataManager(*data_manager),
153 data_manager->GetPOSMatcher(),
155 pos_matcher_,
154156 suppression_dictionary_.get()));
155157 CHECK(user_dictionary_.get());
156158
162164 SystemDictionary::Builder(dictionary_data, dictionary_size).Build();
163165 dictionary_.reset(new DictionaryImpl(
164166 sysdic, // DictionaryImpl takes the ownership
165 new ValueDictionary(*data_manager->GetPOSMatcher(),
166 &sysdic->value_trie()),
167 new ValueDictionary(pos_matcher_, &sysdic->value_trie()),
167168 user_dictionary_.get(),
168169 suppression_dictionary_.get(),
169 data_manager->GetPOSMatcher()));
170 &pos_matcher_));
170171 CHECK(dictionary_.get());
171172
172173 StringPiece suffix_key_array_data, suffix_value_array_data;
202203 suppression_dictionary_.get(),
203204 connector_.get(),
204205 segmenter_.get(),
205 data_manager->GetPOSMatcher(),
206 &pos_matcher_,
206207 pos_group_.get(),
207208 suggestion_filter_.get()));
208209 CHECK(immutable_converter_.get());
227228 suffix_dictionary_.get(),
228229 connector_.get(),
229230 segmenter_.get(),
230 data_manager->GetPOSMatcher(),
231 &pos_matcher_,
231232 suggestion_filter_.get());
232233 CHECK(dictionary_predictor);
233234
234235 PredictorInterface *user_history_predictor =
235236 new UserHistoryPredictor(dictionary_.get(),
236 data_manager->GetPOSMatcher(),
237 &pos_matcher_,
237238 suppression_dictionary_.get(),
238239 enable_content_word_learning);
239240 CHECK(user_history_predictor);
249250 dictionary_.get());
250251 CHECK(rewriter_);
251252
252 converter_impl->Init(data_manager->GetPOSMatcher(),
253 converter_impl->Init(&pos_matcher_,
253254 suppression_dictionary_.get(),
254255 predictor_,
255256 rewriter_,
3434 #include "base/port.h"
3535 #include "dictionary/dictionary_interface.h"
3636 #include "dictionary/pos_group.h"
37 #include "dictionary/pos_matcher.h"
3738 #include "dictionary/user_dictionary.h"
3839 #include "engine/engine_interface.h"
3940
5354 class Engine : public EngineInterface {
5455 public:
5556 Engine();
56 virtual ~Engine();
57 ~Engine() override;
5758
5859 // Initializes the object by given a data manager (providing embedded data
5960 // set) and predictor factory function.
6364 PredictorInterface *),
6465 bool enable_content_word_learning);
6566
66 virtual ConverterInterface *GetConverter() const { return converter_.get(); }
67 virtual PredictorInterface *GetPredictor() const { return predictor_; }
68 virtual dictionary::SuppressionDictionary *GetSuppressionDictionary() {
67 ConverterInterface *GetConverter() const override { return converter_.get(); }
68 PredictorInterface *GetPredictor() const override { return predictor_; }
69 dictionary::SuppressionDictionary *GetSuppressionDictionary() override {
6970 return suppression_dictionary_.get();
7071 }
7172
72 virtual bool Reload();
73 bool Reload() override;
7374
74 virtual UserDataManagerInterface *GetUserDataManager() {
75 UserDataManagerInterface *GetUserDataManager() override {
7576 return user_data_manager_.get();
7677 }
7778
7879 private:
80 dictionary::POSMatcher pos_matcher_;
7981 std::unique_ptr<dictionary::SuppressionDictionary> suppression_dictionary_;
8082 std::unique_ptr<const Connector> connector_;
8183 std::unique_ptr<const Segmenter> segmenter_;
00 MAJOR=2
11 MINOR=17
2 BUILD=2519
2 BUILD=2520
33 REVISION=102
44 # NACL_DICTIONARY_VERSION is the target version of the system dictionary to be
55 # downloaded by NaCl Mozc.
6 NACL_DICTIONARY_VERSION=21
6 NACL_DICTIONARY_VERSION=22
215215 const DictionaryInterface *suffix_dictionary = NULL) {
216216 testing::MockDataManager data_manager;
217217
218 pos_matcher_ = data_manager.GetPOSMatcher();
218 pos_matcher_.Set(data_manager.GetPOSMatcherData());
219219 suppression_dictionary_.reset(new SuppressionDictionary);
220220 if (!dictionary) {
221221 dictionary_mock_ = new DictionaryMock;
246246 suppression_dictionary_.get(),
247247 connector_.get(),
248248 segmenter_.get(),
249 pos_matcher_,
249 &pos_matcher_,
250250 pos_group_.get(),
251251 suggestion_filter_.get()));
252252 converter_.reset(new ConverterMock());
257257 suffix_dictionary_.get(),
258258 connector_.get(),
259259 segmenter_.get(),
260 data_manager.GetPOSMatcher(),
260 &pos_matcher_,
261261 suggestion_filter_.get()));
262262 }
263263
264264 const POSMatcher &pos_matcher() const {
265 return *pos_matcher_;
265 return pos_matcher_;
266266 }
267267
268268 DictionaryMock *mutable_dictionary() {
282282 }
283283
284284 private:
285 const POSMatcher *pos_matcher_;
285 POSMatcher pos_matcher_;
286286 unique_ptr<SuppressionDictionary> suppression_dictionary_;
287287 unique_ptr<const Connector> connector_;
288288 unique_ptr<const Segmenter> segmenter_;
15661566 Segmenter::CreateFromDataManager(data_manager));
15671567 unique_ptr<const SuggestionFilter> suggestion_filter(
15681568 CreateSuggestionFilter(data_manager));
1569 const dictionary::POSMatcher pos_matcher(data_manager.GetPOSMatcherData());
15691570 unique_ptr<TestableDictionaryPredictor> predictor(
15701571 new TestableDictionaryPredictor(converter.get(),
15711572 immutable_converter.get(),
15731574 suffix_dictionary.get(),
15741575 connector.get(),
15751576 segmenter.get(),
1576 data_manager.GetPOSMatcher(),
1577 &pos_matcher,
15771578 suggestion_filter.get()));
15781579
15791580 // "わたしのなまえはなかのです"
31593160 Segmenter::CreateFromDataManager(data_manager));
31603161 unique_ptr<const SuggestionFilter> suggestion_filter(
31613162 CreateSuggestionFilter(data_manager));
3163 const dictionary::POSMatcher pos_matcher(data_manager.GetPOSMatcherData());
31623164 unique_ptr<TestableDictionaryPredictor> predictor(
31633165 new TestableDictionaryPredictor(converter.get(),
31643166 immutable_converter.get(),
31663168 suffix_dictionary.get(),
31673169 connector.get(),
31683170 segmenter.get(),
3169 data_manager.GetPOSMatcher(),
3171 &pos_matcher,
31703172 suggestion_filter.get()));
31713173 Segments segments;
31723174 // "わたしのなまえはなかのです"
3939 #include "config/config_handler.h"
4040 #include "converter/segments.h"
4141 #include "data_manager/scoped_data_manager_initializer_for_testing.h"
42 #include "data_manager/user_pos_manager.h"
42 #include "data_manager/testing/mock_data_manager.h"
4343 #include "dictionary/dictionary_mock.h"
44 #include "dictionary/pos_matcher.h"
4445 #include "dictionary/suppression_dictionary.h"
4546 #include "prediction/predictor_interface.h"
4647 #include "prediction/user_history_predictor.h"
126127
127128 } // namespace
128129
129 class MobilePredictorTest : public testing::Test {
130 class MobilePredictorTest : public ::testing::Test {
130131 protected:
131132 virtual void SetUp() {
132133 config_.reset(new config::Config);
192193
193194 TEST_F(MobilePredictorTest, CallPredictorsForMobilePartialPrediction) {
194195 DictionaryMock dictionary_mock;
196 testing::MockDataManager data_manager;
197 const dictionary::POSMatcher pos_matcher(data_manager.GetPOSMatcherData());
195198 unique_ptr<MobilePredictor> predictor(
196199 new MobilePredictor(
197200 new CheckCandSizePredictor(200),
198201 new UserHistoryPredictor(
199202 &dictionary_mock,
200 UserPosManager::GetUserPosManager()->GetPOSMatcher(),
203 &pos_matcher,
201204 Singleton<SuppressionDictionary>::get(),
202205 true)));
203206 Segments segments;
231234 }
232235
233236
234 class PredictorTest : public testing::Test {
237 class PredictorTest : public ::testing::Test {
235238 protected:
236239 virtual void SetUp() {
237240 config_.reset(new config::Config);
322322 unique_ptr<DictionaryMock> dictionary;
323323 unique_ptr<SuppressionDictionary> suppression_dictionary;
324324 unique_ptr<UserHistoryPredictor> predictor;
325 dictionary::POSMatcher pos_matcher;
325326 };
326327
327328 DataAndPredictor *CreateDataAndPredictor() const {
329330 testing::MockDataManager data_manager;
330331 ret->dictionary.reset(new DictionaryMock);
331332 ret->suppression_dictionary.reset(new SuppressionDictionary);
333 ret->pos_matcher.Set(data_manager.GetPOSMatcherData());
332334 ret->predictor.reset(
333335 new UserHistoryPredictor(ret->dictionary.get(),
334 data_manager.GetPOSMatcher(),
336 &ret->pos_matcher,
335337 ret->suppression_dictionary.get(),
336338 false));
337339 return ret;
505505 // Segment is adverb if;
506506 // 1) lid and rid is adverb.
507507 // 2) or rid is adverb suffix.
508 ((pos_matcher_->IsAdverb(segments->segment(i - 1).candidate(0).lid) &&
509 pos_matcher_->IsAdverb(segments->segment(i - 1).candidate(0).rid)) ||
510 pos_matcher_->IsAdverbSegmentSuffix(
508 ((pos_matcher_.IsAdverb(segments->segment(i - 1).candidate(0).lid) &&
509 pos_matcher_.IsAdverb(segments->segment(i - 1).candidate(0).rid)) ||
510 pos_matcher_.IsAdverbSegmentSuffix(
511511 segments->segment(i - 1).candidate(0).rid)) &&
512512 (cand.content_value != cand.value ||
513513 cand.value != "\xe3\x83\xbb")) { // "・" workaround
583583
584584 CollocationRewriter::CollocationRewriter(
585585 const DataManagerInterface *data_manager)
586 : pos_matcher_(data_manager->GetPOSMatcher()),
587 first_name_id_(pos_matcher_->GetFirstNameId()),
588 last_name_id_(pos_matcher_->GetLastNameId()) {
586 : pos_matcher_(data_manager->GetPOSMatcherData()),
587 first_name_id_(pos_matcher_.GetFirstNameId()),
588 last_name_id_(pos_matcher_.GetLastNameId()) {
589589 const char *data = NULL;
590590 size_t size = 0;
591591
3131
3232 #include "base/port.h"
3333 #include "converter/segments.h"
34 #include "dictionary/pos_matcher.h"
3435 #include "rewriter/rewriter_interface.h"
3536
3637 namespace mozc {
3738
3839 class DataManagerInterface;
39
40 namespace dictionary { class POSMatcher; }
4140
4241 class CollocationRewriter : public RewriterInterface {
4342 public:
5756 Segment *seg) const;
5857 bool RewriteCollocation(Segments *segments) const;
5958
60 const dictionary::POSMatcher *pos_matcher_;
59 const dictionary::POSMatcher pos_matcher_;
6160 const uint16 first_name_id_;
6261 const uint16 last_name_id_;
6362
7373 const size_t segments_size;
7474 };
7575
76 CollocationRewriterTest() {}
77 virtual ~CollocationRewriterTest() {}
78
79 virtual void SetUp() {
76 CollocationRewriterTest() = default;
77 ~CollocationRewriterTest() override = default;
78
79 void SetUp() override {
8080 SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
8181
8282 const mozc::testing::MockDataManager data_manager;
83 pos_matcher_ = data_manager.GetPOSMatcher();
83 pos_matcher_.Set(data_manager.GetPOSMatcherData());
8484 collocation_rewriter_.reset(new CollocationRewriter(&data_manager));
8585 }
8686
125125 return result;
126126 }
127127
128 const POSMatcher *pos_matcher_;
128 POSMatcher pos_matcher_;
129129
130130 private:
131131 std::unique_ptr<const CollocationRewriter> collocation_rewriter_;
142142 const char *kNekowo =
143143 "\xE3\x81\xAD\xE3\x81\x93\xE3\x82\x92"; // "ねこを"
144144 const char *kNeko = "\xE3\x81\xAD\xE3\x81\x93"; // "ねこ"
145 const uint16 id = pos_matcher_->GetUnknownId();
145 const uint16 id = pos_matcher_.GetUnknownId();
146146 const CandidateData kNekowoCands[] = {
147147 {kNekowo, kNeko,
148148 "\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコを"
194194 "\xE3\x81\xBE\xE3\x81\x90\xE3\x82\x8D\xE3\x82\x92"; // "まぐろを"
195195 const char *kMaguro =
196196 "\xE3\x81\xBE\xE3\x81\x90\xE3\x82\x8D"; // "まぐろ"
197 const uint16 id = pos_matcher_->GetUnknownId();
197 const uint16 id = pos_matcher_.GetUnknownId();
198198 const CandidateData kMagurowoCands[] = {
199199 {kMagurowo, kMaguro,
200200 "\xE3\x83\x9E\xE3\x82\xB0\xE3\x83\xAD\xE3\x82\x92", // "マグロを"
240240 const char *kNekowo =
241241 "\xE3\x81\xAD\xE3\x81\x93\xE3\x82\x92"; // "ねこを"
242242 const char *kNeko = "\xE3\x81\xAD\xE3\x81\x93"; // "ねこ"
243 const uint16 id = pos_matcher_->GetUnknownId();
243 const uint16 id = pos_matcher_.GetUnknownId();
244244 const CandidateData kNekowoCands[] = {
245245 {kNekowo, kNeko,
246246 "\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコを"
254254
255255 // "すごく"
256256 const char *kSugoku = "\xe3\x81\x99\xe3\x81\x94\xe3\x81\x8f";
257 const uint16 adverb_id = pos_matcher_->GetAdverbId();
257 const uint16 adverb_id = pos_matcher_.GetAdverbId();
258258 const CandidateData kSugokuCands[] = {
259259 {kSugoku, kSugoku, kSugoku, kSugoku, 0, adverb_id, adverb_id},
260260 };
297297 const char *kNekowo =
298298 "\xE3\x81\xAD\xE3\x81\x93\xE3\x82\x92"; // "ねこを"
299299 const char *kNeko = "\xE3\x81\xAD\xE3\x81\x93"; // "ねこ"
300 const uint16 id = pos_matcher_->GetUnknownId();
300 const uint16 id = pos_matcher_.GetUnknownId();
301301 const CandidateData kNekowoCands[] = {
302302 {kNekowo, kNeko,
303303 "\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコを"
361361 const char *kNekowo =
362362 "\xE3\x81\xAD\xE3\x81\x93\xE3\x82\x92"; // "ねこを"
363363 const char *kNeko = "\xE3\x81\xAD\xE3\x81\x93"; // "ねこ"
364 const uint16 id = pos_matcher_->GetUnknownId();
364 const uint16 id = pos_matcher_.GetUnknownId();
365365 const CandidateData kNekowoCands[] = {
366366 {kNekowo, kNeko,
367367 "\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコを"
4040 #include "base/hash.h"
4141 #include "base/logging.h"
4242 #include "data_manager/user_pos_manager.h"
43 #include "dictionary/pos_matcher.h"
4443 #include "dictionary/user_pos.h"
4544
4645 namespace mozc {
8180 DictionaryGenerator::DictionaryGenerator()
8281 : token_pool_(new ObjectPool<Token>(kTokenSize)),
8382 token_map_(new map<uint64, Token *>),
84 open_bracket_id_(UserPosManager::GetUserPosManager()->GetPOSMatcher()
85 ->GetOpenBracketId()),
86 close_bracket_id_(UserPosManager::GetUserPosManager()->GetPOSMatcher()
87 ->GetCloseBracketId()) {
83 pos_matcher_(UserPosManager::GetUserPosManager()->GetPOSMatcherData()),
84 open_bracket_id_(pos_matcher_.GetOpenBracketId()),
85 close_bracket_id_(pos_matcher_.GetCloseBracketId()) {
8886 user_pos_.reset(dictionary::UserPOS::CreateFromDataManager(
8987 *UserPosManager::GetUserPosManager()));
9088 }
3737 #include <vector>
3838
3939 #include "base/port.h"
40 #include "dictionary/pos_matcher.h"
4041
4142 namespace mozc {
4243
108109 bool Output(const string &filename) const;
109110
110111 private:
111 std::unique_ptr<ObjectPool<Token> > token_pool_;
112 std::unique_ptr<map<uint64, Token *> > token_map_;
112 std::unique_ptr<ObjectPool<Token>> token_pool_;
113 std::unique_ptr<map<uint64, Token *>> token_map_;
113114 std::unique_ptr<const UserPOSInterface> user_pos_;
115 const dictionary::POSMatcher pos_matcher_;
114116 const uint16 open_bracket_id_;
115117 const uint16 close_bracket_id_;
116118
421421 TEST_F(EmojiRewriterTest, CheckDescription) {
422422 Segments segments;
423423 VariantsRewriter variants_rewriter(
424 UserPosManager::GetUserPosManager()->GetPOSMatcher());
424 dictionary::POSMatcher(
425 UserPosManager::GetUserPosManager()->GetPOSMatcherData()));
425426
426427 SetSegment("Emoji", "test", &segments);
427428 EXPECT_TRUE(rewriter_->Rewrite(convreq_, &segments));
3737 #include "base/util.h"
3838 #include "converter/segments.h"
3939 #include "data_manager/data_manager_interface.h"
40 #include "dictionary/pos_matcher.h"
4140 #include "rewriter/number_compound_util.h"
4241
4342 namespace mozc {
129128 } // namespace
130129
131130 FocusCandidateRewriter::FocusCandidateRewriter(
132 const DataManagerInterface *data_manager) {
131 const DataManagerInterface *data_manager)
132 : pos_matcher_(data_manager->GetPOSMatcherData()) {
133133 const char *array = nullptr;
134134 size_t size = 0;
135135 data_manager->GetCounterSuffixSortedArray(&array, &size);
138138 // in debug build.
139139 DCHECK(SerializedStringArray::VerifyData(data));
140140 suffix_array_.Set(data);
141
142 pos_matcher_ = data_manager->GetPOSMatcher();
143141 }
144142
145143 FocusCandidateRewriter::~FocusCandidateRewriter() {}
398396 // Otherwise, the following wrong rewrite will occur.
399397 // Example: "一階へは | 二回 | 行った -> 一階へは | 二階 | 行った"
400398 if (cand.content_value.size() != cand.value.size()) {
401 if (!pos_matcher_->IsParallelMarker(cand.rid)) {
399 if (!pos_matcher_.IsParallelMarker(cand.rid)) {
402400 return false;
403401 }
404402 }
3232 #include "base/port.h"
3333 #include "base/serialized_string_array.h"
3434 #include "converter/segments.h"
35 #include "dictionary/pos_matcher.h"
3536 #include "rewriter/rewriter_interface.h"
3637
3738 namespace mozc {
3839
3940 class DataManagerInterface;
4041 struct CounterSuffixEntry;
41
42 namespace dictionary { class POSMatcher; }
4342
4443 class FocusCandidateRewriter : public RewriterInterface {
4544 public:
8382 uint32 *script_type) const;
8483
8584 SerializedStringArray suffix_array_;
86 const dictionary::POSMatcher *pos_matcher_;
85 const dictionary::POSMatcher pos_matcher_;
8786
8887 DISALLOW_COPY_AND_ASSIGN(FocusCandidateRewriter);
8988 };
102102
103103 LanguageAwareRewriter *CreateLanguageAwareRewriter() const {
104104 return new LanguageAwareRewriter(
105 *UserPosManager::GetUserPosManager()->GetPOSMatcher(),
105 dictionary::POSMatcher(
106 UserPosManager::GetUserPosManager()->GetPOSMatcherData()),
106107 dictionary_mock_.get());
107108 }
108109
148148 ASSERT_TRUE(suffix_array.Init(data));
149149
150150 const testing::MockDataManager data_manager;
151 const POSMatcher* pos_matcher = data_manager.GetPOSMatcher();
151 const POSMatcher pos_matcher(data_manager.GetPOSMatcherData());
152152
153153 Segment::Candidate c;
154154
155155 c.Init();
156 c.lid = pos_matcher->GetNumberId();
157 c.rid = pos_matcher->GetNumberId();
158 EXPECT_TRUE(IsNumber(suffix_array, *pos_matcher, c));
156 c.lid = pos_matcher.GetNumberId();
157 c.rid = pos_matcher.GetNumberId();
158 EXPECT_TRUE(IsNumber(suffix_array, pos_matcher, c));
159159
160160 c.Init();
161 c.lid = pos_matcher->GetKanjiNumberId();
162 c.rid = pos_matcher->GetKanjiNumberId();
163 EXPECT_TRUE(IsNumber(suffix_array, *pos_matcher, c));
161 c.lid = pos_matcher.GetKanjiNumberId();
162 c.rid = pos_matcher.GetKanjiNumberId();
163 EXPECT_TRUE(IsNumber(suffix_array, pos_matcher, c));
164164
165165 c.Init();
166 c.lid = pos_matcher->GetNumberId();
167 c.rid = pos_matcher->GetCounterSuffixWordId();
168 EXPECT_TRUE(IsNumber(suffix_array, *pos_matcher, c));
166 c.lid = pos_matcher.GetNumberId();
167 c.rid = pos_matcher.GetCounterSuffixWordId();
168 EXPECT_TRUE(IsNumber(suffix_array, pos_matcher, c));
169169
170170 c.Init();
171 c.lid = pos_matcher->GetNumberId();
172 c.rid = pos_matcher->GetParallelMarkerId();
173 EXPECT_TRUE(IsNumber(suffix_array, *pos_matcher, c));
171 c.lid = pos_matcher.GetNumberId();
172 c.rid = pos_matcher.GetParallelMarkerId();
173 EXPECT_TRUE(IsNumber(suffix_array, pos_matcher, c));
174174
175175 c.Init();
176176 c.value = "\xE4\xB8\x80\xE9\x9A\x8E"; //"一階"
177177 c.content_value = "\xE4\xB8\x80\xE9\x9A\x8E"; //"一階"
178 c.lid = pos_matcher->GetNumberId();
179 c.rid = pos_matcher->GetNumberId();
180 EXPECT_TRUE(IsNumber(suffix_array, *pos_matcher, c));
178 c.lid = pos_matcher.GetNumberId();
179 c.rid = pos_matcher.GetNumberId();
180 EXPECT_TRUE(IsNumber(suffix_array, pos_matcher, c));
181181
182182 c.Init();
183 c.lid = pos_matcher->GetAdverbId();
184 c.rid = pos_matcher->GetAdverbId();
185 EXPECT_FALSE(IsNumber(suffix_array, *pos_matcher, c));
183 c.lid = pos_matcher.GetAdverbId();
184 c.rid = pos_matcher.GetAdverbId();
185 EXPECT_FALSE(IsNumber(suffix_array, pos_matcher, c));
186186 }
187187
188188 } // namespace number_compound_util
421421 } // namespace
422422
423423 NumberRewriter::NumberRewriter(const DataManagerInterface *data_manager)
424 : pos_matcher_(data_manager->GetPOSMatcher()) {
424 : pos_matcher_(data_manager->GetPOSMatcherData()) {
425425 const char *array = nullptr;
426426 size_t size = 0;
427427 data_manager->GetCounterSuffixSortedArray(&array, &size);
458458
459459 for (size_t i = 0; i < segments->conversion_segments_size(); ++i) {
460460 Segment *seg = segments->mutable_conversion_segment(i);
461 modified |= RewriteOneSegment(suffix_array_, *pos_matcher_,
461 modified |= RewriteOneSegment(suffix_array_, pos_matcher_,
462462 exec_radix_conversion, seg);
463463 }
464464
5050
5151 private:
5252 SerializedStringArray suffix_array_;
53 const dictionary::POSMatcher *pos_matcher_;
53 const dictionary::POSMatcher pos_matcher_;
5454
5555 DISALLOW_COPY_AND_ASSIGN(NumberRewriter);
5656 };
8989 return false;
9090 }
9191
92 Segment *SetupSegments(const POSMatcher* pos_matcher,
92 Segment *SetupSegments(const POSMatcher& pos_matcher,
9393 const string &candidate_value, Segments *segments) {
9494 segments->Clear();
9595 Segment *segment = segments->push_back_segment();
9696 Segment::Candidate *candidate = segment->add_candidate();
9797 candidate->Init();
98 candidate->lid = pos_matcher->GetNumberId();
99 candidate->rid = pos_matcher->GetNumberId();
98 candidate->lid = pos_matcher.GetNumberId();
99 candidate->rid = pos_matcher.GetNumberId();
100100 candidate->value = candidate_value;
101101 candidate->content_value = candidate_value;
102102 return segment;
129129 // considering this class as POD.
130130 NumberRewriterTest() {}
131131
132 virtual void SetUp() {
132 void SetUp() override {
133133 #ifdef MOZC_USE_PACKED_DICTIONARY
134134 // TODO(noriyukit): Currently this test uses mock data manager. Check if we
135135 // can remove this registration of packed data manager.
142142 #endif // MOZC_USE_PACKED_DICTIONARY
143143
144144 SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
145 pos_matcher_ = mock_data_manager_.GetPOSMatcher();
146 }
147
148 virtual void TearDown() {
145 pos_matcher_.Set(mock_data_manager_.GetPOSMatcherData());
146 }
147
148 void TearDown() override {
149149 #ifdef MOZC_USE_PACKED_DICTIONARY
150150 // Unregisters mocked PackedDataManager.
151151 packed::RegisterPackedDataManager(NULL);
157157 }
158158
159159 const testing::MockDataManager mock_data_manager_;
160 const POSMatcher *pos_matcher_;
160 POSMatcher pos_matcher_;
161161 const ConversionRequest default_request_;
162162 };
163163
176176 Segment *seg = segments.push_back_segment();
177177 Segment::Candidate *candidate = seg->add_candidate();
178178 candidate->Init();
179 candidate->lid = pos_matcher_->GetNumberId();
180 candidate->rid = pos_matcher_->GetNumberId();
179 candidate->lid = pos_matcher_.GetNumberId();
180 candidate->rid = pos_matcher_.GetNumberId();
181181 candidate->value = "012";
182182 candidate->content_value = "012";
183183
251251 Segment *seg = segments.push_back_segment();
252252 Segment::Candidate *candidate = seg->add_candidate();
253253 candidate->Init();
254 candidate->lid = pos_matcher_->GetNumberId();
255 candidate->rid = pos_matcher_->GetNumberId();
254 candidate->lid = pos_matcher_.GetNumberId();
255 candidate->rid = pos_matcher_.GetNumberId();
256256 candidate->value = "012";
257257 candidate->content_value = "012";
258258 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
267267 Segment *seg = segments.push_back_segment();
268268 Segment::Candidate *candidate = seg->add_candidate();
269269 candidate->Init();
270 candidate->lid = pos_matcher_->GetNumberId();
271 candidate->rid = pos_matcher_->GetNumberId();
270 candidate->lid = pos_matcher_.GetNumberId();
271 candidate->rid = pos_matcher_.GetNumberId();
272272 candidate->value = "012""\xE3\x81\x8C"; // "012が"
273273 candidate->content_value = "012";
274274
325325 Segment *seg = segments.push_back_segment();
326326 Segment::Candidate *candidate = seg->add_candidate();
327327 candidate->Init();
328 candidate->lid = pos_matcher_->GetNumberId();
329 candidate->rid = pos_matcher_->GetCounterSuffixWordId();
328 candidate->lid = pos_matcher_.GetNumberId();
329 candidate->rid = pos_matcher_.GetCounterSuffixWordId();
330330 candidate->value = "\xE5\x8D\x81\xE4\xBA\x94\xE5\x80\x8B"; // "十五個"
331331 candidate->content_value = "\xE5\x8D\x81\xE4\xBA\x94\xE5\x80\x8B"; // ditto
332332
354354 Segment *seg = segments.push_back_segment();
355355 Segment::Candidate *candidate = seg->add_candidate();
356356 candidate->Init();
357 candidate->lid = pos_matcher_->GetNumberId();
358 candidate->rid = pos_matcher_->GetCounterSuffixWordId();
357 candidate->lid = pos_matcher_.GetNumberId();
358 candidate->rid = pos_matcher_.GetCounterSuffixWordId();
359359 candidate->value = "\xE5\x8D\x81\xE4\xBA\x94\xE5\x9B\x9E"; // "十五回"
360360 candidate->content_value = "\xE5\x8D\x81\xE4\xBA\x94\xE5\x9B\x9E"; // ditto
361361 candidate = seg->add_candidate();
362362 candidate->Init();
363 candidate->lid = pos_matcher_->GetNumberId();
364 candidate->rid = pos_matcher_->GetCounterSuffixWordId();
363 candidate->lid = pos_matcher_.GetNumberId();
364 candidate->rid = pos_matcher_.GetCounterSuffixWordId();
365365 candidate->value = "\xE5\x8D\x81\xE4\xBA\x94\xE9\x9A\x8E"; // "十五階"
366366 candidate->content_value = "\xE5\x8D\x81\xE4\xBA\x94\xE9\x9A\x8E"; // ditto
367367
457457 second_candidate->Init();
458458
459459 second_candidate->value = "0";
460 second_candidate->lid = pos_matcher_->GetNumberId();
461 second_candidate->rid = pos_matcher_->GetNumberId();
460 second_candidate->lid = pos_matcher_.GetNumberId();
461 second_candidate->rid = pos_matcher_.GetNumberId();
462462 second_candidate->content_value = second_candidate->value;
463463
464464 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
521521 Segment *seg = segments.push_back_segment();
522522 Segment::Candidate *candidate = seg->add_candidate();
523523 candidate->Init();
524 candidate->lid = pos_matcher_->GetNumberId();
525 candidate->rid = pos_matcher_->GetNumberId();
524 candidate->lid = pos_matcher_.GetNumberId();
525 candidate->rid = pos_matcher_.GetNumberId();
526526 candidate->value = "0";
527527 candidate->content_value = "0";
528528
563563 Segment *seg = segments.push_back_segment();
564564 Segment::Candidate *candidate = seg->add_candidate();
565565 candidate->Init();
566 candidate->lid = pos_matcher_->GetNumberId();
567 candidate->rid = pos_matcher_->GetNumberId();
566 candidate->lid = pos_matcher_.GetNumberId();
567 candidate->rid = pos_matcher_.GetNumberId();
568568 candidate->value = "00";
569569 candidate->content_value = "00";
570570
603603 Segment *seg = segments.push_back_segment();
604604 Segment::Candidate *candidate = seg->add_candidate();
605605 candidate->Init();
606 candidate->lid = pos_matcher_->GetNumberId();
607 candidate->rid = pos_matcher_->GetNumberId();
606 candidate->lid = pos_matcher_.GetNumberId();
607 candidate->rid = pos_matcher_.GetNumberId();
608608 candidate->value = "1000000000000000000";
609609 candidate->content_value = "1000000000000000000";
610610
687687 Segment *seg = segments.push_back_segment();
688688 Segment::Candidate *candidate = seg->add_candidate();
689689 candidate->Init();
690 candidate->lid = pos_matcher_->GetNumberId();
691 candidate->rid = pos_matcher_->GetNumberId();
690 candidate->lid = pos_matcher_.GetNumberId();
691 candidate->rid = pos_matcher_.GetNumberId();
692692 candidate->value = "18446744073709551616"; // 2^64
693693 candidate->content_value = "18446744073709551616";
694694
792792 Segment *seg = segments.push_back_segment();
793793 Segment::Candidate *candidate = seg->add_candidate();
794794 candidate->Init();
795 candidate->lid = pos_matcher_->GetNumberId();
796 candidate->rid = pos_matcher_->GetNumberId();
795 candidate->lid = pos_matcher_.GetNumberId();
796 candidate->rid = pos_matcher_.GetNumberId();
797797
798798 // 10^100 as "100000 ... 0"
799799 string input = "1";
872872 Segment::Candidate *candidate = segment->add_candidate();
873873 candidate = segment->add_candidate();
874874 candidate->Init();
875 candidate->lid = pos_matcher_->GetNumberId();
876 candidate->rid = pos_matcher_->GetNumberId();
875 candidate->lid = pos_matcher_.GetNumberId();
876 candidate->rid = pos_matcher_.GetNumberId();
877877 // "さんびゃく"
878878 candidate->key =
879879 "\xe3\x81\x95\xe3\x82\x93\xe3\x81\xb3\xe3\x82\x83\xe3\x81\x8f";
907907 "\xe3\x81\x95\xe3\x82\x93\xe3\x81\xb3\xe3\x82\x83\xe3\x81\x8f");
908908 Segment::Candidate *candidate = segment->add_candidate();
909909 candidate->Init();
910 candidate->lid = pos_matcher_->GetNumberId();
911 candidate->rid = pos_matcher_->GetNumberId();
910 candidate->lid = pos_matcher_.GetNumberId();
911 candidate->rid = pos_matcher_.GetNumberId();
912912 // "さんびゃく"
913913 candidate->key =
914914 "\xe3\x81\x95\xe3\x82\x93\xe3\x81\xb3\xe3\x82\x83\xe3\x81\x8f";
919919
920920 candidate = segment->add_candidate();
921921 candidate->Init();
922 candidate->lid = pos_matcher_->GetNumberId();
923 candidate->rid = pos_matcher_->GetNumberId();
922 candidate->lid = pos_matcher_.GetNumberId();
923 candidate->rid = pos_matcher_.GetNumberId();
924924 // "さんびゃく"
925925 candidate->key =
926926 "\xe3\x81\x95\xe3\x82\x93\xe3\x81\xb3\xe3\x82\x83\xe3\x81\x8f";
953953 segment->set_key("\xe3\x81\x84\xe3\x81\xa1");
954954 Segment::Candidate *candidate = segment->add_candidate();
955955 candidate->Init();
956 candidate->lid = pos_matcher_->GetUnknownId(); // Not number POS
957 candidate->rid = pos_matcher_->GetUnknownId();
956 candidate->lid = pos_matcher_.GetUnknownId(); // Not number POS
957 candidate->rid = pos_matcher_.GetUnknownId();
958958 // "いち"
959959 candidate->key = "\xe3\x81\x84\xe3\x81\xa1";
960960 // "いち"
966966
967967 candidate = segment->add_candidate();
968968 candidate->Init();
969 candidate->lid = pos_matcher_->GetNumberId(); // Number POS
970 candidate->rid = pos_matcher_->GetNumberId();
969 candidate->lid = pos_matcher_.GetNumberId(); // Number POS
970 candidate->rid = pos_matcher_.GetNumberId();
971971 // "いち"
972972 candidate->key = "\xe3\x81\x84\xe3\x81\xa1";
973973 // "いち"
991991 // "壱"
992992 EXPECT_TRUE(FindCandidateId(segments.segment(0), "\xe5\xa3\xb1", &daiji_pos));
993993 EXPECT_GT(daiji_pos, 0);
994 EXPECT_EQ(pos_matcher_->GetNumberId(),
994 EXPECT_EQ(pos_matcher_.GetNumberId(),
995995 segments.segment(0).candidate(daiji_pos).lid);
996 EXPECT_EQ(pos_matcher_->GetNumberId(),
996 EXPECT_EQ(pos_matcher_.GetNumberId(),
997997 segments.segment(0).candidate(daiji_pos).rid);
998998 }
999999
10191019 Segment *seg = segments.push_back_segment();
10201020 Segment::Candidate *candidate = seg->add_candidate();
10211021 candidate->Init();
1022 candidate->lid = pos_matcher_->GetNumberId();
1023 candidate->rid = pos_matcher_->GetNumberId();
1022 candidate->lid = pos_matcher_.GetNumberId();
1023 candidate->rid = pos_matcher_.GetNumberId();
10241024 candidate->value = kSuccess[i][0];
10251025 candidate->content_value = kSuccess[i][0];
10261026 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
10461046 Segment *seg = segments.push_back_segment();
10471047 Segment::Candidate *candidate = seg->add_candidate();
10481048 candidate->Init();
1049 candidate->lid = pos_matcher_->GetNumberId();
1050 candidate->rid = pos_matcher_->GetNumberId();
1049 candidate->lid = pos_matcher_.GetNumberId();
1050 candidate->rid = pos_matcher_.GetNumberId();
10511051 candidate->value = kFail[i][0];
10521052 candidate->content_value = kFail[i][0];
10531053 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
10721072 Segment *seg = segments.push_back_segment();
10731073 Segment::Candidate *candidate = seg->add_candidate();
10741074 candidate->Init();
1075 candidate->lid = pos_matcher_->GetGeneralNounId();
1076 candidate->rid = pos_matcher_->GetGeneralNounId();
1075 candidate->lid = pos_matcher_.GetGeneralNounId();
1076 candidate->rid = pos_matcher_.GetGeneralNounId();
10771077 // "はやぶさ"
10781078 candidate->key = "\xE3\x81\xAF\xE3\x82\x84\xE3\x81\xB6\xE3\x81\x95";
10791079 candidate->content_key = candidate->key;
11341134 cand->content_key = cand->key;
11351135 cand->value = "\xE7\x99\xBE\xE8\x88\x8C\xE9\xB3\xA5"; // "百舌鳥"
11361136 cand->content_value = cand->value;
1137 cand->lid = pos_matcher_->GetGeneralNounId();
1138 cand->rid = pos_matcher_->GetGeneralNounId();
1137 cand->lid = pos_matcher_.GetGeneralNounId();
1138 cand->rid = pos_matcher_.GetGeneralNounId();
11391139 EXPECT_FALSE(number_rewriter->Rewrite(default_request_, &segments));
11401140 }
11411141
11481148 Segment *seg = segments.push_back_segment();
11491149 Segment::Candidate *candidate = seg->add_candidate();
11501150 candidate->Init();
1151 candidate->lid = pos_matcher_->GetNumberId();
1152 candidate->rid = pos_matcher_->GetNumberId();
1151 candidate->lid = pos_matcher_.GetNumberId();
1152 candidate->rid = pos_matcher_.GetNumberId();
11531153 candidate->key = "090";
11541154 candidate->value = "090";
11551155 candidate->content_key = "090";
11921192 "\xe3\x81\xa8\xe3\x81\xb1\xe3\x81\xa3\xe3\x81\x8f");
11931193 Segment::Candidate *candidate = seg->add_candidate();
11941194 candidate->Init();
1195 candidate->lid = pos_matcher_->GetNumberId();
1196 candidate->rid = pos_matcher_->GetNumberId();
1195 candidate->lid = pos_matcher_.GetNumberId();
1196 candidate->rid = pos_matcher_.GetNumberId();
11971197 // "ひとり"
11981198 candidate->key = "\xe3\x81\xb2\xe3\x81\xa8\xe3\x82\x8a";
11991199 // "一人"
8888 RewriterImpl::RewriterImpl(const ConverterInterface *parent_converter,
8989 const DataManagerInterface *data_manager,
9090 const PosGroup *pos_group,
91 const DictionaryInterface *dictionary) {
91 const DictionaryInterface *dictionary)
92 : pos_matcher_(data_manager->GetPOSMatcherData()) {
9293 DCHECK(parent_converter);
9394 DCHECK(data_manager);
9495 DCHECK(pos_group);
95 const POSMatcher *pos_matcher = data_manager->GetPOSMatcher();
96 DCHECK(pos_matcher);
9796 // |dictionary| can be NULL
9897
9998 AddRewriter(new UserDictionaryRewriter);
10099 AddRewriter(new FocusCandidateRewriter(data_manager));
101 AddRewriter(new LanguageAwareRewriter(*pos_matcher, dictionary));
102 AddRewriter(new TransliterationRewriter(*pos_matcher));
100 AddRewriter(new LanguageAwareRewriter(pos_matcher_, dictionary));
101 AddRewriter(new TransliterationRewriter(pos_matcher_));
103102 AddRewriter(new EnglishVariantsRewriter);
104103 AddRewriter(new NumberRewriter(data_manager));
105104 AddRewriter(new CollocationRewriter(data_manager));
106 AddRewriter(new SingleKanjiRewriter(*pos_matcher));
105 AddRewriter(new SingleKanjiRewriter(pos_matcher_));
107106 AddRewriter(new EmojiRewriter(
108107 kEmojiDataList, arraysize(kEmojiDataList),
109108 kEmojiTokenList, arraysize(kEmojiTokenList),
112111 AddRewriter(new CalculatorRewriter(parent_converter));
113112 AddRewriter(new SymbolRewriter(parent_converter, data_manager));
114113 AddRewriter(new UnicodeRewriter(parent_converter));
115 AddRewriter(new VariantsRewriter(pos_matcher));
116 AddRewriter(new ZipcodeRewriter(pos_matcher));
114 AddRewriter(new VariantsRewriter(pos_matcher_));
115 AddRewriter(new ZipcodeRewriter(&pos_matcher_));
117116 AddRewriter(new DiceRewriter);
118117
119118 if (FLAGS_use_history_rewriter) {
120119 AddRewriter(new UserBoundaryHistoryRewriter(parent_converter));
121 AddRewriter(new UserSegmentHistoryRewriter(pos_matcher, pos_group));
120 AddRewriter(new UserSegmentHistoryRewriter(&pos_matcher_, pos_group));
122121 }
123122
124123 AddRewriter(new DateRewriter);
3232 #include "base/port.h"
3333 #include "dictionary/dictionary_interface.h"
3434 #include "dictionary/pos_group.h"
35 #include "dictionary/pos_matcher.h"
3536 #include "rewriter/merger_rewriter.h"
3637
3738 namespace mozc {
4748 const dictionary::DictionaryInterface *dictionary);
4849
4950 private:
51 const dictionary::POSMatcher pos_matcher_;
5052 DISALLOW_COPY_AND_ASSIGN(RewriterImpl);
5153 };
5254
238238 } // namespace
239239
240240 SingleKanjiRewriter::SingleKanjiRewriter(const POSMatcher &pos_matcher)
241 : pos_matcher_(&pos_matcher) {}
241 : pos_matcher_(pos_matcher) {}
242242
243243 SingleKanjiRewriter::~SingleKanjiRewriter() {}
244244
269269 continue;
270270 }
271271 InsertCandidate(is_single_segment,
272 pos_matcher_->GetGeneralSymbolId(),
272 pos_matcher_.GetGeneralSymbolId(),
273273 kanji_list,
274274 segments->mutable_conversion_segment(i));
275275
288288 const Segment::Candidate &right_candidate =
289289 segments->conversion_segment(i + 1).candidate(0);
290290 // right segment must be a noun.
291 if (!pos_matcher_->IsContentNoun(right_candidate.lid)) {
291 if (!pos_matcher_.IsContentNoun(right_candidate.lid)) {
292292 continue;
293293 }
294294 } else if (segments_size != 1) { // also apply if segments_size == 1.
301301 if (token == NULL) {
302302 continue;
303303 }
304 InsertNounPrefix(*pos_matcher_,
304 InsertNounPrefix(pos_matcher_,
305305 segments->mutable_conversion_segment(i),
306306 token->value, token->value_size);
307307 // Ignore the next noun content word.
3737 class SingleKanjiRewriter : public RewriterInterface {
3838 public:
3939 explicit SingleKanjiRewriter(const dictionary::POSMatcher &pos_matcher);
40 virtual ~SingleKanjiRewriter();
40 ~SingleKanjiRewriter() override;
4141
42 virtual int capability(const ConversionRequest &request) const;
42 int capability(const ConversionRequest &request) const override;
4343
44 virtual bool Rewrite(const ConversionRequest &request,
45 Segments *segments) const;
44 bool Rewrite(const ConversionRequest &request,
45 Segments *segments) const override;
4646
4747 private:
48 const dictionary::POSMatcher *pos_matcher_;
48 const dictionary::POSMatcher pos_matcher_;
4949 };
5050
5151 } // namespace mozc
5151 protected:
5252 SingleKanjiRewriterTest() {
5353 data_manager_.reset(new testing::MockDataManager);
54 pos_matcher_ = data_manager_->GetPOSMatcher();
55 }
56
57 virtual ~SingleKanjiRewriterTest() {}
58
59 virtual void SetUp() {
54 pos_matcher_.Set(data_manager_->GetPOSMatcherData());
55 }
56
57 ~SingleKanjiRewriterTest() override = default;
58
59 void SetUp() override {
6060 SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
6161 }
6262
6363 SingleKanjiRewriter *CreateSingleKanjiRewriter() const {
64 return new SingleKanjiRewriter(*pos_matcher_);
64 return new SingleKanjiRewriter(pos_matcher_);
6565 }
6666
6767 const POSMatcher &pos_matcher() {
68 return *pos_matcher_;
68 return pos_matcher_;
6969 }
7070
7171 const ConversionRequest default_request_;
7272
7373 private:
7474 std::unique_ptr<testing::MockDataManager> data_manager_;
75 const POSMatcher *pos_matcher_;
75 POSMatcher pos_matcher_;
7676 };
7777
7878 TEST_F(SingleKanjiRewriterTest, CapabilityTest) {
105105
106106 TransliterationRewriter *CreateTransliterationRewriter() const {
107107 return new TransliterationRewriter(
108 *UserPosManager::GetUserPosManager()->GetPOSMatcher());
108 dictionary::POSMatcher(
109 UserPosManager::GetUserPosManager()->GetPOSMatcherData()));
109110 }
110111
111112 const commands::Request &default_request() const {
5050
5151 UsageRewriter::UsageRewriter(const DataManagerInterface *data_manager,
5252 const DictionaryInterface *dictionary)
53 : pos_matcher_(data_manager->GetPOSMatcher()),
53 : pos_matcher_(data_manager->GetPOSMatcherData()),
5454 dictionary_(dictionary),
5555 base_conjugation_suffix_(nullptr) {
5656 StringPiece base_conjugation_suffix_data;
142142 const Segment::Candidate &candidate) const {
143143 // We check Unknwon POS ("名詞,サ変接続") as well, since
144144 // target verbs/adjectives may be in web dictionary.
145 if (!pos_matcher_->IsContentWordWithConjugation(candidate.lid) &&
146 !pos_matcher_->IsUnknown(candidate.lid)) {
145 if (!pos_matcher_.IsContentWordWithConjugation(candidate.lid) &&
146 !pos_matcher_.IsUnknown(candidate.lid)) {
147147 return UsageDictItemIterator();
148148 }
149149
112112 const Segment::Candidate &candidate) const;
113113
114114 map<StrPair, UsageDictItemIterator> key_value_usageitem_map_;
115 const dictionary::POSMatcher *pos_matcher_;
115 const dictionary::POSMatcher pos_matcher_;
116116 const dictionary::DictionaryInterface *dictionary_;
117117 const uint32 *base_conjugation_suffix_;
118118 SerializedStringArray string_array_;
8282 config::ConfigHandler::GetDefaultConfig(&config_);
8383
8484 data_manager_.reset(new testing::MockDataManager);
85
85 pos_matcher_.Set(data_manager_->GetPOSMatcherData());
8686 suppression_dictionary_.reset(new SuppressionDictionary);
8787 user_dictionary_.reset(
8888 new UserDictionary(UserPOS::CreateFromDataManager(*data_manager_),
89 data_manager_->GetPOSMatcher(),
89 pos_matcher_,
9090 suppression_dictionary_.get()));
9191 }
9292
108108 std::unique_ptr<SuppressionDictionary> suppression_dictionary_;
109109 std::unique_ptr<UserDictionary> user_dictionary_;
110110 std::unique_ptr<testing::MockDataManager> data_manager_;
111 dictionary::POSMatcher pos_matcher_;
111112 };
112113
113114 TEST_F(UsageRewriterTest, CapabilityTest) {
113113 request_.set_config(&config_);
114114 }
115115
116 virtual void SetUp() {
116 void SetUp() override {
117117 SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
118118
119119 ConfigHandler::GetDefaultConfig(&config_);
130130
131131 Clock::SetClockForUnitTest(NULL);
132132
133 pos_matcher_ = mock_data_manager_.GetPOSMatcher();
133 pos_matcher_.Set(mock_data_manager_.GetPOSMatcherData());
134134 pos_group_.reset(new PosGroup(mock_data_manager_.GetPosGroupData()));
135 ASSERT_TRUE(pos_matcher_ != NULL);
136135 ASSERT_TRUE(pos_group_.get() != NULL);
137136 }
138137
139 virtual void TearDown() {
138 void TearDown() override {
140139 Clock::SetClockForUnitTest(NULL);
141140
142141 std::unique_ptr<UserSegmentHistoryRewriter> rewriter(
148147 }
149148
150149 const POSMatcher &pos_matcher() const {
151 return *pos_matcher_;
150 return pos_matcher_;
152151 }
153152
154153 NumberRewriter *CreateNumberRewriter() const {
156155 }
157156
158157 UserSegmentHistoryRewriter *CreateUserSegmentHistoryRewriter() const {
159 return new UserSegmentHistoryRewriter(pos_matcher_, pos_group_.get());
158 return new UserSegmentHistoryRewriter(&pos_matcher_, pos_group_.get());
160159 }
161160
162161 void SetNumberForm(Config::CharacterForm form) {
177176
178177 private:
179178 const testing::MockDataManager mock_data_manager_;
180 const POSMatcher *pos_matcher_;
179 POSMatcher pos_matcher_;
181180 std::unique_ptr<const PosGroup> pos_group_;
182181 DISALLOW_COPY_AND_ASSIGN(UserSegmentHistoryRewriterTest);
183182 };
131131 return true;
132132 }
133133
134 VariantsRewriter::VariantsRewriter(const POSMatcher *pos_matcher)
134 VariantsRewriter::VariantsRewriter(const POSMatcher pos_matcher)
135135 : pos_matcher_(pos_matcher) {}
136136
137137 VariantsRewriter::~VariantsRewriter() {}
324324 if (candidate->attributes & Segment::Candidate::NO_EXTRA_DESCRIPTION) {
325325 continue;
326326 }
327 SetDescriptionForTransliteration(*pos_matcher_, candidate);
327 SetDescriptionForTransliteration(pos_matcher_, candidate);
328328 }
329329
330330 // Regular Candidate
343343
344344 if (original_candidate->attributes &
345345 Segment::Candidate::NO_VARIANTS_EXPANSION) {
346 SetDescriptionForCandidate(*pos_matcher_, original_candidate);
346 SetDescriptionForCandidate(pos_matcher_, original_candidate);
347347 VLOG(1) << "Canidate has NO_NORMALIZATION node";
348348 continue;
349349 }
355355 &alternative_content_value,
356356 &default_inner_segment_boundary,
357357 &alternative_inner_segment_boundary)) {
358 SetDescriptionForCandidate(*pos_matcher_, original_candidate);
358 SetDescriptionForCandidate(pos_matcher_, original_candidate);
359359 continue;
360360 }
361361
408408 new_candidate->lid = original_candidate->lid;
409409 new_candidate->rid = original_candidate->rid;
410410 new_candidate->description = original_candidate->description;
411 SetDescription(*pos_matcher_, default_description_type, new_candidate);
411 SetDescription(pos_matcher_, default_description_type, new_candidate);
412412
413413 original_candidate->value = alternative_value;
414414 original_candidate->content_value = alternative_content_value;
415 SetDescription(*pos_matcher_,
415 SetDescription(pos_matcher_,
416416 alternative_description_type, original_candidate);
417417 ++i; // skip inserted candidate
418418 } else if (type == SELECT_VARIANT) {
421421 original_candidate->content_value = default_content_value;
422422 original_candidate->inner_segment_boundary.swap(
423423 default_inner_segment_boundary);
424 SetDescription(*pos_matcher_,
424 SetDescription(pos_matcher_,
425425 default_description_type, original_candidate);
426426 }
427427 modified = true;
5353 static const char *kDidYouMean;
5454 static const char *kYenKigou;
5555
56 explicit VariantsRewriter(const dictionary::POSMatcher *pos_matcher);
56 explicit VariantsRewriter(dictionary::POSMatcher pos_matcher);
5757 virtual ~VariantsRewriter();
5858 virtual int capability(const ConversionRequest &request) const;
5959 virtual bool Rewrite(const ConversionRequest &request,
111111 vector<uint32> *default_inner_segment_boundary,
112112 vector<uint32> *alternative_inner_segment_boundary) const;
113113
114 const dictionary::POSMatcher *pos_matcher_;
114 const dictionary::POSMatcher pos_matcher_;
115115 };
116116
117117 } // namespace mozc
7070 // considering this class as POD.
7171 VariantsRewriterTest() {}
7272
73 virtual void SetUp() {
73 void SetUp() override {
7474 Reset();
7575 #ifdef MOZC_USE_PACKED_DICTIONARY
7676 // Registers mocked PackedDataManager.
8080 kPackedSystemDictionary_size)));
8181 packed::RegisterPackedDataManager(data_manager.release());
8282 #endif // MOZC_USE_PACKED_DICTIONARY
83 pos_matcher_ = UserPosManager::GetUserPosManager()->GetPOSMatcher();
83 pos_matcher_.Set(UserPosManager::GetUserPosManager()->GetPOSMatcherData());
8484 }
8585
8686 virtual void TearDown() {
114114 return new VariantsRewriter(pos_matcher_);
115115 }
116116
117 const POSMatcher *pos_matcher_;
117 POSMatcher pos_matcher_;
118118 };
119119
120120 TEST_F(VariantsRewriterTest, RewriteTest) {
349349 candidate.value = "HalfASCII";
350350 candidate.content_value = candidate.value;
351351 candidate.content_key = "halfascii";
352 VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate);
352 VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate);
353353 // "[半] アルファベット"
354354 EXPECT_EQ(AppendString(VariantsRewriter::kHalfWidth,
355355 VariantsRewriter::kAlphabet),
362362 candidate.value = "Half ASCII";
363363 candidate.content_value = candidate.value;
364364 candidate.content_key = "half ascii";
365 VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate);
365 VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate);
366366 // "[半] アルファベット"
367367 EXPECT_EQ(AppendString(VariantsRewriter::kHalfWidth,
368368 VariantsRewriter::kAlphabet),
374374 candidate.value = "Half!ASCII!";
375375 candidate.content_value = candidate.value;
376376 candidate.content_key = "half!ascii!";
377 VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate);
377 VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate);
378378 // "[半] アルファベット"
379379 EXPECT_EQ(AppendString(VariantsRewriter::kHalfWidth,
380380 VariantsRewriter::kAlphabet),
389389 candidate.content_key =
390390 "\xe3\x81\x97\xe3\x83\xbc\xe3\x81\xa7\xe3\x81\x83\xe3"
391391 "\x83\xbc\xe3\x82\x8d\xe3\x82\x80";
392 VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate);
392 VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate);
393393 // "[半] アルファベット"
394394 EXPECT_EQ(AppendString(VariantsRewriter::kHalfWidth,
395395 VariantsRewriter::kAlphabet),
406406 candidate.content_key =
407407 "\xe3\x81\x93\xe3\x81\x8e\xe3\x81\xa8\xe3\x81\x88\xe3\x82\x8b\xe3\x81"
408408 "\x94\xe3\x81\x99\xe3\x82\x80";
409 VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate);
409 VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate);
410410 // "[全] カタカナ"
411411 EXPECT_EQ(AppendString(VariantsRewriter::kFullWidth,
412412 VariantsRewriter::kKatakana),
418418 candidate.value = "!@#";
419419 candidate.content_value = candidate.value;
420420 candidate.content_key = "!@#";
421 VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate);
421 VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate);
422422 // "[半]"
423423 EXPECT_EQ(VariantsRewriter::kHalfWidth, candidate.description);
424424 }
430430 "\x80\x8d";
431431 candidate.content_value = candidate.value;
432432 candidate.content_key = "[ABC]";
433 VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate);
433 VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate);
434434 // "[全] アルファベット"
435435 EXPECT_EQ(AppendString(VariantsRewriter::kFullWidth,
436436 VariantsRewriter::kAlphabet),
445445 // "くさなぎつよし"
446446 candidate.content_key = "\xE3\x81\x8F\xE3\x81\x95\xE3\x81\xAA"
447447 "\xE3\x81\x8E\xE3\x81\xA4\xE3\x82\x88\xE3\x81\x97";
448 VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate);
448 VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate);
449449 // "<機種依存文字>"
450450 EXPECT_EQ(VariantsRewriter::kPlatformDependent, candidate.description);
451451 }
456456 candidate.content_value = candidate.value;
457457 // "えん"
458458 candidate.content_key = "\xE3\x81\x88\xE3\x82\x93";
459 VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate);
459 VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate);
460460 // "[半] バックスラッシュ"
461461 const char *expected =
462462 "\x5B\xE5\x8D\x8A\x5D\x20\xE3\x83\x90\xE3\x83\x83"
471471 candidate.content_value = candidate.value;
472472 // "えん"
473473 candidate.content_key = "\xE3\x81\x88\xE3\x82\x93";
474 VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate);
474 VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate);
475475 // "[全] バックスラッシュ"
476476 const char *expected =
477477 "\x5B\xE5\x85\xA8\x5D\x20\xE3\x83\x90\xE3\x83\x83\xE3\x82\xAF"
485485 candidate.content_value = candidate.value;
486486 // "えん"
487487 candidate.content_key = "\xE3\x81\x88\xE3\x82\x93";
488 VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate);
488 VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate);
489489 // "[半] 円記号 <機種依存文字>" for Desktop,
490490 // "[半] 円記号 <機種依存>" for Android
491491 string expected =("[" "\xE5\x8D\x8A" "] "
500500 candidate.content_value = candidate.value;
501501 // "えん"
502502 candidate.content_key = "\xE3\x81\x88\xE3\x82\x93";
503 VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate);
503 VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate);
504504 // "[全] 円記号"
505505 const char *expected =
506506 "[" "\xE5\x85\xA8" "] " "\xE5\x86\x86\xE8\xA8\x98\xE5\x8F\xB7";
516516 candidate.content_key = "\xE3\x81\xAD\xE3\x81\x9A\xE3\x81\xBF";
517517 // "絵文字"
518518 candidate.description = "\xE7\xB5\xB5\xE6\x96\x87\xE5\xAD\x97";
519 VariantsRewriter::SetDescriptionForCandidate(*pos_matcher_, &candidate);
519 VariantsRewriter::SetDescriptionForCandidate(pos_matcher_, &candidate);
520520 // "絵文字 <機種依存文字>" for Desktop, "絵文字 <機種依存>" for Andorid
521521 string expected("\xE7\xB5\xB5\xE6\x96\x87\xE5\xAD\x97" " ");
522522 expected.append(VariantsRewriter::kPlatformDependent);
531531 candidate.value = "HalfASCII";
532532 candidate.content_value = candidate.value;
533533 candidate.content_key = "halfascii";
534 VariantsRewriter::SetDescriptionForTransliteration(*pos_matcher_,
534 VariantsRewriter::SetDescriptionForTransliteration(pos_matcher_,
535535 &candidate);
536536 // "[半] アルファベット"
537537 EXPECT_EQ(AppendString(VariantsRewriter::kHalfWidth,
544544 candidate.value = "!@#";
545545 candidate.content_value = candidate.value;
546546 candidate.content_key = "!@#";
547 VariantsRewriter::SetDescriptionForTransliteration(*pos_matcher_,
547 VariantsRewriter::SetDescriptionForTransliteration(pos_matcher_,
548548 &candidate);
549549 // "[半]"
550550 EXPECT_EQ(VariantsRewriter::kHalfWidth, candidate.description);
557557 "\x80\x8d";
558558 candidate.content_value = candidate.value;
559559 candidate.content_key = "[ABC]";
560 VariantsRewriter::SetDescriptionForTransliteration(*pos_matcher_,
560 VariantsRewriter::SetDescriptionForTransliteration(pos_matcher_,
561561 &candidate);
562562 // "[全] アルファベット"
563563 EXPECT_EQ(AppendString(VariantsRewriter::kFullWidth,
573573 // "くさなぎつよし"
574574 candidate.content_key = "\xE3\x81\x8F\xE3\x81\x95\xE3\x81\xAA"
575575 "\xE3\x81\x8E\xE3\x81\xA4\xE3\x82\x88\xE3\x81\x97";
576 VariantsRewriter::SetDescriptionForTransliteration(*pos_matcher_,
576 VariantsRewriter::SetDescriptionForTransliteration(pos_matcher_,
577577 &candidate);
578578 // "<機種依存文字>"
579579 EXPECT_EQ(VariantsRewriter::kPlatformDependent, candidate.description);
587587 candidate.value = "HalfASCII";
588588 candidate.content_value = candidate.value;
589589 candidate.content_key = "halfascii";
590 VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate);
590 VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate);
591591 EXPECT_EQ("", candidate.description);
592592 }
593593 // containing symbols
597597 candidate.value = "Half ASCII";
598598 candidate.content_value = candidate.value;
599599 candidate.content_key = "half ascii";
600 VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate);
600 VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate);
601601 EXPECT_EQ("", candidate.description);
602602 }
603603 {
606606 candidate.value = "Half!ASCII!";
607607 candidate.content_value = candidate.value;
608608 candidate.content_key = "half!ascii!";
609 VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate);
609 VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate);
610610 EXPECT_EQ("", candidate.description);
611611 }
612612 {
618618 candidate.content_key =
619619 "\xe3\x81\x97\xe3\x83\xbc\xe3\x81\xa7\xe3\x81\x83\xe3"
620620 "\x83\xbc\xe3\x82\x8d\xe3\x82\x80";
621 VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate);
621 VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate);
622622 EXPECT_EQ("", candidate.description);
623623 }
624624 {
627627 candidate.value = "!@#";
628628 candidate.content_value = candidate.value;
629629 candidate.content_key = "!@#";
630 VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate);
630 VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate);
631631 EXPECT_EQ("", candidate.description);
632632 }
633633 {
638638 "\x80\x8d";
639639 candidate.content_value = candidate.value;
640640 candidate.content_key = "[ABC]";
641 VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate);
641 VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate);
642642 EXPECT_EQ("", candidate.description);
643643 }
644644 {
650650 // "くさなぎつよし"
651651 candidate.content_key = "\xE3\x81\x8F\xE3\x81\x95\xE3\x81\xAA"
652652 "\xE3\x81\x8E\xE3\x81\xA4\xE3\x82\x88\xE3\x81\x97";
653 VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, &candidate);
653 VariantsRewriter::SetDescriptionForPrediction(pos_matcher_, &candidate);
654654 // "<機種依存文字>"
655655 EXPECT_EQ(VariantsRewriter::kPlatformDependent, candidate.description);
656656 }
6969 candidate->content_value = value;
7070
7171 if (type == ZIPCODE) {
72 const POSMatcher *pos_matcher =
73 UserPosManager::GetUserPosManager()->GetPOSMatcher();
74 candidate->lid = pos_matcher->GetZipcodeId();
75 candidate->rid = pos_matcher->GetZipcodeId();
72 const POSMatcher pos_matcher(
73 UserPosManager::GetUserPosManager()->GetPOSMatcherData());
74 candidate->lid = pos_matcher.GetZipcodeId();
75 candidate->rid = pos_matcher.GetZipcodeId();
7676 }
7777 }
7878
9797
9898 class ZipcodeRewriterTest : public ::testing::Test {
9999 protected:
100 virtual void SetUp() {
100 void SetUp() override {
101101 #ifdef MOZC_USE_PACKED_DICTIONARY
102102 // Registers mocked PackedDataManager.
103103 std::unique_ptr<packed::PackedDataManager>
106106 kPackedSystemDictionary_size)));
107107 packed::RegisterPackedDataManager(data_manager.release());
108108 #endif // MOZC_USE_PACKED_DICTIONARY
109
109 pos_matcher_.Set(UserPosManager::GetUserPosManager()->GetPOSMatcherData());
110110 SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
111111 }
112112
113 virtual void TearDown() {
113 void TearDown() override {
114114 #ifdef MOZC_USE_PACKED_DICTIONARY
115115 // Unregisters mocked PackedDataManager.
116116 packed::RegisterPackedDataManager(NULL);
118118 }
119119
120120 ZipcodeRewriter *CreateZipcodeRewriter() const {
121 return new ZipcodeRewriter(
122 UserPosManager::GetUserPosManager()->GetPOSMatcher());
121 return new ZipcodeRewriter(&pos_matcher_);
123122 }
123
124 dictionary::POSMatcher pos_matcher_;
124125 };
125126
126127 TEST_F(ZipcodeRewriterTest, BasicTest) {
533533
534534 t13n_rewriter_.reset(
535535 new TransliterationRewriter(
536 *UserPosManager::GetUserPosManager()->GetPOSMatcher()));
536 dictionary::POSMatcher(
537 UserPosManager::GetUserPosManager()->GetPOSMatcherData())));
537538 }
538539
539540 virtual void TearDown() {