Stop embedding user POS data as C++ structures
This CL defines binary format for user POS data and converts
C++-embedded data to a new data set file.
BUG=
TEST=
REF_BUG=26841123
REF_CL=116321468,116327229
REF_TIME=2016-03-04T14:08:55+09:00
REF_TIME_RAW=1457068135 +0900
Noriyuki Takahashi
8 years ago
198 | 198 | PepperFileUtil::Initialize(instance_, kFileIoFileSystemExpectedSize); |
199 | 199 | LoadDictionary(); |
200 | 200 | #endif // GOOGLE_JAPANESE_INPUT_BUILD |
201 | user_pos_.reset(new dictionary::UserPOS( | |
202 | packed::PackedDataManager::GetUserPosManager()->GetUserPOSData())); | |
201 | user_pos_.reset(dictionary::UserPOS::CreateFromDataManager( | |
202 | *packed::PackedDataManager::GetUserPosManager())); | |
203 | 203 | |
204 | 204 | engine_.reset(mozc::EngineFactory::Create()); |
205 | 205 | handler_.reset(new SessionHandler(engine_.get())); |
59 | 59 | #include "dictionary/system/value_dictionary.h" |
60 | 60 | #include "dictionary/user_dictionary.h" |
61 | 61 | #include "dictionary/user_dictionary_stub.h" |
62 | #include "dictionary/user_pos.h" | |
62 | 63 | #include "engine/engine.h" |
63 | 64 | #include "engine/engine_interface.h" |
64 | 65 | #include "engine/mock_data_engine_factory.h" |
345 | 346 | SuppressionDictionary *suppression_dictionary = new SuppressionDictionary; |
346 | 347 | dictionary::UserDictionary *user_dictionary = |
347 | 348 | new dictionary::UserDictionary( |
348 | new dictionary::UserPOS(user_pos_manager.GetUserPOSData()), | |
349 | dictionary::UserPOS::CreateFromDataManager(user_pos_manager), | |
349 | 350 | user_pos_manager.GetPOSMatcher(), |
350 | 351 | suppression_dictionary); |
351 | 352 | InitConverterAndData( |
54 | 54 | '../dictionary/dictionary.gyp:dictionary_mock', |
55 | 55 | '../dictionary/dictionary.gyp:suffix_dictionary', |
56 | 56 | '../dictionary/dictionary_base.gyp:user_dictionary', |
57 | '../dictionary/dictionary_base.gyp:user_pos', | |
57 | 58 | '../dictionary/system/system_dictionary.gyp:system_dictionary', |
58 | 59 | '../dictionary/system/system_dictionary.gyp:value_dictionary', |
59 | 60 | '../engine/engine.gyp:engine_factory', |
28 | 28 | |
29 | 29 | #include "data_manager/chromeos/chromeos_user_pos_manager.h" |
30 | 30 | |
31 | #include "base/embedded_file.h" | |
31 | 32 | #include "base/logging.h" |
32 | 33 | #include "base/singleton.h" |
33 | #include "dictionary/pos_group.h" | |
34 | 34 | #include "dictionary/pos_matcher.h" |
35 | #include "dictionary/user_pos.h" | |
36 | 35 | |
37 | 36 | namespace mozc { |
38 | 37 | namespace chromeos { |
43 | 42 | |
44 | 43 | namespace { |
45 | 44 | |
46 | // The following header file is automatically generated and contains the | |
47 | // definition of variable, kPOSToken, of type UserPOSImpl::POSToken. | |
48 | #include "data_manager/chromeos/user_pos_data.h" | |
45 | // Embedded file kUserPosManagerData is defined in this header file. | |
46 | #include "data_manager/chromeos/user_pos_manager_data.h" | |
49 | 47 | |
50 | 48 | } // namespace |
51 | 49 | |
52 | const dictionary::UserPOS::POSToken * | |
53 | ChromeOsUserPosManager::GetUserPOSData() const { | |
54 | DCHECK(kPOSToken != NULL); | |
55 | return kPOSToken; | |
50 | ChromeOsUserPosManager::ChromeOsUserPosManager() { | |
51 | const StringPiece data = LoadEmbeddedFile(kUserPosManagerData); | |
52 | const char *kMagicNumber = ""; // Magic number is not present. | |
53 | CHECK(manager_.InitUserPosManagerDataFromArray(data, kMagicNumber)) | |
54 | << "Embedded user_pos_manager_data.h is broken"; | |
55 | } | |
56 | ||
57 | ChromeOsUserPosManager::~ChromeOsUserPosManager() = default; | |
58 | ||
59 | void ChromeOsUserPosManager::GetUserPOSData( | |
60 | StringPiece *token_array_data, StringPiece *string_array_data) const { | |
61 | manager_.GetUserPOSData(token_array_data, string_array_data); | |
56 | 62 | } |
57 | 63 | |
58 | 64 | namespace { |
30 | 30 | #define MOZC_DATA_MANAGER_CHROMEOS_CHROMEOS_USER_POS_MANAGER_H_ |
31 | 31 | |
32 | 32 | #include "base/port.h" |
33 | #include "data_manager/data_manager.h" | |
33 | 34 | #include "data_manager/data_manager_interface.h" |
34 | 35 | |
35 | 36 | namespace mozc { |
37 | 38 | |
38 | 39 | class ChromeOsUserPosManager : public DataManagerInterface { |
39 | 40 | public: |
40 | ChromeOsUserPosManager() {} | |
41 | ~ChromeOsUserPosManager() override {} | |
41 | ChromeOsUserPosManager(); | |
42 | ~ChromeOsUserPosManager() override; | |
42 | 43 | |
43 | 44 | static ChromeOsUserPosManager *GetUserPosManager(); |
44 | 45 | |
45 | 46 | // Partially implement the interface because some binary only reqiures the |
46 | 47 | // folloiwng embedded data. |
47 | const dictionary::UserPOS::POSToken *GetUserPOSData() const override; | |
48 | void GetUserPOSData(StringPiece *token_array_data, | |
49 | StringPiece *string_array_data) const override; | |
48 | 50 | const dictionary::POSMatcher *GetPOSMatcher() const override; |
49 | 51 | |
50 | 52 | // The following are implemented in ChromeOsDataManager. |
80 | 82 | size_t *size) const override {} |
81 | 83 | |
82 | 84 | private: |
85 | DataManager manager_; | |
83 | 86 | DISALLOW_COPY_AND_ASSIGN(ChromeOsUserPosManager); |
84 | 87 | }; |
85 | 88 |
35 | 35 | #include "rewriter/serialized_dictionary.h" |
36 | 36 | |
37 | 37 | namespace mozc { |
38 | namespace { | |
39 | ||
40 | bool InitUserPosManagerDataFromReader(const DataSetReader &reader, | |
41 | StringPiece *user_pos_token_array_data, | |
42 | StringPiece *user_pos_string_array_data) { | |
43 | if (!reader.Get("user_pos_token", user_pos_token_array_data)) { | |
44 | LOG(ERROR) << "Cannot find a user POS token array"; | |
45 | return false; | |
46 | } | |
47 | if (!reader.Get("user_pos_string", user_pos_string_array_data)) { | |
48 | LOG(ERROR) << "Cannot find a user POS string array"; | |
49 | return false; | |
50 | } | |
51 | if (user_pos_token_array_data->size() % 8 != 0 || | |
52 | !SerializedStringArray::VerifyData(*user_pos_string_array_data)) { | |
53 | LOG(ERROR) << "User POS data is broken: token array data size = " | |
54 | << user_pos_token_array_data->size() << ", string array size = " | |
55 | << user_pos_string_array_data->size(); | |
56 | return false; | |
57 | } | |
58 | return true; | |
59 | } | |
60 | ||
61 | } // namespace | |
38 | 62 | |
39 | 63 | DataManager::DataManager() = default; |
40 | 64 | DataManager::~DataManager() = default; |
43 | 67 | DataSetReader reader; |
44 | 68 | if (!reader.Init(array, magic)) { |
45 | 69 | LOG(ERROR) << "Binary data of size " << array.size() << " is broken"; |
70 | return false; | |
71 | } | |
72 | if (!InitUserPosManagerDataFromReader(reader, | |
73 | &user_pos_token_array_data_, | |
74 | &user_pos_string_array_data_)) { | |
75 | LOG(ERROR) << "User POS manager data is broken"; | |
46 | 76 | return false; |
47 | 77 | } |
48 | 78 | if (!reader.Get("conn", &connection_data_)) { |
195 | 225 | return true; |
196 | 226 | } |
197 | 227 | |
228 | bool DataManager::InitUserPosManagerDataFromArray(StringPiece array, | |
229 | StringPiece magic) { | |
230 | DataSetReader reader; | |
231 | if (!reader.Init(array, magic)) { | |
232 | LOG(ERROR) << "Binary data of size " << array.size() << " is broken"; | |
233 | return false; | |
234 | } | |
235 | if (!InitUserPosManagerDataFromReader(reader, | |
236 | &user_pos_token_array_data_, | |
237 | &user_pos_string_array_data_)) { | |
238 | LOG(ERROR) << "User POS manager data is broken"; | |
239 | return false; | |
240 | } | |
241 | return true; | |
242 | } | |
243 | ||
198 | 244 | void DataManager::GetConnectorData(const char **data, size_t *size) const { |
199 | 245 | *data = connection_data_.data(); |
200 | 246 | *size = connection_data_.size(); |
222 | 268 | *size = suggestion_filter_data_.size(); |
223 | 269 | } |
224 | 270 | |
225 | const dictionary::UserPOS::POSToken *DataManager::GetUserPOSData() const { | |
226 | LOG(FATAL) << "Not implemented"; | |
227 | return nullptr; | |
271 | void DataManager::GetUserPOSData(StringPiece *token_array_data, | |
272 | StringPiece *string_array_data) const { | |
273 | *token_array_data = user_pos_token_array_data_; | |
274 | *string_array_data = user_pos_string_array_data_; | |
228 | 275 | } |
229 | 276 | |
230 | 277 | const dictionary::POSMatcher *DataManager::GetPOSMatcher() const { |
119 | 119 | 'dependencies': [ |
120 | 120 | '../data_manager_base.gyp:dataset_writer_main', |
121 | 121 | '../../rewriter/rewriter_base.gyp:gen_rewriter_files#host', |
122 | '<(dataset_tag)_data_manager_base.gyp:gen_separate_user_pos_data_for_<(dataset_tag)#host', | |
122 | 123 | 'gen_separate_connection_data_for_<(dataset_tag)#host', |
123 | 124 | 'gen_separate_dictionary_data_for_<(dataset_tag)#host', |
124 | 125 | 'gen_separate_collocation_data_for_<(dataset_tag)#host', |
136 | 137 | 'action_name': 'gen_mozc_dataset_for_<(dataset_tag)', |
137 | 138 | 'variables': { |
138 | 139 | 'generator': '<(PRODUCT_DIR)/dataset_writer_main<(EXECUTABLE_SUFFIX)', |
140 | 'user_pos_token': '<(gen_out_dir)/user_pos_token_array.data', | |
141 | 'user_pos_string': '<(gen_out_dir)/user_pos_string_array.data', | |
139 | 142 | 'dictionary': '<(gen_out_dir)/system.dictionary', |
140 | 143 | 'connection': '<(gen_out_dir)/connection.data', |
141 | 144 | 'collocation': '<(gen_out_dir)/collocation_data.data', |
158 | 161 | 'symbol_string': '<(gen_out_dir)/symbol_string.data', |
159 | 162 | }, |
160 | 163 | 'inputs': [ |
164 | '<(user_pos_token)', | |
165 | '<(user_pos_string)', | |
161 | 166 | '<(dictionary)', |
162 | 167 | '<(connection)', |
163 | 168 | '<(collocation)', |
186 | 191 | '<(generator)', |
187 | 192 | '--magic=<(magic_number)', |
188 | 193 | '--output=<(gen_out_dir)/<(out_mozc_data)', |
194 | 'user_pos_token:32:<(user_pos_token)', | |
195 | 'user_pos_string:32:<(user_pos_string)', | |
189 | 196 | 'coll:32:<(gen_out_dir)/collocation_data.data', |
190 | 197 | 'cols:32:<(gen_out_dir)/collocation_suppression_data.data', |
191 | 198 | 'conn:32:<(gen_out_dir)/connection.data', |
44 | 44 | DataManager(); |
45 | 45 | ~DataManager() override; |
46 | 46 | |
47 | // Parses |array| and extracts byte blocks of data set. | |
47 | 48 | bool InitFromArray(StringPiece array, StringPiece magic); |
48 | 49 | |
50 | // The same as above InitFromArray() but only parses data set for user pos | |
51 | // manager. For mozc runtime modules, use InitFromArray() because this method | |
52 | // is only for build tools, e.g., rewriter/dictionary_generator.cc (some build | |
53 | // tools depend on user pos data to create outputs, so we need to handle | |
54 | // partial data set). | |
55 | bool InitUserPosManagerDataFromArray(StringPiece array, StringPiece magic); | |
56 | ||
49 | 57 | // The following interfaces are implemented. |
58 | void GetUserPOSData(StringPiece *token_array_data, | |
59 | StringPiece *string_array_data) const override; | |
50 | 60 | void GetConnectorData(const char **data, size_t *size) const override; |
51 | 61 | void GetSystemDictionaryData(const char **data, int *size) const override; |
52 | 62 | void GetCollocationData(const char **array, size_t *size) const override; |
81 | 91 | // The following interfaces are not yet implemented. |
82 | 92 | // TODO(noriyukit): Implements all the interfaces by migrating embedded C++ |
83 | 93 | // structures to a data set file. |
84 | const dictionary::UserPOS::POSToken *GetUserPOSData() const override; | |
85 | 94 | const dictionary::POSMatcher *GetPOSMatcher() const override; |
86 | 95 | |
87 | 96 | private: |
97 | StringPiece user_pos_token_array_data_; | |
98 | StringPiece user_pos_string_array_data_; | |
88 | 99 | StringPiece connection_data_; |
89 | 100 | StringPiece dictionary_data_; |
90 | 101 | StringPiece suggestion_filter_data_; |
38 | 38 | 'dependencies': [ |
39 | 39 | '<(mozc_dir)/base/base.gyp:base', |
40 | 40 | '<(mozc_dir)/dictionary/dictionary_base.gyp:pos_matcher', |
41 | '<(mozc_dir)/dictionary/dictionary_base.gyp:user_pos', | |
42 | 41 | 'gen_embedded_pos_matcher_data_for_<(dataset_tag)#host', |
43 | 'gen_embedded_user_pos_data_for_<(dataset_tag)#host', | |
42 | 'gen_user_pos_manager_data_header_for_<(dataset_tag)#host', | |
43 | '../data_manager_base.gyp:data_manager', | |
44 | 44 | ], |
45 | 45 | }, |
46 | 46 | { |
49 | 49 | 'toolsets': ['host'], |
50 | 50 | 'dependencies': [ |
51 | 51 | 'gen_embedded_pos_matcher_data_for_<(dataset_tag)#host', |
52 | 'gen_embedded_user_pos_data_for_<(dataset_tag)#host', | |
53 | 52 | ], |
54 | 53 | }, |
55 | 54 | { |
57 | 56 | 'type': 'none', |
58 | 57 | 'toolsets': ['host'], |
59 | 58 | 'dependencies': [ |
60 | 'gen_embedded_user_pos_data_for_<(dataset_tag)#host', | |
59 | 'gen_separate_user_pos_data_for_<(dataset_tag)#host', | |
61 | 60 | ], |
62 | 61 | 'actions': [ |
63 | 62 | { |
81 | 80 | ], |
82 | 81 | }, |
83 | 82 | { |
84 | 'target_name': 'gen_embedded_user_pos_data_for_<(dataset_tag)', | |
83 | 'target_name': 'gen_user_pos_manager_data_header_for_<(dataset_tag)', | |
84 | 'type': 'none', | |
85 | 'toolsets': ['host'], | |
86 | 'dependencies': [ | |
87 | 'gen_user_pos_manager_data_for_<(dataset_tag)#host', | |
88 | ], | |
89 | 'actions': [ | |
90 | { | |
91 | 'action_name': 'gen_user_pos_manager_data_header_for_<(dataset_tag)', | |
92 | 'variables': { | |
93 | 'user_pos_manager_data': '<(gen_out_dir)/user_pos_manager.data', | |
94 | }, | |
95 | 'inputs': [ | |
96 | '<(user_pos_manager_data)', | |
97 | ], | |
98 | 'outputs': [ | |
99 | '<(gen_out_dir)/user_pos_manager_data.h', | |
100 | ], | |
101 | 'action': [ | |
102 | 'python', '<(mozc_dir)/build_tools/embed_file.py', | |
103 | '--input=<(user_pos_manager_data)', | |
104 | '--name=kUserPosManagerData', | |
105 | '--output=<(gen_out_dir)/user_pos_manager_data.h', | |
106 | ], | |
107 | }, | |
108 | ], | |
109 | }, | |
110 | { | |
111 | 'target_name': 'gen_user_pos_manager_data_for_<(dataset_tag)', | |
112 | 'type': 'none', | |
113 | 'toolsets': ['host'], | |
114 | 'dependencies': [ | |
115 | '../data_manager_base.gyp:dataset_writer_main', | |
116 | 'gen_separate_user_pos_data_for_<(dataset_tag)#host', | |
117 | ], | |
118 | 'actions': [ | |
119 | { | |
120 | 'action_name': 'gen_user_pos_manager_data_for_<(dataset_tag)', | |
121 | 'variables': { | |
122 | 'generator': '<(PRODUCT_DIR)/dataset_writer_main<(EXECUTABLE_SUFFIX)', | |
123 | 'user_pos_token': '<(gen_out_dir)/user_pos_token_array.data', | |
124 | 'user_pos_string': '<(gen_out_dir)/user_pos_string_array.data', | |
125 | }, | |
126 | 'inputs': [ | |
127 | '<(user_pos_token)', | |
128 | '<(user_pos_string)', | |
129 | ], | |
130 | 'outputs': [ | |
131 | '<(gen_out_dir)/user_pos_manager.data', | |
132 | ], | |
133 | 'action': [ | |
134 | '<(generator)', | |
135 | '--output=<(gen_out_dir)/user_pos_manager.data', | |
136 | 'user_pos_token:32:<(user_pos_token)', | |
137 | 'user_pos_string:32:<(user_pos_string)', | |
138 | ], | |
139 | }, | |
140 | ], | |
141 | }, | |
142 | { | |
143 | 'target_name': 'gen_separate_user_pos_data_for_<(dataset_tag)', | |
85 | 144 | 'type': 'none', |
86 | 145 | 'toolsets': ['host'], |
87 | 146 | 'dependencies': [ |
89 | 148 | ], |
90 | 149 | 'actions': [ |
91 | 150 | { |
92 | 'action_name': 'gen_embedded_user_pos_data_for_<(dataset_tag)', | |
151 | 'action_name': 'gen_separate_user_pos_data_for_<(dataset_tag)', | |
93 | 152 | 'variables': { |
94 | 153 | 'id_def': '<(platform_data_dir)/id.def', |
95 | 154 | 'special_pos': '<(common_data_dir)/rules/special_pos.def', |
96 | 155 | 'user_pos': '<(common_data_dir)/rules/user_pos.def', |
97 | 156 | 'cforms': '<(common_data_dir)/rules/cforms.def', |
98 | 'user_pos_data': '<(gen_out_dir)/user_pos_data.h', | |
157 | 'token_array_data': '<(gen_out_dir)/user_pos_token_array.data', | |
158 | 'string_array_data': '<(gen_out_dir)/user_pos_string_array.data', | |
99 | 159 | 'pos_list': '<(gen_out_dir)/pos_list.data', |
100 | 160 | }, |
101 | 161 | 'inputs': [ |
106 | 166 | '<(cforms)', |
107 | 167 | ], |
108 | 168 | 'outputs': [ |
109 | '<(user_pos_data)', | |
169 | '<(token_array_data)', | |
170 | '<(string_array_data)', | |
110 | 171 | '<(pos_list)', |
111 | 172 | ], |
112 | 173 | 'action': [ |
115 | 176 | '--special_pos_file=<(special_pos)', |
116 | 177 | '--user_pos_file=<(user_pos)', |
117 | 178 | '--cforms_file=<(cforms)', |
118 | '--output=<(user_pos_data)', | |
179 | '--output_token_array=<(token_array_data)', | |
180 | '--output_string_array=<(string_array_data)', | |
119 | 181 | '--output_pos_list=<(pos_list)', |
120 | 182 | ], |
121 | 'message': '[<(dataset_tag)] Generating <(user_pos_data).', | |
183 | 'message': '[<(dataset_tag)] Generating user pos data.', | |
122 | 184 | }, |
123 | 185 | ], |
124 | 186 | }, |
31 | 31 | |
32 | 32 | #include "base/port.h" |
33 | 33 | #include "base/string_piece.h" |
34 | #include "dictionary/user_pos.h" | |
35 | 34 | |
36 | 35 | namespace mozc { |
37 | 36 | |
50 | 49 | public: |
51 | 50 | virtual ~DataManagerInterface() {} |
52 | 51 | |
53 | // Returns the address of an array of UserPOS::POSToken. | |
54 | virtual const dictionary::UserPOS::POSToken *GetUserPOSData() const = 0; | |
52 | // Returns data set for UserPOS. | |
53 | virtual void GetUserPOSData(StringPiece *token_array_data, | |
54 | StringPiece *string_array_data) const = 0; | |
55 | 55 | |
56 | 56 | // Returns a reference to POSMatcher class handling POS rules. Don't |
57 | 57 | // delete the returned pointer, which is owned by the manager. |
28 | 28 | |
29 | 29 | #include "data_manager/oss/oss_user_pos_manager.h" |
30 | 30 | |
31 | #include "base/embedded_file.h" | |
31 | 32 | #include "base/logging.h" |
32 | 33 | #include "base/singleton.h" |
33 | 34 | #include "dictionary/pos_group.h" |
34 | 35 | #include "dictionary/pos_matcher.h" |
35 | #include "dictionary/user_pos.h" | |
36 | 36 | |
37 | 37 | namespace mozc { |
38 | 38 | namespace oss { |
43 | 43 | |
44 | 44 | namespace { |
45 | 45 | |
46 | // The following header file is automatically generated and contains the | |
47 | // definition of variable, kPOSToken, of type UserPOSImpl::POSToken. | |
48 | #include "data_manager/oss/user_pos_data.h" | |
46 | // Embedded file kUserPosManagerData is defined in this header file. | |
47 | #include "data_manager/oss/user_pos_manager_data.h" | |
49 | 48 | |
50 | 49 | } // namespace |
51 | 50 | |
52 | const dictionary::UserPOS::POSToken *OssUserPosManager::GetUserPOSData() const { | |
53 | DCHECK(kPOSToken != NULL); | |
54 | return kPOSToken; | |
51 | OssUserPosManager::OssUserPosManager() { | |
52 | const StringPiece data = LoadEmbeddedFile(kUserPosManagerData); | |
53 | const char *kMagicNumber = ""; // Magic number is not present. | |
54 | CHECK(manager_.InitUserPosManagerDataFromArray(data, kMagicNumber)) | |
55 | << "Embedded user_pos_manager_data.h is broken"; | |
56 | } | |
57 | ||
58 | OssUserPosManager::~OssUserPosManager() = default; | |
59 | ||
60 | void OssUserPosManager::GetUserPOSData( | |
61 | StringPiece *token_array_data, StringPiece *string_array_data) const { | |
62 | manager_.GetUserPOSData(token_array_data, string_array_data); | |
55 | 63 | } |
56 | 64 | |
57 | 65 | namespace { |
30 | 30 | #define MOZC_DATA_MANAGER_OSS_OSS_USER_POS_MANAGER_H_ |
31 | 31 | |
32 | 32 | #include "base/port.h" |
33 | #include "data_manager/data_manager.h" | |
33 | 34 | #include "data_manager/data_manager_interface.h" |
34 | 35 | |
35 | 36 | namespace mozc { |
37 | 38 | |
38 | 39 | class OssUserPosManager : public DataManagerInterface { |
39 | 40 | public: |
40 | OssUserPosManager() {} | |
41 | ~OssUserPosManager() override {} | |
41 | OssUserPosManager(); | |
42 | ~OssUserPosManager() override; | |
42 | 43 | |
43 | 44 | static OssUserPosManager *GetUserPosManager(); |
44 | 45 | |
45 | 46 | // Partially implement the interface because some binary only reqiures the |
46 | 47 | // folloiwng embedded data. |
47 | 48 | // Returns the address to an array of UserPOS::POSToken. |
48 | const dictionary::UserPOS::POSToken *GetUserPOSData() const override; | |
49 | void GetUserPOSData(StringPiece *token_array_data, | |
50 | StringPiece *string_array_data) const override; | |
49 | 51 | const dictionary::POSMatcher *GetPOSMatcher() const override; |
50 | 52 | |
51 | 53 | // The following are implemented in OssDataManager. |
80 | 82 | size_t *size) const override {} |
81 | 83 | |
82 | 84 | private: |
85 | DataManager manager_; | |
83 | 86 | DISALLOW_COPY_AND_ASSIGN(OssUserPosManager); |
84 | 87 | }; |
85 | 88 |
28 | 28 | |
29 | 29 | #include <string> |
30 | 30 | |
31 | #include "base/file_stream.h" | |
31 | 32 | #include "base/flags.h" |
32 | 33 | #include "base/init_mozc.h" |
33 | 34 | #include "base/logging.h" |
37 | 38 | #include "dictionary/pos_matcher.h" |
38 | 39 | #include "dictionary/user_pos.h" |
39 | 40 | |
41 | DEFINE_string(user_pos_manager_data, "", "Input user pos manager data"); | |
40 | 42 | DEFINE_string(output, "", "Output data file name"); |
41 | 43 | |
42 | 44 | namespace mozc { |
43 | 45 | namespace { |
44 | 46 | |
45 | 47 | #include "data_manager/@DIR@/pos_matcher_data.h" |
46 | #include "data_manager/@DIR@/user_pos_data.h" | |
47 | 48 | |
48 | 49 | } // namespace |
49 | 50 | |
50 | 51 | bool OutputData(const string &file_path) { |
52 | const char* kMagicNumber = ""; // No magic number. | |
51 | 53 | packed::SystemDictionaryDataPacker packer(Version::GetMozcVersion()); |
52 | packer.SetPosTokens(kPOSToken, arraysize(kPOSToken)); | |
54 | packer.SetMozcData(InputFileStream(FLAGS_user_pos_manager_data.c_str(), | |
55 | ios_base::in | ios_base::binary).Read(), | |
56 | kMagicNumber); | |
53 | 57 | // The following two arrays contain sentinel elements but the packer doesn't |
54 | 58 | // expect them. So, pass the shinked ranges of the arrays. Note that |
55 | 59 | // sentinel elements are not necessary at runtime. |
63 | 67 | int main(int argc, char **argv) { |
64 | 68 | mozc::InitMozc(argv[0], &argc, &argv, false); |
65 | 69 | |
66 | if (FLAGS_output.empty()) { | |
67 | LOG(FATAL) << "output flag is needed"; | |
70 | if (FLAGS_user_pos_manager_data.empty() || FLAGS_output.empty()) { | |
71 | LOG(FATAL) << "input and output flags are needed"; | |
68 | 72 | return 1; |
69 | 73 | } |
70 | 74 | if (!mozc::OutputData(FLAGS_output)) { |
51 | 51 | namespace { |
52 | 52 | |
53 | 53 | #include "data_manager/@DIR@/pos_matcher_data.h" |
54 | #include "data_manager/@DIR@/user_pos_data.h" | |
55 | 54 | |
56 | 55 | } // namespace |
57 | 56 | |
61 | 60 | dictionary_version = FLAGS_dictionary_version; |
62 | 61 | } |
63 | 62 | packed::SystemDictionaryDataPacker packer(dictionary_version); |
64 | packer.SetPosTokens(kPOSToken, arraysize(kPOSToken)); | |
65 | 63 | // The following two arrays contain sentinel elements but the packer doesn't |
66 | 64 | // expect them. So pass the shinked ranges of the arrays. Note that sentinel |
67 | 65 | // elements are not required at runtime. |
49 | 49 | using std::unique_ptr; |
50 | 50 | |
51 | 51 | using mozc::dictionary::POSMatcher; |
52 | using mozc::dictionary::UserPOS; | |
53 | 52 | |
54 | 53 | namespace mozc { |
55 | 54 | namespace packed { |
78 | 77 | bool InitWithZippedData(const string &zipped_system_dictionary_data); |
79 | 78 | string GetDictionaryVersion(); |
80 | 79 | |
81 | const UserPOS::POSToken *GetUserPOSData() const; | |
80 | void GetUserPOSData(StringPiece *token_array_data, | |
81 | StringPiece *string_array_data) const; | |
82 | 82 | const POSMatcher *GetPOSMatcher() const; |
83 | 83 | const uint8 *GetPosGroupData() const; |
84 | 84 | void GetConnectorData(const char **data, size_t *size) const; |
119 | 119 | }; |
120 | 120 | bool InitializeWithSystemDictionaryData(); |
121 | 121 | |
122 | unique_ptr<UserPOS::POSToken[]> pos_token_; | |
123 | unique_ptr<UserPOS::ConjugationType[]> conjugation_array_; | |
124 | 122 | unique_ptr<uint16[]> rule_id_table_; |
125 | 123 | unique_ptr<POSMatcher::Range *[]> range_tables_; |
126 | 124 | unique_ptr<Range[]> range_table_items_; |
171 | 169 | << " expected:" << kSystemDictionaryFormatVersion |
172 | 170 | << " actual:" << system_dictionary_data_->format_version(); |
173 | 171 | return false; |
174 | } | |
175 | // Makes UserPOS data. | |
176 | pos_token_.reset( | |
177 | new UserPOS::POSToken[system_dictionary_data_->pos_tokens_size()]); | |
178 | size_t conjugation_count = 0; | |
179 | for (size_t i = 0; i < system_dictionary_data_->pos_tokens_size(); ++i) { | |
180 | conjugation_count += | |
181 | system_dictionary_data_->pos_tokens(i).conjugation_forms_size(); | |
182 | } | |
183 | conjugation_array_.reset(new UserPOS::ConjugationType[conjugation_count]); | |
184 | size_t conjugation_index = 0; | |
185 | for (size_t i = 0; i < system_dictionary_data_->pos_tokens_size(); ++i) { | |
186 | const SystemDictionaryData::PosToken &pos_token = | |
187 | system_dictionary_data_->pos_tokens(i); | |
188 | if (pos_token.has_pos()) { | |
189 | pos_token_[i].pos = pos_token.pos().data(); | |
190 | } else { | |
191 | pos_token_[i].pos = NULL; | |
192 | } | |
193 | pos_token_[i].conjugation_size = | |
194 | pos_token.conjugation_forms_size(); | |
195 | pos_token_[i].conjugation_form = &conjugation_array_[conjugation_index]; | |
196 | if (pos_token.conjugation_forms_size() == 0) { | |
197 | pos_token_[i].conjugation_form = NULL; | |
198 | } | |
199 | for (size_t j = 0; j < pos_token.conjugation_forms_size(); ++j) { | |
200 | const SystemDictionaryData::PosToken::ConjugationType &conjugation_form = | |
201 | pos_token.conjugation_forms(j); | |
202 | if (conjugation_form.has_key_suffix()) { | |
203 | conjugation_array_[conjugation_index].key_suffix = | |
204 | conjugation_form.key_suffix().data(); | |
205 | } else { | |
206 | conjugation_array_[conjugation_index].key_suffix = NULL; | |
207 | } | |
208 | if (conjugation_form.has_value_suffix()) { | |
209 | conjugation_array_[conjugation_index].value_suffix = | |
210 | conjugation_form.value_suffix().data(); | |
211 | } else { | |
212 | conjugation_array_[conjugation_index].value_suffix = NULL; | |
213 | } | |
214 | conjugation_array_[conjugation_index].id = conjugation_form.id(); | |
215 | ++conjugation_index; | |
216 | } | |
217 | 172 | } |
218 | 173 | |
219 | 174 | // Makes POSMatcher data. |
262 | 217 | if (system_dictionary_data_->has_mozc_data() && |
263 | 218 | !manager_.InitFromArray(system_dictionary_data_->mozc_data(), |
264 | 219 | system_dictionary_data_->mozc_data_magic())) { |
265 | LOG(ERROR) << "Failed to initialize mozc data"; | |
266 | return false; | |
267 | } | |
268 | ||
220 | VLOG(1) << "Data set is incomplete. Assume this is user pos manager data."; | |
221 | // The data set containing only user pos manager data is used in build | |
222 | // tools. | |
223 | // TODO(noriyukit): Fix this hard-to-understand behavior by removing | |
224 | // PackedDataManager. | |
225 | if (!manager_.InitUserPosManagerDataFromArray( | |
226 | system_dictionary_data_->mozc_data(), | |
227 | system_dictionary_data_->mozc_data_magic())) { | |
228 | LOG(ERROR) << "Failed to initialize mozc data"; | |
229 | return false; | |
230 | } | |
231 | } | |
269 | 232 | return true; |
270 | 233 | } |
271 | 234 | |
272 | const UserPOS::POSToken *PackedDataManager::Impl::GetUserPOSData() const { | |
273 | return pos_token_.get(); | |
235 | void PackedDataManager::Impl::GetUserPOSData( | |
236 | StringPiece *token_array_data, StringPiece *string_array_data) const { | |
237 | manager_.GetUserPOSData(token_array_data, string_array_data); | |
274 | 238 | } |
275 | 239 | |
276 | 240 | const POSMatcher *PackedDataManager::Impl::GetPOSMatcher() const { |
401 | 365 | return manager_impl_->GetDictionaryVersion(); |
402 | 366 | } |
403 | 367 | |
404 | const UserPOS::POSToken *PackedDataManager::GetUserPOSData() const { | |
405 | return manager_impl_->GetUserPOSData(); | |
368 | void PackedDataManager::GetUserPOSData( | |
369 | StringPiece *token_array_data, StringPiece *string_array_data) const { | |
370 | manager_impl_->GetUserPOSData(token_array_data, string_array_data); | |
406 | 371 | } |
407 | 372 | |
408 | 373 | PackedDataManager *PackedDataManager::GetUserPosManager() { |
51 | 51 | |
52 | 52 | static PackedDataManager *GetUserPosManager(); |
53 | 53 | |
54 | const dictionary::UserPOS::POSToken *GetUserPOSData() const override; | |
54 | void GetUserPOSData(StringPiece *token_array_data, | |
55 | StringPiece *string_array_data) const override; | |
55 | 56 | const dictionary::POSMatcher *GetPOSMatcher() const override; |
56 | 57 | const uint8 *GetPosGroupData() const override; |
57 | 58 | void GetConnectorData(const char **data, size_t *size) const override; |
81 | 81 | ], |
82 | 82 | 'action': [ |
83 | 83 | '<(PRODUCT_DIR)/gen_packed_data_light_main_<(dataset_tag)<(EXECUTABLE_SUFFIX)', |
84 | '--user_pos_manager_data=<(gen_out_dir)/../<(dataset_dir)/user_pos_manager.data', | |
84 | 85 | '--output=<(gen_out_dir)/packed_data_light_<(dataset_tag)', |
85 | 86 | ], |
86 | 87 | }, |
87 | 88 | ], |
88 | 89 | 'dependencies': [ |
89 | 90 | 'gen_packed_data_light_main_<(dataset_tag)', |
91 | '../<(dataset_dir)/<(dataset_tag)_data_manager_base.gyp:gen_user_pos_manager_data_for_<(dataset_tag)', | |
90 | 92 | ], |
91 | 93 | }, |
92 | 94 | ], |
36 | 36 | optional string product_version = 1 [ default = "0.0.0.0" ]; |
37 | 37 | optional uint32 format_version = 2; |
38 | 38 | |
39 | message PosToken { | |
40 | optional string pos = 1; | |
41 | message ConjugationType { | |
42 | optional string key_suffix = 1; | |
43 | optional string value_suffix = 2; | |
44 | optional uint32 id = 3; | |
45 | }; | |
46 | repeated ConjugationType conjugation_forms = 2; | |
47 | }; | |
48 | repeated PosToken pos_tokens = 3; | |
39 | reserved 3; // DEPRECATED: repeated PosToken pos_tokens = 3; | |
49 | 40 | |
50 | 41 | message PosMatcherData { |
51 | 42 | repeated uint32 rule_id_table = 1; |
56 | 56 | } |
57 | 57 | |
58 | 58 | SystemDictionaryDataPacker::~SystemDictionaryDataPacker() { |
59 | } | |
60 | ||
61 | void SystemDictionaryDataPacker::SetPosTokens( | |
62 | const UserPOS::POSToken *pos_token_data, | |
63 | size_t token_count) { | |
64 | for (size_t i = 0; i < token_count; ++i) { | |
65 | SystemDictionaryData::PosToken *pos_token = | |
66 | system_dictionary_->add_pos_tokens(); | |
67 | if (pos_token_data[i].pos) { | |
68 | pos_token->set_pos(pos_token_data[i].pos); | |
69 | } | |
70 | for (size_t j = 0; j < pos_token_data[i].conjugation_size; ++j) { | |
71 | SystemDictionaryData::PosToken::ConjugationType *conjugation_form | |
72 | = pos_token->add_conjugation_forms(); | |
73 | if (pos_token_data[i].conjugation_form[j].key_suffix) { | |
74 | conjugation_form->set_key_suffix( | |
75 | pos_token_data[i].conjugation_form[j].key_suffix); | |
76 | } | |
77 | if (pos_token_data[i].conjugation_form[j].value_suffix) { | |
78 | conjugation_form->set_value_suffix( | |
79 | pos_token_data[i].conjugation_form[j].value_suffix); | |
80 | } | |
81 | conjugation_form->set_id( | |
82 | pos_token_data[i].conjugation_form[j].id); | |
83 | } | |
84 | } | |
85 | 59 | } |
86 | 60 | |
87 | 61 | void SystemDictionaryDataPacker::SetPosMatcherData( |
33 | 33 | |
34 | 34 | #include "base/port.h" |
35 | 35 | #include "dictionary/pos_matcher.h" |
36 | #include "dictionary/user_pos.h" | |
37 | 36 | |
38 | 37 | namespace mozc { |
39 | 38 | namespace packed { |
44 | 43 | public: |
45 | 44 | explicit SystemDictionaryDataPacker(const string &product_version); |
46 | 45 | ~SystemDictionaryDataPacker(); |
47 | void SetPosTokens( | |
48 | const dictionary::UserPOS::POSToken *pos_token_data, | |
49 | size_t token_count); | |
50 | 46 | void SetPosMatcherData( |
51 | 47 | const uint16 *rule_id_table, |
52 | 48 | size_t rule_id_table_count, |
32 | 32 | namespace mozc { |
33 | 33 | namespace packed { |
34 | 34 | |
35 | const int kSystemDictionaryFormatVersion = 20; | |
35 | const int kSystemDictionaryFormatVersion = 21; | |
36 | 36 | |
37 | 37 | } // namespace packed |
38 | 38 | } // namespace mozc |
28 | 28 | |
29 | 29 | #include "data_manager/testing/mock_user_pos_manager.h" |
30 | 30 | |
31 | #include "base/embedded_file.h" | |
31 | 32 | #include "base/logging.h" |
32 | 33 | #include "base/singleton.h" |
33 | #include "dictionary/pos_group.h" | |
34 | 34 | #include "dictionary/pos_matcher.h" |
35 | #include "dictionary/user_pos.h" | |
36 | 35 | |
37 | 36 | namespace mozc { |
38 | 37 | namespace testing { |
43 | 42 | |
44 | 43 | namespace { |
45 | 44 | |
46 | // The following header file is automatically generated and contains the | |
47 | // definition of variable, kPOSToken, of type UserPOSImpl::POSToken. | |
48 | #include "data_manager/testing/user_pos_data.h" | |
45 | // Embedded file kUserPosManagerData is defined in this header file. | |
46 | #include "data_manager/testing/user_pos_manager_data.h" | |
49 | 47 | |
50 | 48 | } // namespace |
51 | 49 | |
52 | const dictionary::UserPOS::POSToken * | |
53 | MockUserPosManager::GetUserPOSData() const { | |
54 | DCHECK(kPOSToken != NULL); | |
55 | return kPOSToken; | |
50 | MockUserPosManager::MockUserPosManager() { | |
51 | const StringPiece data = LoadEmbeddedFile(kUserPosManagerData); | |
52 | const char *kMagicNumber = ""; // Magic number is not present. | |
53 | CHECK(manager_.InitUserPosManagerDataFromArray(data, kMagicNumber)) | |
54 | << "Embedded user_pos_manager_data.h is broken"; | |
55 | } | |
56 | ||
57 | MockUserPosManager::~MockUserPosManager() = default; | |
58 | ||
59 | void MockUserPosManager::GetUserPOSData( | |
60 | StringPiece *token_array_data, StringPiece *string_array_data) const { | |
61 | manager_.GetUserPOSData(token_array_data, string_array_data); | |
56 | 62 | } |
57 | 63 | |
58 | 64 | namespace { |
30 | 30 | #define MOZC_DATA_MANAGER_TESTING_MOCK_USER_POS_MANAGER_H_ |
31 | 31 | |
32 | 32 | #include "base/port.h" |
33 | #include "data_manager/data_manager.h" | |
33 | 34 | #include "data_manager/data_manager_interface.h" |
34 | 35 | |
35 | 36 | namespace mozc { |
37 | 38 | |
38 | 39 | class MockUserPosManager : public DataManagerInterface { |
39 | 40 | public: |
40 | MockUserPosManager() {} | |
41 | ~MockUserPosManager() override {} | |
41 | MockUserPosManager(); | |
42 | ~MockUserPosManager() override; | |
42 | 43 | |
43 | 44 | static MockUserPosManager *GetUserPosManager(); |
44 | 45 | |
45 | 46 | // Partially implement the interface because some binary only reqiures the |
46 | 47 | // folloiwng embedded data. |
47 | const dictionary::UserPOS::POSToken *GetUserPOSData() const override; | |
48 | void GetUserPOSData(StringPiece *token_array_data, | |
49 | StringPiece *string_array_data) const override; | |
48 | 50 | const dictionary::POSMatcher *GetPOSMatcher() const override; |
49 | 51 | |
50 | 52 | // The following are implemented in MockDataManager. |
79 | 81 | size_t *size) const override {} |
80 | 82 | |
81 | 83 | private: |
84 | DataManager manager_; | |
82 | 85 | DISALLOW_COPY_AND_ASSIGN(MockUserPosManager); |
83 | 86 | }; |
84 | 87 |
63 | 63 | 'dictionary_base.gyp:pos_matcher', |
64 | 64 | 'dictionary_base.gyp:suppression_dictionary', |
65 | 65 | 'dictionary_base.gyp:user_dictionary', |
66 | 'dictionary_base.gyp:user_pos', | |
66 | 67 | ], |
67 | 68 | 'variables': { |
68 | 69 | 'test_size': 'small', |
27 | 27 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | 29 | |
30 | """Utility to generate user_pos_data.h.""" | |
30 | """Utility to generate User POS binary data.""" | |
31 | 31 | |
32 | 32 | __author__ = "hidehiko" |
33 | 33 | |
34 | from collections import defaultdict | |
35 | import logging | |
36 | 34 | import optparse |
35 | import struct | |
37 | 36 | |
38 | from build_tools import code_generator_util | |
39 | 37 | from build_tools import serialized_string_array_builder |
40 | 38 | from dictionary import pos_util |
41 | 39 | |
42 | 40 | |
43 | def OutputUserPosDataHeader(user_pos_data, output): | |
44 | """Prints user_pos_data.h to output.""" | |
45 | # Output kConjugation | |
46 | for index, (_, conjugation_list) in enumerate(user_pos_data): | |
47 | output.write( | |
48 | 'static const ::mozc::dictionary::UserPOS::ConjugationType ' | |
49 | 'kConjugation%d[] = {\n' % (index)) | |
50 | for value_suffix, key_suffix, pos_id in conjugation_list: | |
51 | output.write(' { %s, %s, %d },\n' % ( | |
52 | code_generator_util.ToCppStringLiteral(value_suffix), | |
53 | code_generator_util.ToCppStringLiteral(key_suffix), | |
54 | pos_id)) | |
55 | output.write('};\n') | |
41 | def ToString(s): | |
42 | return s or '' | |
56 | 43 | |
57 | # Output PosToken | |
58 | output.write('const ::mozc::dictionary::UserPOS::POSToken kPOSToken[] = {\n') | |
59 | for index, (user_pos, conjunction_list) in enumerate(user_pos_data): | |
60 | output.write(' { %s, %d, kConjugation%d },\n' % ( | |
61 | code_generator_util.ToCppStringLiteral(user_pos), | |
62 | len(conjunction_list), | |
63 | index)) | |
64 | # Also output the sentinal. | |
65 | output.write(' { NULL, 0, NULL },\n' | |
66 | '};\n') | |
44 | ||
45 | def OutputUserPosData(user_pos_data, output_token_array, output_string_array): | |
46 | string_index = {} | |
47 | for user_pos, conjugation_list in user_pos_data: | |
48 | string_index[ToString(user_pos)] = 0 | |
49 | for value_suffix, key_suffix, _ in conjugation_list: | |
50 | string_index[ToString(value_suffix)] = 0 | |
51 | string_index[ToString(key_suffix)] = 0 | |
52 | for index, s in enumerate(sorted(string_index)): | |
53 | string_index[s] = index | |
54 | ||
55 | with open(output_token_array, 'wb') as f: | |
56 | for user_pos, conjugation_list in sorted(user_pos_data): | |
57 | user_pos_index = string_index[ToString(user_pos)] | |
58 | for value_suffix, key_suffix, conjugation_id in conjugation_list: | |
59 | # One entry is serialized to 8 byte (four uint16 components). | |
60 | f.write(struct.pack('<H', user_pos_index)) | |
61 | f.write(struct.pack('<H', string_index[ToString(value_suffix)])) | |
62 | f.write(struct.pack('<H', string_index[ToString(key_suffix)])) | |
63 | f.write(struct.pack('<H', conjugation_id)) | |
64 | ||
65 | serialized_string_array_builder.SerializeToFile( | |
66 | sorted(string_index.iterkeys()), output_string_array) | |
67 | 67 | |
68 | 68 | |
69 | 69 | def ParseOptions(): |
70 | 70 | parser = optparse.OptionParser() |
71 | 71 | # Input: id.def, special_pos.def, user_pos.def, cforms.def |
72 | # Output: user_pos_data.h | |
73 | 72 | parser.add_option('--id_file', dest='id_file', help='Path to id.def.') |
74 | 73 | parser.add_option('--special_pos_file', dest='special_pos_file', |
75 | 74 | help='Path to special_pos.def') |
77 | 76 | help='Path to cforms.def') |
78 | 77 | parser.add_option('--user_pos_file', dest='user_pos_file', |
79 | 78 | help='Path to user_pos,def') |
80 | parser.add_option('--output', dest='output', | |
81 | help='Path to output user_pos_data.h') | |
79 | parser.add_option('--output_token_array', dest='output_token_array', | |
80 | help='Path to output token array binary data') | |
81 | parser.add_option('--output_string_array', dest='output_string_array', | |
82 | help='Path to output string array data') | |
82 | 83 | parser.add_option('--output_pos_list', dest='output_pos_list', |
83 | 84 | help='Path to output POS list binary file') |
84 | 85 | return parser.parse_args()[0] |
93 | 94 | user_pos = pos_util.UserPos(pos_database, inflection_map) |
94 | 95 | user_pos.Parse(options.user_pos_file) |
95 | 96 | |
96 | with open(options.output, 'w') as stream: | |
97 | OutputUserPosDataHeader(user_pos.data, stream) | |
97 | OutputUserPosData(user_pos.data, | |
98 | options.output_token_array, options.output_string_array) | |
98 | 99 | |
99 | 100 | if options.output_pos_list: |
100 | 101 | serialized_string_array_builder.SerializeToFile( |
115 | 115 | } // namespace |
116 | 116 | |
117 | 117 | TextDictionaryLoader::TextDictionaryLoader(const POSMatcher &pos_matcher) |
118 | : pos_matcher_(&pos_matcher) { | |
119 | } | |
118 | : zipcode_id_(pos_matcher.GetZipcodeId()), | |
119 | isolated_word_id_(pos_matcher.GetIsolatedWordId()) {} | |
120 | ||
121 | TextDictionaryLoader::TextDictionaryLoader(uint16 zipcode_id, | |
122 | uint16 isolated_word_id) | |
123 | : zipcode_id_(zipcode_id), isolated_word_id_(isolated_word_id) {} | |
120 | 124 | |
121 | 125 | TextDictionaryLoader::~TextDictionaryLoader() { |
122 | 126 | Clear(); |
133 | 137 | return true; |
134 | 138 | } |
135 | 139 | if (Util::StartsWith(label, "ZIP_CODE")) { |
136 | token->lid = pos_matcher_->GetZipcodeId(); | |
137 | token->rid = pos_matcher_->GetZipcodeId(); | |
140 | token->lid = zipcode_id_; | |
141 | token->rid = zipcode_id_; | |
138 | 142 | return true; |
139 | 143 | } |
140 | 144 | if (Util::StartsWith(label, "ENGLISH")) { |
141 | 145 | // TODO(noriyukit): Might be better to use special POS for english words. |
142 | token->lid = pos_matcher_->GetIsolatedWordId(); | |
143 | token->rid = pos_matcher_->GetIsolatedWordId(); | |
146 | token->lid = isolated_word_id_; | |
147 | token->rid = isolated_word_id_; | |
144 | 148 | return true; |
145 | 149 | } |
146 | 150 | LOG(ERROR) << "Unknown special label: " << label; |
47 | 47 | public: |
48 | 48 | // TODO(noriyukit): Better to pass the pointer of pos_matcher. |
49 | 49 | explicit TextDictionaryLoader(const POSMatcher& pos_matcher); |
50 | TextDictionaryLoader(uint16 zipcode_id, uint16 isolated_word_id); | |
50 | 51 | virtual ~TextDictionaryLoader(); |
51 | 52 | |
52 | 53 | // Loads tokens from system dictionary files and reading correction |
85 | 86 | // Allows derived classes to implement custom filtering rules. |
86 | 87 | virtual Token *ParseTSV(const vector<StringPiece> &columns) const; |
87 | 88 | |
88 | const POSMatcher *pos_matcher_; | |
89 | ||
90 | 89 | private: |
91 | 90 | static void LoadReadingCorrectionTokens( |
92 | 91 | const string &reading_correction_filename, |
104 | 103 | |
105 | 104 | Token *ParseTSVLine(StringPiece line) const; |
106 | 105 | |
106 | const uint16 zipcode_id_; | |
107 | const uint16 isolated_word_id_; | |
107 | 108 | vector<Token *> tokens_; |
108 | 109 | |
109 | 110 | FRIEND_TEST(TextDictionaryLoaderTest, RewriteSpecialTokenTest); |
226 | 226 | // Creates a user dictionary with actual pos data. |
227 | 227 | UserDictionary *CreateDictionary() { |
228 | 228 | const testing::MockUserPosManager user_pos_manager; |
229 | return new UserDictionary(new UserPOS(user_pos_manager.GetUserPOSData()), | |
229 | return new UserDictionary(UserPOS::CreateFromDataManager(user_pos_manager), | |
230 | 230 | user_pos_manager.GetPOSMatcher(), |
231 | 231 | Singleton<SuppressionDictionary>::get()); |
232 | 232 | } |
29 | 29 | #include "dictionary/user_pos.h" |
30 | 30 | |
31 | 31 | #include <algorithm> |
32 | #include <map> | |
32 | #include <set> | |
33 | 33 | |
34 | 34 | #include "base/logging.h" |
35 | 35 | #include "base/util.h" |
37 | 37 | namespace mozc { |
38 | 38 | namespace dictionary { |
39 | 39 | |
40 | UserPOS::UserPOS(const POSToken *pos_token_array) | |
41 | : pos_token_array_(pos_token_array) { | |
42 | DCHECK(pos_token_array_); | |
43 | for (size_t i = 0; pos_token_array_[i].pos != nullptr; ++i) { | |
44 | pos_map_.insert( | |
45 | std::make_pair(string(pos_token_array_[i].pos), &pos_token_array_[i])); | |
46 | } | |
47 | CHECK_GT(pos_map_.size(), 1); | |
40 | UserPOS::UserPOS(StringPiece token_array_data, StringPiece string_array_data) | |
41 | : token_array_data_(token_array_data) { | |
42 | DCHECK_EQ(token_array_data.size() % 8, 0); | |
43 | DCHECK(SerializedStringArray::VerifyData(string_array_data)); | |
44 | string_array_.Set(string_array_data); | |
48 | 45 | } |
46 | ||
47 | UserPOS::~UserPOS() = default; | |
49 | 48 | |
50 | 49 | void UserPOS::GetPOSList(vector<string> *pos_list) const { |
51 | 50 | pos_list->clear(); |
52 | for (size_t i = 0; pos_token_array_[i].pos != nullptr; ++i) { | |
53 | pos_list->push_back(pos_token_array_[i].pos); | |
51 | set<uint16> seen; | |
52 | for (auto iter = begin(); iter != end(); ++iter) { | |
53 | if (!seen.insert(iter.pos_index()).second) { | |
54 | continue; | |
55 | } | |
56 | const StringPiece pos = string_array_[iter.pos_index()]; | |
57 | pos_list->emplace_back(pos.data(), pos.size()); | |
54 | 58 | } |
55 | 59 | } |
56 | 60 | |
57 | 61 | bool UserPOS::IsValidPOS(const string &pos) const { |
58 | map<string, const POSToken*>::const_iterator it = pos_map_.find(pos); | |
59 | return it != pos_map_.end(); | |
62 | const auto iter = | |
63 | std::lower_bound(string_array_.begin(), string_array_.end(), pos); | |
64 | if (iter == string_array_.end()) { | |
65 | return false; | |
66 | } | |
67 | return std::binary_search(begin(), end(), iter.index()); | |
60 | 68 | } |
61 | 69 | |
62 | 70 | bool UserPOS::GetPOSIDs(const string &pos, uint16 *id) const { |
63 | map<string, const POSToken*>::const_iterator it = pos_map_.find(pos); | |
64 | if (it == pos_map_.end()) { | |
71 | const auto str_iter = | |
72 | std::lower_bound(string_array_.begin(), string_array_.end(), pos); | |
73 | if (str_iter == string_array_.end() || *str_iter != pos) { | |
65 | 74 | return false; |
66 | 75 | } |
67 | ||
68 | const ConjugationType *conjugation_form = it->second->conjugation_form; | |
69 | CHECK(conjugation_form); | |
70 | ||
71 | *id = conjugation_form[0].id; | |
72 | ||
76 | const auto token_iter = std::lower_bound(begin(), end(), str_iter.index()); | |
77 | if (token_iter == end() || token_iter.pos_index() != str_iter.index()) { | |
78 | return false; | |
79 | } | |
80 | *id = token_iter.conjugation_id(); | |
73 | 81 | return true; |
74 | 82 | } |
75 | 83 | |
76 | bool UserPOS::GetTokens(const string &key, | |
77 | const string &value, | |
78 | const string &pos, | |
79 | vector<Token> *tokens) const { | |
80 | if (key.empty() || | |
81 | value.empty() || | |
82 | pos.empty() || | |
83 | tokens == nullptr) { | |
84 | bool UserPOS::GetTokens(const string &key, const string &value, | |
85 | const string &pos, vector<Token> *tokens) const { | |
86 | if (key.empty() || value.empty() || pos.empty() || tokens == nullptr) { | |
84 | 87 | return false; |
85 | 88 | } |
86 | 89 | |
87 | 90 | tokens->clear(); |
88 | map<string, const POSToken*>::const_iterator it = pos_map_.find(pos); | |
89 | if (it == pos_map_.end()) { | |
91 | const auto str_iter = | |
92 | std::lower_bound(string_array_.begin(), string_array_.end(), pos); | |
93 | if (str_iter == string_array_.end() || *str_iter != pos) { | |
90 | 94 | return false; |
91 | 95 | } |
92 | ||
93 | const ConjugationType *conjugation_form = it->second->conjugation_form; | |
94 | CHECK(conjugation_form); | |
95 | ||
96 | const size_t size = static_cast<size_t>(it->second->conjugation_size); | |
96 | pair<iterator, iterator> range = | |
97 | std::equal_range(begin(), end(), str_iter.index()); | |
98 | if (range.first == range.second) { | |
99 | return false; | |
100 | } | |
101 | const size_t size = range.second - range.first; | |
97 | 102 | CHECK_GE(size, 1); |
98 | 103 | tokens->resize(size); |
99 | 104 | |
103 | 108 | // Set smaller cost for "短縮よみ" in order to make |
104 | 109 | // the rank of the word higher than others. |
105 | 110 | const int16 kIsolatedWordCost = 200; |
106 | const char kIsolatedWordPOS[] | |
107 | = "\xE7\x9F\xAD\xE7\xB8\xAE\xE3\x82\x88\xE3\x81\xBF"; | |
111 | const char kIsolatedWordPOS[] = | |
112 | "\xE7\x9F\xAD\xE7\xB8\xAE\xE3\x82\x88\xE3\x81\xBF"; | |
108 | 113 | |
109 | 114 | if (size == 1) { // no conjugation |
115 | const auto &token_iter = range.first; | |
110 | 116 | (*tokens)[0].key = key; |
111 | 117 | (*tokens)[0].value = value; |
112 | (*tokens)[0].id = conjugation_form[0].id; | |
118 | (*tokens)[0].id = token_iter.conjugation_id(); | |
113 | 119 | if (pos == kIsolatedWordPOS) { |
114 | (*tokens)[0].cost= kIsolatedWordCost; | |
120 | (*tokens)[0].cost = kIsolatedWordCost; | |
115 | 121 | } else { |
116 | (*tokens)[0].cost= kDefaultCost; | |
122 | (*tokens)[0].cost = kDefaultCost; | |
117 | 123 | } |
118 | 124 | } else { |
125 | const auto &base_form_token_iter = range.first; | |
119 | 126 | // expand all other forms |
120 | 127 | string key_stem = key; |
121 | 128 | string value_stem = value; |
122 | 129 | // assume that conjugation_form[0] contains the suffix of "base form". |
123 | const string base_key_suffix = conjugation_form[0].key_suffix; | |
124 | const string base_value_suffix = conjugation_form[0].value_suffix; | |
130 | const StringPiece base_key_suffix = | |
131 | string_array_[base_form_token_iter.key_suffix_index()]; | |
132 | const StringPiece base_value_suffix = | |
133 | string_array_[base_form_token_iter.value_suffix_index()]; | |
134 | ||
125 | 135 | if (base_key_suffix.size() < key.size() && |
126 | 136 | base_value_suffix.size() < value.size() && |
127 | 137 | Util::EndsWith(key, base_key_suffix) && |
129 | 139 | key_stem.assign(key, 0, key.size() - base_key_suffix.size()); |
130 | 140 | value_stem.assign(value, 0, value.size() - base_value_suffix.size()); |
131 | 141 | } |
132 | for (size_t i = 0; i < size; ++i) { | |
133 | (*tokens)[i].key = key_stem + conjugation_form[i].key_suffix; | |
134 | (*tokens)[i].value = value_stem + conjugation_form[i].value_suffix; | |
135 | (*tokens)[i].id = conjugation_form[i].id; | |
136 | (*tokens)[i].cost = kDefaultCost; | |
142 | for (size_t i = 0; i < size; ++i, ++range.first) { | |
143 | const auto &token_iter = range.first; | |
144 | const StringPiece key_suffix = | |
145 | string_array_[token_iter.key_suffix_index()]; | |
146 | const StringPiece value_suffix = | |
147 | string_array_[token_iter.value_suffix_index()]; | |
148 | Util::ConcatStrings(key_stem, key_suffix, &(*tokens)[i].key); | |
149 | Util::ConcatStrings(value_stem, value_suffix, &(*tokens)[i].value); | |
150 | (*tokens)[i].id = token_iter.conjugation_id(); | |
151 | (*tokens)[i].cost = kDefaultCost; | |
137 | 152 | } |
153 | DCHECK(range.first == range.second); | |
138 | 154 | } |
139 | 155 | |
140 | 156 | return true; |
141 | 157 | } |
142 | 158 | |
159 | UserPOS *UserPOS::CreateFromDataManager(const DataManagerInterface &manager) { | |
160 | StringPiece token_array_data, string_array_data; | |
161 | manager.GetUserPOSData(&token_array_data, &string_array_data); | |
162 | return new UserPOS(token_array_data, string_array_data); | |
163 | } | |
164 | ||
143 | 165 | } // namespace dictionary |
144 | 166 | } // namespace mozc |
29 | 29 | #ifndef MOZC_DICTIONARY_USER_POS_H_ |
30 | 30 | #define MOZC_DICTIONARY_USER_POS_H_ |
31 | 31 | |
32 | #include <map> | |
32 | #include <iterator> | |
33 | 33 | #include <string> |
34 | #include <utility> | |
34 | 35 | #include <vector> |
35 | 36 | |
36 | 37 | #include "base/port.h" |
38 | #include "base/serialized_string_array.h" | |
39 | #include "base/string_piece.h" | |
40 | #include "data_manager/data_manager_interface.h" | |
37 | 41 | #include "dictionary/user_pos_interface.h" |
38 | 42 | |
39 | 43 | namespace mozc { |
40 | 44 | namespace dictionary { |
41 | 45 | |
46 | // This implementation of UserPOSInterface uses a sorted array of tokens to | |
47 | // efficiently lookup required data. There are two required data, string array | |
48 | // and token array, which are generated by ./gen_user_pos_data.py. | |
49 | // | |
50 | // * Prerequisite | |
51 | // Little endian is assumed. | |
52 | // | |
53 | // * Binary format | |
54 | // | |
55 | // ** String array | |
56 | // All the strings, such as key and value suffixes, are serialized into one | |
57 | // array using SerializedStringArray in such a way that array is sorted in | |
58 | // ascending order. In the token array (see below), every string is stored as | |
59 | // an index to this array. | |
60 | // | |
61 | // ** Token array | |
62 | // | |
63 | // The token array is an array of 8 byte blocks each of which has the following | |
64 | // layout: | |
65 | // | |
66 | // Token layout (8 bytes) | |
67 | // +---------------------------------------+ | |
68 | // | POS index (2 bytes) | | |
69 | // + - - - - - - - - - - - - - - - - - - - + | |
70 | // | Value suffix index (2 bytes) | | |
71 | // + - - - - - - - - - - - - - - - - - - - + | |
72 | // | Key suffix index (2 bytes) | | |
73 | // + - - - - - - - - - - - - - - - - - - - + | |
74 | // | Conjugation ID (2 bytes) | | |
75 | // +---------------------------------------+ | |
76 | // | |
77 | // The array is sorted in ascending order of POS index so that we can use binary | |
78 | // search to lookup necessary information efficiently. Note that there are | |
79 | // tokens having the same POS index. | |
42 | 80 | class UserPOS : public UserPOSInterface { |
43 | 81 | public: |
44 | struct ConjugationType { | |
45 | const char *key_suffix; | |
46 | const char *value_suffix; | |
47 | uint16 id; | |
82 | static const size_t kTokenByteLength = 8; | |
83 | ||
84 | class iterator | |
85 | : public std::iterator<std::random_access_iterator_tag, uint16> { | |
86 | public: | |
87 | iterator() = default; | |
88 | explicit iterator(const char *ptr) : ptr_(ptr) {} | |
89 | iterator(const iterator &x) = default; | |
90 | ||
91 | uint16 pos_index() const { | |
92 | return *reinterpret_cast<const uint16 *>(ptr_); | |
93 | } | |
94 | uint16 value_suffix_index() const { | |
95 | return *reinterpret_cast<const uint16 *>(ptr_ + 2); | |
96 | } | |
97 | uint16 key_suffix_index() const { | |
98 | return *reinterpret_cast<const uint16 *>(ptr_ + 4); | |
99 | } | |
100 | uint16 conjugation_id() const { | |
101 | return *reinterpret_cast<const uint16 *>(ptr_ + 6); | |
102 | } | |
103 | ||
104 | uint16 operator*() const { return pos_index(); } | |
105 | ||
106 | void swap(iterator &x) { | |
107 | using std::swap; | |
108 | swap(ptr_, x.ptr_); | |
109 | } | |
110 | ||
111 | friend void swap(iterator &x, iterator &y) { x.swap(y); } | |
112 | ||
113 | iterator &operator++() { | |
114 | ptr_ += kTokenByteLength; | |
115 | return *this; | |
116 | } | |
117 | ||
118 | iterator operator++(int) { | |
119 | const char *tmp = ptr_; | |
120 | ptr_ += kTokenByteLength; | |
121 | return iterator(tmp); | |
122 | } | |
123 | ||
124 | iterator &operator--() { | |
125 | ptr_ -= kTokenByteLength; | |
126 | return *this; | |
127 | } | |
128 | ||
129 | iterator operator--(int) { | |
130 | const char *tmp = ptr_; | |
131 | ptr_ -= kTokenByteLength; | |
132 | return iterator(tmp); | |
133 | } | |
134 | ||
135 | iterator &operator+=(difference_type n) { | |
136 | ptr_ += n * kTokenByteLength; | |
137 | return *this; | |
138 | } | |
139 | ||
140 | iterator &operator-=(difference_type n) { | |
141 | ptr_ -= n * kTokenByteLength; | |
142 | return *this; | |
143 | } | |
144 | ||
145 | friend iterator operator+(iterator x, difference_type n) { | |
146 | return iterator(x.ptr_ + n * kTokenByteLength); | |
147 | } | |
148 | ||
149 | friend iterator operator+(difference_type n, iterator x) { | |
150 | return iterator(x.ptr_ + n * kTokenByteLength); | |
151 | } | |
152 | ||
153 | friend iterator operator-(iterator x, difference_type n) { | |
154 | return iterator(x.ptr_ - n * kTokenByteLength); | |
155 | } | |
156 | ||
157 | friend difference_type operator-(iterator x, iterator y) { | |
158 | return (x.ptr_ - y.ptr_) / kTokenByteLength; | |
159 | } | |
160 | ||
161 | friend bool operator==(iterator x, iterator y) { return x.ptr_ == y.ptr_; } | |
162 | friend bool operator!=(iterator x, iterator y) { return x.ptr_ != y.ptr_; } | |
163 | friend bool operator<(iterator x, iterator y) { return x.ptr_ < y.ptr_; } | |
164 | friend bool operator<=(iterator x, iterator y) { return x.ptr_ <= y.ptr_; } | |
165 | friend bool operator>(iterator x, iterator y) { return x.ptr_ > y.ptr_; } | |
166 | friend bool operator>=(iterator x, iterator y) { return x.ptr_ >= y.ptr_; } | |
167 | ||
168 | private: | |
169 | const char *ptr_ = nullptr; | |
48 | 170 | }; |
49 | 171 | |
50 | struct POSToken { | |
51 | const char *pos; | |
52 | uint16 conjugation_size; | |
53 | const ConjugationType *conjugation_form; | |
54 | }; | |
55 | ||
56 | // Initializes the user pos from the given POSToken array. The class doesn't | |
57 | // take the ownership of the array. The caller is responsible for deleting it. | |
58 | explicit UserPOS(const POSToken *pos_token_array); | |
59 | virtual ~UserPOS() {} | |
172 | using const_iterator = iterator; | |
173 | ||
174 | static UserPOS *CreateFromDataManager(const DataManagerInterface &manager); | |
175 | ||
176 | // Initializes the user pos from the given binary data. The provided byte | |
177 | // data must outlive this instance. | |
178 | UserPOS(StringPiece token_array_data, StringPiece string_array_data); | |
179 | ~UserPOS() override; | |
60 | 180 | |
61 | 181 | // Implementation of UserPOSInterface. |
62 | virtual void GetPOSList(vector<string> *pos_list) const; | |
63 | virtual bool IsValidPOS(const string &pos) const; | |
64 | virtual bool GetPOSIDs(const string &pos, uint16 *id) const; | |
65 | virtual bool GetTokens(const string &key, const string &value, | |
66 | const string &pos, vector<Token> *tokens) const; | |
182 | void GetPOSList(vector<string> *pos_list) const override; | |
183 | bool IsValidPOS(const string &pos) const override; | |
184 | bool GetPOSIDs(const string &pos, uint16 *id) const override; | |
185 | bool GetTokens(const string &key, const string &value, const string &pos, | |
186 | vector<Token> *tokens) const override; | |
187 | ||
188 | iterator begin() const { return iterator(token_array_data_.data()); } | |
189 | iterator end() const { | |
190 | return iterator(token_array_data_.data() + token_array_data_.size()); | |
191 | } | |
67 | 192 | |
68 | 193 | private: |
69 | const POSToken *pos_token_array_; | |
70 | map<string, const POSToken *> pos_map_; | |
194 | StringPiece token_array_data_; | |
195 | SerializedStringArray string_array_; | |
71 | 196 | |
72 | 197 | DISALLOW_COPY_AND_ASSIGN(UserPOS); |
73 | 198 | }; |
44 | 44 | |
45 | 45 | class UserPOSTest : public ::testing::Test { |
46 | 46 | protected: |
47 | virtual void SetUp() { | |
47 | void SetUp() override { | |
48 | StringPiece token_array_data, string_array_data; | |
48 | 49 | const testing::MockUserPosManager user_pos_manager; |
49 | user_pos_.reset(new UserPOS(user_pos_manager.GetUserPOSData())); | |
50 | user_pos_manager.GetUserPOSData(&token_array_data, &string_array_data); | |
51 | user_pos_.reset(new UserPOS(token_array_data, string_array_data)); | |
50 | 52 | CHECK(user_pos_.get()); |
51 | 53 | } |
52 | 54 |
46 | 46 | #include "dictionary/system/system_dictionary.h" |
47 | 47 | #include "dictionary/system/value_dictionary.h" |
48 | 48 | #include "dictionary/user_dictionary.h" |
49 | #include "dictionary/user_pos.h" | |
49 | 50 | #include "engine/engine_interface.h" |
50 | 51 | #include "engine/user_data_manager_interface.h" |
51 | 52 | #include "prediction/dictionary_predictor.h" |
148 | 149 | CHECK(suppression_dictionary_.get()); |
149 | 150 | |
150 | 151 | user_dictionary_.reset( |
151 | new UserDictionary(new UserPOS(data_manager->GetUserPOSData()), | |
152 | new UserDictionary(UserPOS::CreateFromDataManager(*data_manager), | |
152 | 153 | data_manager->GetPOSMatcher(), |
153 | 154 | suppression_dictionary_.get())); |
154 | 155 | CHECK(user_dictionary_.get()); |
0 | 0 | MAJOR=2 |
1 | 1 | MINOR=17 |
2 | BUILD=2517 | |
2 | BUILD=2518 | |
3 | 3 | REVISION=102 |
4 | 4 | # NACL_DICTIONARY_VERSION is the target version of the system dictionary to be |
5 | 5 | # downloaded by NaCl Mozc. |
6 | NACL_DICTIONARY_VERSION=20 | |
6 | NACL_DICTIONARY_VERSION=21 |
81 | 81 | DictionaryGenerator::DictionaryGenerator() |
82 | 82 | : token_pool_(new ObjectPool<Token>(kTokenSize)), |
83 | 83 | token_map_(new map<uint64, Token *>), |
84 | user_pos_(new dictionary::UserPOS( | |
85 | UserPosManager::GetUserPosManager()->GetUserPOSData())), | |
86 | 84 | open_bracket_id_(UserPosManager::GetUserPosManager()->GetPOSMatcher() |
87 | 85 | ->GetOpenBracketId()), |
88 | 86 | close_bracket_id_(UserPosManager::GetUserPosManager()->GetPOSMatcher() |
89 | ->GetCloseBracketId()) {} | |
87 | ->GetCloseBracketId()) { | |
88 | user_pos_.reset(dictionary::UserPOS::CreateFromDataManager( | |
89 | *UserPosManager::GetUserPosManager())); | |
90 | } | |
90 | 91 | |
91 | 92 | DictionaryGenerator::~DictionaryGenerator() {} |
92 | 93 |
72 | 72 | '../converter/converter_base.gyp:converter_mock', |
73 | 73 | '../data_manager/data_manager.gyp:user_pos_manager', |
74 | 74 | '../data_manager/testing/mock_data_manager.gyp:mock_data_manager', |
75 | '../dictionary/dictionary_base.gyp:user_pos', | |
75 | 76 | '../engine/engine.gyp:mock_data_engine_factory', |
76 | 77 | '../protocol/protocol.gyp:commands_proto', |
77 | 78 | '../session/session_base.gyp:request_test_util', |
42 | 42 | #include "dictionary/suppression_dictionary.h" |
43 | 43 | #include "dictionary/user_dictionary.h" |
44 | 44 | #include "dictionary/user_dictionary_storage.h" |
45 | #include "dictionary/user_pos.h" | |
45 | 46 | #include "protocol/commands.pb.h" |
46 | 47 | #include "protocol/config.pb.h" |
47 | 48 | #include "request/conversion_request.h" |
76 | 77 | convreq_.set_config(&config_); |
77 | 78 | } |
78 | 79 | |
79 | virtual void SetUp() { | |
80 | void SetUp() override { | |
80 | 81 | SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir); |
81 | 82 | config::ConfigHandler::GetDefaultConfig(&config_); |
82 | 83 | |
84 | 85 | |
85 | 86 | suppression_dictionary_.reset(new SuppressionDictionary); |
86 | 87 | user_dictionary_.reset( |
87 | new UserDictionary(new UserPOS(data_manager_->GetUserPOSData()), | |
88 | new UserDictionary(UserPOS::CreateFromDataManager(*data_manager_), | |
88 | 89 | data_manager_->GetPOSMatcher(), |
89 | 90 | suppression_dictionary_.get())); |
90 | 91 | } |
91 | 92 | |
92 | virtual void TearDown() { | |
93 | void TearDown() override { | |
93 | 94 | // just in case, reset the config |
94 | 95 | config::ConfigHandler::GetDefaultConfig(&config_); |
95 | 96 | } |