Remove the duplicate of value trie.
The value trie, i.e., the trie of surface forms, is allocated in
two locations: in SystemDictionary and in ValueDictionary.
This CL replaces the one in ValueDictionary by a pointer to
SystemDictionary's value trie.
BUG=
TEST=unittest
REF_BUG=21859420
REF_CL=96080377
Noriyuki Takahashi authored 8 years ago
Yohei Yukawa committed 8 years ago
188 | 188 | int dictionary_size = 0; |
189 | 189 | data_manager.GetSystemDictionaryData(&dictionary_data, &dictionary_size); |
190 | 190 | |
191 | SystemDictionary *sysdic = | |
192 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(); | |
191 | 193 | ret->user_dictionary.reset(new UserDictionaryStub); |
192 | 194 | ret->suppression_dictionary.reset(new SuppressionDictionary); |
193 | 195 | ret->dictionary.reset(new DictionaryImpl( |
194 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(), | |
195 | ValueDictionary::CreateValueDictionaryFromImage( | |
196 | *data_manager.GetPOSMatcher(), dictionary_data, dictionary_size), | |
196 | sysdic, // DictionaryImpl takes the ownership | |
197 | new ValueDictionary(*data_manager.GetPOSMatcher(), | |
198 | &sysdic->value_trie()), | |
197 | 199 | ret->user_dictionary.get(), |
198 | 200 | ret->suppression_dictionary.get(), |
199 | 201 | data_manager.GetPOSMatcher())); |
1226 | 1228 | int dictionary_size = 0; |
1227 | 1229 | data_manager.GetSystemDictionaryData(&dictionary_data, &dictionary_size); |
1228 | 1230 | |
1231 | SystemDictionary *sysdic = | |
1232 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(); | |
1229 | 1233 | std::unique_ptr<DictionaryInterface> dictionary(new DictionaryImpl( |
1230 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(), | |
1231 | ValueDictionary::CreateValueDictionaryFromImage( | |
1232 | *data_manager.GetPOSMatcher(), dictionary_data, dictionary_size), | |
1234 | sysdic, // DictionaryImpl takes the ownership | |
1235 | new ValueDictionary(*data_manager.GetPOSMatcher(), | |
1236 | &sysdic->value_trie()), | |
1233 | 1237 | mock_user_dictionary.get(), |
1234 | 1238 | suppression_dictionary.get(), |
1235 | 1239 | data_manager.GetPOSMatcher())); |
109 | 109 | int dictionary_size = 0; |
110 | 110 | data_manager_->GetSystemDictionaryData(&dictionary_data, |
111 | 111 | &dictionary_size); |
112 | SystemDictionary *sysdic = | |
113 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(); | |
112 | 114 | dictionary_.reset(new DictionaryImpl( |
113 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(), | |
114 | ValueDictionary::CreateValueDictionaryFromImage( | |
115 | *pos_matcher, dictionary_data, dictionary_size), | |
115 | sysdic, // DictionaryImpl takes the ownership | |
116 | new ValueDictionary(*pos_matcher, &sysdic->value_trie()), | |
116 | 117 | &user_dictionary_stub_, |
117 | 118 | suppression_dictionary_.get(), |
118 | 119 | pos_matcher)); |
86 | 86 | int dictionary_size = 0; |
87 | 87 | data_manager_->GetSystemDictionaryData(&dictionary_data, |
88 | 88 | &dictionary_size); |
89 | SystemDictionary *sysdic = | |
90 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(); | |
89 | 91 | dictionary_.reset(new DictionaryImpl( |
90 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(), | |
91 | ValueDictionary::CreateValueDictionaryFromImage( | |
92 | *pos_matcher, dictionary_data, dictionary_size), | |
92 | sysdic, // DictionaryImpl takes the ownership | |
93 | new ValueDictionary(*pos_matcher, &sysdic->value_trie()), | |
93 | 94 | &user_dictionary_stub_, |
94 | 95 | suppression_dictionary_.get(), |
95 | 96 | pos_matcher)); |
68 | 68 | const char *dictionary_data = NULL; |
69 | 69 | int dictionary_size = 0; |
70 | 70 | data_manager.GetSystemDictionaryData(&dictionary_data, &dictionary_size); |
71 | DictionaryInterface *sys_dict = | |
71 | SystemDictionary *sys_dict = | |
72 | 72 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(); |
73 | DictionaryInterface *val_dict = | |
74 | ValueDictionary::CreateValueDictionaryFromImage(*ret->pos_matcher, | |
75 | dictionary_data, | |
76 | dictionary_size); | |
73 | ValueDictionary *val_dict = | |
74 | new ValueDictionary(*ret->pos_matcher, &sys_dict->value_trie()); | |
77 | 75 | ret->user_dictionary.reset(new UserDictionaryStub); |
78 | 76 | ret->suppression_dictionary.reset(new SuppressionDictionary); |
79 | 77 | ret->dictionary.reset(new DictionaryImpl(sys_dict, |
76 | 76 | '../../storage/louds/louds.gyp:louds_trie', |
77 | 77 | '../dictionary_base.gyp:pos_matcher', |
78 | 78 | '../file/dictionary_file.gyp:codec_factory', |
79 | '../file/dictionary_file.gyp:dictionary_file', | |
80 | 79 | 'system_dictionary_codec', |
81 | 80 | ], |
82 | 81 | }, |
97 | 97 | |
98 | 98 | virtual ~SystemDictionary(); |
99 | 99 | |
100 | const storage::louds::LoudsTrie &value_trie() const { return value_trie_; } | |
101 | ||
100 | 102 | // Implementation of DictionaryInterface. |
101 | 103 | virtual bool HasKey(StringPiece key) const; |
102 | 104 | virtual bool HasValue(StringPiece value) const; |
88 | 88 | '../../base/base.gyp:base_core', |
89 | 89 | '../../data_manager/data_manager.gyp:user_pos_manager', |
90 | 90 | '../../request/request.gyp:conversion_request', |
91 | '../../storage/louds/louds.gyp:louds_trie_builder', | |
91 | 92 | '../../testing/testing.gyp:gtest_main', |
92 | 93 | '../dictionary.gyp:dictionary_test_util', |
93 | 'system_dictionary.gyp:system_dictionary_builder', | |
94 | 94 | 'system_dictionary.gyp:value_dictionary', |
95 | 95 | ], |
96 | 96 | 'variables': { |
49 | 49 | namespace mozc { |
50 | 50 | namespace dictionary { |
51 | 51 | |
52 | ValueDictionary::ValueDictionary(const POSMatcher& pos_matcher) | |
53 | : dictionary_file_( | |
54 | new DictionaryFile(DictionaryFileCodecFactory::GetCodec())), | |
52 | ValueDictionary::ValueDictionary(const POSMatcher &pos_matcher, | |
53 | const LoudsTrie *value_trie) | |
54 | : value_trie_(value_trie), | |
55 | 55 | codec_(SystemDictionaryCodecFactory::GetCodec()), |
56 | 56 | suggestion_only_word_id_(pos_matcher.GetSuggestOnlyWordId()) { |
57 | 57 | } |
58 | 58 | |
59 | 59 | ValueDictionary::~ValueDictionary() {} |
60 | ||
61 | // static | |
62 | ValueDictionary *ValueDictionary::CreateValueDictionaryFromFile( | |
63 | const POSMatcher& pos_matcher, const string &filename) { | |
64 | std::unique_ptr<ValueDictionary> instance(new ValueDictionary(pos_matcher)); | |
65 | DCHECK(instance.get()); | |
66 | if (!instance->dictionary_file_->OpenFromFile(filename)) { | |
67 | LOG(ERROR) << "Failed to open system dictionary file"; | |
68 | return nullptr; | |
69 | } | |
70 | if (!instance->OpenDictionaryFile()) { | |
71 | LOG(ERROR) << "Failed to create value dictionary"; | |
72 | return nullptr; | |
73 | } | |
74 | return instance.release(); | |
75 | } | |
76 | ||
77 | // static | |
78 | ValueDictionary *ValueDictionary::CreateValueDictionaryFromImage( | |
79 | const POSMatcher& pos_matcher, const char *ptr, int len) { | |
80 | // Make the dictionary not to be paged out. | |
81 | // We don't check the return value because the process doesn't necessarily | |
82 | // has the priviledge to mlock. | |
83 | // Note that we don't munlock the space because it's always better to keep | |
84 | // the singleton system dictionary paged in as long as the process runs. | |
85 | Mmap::MaybeMLock(ptr, len); | |
86 | std::unique_ptr<ValueDictionary> instance(new ValueDictionary(pos_matcher)); | |
87 | DCHECK(instance.get()); | |
88 | if (!instance->dictionary_file_->OpenFromImage(ptr, len)) { | |
89 | LOG(ERROR) << "Failed to open system dictionary file"; | |
90 | return nullptr; | |
91 | } | |
92 | if (!instance->OpenDictionaryFile()) { | |
93 | LOG(ERROR) << "Failed to create value dictionary"; | |
94 | return nullptr; | |
95 | } | |
96 | return instance.release(); | |
97 | } | |
98 | ||
99 | bool ValueDictionary::OpenDictionaryFile() { | |
100 | int image_len = 0; | |
101 | const unsigned char *value_image = | |
102 | reinterpret_cast<const uint8 *>(dictionary_file_->GetSection( | |
103 | codec_->GetSectionNameForValue(), &image_len)); | |
104 | CHECK(value_image) << "can not find value section"; | |
105 | if (!(value_trie_.Open(value_image))) { | |
106 | DLOG(ERROR) << "Cannot open value trie"; | |
107 | return false; | |
108 | } | |
109 | return true; | |
110 | } | |
111 | 60 | |
112 | 61 | // ValueDictionary is supposed to use the same data with SystemDictionary |
113 | 62 | // and SystemDictionary::HasKey should return the same result with |
176 | 125 | codec_->EncodeValue(key, &encoded_key); |
177 | 126 | |
178 | 127 | LoudsTrie::Node node; |
179 | if (!value_trie_.Traverse(encoded_key, &node)) { | |
128 | if (!value_trie_->Traverse(encoded_key, &node)) { | |
180 | 129 | return; |
181 | 130 | } |
182 | 131 | |
192 | 141 | node = queue.front(); |
193 | 142 | queue.pop(); |
194 | 143 | |
195 | if (value_trie_.IsTerminalNode(node)) { | |
196 | switch (HandleTerminalNode(value_trie_, *codec_, | |
144 | if (value_trie_->IsTerminalNode(node)) { | |
145 | switch (HandleTerminalNode(*value_trie_, *codec_, | |
197 | 146 | suggestion_only_word_id_, |
198 | 147 | node, callback, encoded_value_buffer, |
199 | 148 | &value, &token)) { |
206 | 155 | } |
207 | 156 | } |
208 | 157 | |
209 | for (value_trie_.MoveToFirstChild(&node); | |
210 | value_trie_.IsValidNode(node); | |
211 | value_trie_.MoveToNextSibling(&node)) { | |
158 | for (value_trie_->MoveToFirstChild(&node); | |
159 | value_trie_->IsValidNode(node); | |
160 | value_trie_->MoveToNextSibling(&node)) { | |
212 | 161 | queue.push(node); |
213 | 162 | } |
214 | 163 | } while (!queue.empty()); |
231 | 180 | |
232 | 181 | string lookup_key_str; |
233 | 182 | codec_->EncodeValue(key, &lookup_key_str); |
234 | if (value_trie_.ExactSearch(lookup_key_str) == -1) { | |
183 | if (value_trie_->ExactSearch(lookup_key_str) == -1) { | |
235 | 184 | return; |
236 | 185 | } |
237 | 186 | if (callback->OnKey(key) != Callback::TRAVERSE_CONTINUE) { |
33 | 33 | #ifndef MOZC_DICTIONARY_SYSTEM_VALUE_DICTIONARY_H_ |
34 | 34 | #define MOZC_DICTIONARY_SYSTEM_VALUE_DICTIONARY_H_ |
35 | 35 | |
36 | #include <memory> | |
37 | #include <string> | |
38 | ||
39 | 36 | #include "base/port.h" |
40 | 37 | #include "base/string_piece.h" |
41 | 38 | #include "dictionary/dictionary_interface.h" |
44 | 41 | namespace mozc { |
45 | 42 | namespace dictionary { |
46 | 43 | |
47 | class DictionaryFile; | |
48 | 44 | class POSMatcher; |
49 | 45 | class SystemDictionaryCodecInterface; |
50 | 46 | |
51 | 47 | class ValueDictionary : public DictionaryInterface { |
52 | 48 | public: |
49 | // This class doesn't take the ownership of |value_trie|. | |
50 | ValueDictionary(const POSMatcher &pos_matcher, | |
51 | const storage::louds::LoudsTrie *value_trie); | |
53 | 52 | virtual ~ValueDictionary(); |
54 | ||
55 | static ValueDictionary *CreateValueDictionaryFromFile( | |
56 | const POSMatcher& pos_matcher, const string &filename); | |
57 | ||
58 | static ValueDictionary *CreateValueDictionaryFromImage( | |
59 | const POSMatcher& pos_matcher, const char *ptr, int len); | |
60 | 53 | |
61 | 54 | // Implementation of DictionaryInterface |
62 | 55 | virtual bool HasKey(StringPiece key) const; |
75 | 68 | Callback *callback) const; |
76 | 69 | |
77 | 70 | private: |
78 | explicit ValueDictionary(const POSMatcher& pos_matcher); | |
79 | ||
80 | bool OpenDictionaryFile(); | |
81 | ||
82 | storage::louds::LoudsTrie value_trie_; | |
83 | std::unique_ptr<DictionaryFile> dictionary_file_; | |
71 | const storage::louds::LoudsTrie *value_trie_; | |
84 | 72 | const SystemDictionaryCodecInterface *codec_; |
85 | 73 | const uint16 suggestion_only_word_id_; |
86 | 74 |
29 | 29 | #include "dictionary/system/value_dictionary.h" |
30 | 30 | |
31 | 31 | #include <memory> |
32 | #include <vector> | |
33 | 32 | |
34 | #include "base/file_util.h" | |
35 | #include "base/stl_util.h" | |
36 | #include "base/system_util.h" | |
37 | 33 | #include "data_manager/user_pos_manager.h" |
38 | #include "dictionary/dictionary_interface.h" | |
39 | 34 | #include "dictionary/dictionary_test_util.h" |
40 | 35 | #include "dictionary/dictionary_token.h" |
41 | 36 | #include "dictionary/pos_matcher.h" |
42 | #include "dictionary/system/system_dictionary_builder.h" | |
37 | #include "dictionary/system/codec_interface.h" | |
43 | 38 | #include "request/conversion_request.h" |
44 | #include "testing/base/public/googletest.h" | |
39 | #include "storage/louds/louds_trie_builder.h" | |
45 | 40 | #include "testing/base/public/gunit.h" |
41 | ||
42 | using mozc::storage::louds::LoudsTrie; | |
43 | using mozc::storage::louds::LoudsTrieBuilder; | |
46 | 44 | |
47 | 45 | namespace mozc { |
48 | 46 | namespace dictionary { |
49 | 47 | |
50 | class ValueDictionaryTest : public testing::Test { | |
48 | class ValueDictionaryTest : public ::testing::Test { | |
51 | 49 | protected: |
52 | ValueDictionaryTest() : | |
53 | dict_name_(FLAGS_test_tmpdir + "/value_dict_test.dic") {} | |
54 | ||
55 | 50 | virtual void SetUp() { |
56 | STLDeleteElements(&tokens_); | |
57 | SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir); | |
58 | FileUtil::Unlink(dict_name_); | |
59 | 51 | pos_matcher_ = UserPosManager::GetUserPosManager()->GetPOSMatcher(); |
52 | louds_trie_builder_.reset(new LoudsTrieBuilder); | |
53 | louds_trie_.reset(new LoudsTrie); | |
60 | 54 | } |
61 | 55 | |
62 | 56 | virtual void TearDown() { |
63 | STLDeleteElements(&tokens_); | |
64 | FileUtil::Unlink(dict_name_); | |
57 | louds_trie_.reset(nullptr); | |
58 | louds_trie_builder_.reset(nullptr); | |
65 | 59 | } |
66 | 60 | |
67 | void AddToken(const string &key, const string &value) { | |
68 | Token *token = new Token; | |
69 | token->key = key; | |
70 | token->value = value; | |
71 | token->cost = 0; | |
72 | token->lid = 0; | |
73 | token->rid = 0; | |
74 | tokens_.push_back(token); | |
61 | void AddValue(const string &value) { | |
62 | string encoded; | |
63 | SystemDictionaryCodecFactory::GetCodec()->EncodeValue(value, &encoded); | |
64 | louds_trie_builder_->Add(encoded); | |
75 | 65 | } |
76 | 66 | |
77 | void BuildDictionary() { | |
78 | dictionary::SystemDictionaryBuilder builder; | |
79 | builder.BuildFromTokens(tokens_); | |
80 | builder.WriteToFile(dict_name_); | |
67 | ValueDictionary *BuildValueDictionary() { | |
68 | louds_trie_builder_->Build(); | |
69 | louds_trie_->Open( | |
70 | reinterpret_cast<const uint8 *>(louds_trie_builder_->image().data())); | |
71 | return new ValueDictionary(*pos_matcher_, louds_trie_.get()); | |
81 | 72 | } |
82 | 73 | |
83 | 74 | void InitToken(const string &value, Token *token) const { |
87 | 78 | token->attributes = Token::NONE; |
88 | 79 | } |
89 | 80 | |
90 | const string dict_name_; | |
91 | 81 | const POSMatcher *pos_matcher_; |
92 | 82 | ConversionRequest convreq_; |
93 | ||
94 | private: | |
95 | vector<Token *> tokens_; | |
83 | std::unique_ptr<LoudsTrieBuilder> louds_trie_builder_; | |
84 | std::unique_ptr<LoudsTrie> louds_trie_; | |
96 | 85 | }; |
97 | 86 | |
98 | 87 | TEST_F(ValueDictionaryTest, HasValue) { |
99 | // "うぃー" | |
100 | AddToken("\xE3\x81\x86\xE3\x81\x83\xE3\x83\xBC", "we"); | |
101 | // "うぉー" | |
102 | AddToken("\xE3\x81\x86\xE3\x81\x89\xE3\x83\xBC", "war"); | |
103 | // "わーど" | |
104 | AddToken("\xE3\x82\x8F\xE3\x83\xBC\xE3\x81\xA9", "word"); | |
105 | // "わーるど" | |
106 | AddToken("\xE3\x82\x8F\xE3\x83\xBC\xE3\x82\x8B\xE3\x81\xA9", "world"); | |
107 | BuildDictionary(); | |
108 | std::unique_ptr<ValueDictionary> dictionary( | |
109 | ValueDictionary::CreateValueDictionaryFromFile(*pos_matcher_, | |
110 | dict_name_)); | |
88 | AddValue("we"); | |
89 | AddValue("war"); | |
90 | AddValue("word"); | |
91 | AddValue("world"); | |
92 | std::unique_ptr<ValueDictionary> dictionary(BuildValueDictionary()); | |
111 | 93 | |
112 | 94 | // ValueDictionary is supposed to use the same data with SystemDictionary |
113 | 95 | // and SystemDictionary::HasValue should return the same result with |
123 | 105 | } |
124 | 106 | |
125 | 107 | TEST_F(ValueDictionaryTest, LookupPredictive) { |
126 | // "ぐーぐる" | |
127 | AddToken("\xE3\x81\x90\xE3\x83\xBC\xE3\x81\x90\xE3\x82\x8B", "google"); | |
128 | // "うぃー" | |
129 | AddToken("\xE3\x81\x86\xE3\x81\x83\xE3\x83\xBC", "we"); | |
130 | // "うぉー" | |
131 | AddToken("\xE3\x81\x86\xE3\x81\x89\xE3\x83\xBC", "war"); | |
132 | // "わーど" | |
133 | AddToken("\xE3\x82\x8F\xE3\x83\xBC\xE3\x81\xA9", "word"); | |
134 | // "わーるど" | |
135 | AddToken("\xE3\x82\x8F\xE3\x83\xBC\xE3\x82\x8B\xE3\x81\xA9", "world"); | |
136 | BuildDictionary(); | |
137 | std::unique_ptr<ValueDictionary> dictionary( | |
138 | ValueDictionary::CreateValueDictionaryFromFile(*pos_matcher_, | |
139 | dict_name_)); | |
108 | AddValue("google"); | |
109 | AddValue("we"); | |
110 | AddValue("war"); | |
111 | AddValue("word"); | |
112 | AddValue("world"); | |
113 | std::unique_ptr<ValueDictionary> dictionary(BuildValueDictionary()); | |
140 | 114 | |
141 | 115 | // Reading fields are irrelevant to value dictionary. Prepare actual tokens |
142 | 116 | // that are to be looked up. |
177 | 151 | } |
178 | 152 | |
179 | 153 | TEST_F(ValueDictionaryTest, LookupExact) { |
180 | // "うぃー" | |
181 | AddToken("\xE3\x81\x86\xE3\x81\x83\xE3\x83\xBC", "we"); | |
182 | // "うぉー" | |
183 | AddToken("\xE3\x81\x86\xE3\x81\x89\xE3\x83\xBC", "war"); | |
184 | // "わーど" | |
185 | AddToken("\xE3\x82\x8F\xE3\x83\xBC\xE3\x81\xA9", "word"); | |
186 | BuildDictionary(); | |
154 | AddValue("we"); | |
155 | AddValue("war"); | |
156 | AddValue("word"); | |
157 | std::unique_ptr<ValueDictionary> dictionary(BuildValueDictionary()); | |
187 | 158 | |
188 | std::unique_ptr<ValueDictionary> dictionary( | |
189 | ValueDictionary::CreateValueDictionaryFromFile(*pos_matcher_, | |
190 | dict_name_)); | |
191 | 159 | CollectTokenCallback callback; |
192 | 160 | dictionary->LookupExact("war", convreq_, &callback); |
193 | 161 | ASSERT_EQ(1, callback.tokens().size()); |
158 | 158 | int dictionary_size = 0; |
159 | 159 | data_manager->GetSystemDictionaryData(&dictionary_data, &dictionary_size); |
160 | 160 | |
161 | SystemDictionary *sysdic = | |
162 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(); | |
161 | 163 | dictionary_.reset(new DictionaryImpl( |
162 | SystemDictionary::Builder(dictionary_data, dictionary_size).Build(), | |
163 | ValueDictionary::CreateValueDictionaryFromImage( | |
164 | *data_manager->GetPOSMatcher(), dictionary_data, dictionary_size), | |
164 | sysdic, // DictionaryImpl takes the ownership | |
165 | new ValueDictionary(*data_manager->GetPOSMatcher(), | |
166 | &sysdic->value_trie()), | |
165 | 167 | user_dictionary_.get(), |
166 | 168 | suppression_dictionary_.get(), |
167 | 169 | data_manager->GetPOSMatcher())); |