Codebase list mozc / eb52b51
Remove the duplicate of value trie. The value trie, i.e., the trie of surface forms, is allocated in two locations: in SystemDictionary and in ValueDictionary. This CL replaces the one in ValueDictionary by a pointer to SystemDictionary's value trie. BUG= TEST=unittest REF_BUG=21859420 REF_CL=96080377 Noriyuki Takahashi authored 8 years ago Yohei Yukawa committed 8 years ago
12 changed file(s) with 80 addition(s) and 168 deletion(s). Raw diff Collapse all Expand all
188188 int dictionary_size = 0;
189189 data_manager.GetSystemDictionaryData(&dictionary_data, &dictionary_size);
190190
191 SystemDictionary *sysdic =
192 SystemDictionary::Builder(dictionary_data, dictionary_size).Build();
191193 ret->user_dictionary.reset(new UserDictionaryStub);
192194 ret->suppression_dictionary.reset(new SuppressionDictionary);
193195 ret->dictionary.reset(new DictionaryImpl(
194 SystemDictionary::Builder(dictionary_data, dictionary_size).Build(),
195 ValueDictionary::CreateValueDictionaryFromImage(
196 *data_manager.GetPOSMatcher(), dictionary_data, dictionary_size),
196 sysdic, // DictionaryImpl takes the ownership
197 new ValueDictionary(*data_manager.GetPOSMatcher(),
198 &sysdic->value_trie()),
197199 ret->user_dictionary.get(),
198200 ret->suppression_dictionary.get(),
199201 data_manager.GetPOSMatcher()));
12261228 int dictionary_size = 0;
12271229 data_manager.GetSystemDictionaryData(&dictionary_data, &dictionary_size);
12281230
1231 SystemDictionary *sysdic =
1232 SystemDictionary::Builder(dictionary_data, dictionary_size).Build();
12291233 std::unique_ptr<DictionaryInterface> dictionary(new DictionaryImpl(
1230 SystemDictionary::Builder(dictionary_data, dictionary_size).Build(),
1231 ValueDictionary::CreateValueDictionaryFromImage(
1232 *data_manager.GetPOSMatcher(), dictionary_data, dictionary_size),
1234 sysdic, // DictionaryImpl takes the ownership
1235 new ValueDictionary(*data_manager.GetPOSMatcher(),
1236 &sysdic->value_trie()),
12331237 mock_user_dictionary.get(),
12341238 suppression_dictionary.get(),
12351239 data_manager.GetPOSMatcher()));
109109 int dictionary_size = 0;
110110 data_manager_->GetSystemDictionaryData(&dictionary_data,
111111 &dictionary_size);
112 SystemDictionary *sysdic =
113 SystemDictionary::Builder(dictionary_data, dictionary_size).Build();
112114 dictionary_.reset(new DictionaryImpl(
113 SystemDictionary::Builder(dictionary_data, dictionary_size).Build(),
114 ValueDictionary::CreateValueDictionaryFromImage(
115 *pos_matcher, dictionary_data, dictionary_size),
115 sysdic, // DictionaryImpl takes the ownership
116 new ValueDictionary(*pos_matcher, &sysdic->value_trie()),
116117 &user_dictionary_stub_,
117118 suppression_dictionary_.get(),
118119 pos_matcher));
8686 int dictionary_size = 0;
8787 data_manager_->GetSystemDictionaryData(&dictionary_data,
8888 &dictionary_size);
89 SystemDictionary *sysdic =
90 SystemDictionary::Builder(dictionary_data, dictionary_size).Build();
8991 dictionary_.reset(new DictionaryImpl(
90 SystemDictionary::Builder(dictionary_data, dictionary_size).Build(),
91 ValueDictionary::CreateValueDictionaryFromImage(
92 *pos_matcher, dictionary_data, dictionary_size),
92 sysdic, // DictionaryImpl takes the ownership
93 new ValueDictionary(*pos_matcher, &sysdic->value_trie()),
9394 &user_dictionary_stub_,
9495 suppression_dictionary_.get(),
9596 pos_matcher));
6868 const char *dictionary_data = NULL;
6969 int dictionary_size = 0;
7070 data_manager.GetSystemDictionaryData(&dictionary_data, &dictionary_size);
71 DictionaryInterface *sys_dict =
71 SystemDictionary *sys_dict =
7272 SystemDictionary::Builder(dictionary_data, dictionary_size).Build();
73 DictionaryInterface *val_dict =
74 ValueDictionary::CreateValueDictionaryFromImage(*ret->pos_matcher,
75 dictionary_data,
76 dictionary_size);
73 ValueDictionary *val_dict =
74 new ValueDictionary(*ret->pos_matcher, &sys_dict->value_trie());
7775 ret->user_dictionary.reset(new UserDictionaryStub);
7876 ret->suppression_dictionary.reset(new SuppressionDictionary);
7977 ret->dictionary.reset(new DictionaryImpl(sys_dict,
7676 '../../storage/louds/louds.gyp:louds_trie',
7777 '../dictionary_base.gyp:pos_matcher',
7878 '../file/dictionary_file.gyp:codec_factory',
79 '../file/dictionary_file.gyp:dictionary_file',
8079 'system_dictionary_codec',
8180 ],
8281 },
9797
9898 virtual ~SystemDictionary();
9999
100 const storage::louds::LoudsTrie &value_trie() const { return value_trie_; }
101
100102 // Implementation of DictionaryInterface.
101103 virtual bool HasKey(StringPiece key) const;
102104 virtual bool HasValue(StringPiece value) const;
8888 '../../base/base.gyp:base_core',
8989 '../../data_manager/data_manager.gyp:user_pos_manager',
9090 '../../request/request.gyp:conversion_request',
91 '../../storage/louds/louds.gyp:louds_trie_builder',
9192 '../../testing/testing.gyp:gtest_main',
9293 '../dictionary.gyp:dictionary_test_util',
93 'system_dictionary.gyp:system_dictionary_builder',
9494 'system_dictionary.gyp:value_dictionary',
9595 ],
9696 'variables': {
4949 namespace mozc {
5050 namespace dictionary {
5151
52 ValueDictionary::ValueDictionary(const POSMatcher& pos_matcher)
53 : dictionary_file_(
54 new DictionaryFile(DictionaryFileCodecFactory::GetCodec())),
52 ValueDictionary::ValueDictionary(const POSMatcher &pos_matcher,
53 const LoudsTrie *value_trie)
54 : value_trie_(value_trie),
5555 codec_(SystemDictionaryCodecFactory::GetCodec()),
5656 suggestion_only_word_id_(pos_matcher.GetSuggestOnlyWordId()) {
5757 }
5858
5959 ValueDictionary::~ValueDictionary() {}
60
61 // static
62 ValueDictionary *ValueDictionary::CreateValueDictionaryFromFile(
63 const POSMatcher& pos_matcher, const string &filename) {
64 std::unique_ptr<ValueDictionary> instance(new ValueDictionary(pos_matcher));
65 DCHECK(instance.get());
66 if (!instance->dictionary_file_->OpenFromFile(filename)) {
67 LOG(ERROR) << "Failed to open system dictionary file";
68 return nullptr;
69 }
70 if (!instance->OpenDictionaryFile()) {
71 LOG(ERROR) << "Failed to create value dictionary";
72 return nullptr;
73 }
74 return instance.release();
75 }
76
77 // static
78 ValueDictionary *ValueDictionary::CreateValueDictionaryFromImage(
79 const POSMatcher& pos_matcher, const char *ptr, int len) {
80 // Make the dictionary not to be paged out.
81 // We don't check the return value because the process doesn't necessarily
82 // has the priviledge to mlock.
83 // Note that we don't munlock the space because it's always better to keep
84 // the singleton system dictionary paged in as long as the process runs.
85 Mmap::MaybeMLock(ptr, len);
86 std::unique_ptr<ValueDictionary> instance(new ValueDictionary(pos_matcher));
87 DCHECK(instance.get());
88 if (!instance->dictionary_file_->OpenFromImage(ptr, len)) {
89 LOG(ERROR) << "Failed to open system dictionary file";
90 return nullptr;
91 }
92 if (!instance->OpenDictionaryFile()) {
93 LOG(ERROR) << "Failed to create value dictionary";
94 return nullptr;
95 }
96 return instance.release();
97 }
98
99 bool ValueDictionary::OpenDictionaryFile() {
100 int image_len = 0;
101 const unsigned char *value_image =
102 reinterpret_cast<const uint8 *>(dictionary_file_->GetSection(
103 codec_->GetSectionNameForValue(), &image_len));
104 CHECK(value_image) << "can not find value section";
105 if (!(value_trie_.Open(value_image))) {
106 DLOG(ERROR) << "Cannot open value trie";
107 return false;
108 }
109 return true;
110 }
11160
11261 // ValueDictionary is supposed to use the same data with SystemDictionary
11362 // and SystemDictionary::HasKey should return the same result with
176125 codec_->EncodeValue(key, &encoded_key);
177126
178127 LoudsTrie::Node node;
179 if (!value_trie_.Traverse(encoded_key, &node)) {
128 if (!value_trie_->Traverse(encoded_key, &node)) {
180129 return;
181130 }
182131
192141 node = queue.front();
193142 queue.pop();
194143
195 if (value_trie_.IsTerminalNode(node)) {
196 switch (HandleTerminalNode(value_trie_, *codec_,
144 if (value_trie_->IsTerminalNode(node)) {
145 switch (HandleTerminalNode(*value_trie_, *codec_,
197146 suggestion_only_word_id_,
198147 node, callback, encoded_value_buffer,
199148 &value, &token)) {
206155 }
207156 }
208157
209 for (value_trie_.MoveToFirstChild(&node);
210 value_trie_.IsValidNode(node);
211 value_trie_.MoveToNextSibling(&node)) {
158 for (value_trie_->MoveToFirstChild(&node);
159 value_trie_->IsValidNode(node);
160 value_trie_->MoveToNextSibling(&node)) {
212161 queue.push(node);
213162 }
214163 } while (!queue.empty());
231180
232181 string lookup_key_str;
233182 codec_->EncodeValue(key, &lookup_key_str);
234 if (value_trie_.ExactSearch(lookup_key_str) == -1) {
183 if (value_trie_->ExactSearch(lookup_key_str) == -1) {
235184 return;
236185 }
237186 if (callback->OnKey(key) != Callback::TRAVERSE_CONTINUE) {
3333 #ifndef MOZC_DICTIONARY_SYSTEM_VALUE_DICTIONARY_H_
3434 #define MOZC_DICTIONARY_SYSTEM_VALUE_DICTIONARY_H_
3535
36 #include <memory>
37 #include <string>
38
3936 #include "base/port.h"
4037 #include "base/string_piece.h"
4138 #include "dictionary/dictionary_interface.h"
4441 namespace mozc {
4542 namespace dictionary {
4643
47 class DictionaryFile;
4844 class POSMatcher;
4945 class SystemDictionaryCodecInterface;
5046
5147 class ValueDictionary : public DictionaryInterface {
5248 public:
49 // This class doesn't take the ownership of |value_trie|.
50 ValueDictionary(const POSMatcher &pos_matcher,
51 const storage::louds::LoudsTrie *value_trie);
5352 virtual ~ValueDictionary();
54
55 static ValueDictionary *CreateValueDictionaryFromFile(
56 const POSMatcher& pos_matcher, const string &filename);
57
58 static ValueDictionary *CreateValueDictionaryFromImage(
59 const POSMatcher& pos_matcher, const char *ptr, int len);
6053
6154 // Implementation of DictionaryInterface
6255 virtual bool HasKey(StringPiece key) const;
7568 Callback *callback) const;
7669
7770 private:
78 explicit ValueDictionary(const POSMatcher& pos_matcher);
79
80 bool OpenDictionaryFile();
81
82 storage::louds::LoudsTrie value_trie_;
83 std::unique_ptr<DictionaryFile> dictionary_file_;
71 const storage::louds::LoudsTrie *value_trie_;
8472 const SystemDictionaryCodecInterface *codec_;
8573 const uint16 suggestion_only_word_id_;
8674
2929 #include "dictionary/system/value_dictionary.h"
3030
3131 #include <memory>
32 #include <vector>
3332
34 #include "base/file_util.h"
35 #include "base/stl_util.h"
36 #include "base/system_util.h"
3733 #include "data_manager/user_pos_manager.h"
38 #include "dictionary/dictionary_interface.h"
3934 #include "dictionary/dictionary_test_util.h"
4035 #include "dictionary/dictionary_token.h"
4136 #include "dictionary/pos_matcher.h"
42 #include "dictionary/system/system_dictionary_builder.h"
37 #include "dictionary/system/codec_interface.h"
4338 #include "request/conversion_request.h"
44 #include "testing/base/public/googletest.h"
39 #include "storage/louds/louds_trie_builder.h"
4540 #include "testing/base/public/gunit.h"
41
42 using mozc::storage::louds::LoudsTrie;
43 using mozc::storage::louds::LoudsTrieBuilder;
4644
4745 namespace mozc {
4846 namespace dictionary {
4947
50 class ValueDictionaryTest : public testing::Test {
48 class ValueDictionaryTest : public ::testing::Test {
5149 protected:
52 ValueDictionaryTest() :
53 dict_name_(FLAGS_test_tmpdir + "/value_dict_test.dic") {}
54
5550 virtual void SetUp() {
56 STLDeleteElements(&tokens_);
57 SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
58 FileUtil::Unlink(dict_name_);
5951 pos_matcher_ = UserPosManager::GetUserPosManager()->GetPOSMatcher();
52 louds_trie_builder_.reset(new LoudsTrieBuilder);
53 louds_trie_.reset(new LoudsTrie);
6054 }
6155
6256 virtual void TearDown() {
63 STLDeleteElements(&tokens_);
64 FileUtil::Unlink(dict_name_);
57 louds_trie_.reset(nullptr);
58 louds_trie_builder_.reset(nullptr);
6559 }
6660
67 void AddToken(const string &key, const string &value) {
68 Token *token = new Token;
69 token->key = key;
70 token->value = value;
71 token->cost = 0;
72 token->lid = 0;
73 token->rid = 0;
74 tokens_.push_back(token);
61 void AddValue(const string &value) {
62 string encoded;
63 SystemDictionaryCodecFactory::GetCodec()->EncodeValue(value, &encoded);
64 louds_trie_builder_->Add(encoded);
7565 }
7666
77 void BuildDictionary() {
78 dictionary::SystemDictionaryBuilder builder;
79 builder.BuildFromTokens(tokens_);
80 builder.WriteToFile(dict_name_);
67 ValueDictionary *BuildValueDictionary() {
68 louds_trie_builder_->Build();
69 louds_trie_->Open(
70 reinterpret_cast<const uint8 *>(louds_trie_builder_->image().data()));
71 return new ValueDictionary(*pos_matcher_, louds_trie_.get());
8172 }
8273
8374 void InitToken(const string &value, Token *token) const {
8778 token->attributes = Token::NONE;
8879 }
8980
90 const string dict_name_;
9181 const POSMatcher *pos_matcher_;
9282 ConversionRequest convreq_;
93
94 private:
95 vector<Token *> tokens_;
83 std::unique_ptr<LoudsTrieBuilder> louds_trie_builder_;
84 std::unique_ptr<LoudsTrie> louds_trie_;
9685 };
9786
9887 TEST_F(ValueDictionaryTest, HasValue) {
99 // "うぃー"
100 AddToken("\xE3\x81\x86\xE3\x81\x83\xE3\x83\xBC", "we");
101 // "うぉー"
102 AddToken("\xE3\x81\x86\xE3\x81\x89\xE3\x83\xBC", "war");
103 // "わーど"
104 AddToken("\xE3\x82\x8F\xE3\x83\xBC\xE3\x81\xA9", "word");
105 // "わーるど"
106 AddToken("\xE3\x82\x8F\xE3\x83\xBC\xE3\x82\x8B\xE3\x81\xA9", "world");
107 BuildDictionary();
108 std::unique_ptr<ValueDictionary> dictionary(
109 ValueDictionary::CreateValueDictionaryFromFile(*pos_matcher_,
110 dict_name_));
88 AddValue("we");
89 AddValue("war");
90 AddValue("word");
91 AddValue("world");
92 std::unique_ptr<ValueDictionary> dictionary(BuildValueDictionary());
11193
11294 // ValueDictionary is supposed to use the same data with SystemDictionary
11395 // and SystemDictionary::HasValue should return the same result with
123105 }
124106
125107 TEST_F(ValueDictionaryTest, LookupPredictive) {
126 // "ぐーぐる"
127 AddToken("\xE3\x81\x90\xE3\x83\xBC\xE3\x81\x90\xE3\x82\x8B", "google");
128 // "うぃー"
129 AddToken("\xE3\x81\x86\xE3\x81\x83\xE3\x83\xBC", "we");
130 // "うぉー"
131 AddToken("\xE3\x81\x86\xE3\x81\x89\xE3\x83\xBC", "war");
132 // "わーど"
133 AddToken("\xE3\x82\x8F\xE3\x83\xBC\xE3\x81\xA9", "word");
134 // "わーるど"
135 AddToken("\xE3\x82\x8F\xE3\x83\xBC\xE3\x82\x8B\xE3\x81\xA9", "world");
136 BuildDictionary();
137 std::unique_ptr<ValueDictionary> dictionary(
138 ValueDictionary::CreateValueDictionaryFromFile(*pos_matcher_,
139 dict_name_));
108 AddValue("google");
109 AddValue("we");
110 AddValue("war");
111 AddValue("word");
112 AddValue("world");
113 std::unique_ptr<ValueDictionary> dictionary(BuildValueDictionary());
140114
141115 // Reading fields are irrelevant to value dictionary. Prepare actual tokens
142116 // that are to be looked up.
177151 }
178152
179153 TEST_F(ValueDictionaryTest, LookupExact) {
180 // "うぃー"
181 AddToken("\xE3\x81\x86\xE3\x81\x83\xE3\x83\xBC", "we");
182 // "うぉー"
183 AddToken("\xE3\x81\x86\xE3\x81\x89\xE3\x83\xBC", "war");
184 // "わーど"
185 AddToken("\xE3\x82\x8F\xE3\x83\xBC\xE3\x81\xA9", "word");
186 BuildDictionary();
154 AddValue("we");
155 AddValue("war");
156 AddValue("word");
157 std::unique_ptr<ValueDictionary> dictionary(BuildValueDictionary());
187158
188 std::unique_ptr<ValueDictionary> dictionary(
189 ValueDictionary::CreateValueDictionaryFromFile(*pos_matcher_,
190 dict_name_));
191159 CollectTokenCallback callback;
192160 dictionary->LookupExact("war", convreq_, &callback);
193161 ASSERT_EQ(1, callback.tokens().size());
158158 int dictionary_size = 0;
159159 data_manager->GetSystemDictionaryData(&dictionary_data, &dictionary_size);
160160
161 SystemDictionary *sysdic =
162 SystemDictionary::Builder(dictionary_data, dictionary_size).Build();
161163 dictionary_.reset(new DictionaryImpl(
162 SystemDictionary::Builder(dictionary_data, dictionary_size).Build(),
163 ValueDictionary::CreateValueDictionaryFromImage(
164 *data_manager->GetPOSMatcher(), dictionary_data, dictionary_size),
164 sysdic, // DictionaryImpl takes the ownership
165 new ValueDictionary(*data_manager->GetPOSMatcher(),
166 &sysdic->value_trie()),
165167 user_dictionary_.get(),
166168 suppression_dictionary_.get(),
167169 data_manager->GetPOSMatcher()));
00 MAJOR=2
11 MINOR=17
2 BUILD=2257
2 BUILD=2258
33 REVISION=102
44 # NACL_DICTIONARY_VERSION is the target version of the system dictionary to be
55 # downloaded by NaCl Mozc.