Codebase list mozc / 402a3cb
Minor improvement of user dictionary code * Use equal_range instead of lower_bound to reduce string comparison. * Reduce the string copy in key to UserPOS::Token conversion by modifying comparators. REF_BUG=27707461 REF_CL=120670147 REF_TIME=2016-04-25T10:54:23+09:00 REF_TIME_RAW=1461549263 +0900 Noriyuki Takahashi 8 years ago
3 changed file(s) with 70 addition(s) and 71 deletion(s). Raw diff Collapse all Expand all
00 MAJOR=2
11 MINOR=18
2 BUILD=2558
2 BUILD=2559
33 REVISION=102
44 # CAUTION: NACL_DICTIONARY_VERSION is going to be migrated to ENGINE_VERSION.
55 # NACL_DICTIONARY_VERSION is the target version of the system dictionary to be
4141 #include "base/mutex.h"
4242 #include "base/singleton.h"
4343 #include "base/stl_util.h"
44 #include "base/string_piece.h"
4445 #include "base/thread.h"
4546 #include "base/util.h"
4647 #include "dictionary/dictionary_token.h"
5758 namespace {
5859
5960 struct OrderByKey {
60 bool operator()(const UserPOS::Token *lhs,
61 const UserPOS::Token *rhs) const {
62 return lhs->key < rhs->key;
61 bool operator()(const UserPOS::Token *token, StringPiece key) const {
62 return token->key < key;
63 }
64
65 bool operator()(StringPiece key, const UserPOS::Token *token) const {
66 return key < token->key;
6367 }
6468 };
6569
70 struct OrderByKeyPrefix {
71 bool operator()(const UserPOS::Token *token, StringPiece prefix) const {
72 return StringPiece(token->key, 0, prefix.size()) < prefix;
73 }
74
75 bool operator()(StringPiece prefix, const UserPOS::Token *token) const {
76 return prefix < StringPiece(token->key, 0, prefix.size());
77 }
78 };
79
6680 struct OrderByKeyThenById {
67 bool operator()(const UserPOS::Token *lhs,
68 const UserPOS::Token *rhs) const {
81 bool operator()(const UserPOS::Token *lhs, const UserPOS::Token *rhs) const {
6982 const int comp = lhs->key.compare(rhs->key);
7083 return comp == 0 ? (lhs->id < rhs->id) : (comp < 0);
7184 }
109122
110123 class UserDictionary::TokensIndex : public vector<UserPOS::Token *> {
111124 public:
112 explicit TokensIndex(const UserPOSInterface *user_pos,
113 SuppressionDictionary *suppression_dictionary)
125 TokensIndex(const UserPOSInterface *user_pos,
126 SuppressionDictionary *suppression_dictionary)
114127 : user_pos_(user_pos),
115128 suppression_dictionary_(suppression_dictionary) {}
129
116130 ~TokensIndex() {
117131 Clear();
118132 }
211225 DCHECK(dic_);
212226 }
213227
214 virtual ~UserDictionaryReloader() {
228 ~UserDictionaryReloader() override {
215229 Join();
216230 }
217231
248262 Start("UserDictionaryReloader");
249263 }
250264
251 virtual void Run() {
265 void Run() override {
252266 std::unique_ptr<UserDictionaryStorage> storage(new UserDictionaryStorage(
253267 Singleton<UserDictionaryFileManager>::get()->GetFileName()));
254268
341355 }
342356
343357 // Find the starting point of iteration over dictionary contents.
344 UserPOS::Token key_token;
345 key.CopyToString(&key_token.key);
346 vector<UserPOS::Token *>::const_iterator it = std::lower_bound(
347 tokens_->begin(), tokens_->end(), &key_token, OrderByKey());
348
349358 Token token;
350 for (; it != tokens_->end(); ++it) {
351 if (!Util::StartsWith((*it)->key, key)) {
352 break;
353 }
354 switch (callback->OnKey((*it)->key)) {
359 for (auto range = std::equal_range(tokens_->begin(), tokens_->end(), key,
360 OrderByKeyPrefix());
361 range.first != range.second; ++range.first) {
362 const UserPOS::Token &user_pos_token = **range.first;
363 switch (callback->OnKey(user_pos_token.key)) {
355364 case Callback::TRAVERSE_DONE:
356365 return;
357366 case Callback::TRAVERSE_NEXT_KEY:
360369 default:
361370 break;
362371 }
363 FillTokenFromUserPOSToken(**it, &token);
372 FillTokenFromUserPOSToken(user_pos_token, &token);
364373 // Override POS IDs for suggest only words.
365 if (pos_matcher_.IsSuggestOnlyWord((*it)->id)) {
374 if (pos_matcher_.IsSuggestOnlyWord(user_pos_token.id)) {
366375 token.lid = token.rid = pos_matcher_.GetUnknownId();
367376 }
368 if (callback->OnToken((*it)->key, (*it)->key, token) ==
377 if (callback->OnToken(user_pos_token.key, user_pos_token.key, token) ==
369378 Callback::TRAVERSE_DONE) {
370379 return;
371380 }
391400 }
392401
393402 // Find the starting point for iteration over dictionary contents.
394 UserPOS::Token key_token;
395 key_token.key.assign(key.data(), Util::OneCharLen(key.data()));
396 vector<UserPOS::Token *>::const_iterator it = std::lower_bound(
397 tokens_->begin(), tokens_->end(), &key_token, OrderByKey());
398
403 const StringPiece first_char(key, 0, Util::OneCharLen(key.data()));
399404 Token token;
400 for (; it != tokens_->end(); ++it) {
401 if ((*it)->key > key) {
405 for (auto it = std::lower_bound(tokens_->begin(), tokens_->end(), first_char,
406 OrderByKey());
407 it != tokens_->end(); ++it) {
408 const UserPOS::Token &user_pos_token = **it;
409 if (user_pos_token.key > key) {
402410 break;
403411 }
404 if (pos_matcher_.IsSuggestOnlyWord((*it)->id)) {
412 if (pos_matcher_.IsSuggestOnlyWord(user_pos_token.id)) {
405413 continue;
406414 }
407 if (!Util::StartsWith(key, (*it)->key)) {
415 if (!Util::StartsWith(key, user_pos_token.key)) {
408416 continue;
409417 }
410 switch (callback->OnKey((*it)->key)) {
418 switch (callback->OnKey(user_pos_token.key)) {
411419 case Callback::TRAVERSE_DONE:
412420 return;
413421 case Callback::TRAVERSE_NEXT_KEY:
418426 default:
419427 break;
420428 }
421 FillTokenFromUserPOSToken(**it, &token);
422 switch (callback->OnToken((*it)->key, (*it)->key, token)) {
429 FillTokenFromUserPOSToken(user_pos_token, &token);
430 switch (callback->OnToken(user_pos_token.key, user_pos_token.key, token)) {
423431 case Callback::TRAVERSE_DONE:
424432 return;
425433 case Callback::TRAVERSE_CULL:
440448 conversion_request.config().incognito_mode()) {
441449 return;
442450 }
443 UserPOS::Token key_token;
444 key.CopyToString(&key_token.key);
445 typedef vector<UserPOS::Token *>::const_iterator TokenIterator;
446 pair<TokenIterator, TokenIterator> range = std::equal_range(
447 tokens_->begin(), tokens_->end(), &key_token, OrderByKey());
451 auto range = std::equal_range(tokens_->begin(), tokens_->end(), key,
452 OrderByKey());
448453 if (range.first == range.second) {
449454 return;
450455 }
483488 return false;
484489 }
485490
486 UserPOS::Token key_token;
487 key.CopyToString(&key_token.key);
488 typedef vector<UserPOS::Token *>::const_iterator TokenIterator;
489 pair<TokenIterator, TokenIterator> range = std::equal_range(
490 tokens_->begin(), tokens_->end(), &key_token, OrderByKey());
491
492491 // Set the comment that was found first.
493 for (; range.first != range.second; ++range.first) {
494 const UserPOS::Token *token = *range.first;
495 if (token->value == value && !token->comment.empty()) {
496 comment->assign(token->comment);
492 for (auto range = std::equal_range(tokens_->begin(), tokens_->end(), key,
493 OrderByKey());
494 range.first != range.second; ++range.first) {
495 const UserPOS::Token &token = **range.first;
496 if (token.value == value && !token.comment.empty()) {
497 comment->assign(token.comment);
497498 return true;
498499 }
499500 }
5252 UserDictionary(const UserPOSInterface *user_pos,
5353 POSMatcher pos_matcher,
5454 SuppressionDictionary *suppression_dictionary);
55 virtual ~UserDictionary();
55 ~UserDictionary() override;
5656
57 virtual bool HasKey(StringPiece key) const;
58 virtual bool HasValue(StringPiece value) const;
57 bool HasKey(StringPiece key) const override;
58 bool HasValue(StringPiece value) const override;
59
5960 // Lookup methods don't support kana modifier insensitive lookup, i.e.,
6061 // Callback::OnActualKey() is never called.
61 virtual void LookupPredictive(StringPiece key,
62 const ConversionRequest &conversion_request,
63 Callback *callback) const;
64
65 virtual void LookupPrefix(StringPiece key,
66 const ConversionRequest &conversion_request,
67 Callback *callback) const;
68
69 virtual void LookupExact(StringPiece key,
70 const ConversionRequest &conversion_request,
71 Callback *callback) const;
72
73 virtual void LookupReverse(StringPiece str,
74 const ConversionRequest &conversion_request,
75 Callback *callback) const;
62 void LookupPredictive(StringPiece key,
63 const ConversionRequest &conversion_request,
64 Callback *callback) const override;
65 void LookupPrefix(StringPiece key,
66 const ConversionRequest &conversion_request,
67 Callback *callback) const override;
68 void LookupExact(StringPiece key,
69 const ConversionRequest &conversion_request,
70 Callback *callback) const override;
71 void LookupReverse(StringPiece str,
72 const ConversionRequest &conversion_request,
73 Callback *callback) const override;
7674
7775 // Looks up a user comment from a pair of key and value. When (key, value)
7876 // doesn't exist in this dictionary or user comment is empty, bool is
7977 // returned and string is kept as-is.
80 virtual bool LookupComment(StringPiece key, StringPiece value,
81 const ConversionRequest &conversion_request,
82 string *comment) const;
78 bool LookupComment(StringPiece key, StringPiece value,
79 const ConversionRequest &conversion_request,
80 string *comment) const override;
8381
8482 // Loads dictionary from UserDictionaryStorage.
8583 // mainly for unittesting