Codebase list mozc / d3f6591
Remove SegmenterInterface as its Segmenter is the only derived class. Benefits: - Virtual method call is removed (especially IsBoundary is called heavily so its cost is reduced) - Reduce the number of files to be maintained This is just a code cleanup. Hence no behavior change should occur. BUG=none TEST=unittest Noriyuki Takahashi authored 9 years ago Yohei Yukawa committed 9 years ago
19 changed file(s) with 64 addition(s) and 133 deletion(s). Raw diff Collapse all Expand all
5151 'converter_base.gyp:conversion_request',
5252 'converter_base.gyp:immutable_converter',
5353 'converter_base.gyp:immutable_converter_interface',
54 'converter_base.gyp:segmenter',
5455 'converter_base.gyp:segments',
5556 ],
5657 },
8585 '../transliteration/transliteration.gyp:transliteration',
8686 'connector',
8787 'lattice',
88 'segmenter',
8889 ],
8990 },
9091 {
126127 '../session/session_base.gyp:session_protocol',
127128 'connector',
128129 'immutable_converter_interface',
130 'segmenter',
129131 'segments',
130132 ],
131133 },
4747 #include "converter/immutable_converter_interface.h"
4848 #include "converter/node.h"
4949 #include "converter/segmenter.h"
50 #include "converter/segmenter_interface.h"
5150 #include "converter/segments.h"
5251 #include "data_manager/data_manager_interface.h"
5352 #include "data_manager/testing/mock_data_manager.h"
176175 scoped_ptr<SuppressionDictionary> suppression_dictionary;
177176 scoped_ptr<DictionaryInterface> suffix_dictionary;
178177 scoped_ptr<const Connector> connector;
179 scoped_ptr<const SegmenterInterface> segmenter;
178 scoped_ptr<const Segmenter> segmenter;
180179 scoped_ptr<DictionaryInterface> dictionary;
181180 scoped_ptr<const PosGroup> pos_group;
182181 scoped_ptr<const SuggestionFilter> suggestion_filter;
12431242 CreateSuffixDictionaryFromDataManager(data_manager));
12441243 scoped_ptr<const Connector> connector(
12451244 Connector::CreateFromDataManager(data_manager));
1246 scoped_ptr<const SegmenterInterface> segmenter(
1245 scoped_ptr<const Segmenter> segmenter(
12471246 Segmenter::CreateFromDataManager(data_manager));
12481247 scoped_ptr<const SuggestionFilter> suggestion_filter(
12491248 CreateSuggestionFilter(data_manager));
5151 #include "converter/node.h"
5252 #include "converter/node_allocator.h"
5353 #include "converter/node_list_builder.h"
54 #include "converter/segmenter_interface.h"
54 #include "converter/segmenter.h"
5555 #include "converter/segments.h"
5656 #include "dictionary/dictionary_interface.h"
5757 #include "dictionary/pos_group.h"
274274 const DictionaryInterface *suffix_dictionary,
275275 const SuppressionDictionary *suppression_dictionary,
276276 const Connector *connector,
277 const SegmenterInterface *segmenter,
277 const Segmenter *segmenter,
278278 const POSMatcher *pos_matcher,
279279 const PosGroup *pos_group,
280280 const SuggestionFilter *suggestion_filter)
649649 if ((lnode->value.size() + rnode->value.size())
650650 == compound_node->value.size() &&
651651 (lnode->value + rnode->value) == compound_node->value &&
652 segmenter_->IsBoundary(lnode, rnode, false)) { // Constraint 3.
652 segmenter_->IsBoundary(*lnode, *rnode, false)) { // Constraint 3.
653653 const int32 cost = lnode->wcost + GetCost(lnode, rnode);
654654 if (cost < best_cost) { // choose the smallest ones
655655 best_last_name_node = lnode;
17661766 }
17671767
17681768 // Grammatically segmented.
1769 if (segmenter_->IsBoundary(node, node->next, is_single_segment)) {
1769 if (segmenter_->IsBoundary(*node, *node->next, is_single_segment)) {
17701770 return true;
17711771 }
17721772
5050 class NBestGenerator;
5151 class POSMatcher;
5252 class PosGroup;
53 class SegmenterInterface;
53 class Segmenter;
5454 class SuggestionFilter;
5555
5656 class ImmutableConverterImpl : public ImmutableConverterInterface {
6060 const DictionaryInterface *suffix_dictionary,
6161 const dictionary::SuppressionDictionary *suppression_dictionary,
6262 const Connector *connector,
63 const SegmenterInterface *segmenter,
63 const Segmenter *segmenter,
6464 const POSMatcher *pos_matcher,
6565 const PosGroup *pos_group,
6666 const SuggestionFilter *suggestion_filter);
183183 const DictionaryInterface *suffix_dictionary_;
184184 const dictionary::SuppressionDictionary *suppression_dictionary_;
185185 const Connector *connector_;
186 const SegmenterInterface *segmenter_;
186 const Segmenter *segmenter_;
187187 const POSMatcher *pos_matcher_;
188188 const PosGroup *pos_group_;
189189 const SuggestionFilter *suggestion_filter_;
4444 #include "converter/conversion_request.h"
4545 #include "converter/lattice.h"
4646 #include "converter/segmenter.h"
47 #include "converter/segmenter_interface.h"
4847 #include "converter/segments.h"
4948 #include "data_manager/data_manager_interface.h"
5049 #include "data_manager/testing/mock_data_manager.h"
162161 scoped_ptr<const DataManagerInterface> data_manager_;
163162 scoped_ptr<const SuppressionDictionary> suppression_dictionary_;
164163 scoped_ptr<const Connector> connector_;
165 scoped_ptr<const SegmenterInterface> segmenter_;
164 scoped_ptr<const Segmenter> segmenter_;
166165 scoped_ptr<const DictionaryInterface> suffix_dictionary_;
167166 scoped_ptr<const DictionaryInterface> dictionary_;
168167 scoped_ptr<const PosGroup> pos_group_;
3838 #include "converter/connector.h"
3939 #include "converter/lattice.h"
4040 #include "converter/node.h"
41 #include "converter/segmenter_interface.h"
41 #include "converter/segmenter.h"
4242 #include "converter/segments.h"
4343 #include "dictionary/pos_matcher.h"
4444
105105 }
106106
107107 NBestGenerator::NBestGenerator(const SuppressionDictionary *suppression_dic,
108 const SegmenterInterface *segmenter,
108 const Segmenter *segmenter,
109109 const Connector *connector,
110110 const POSMatcher *pos_matcher,
111111 const Lattice *lattice,
238238 const Node *lnode = nodes[i - 1];
239239 const Node *rnode = nodes[i];
240240 const bool kMultipleSegments = false;
241 if (segmenter_->IsBoundary(lnode, rnode, kMultipleSegments)) {
241 if (segmenter_->IsBoundary(*lnode, *rnode, kMultipleSegments)) {
242242 candidate->PushBackInnerSegmentBoundary(
243243 key_len, value_len, content_key_len, content_value_len);
244244 key_len = 0;
497497 // is_boundary is true if there is a grammer-based boundary
498498 // between lnode and rnode
499499 const bool is_boundary = (lnode->node_type == Node::HIS_NODE ||
500 segmenter_->IsBoundary(lnode, rnode, false));
500 segmenter_->IsBoundary(*lnode, *rnode, false));
501501 if (!is_edge && is_boundary) {
502502 // There is a boundary within the segment.
503503 return INVALID;
524524 // between lnode and rnode
525525 const bool is_boundary = (
526526 lnode->node_type == Node::HIS_NODE ||
527 segmenter_->IsBoundary(lnode, rnode, true));
527 segmenter_->IsBoundary(*lnode, *rnode, true));
528528 if (is_edge != is_boundary) {
529529 // on the edge, have a boudnary.
530530 // not on the edge, not the case.
546546 // between lnode and rnode
547547 const bool is_boundary = (
548548 lnode->node_type == Node::HIS_NODE ||
549 segmenter_->IsBoundary(lnode, rnode, false));
549 segmenter_->IsBoundary(*lnode, *rnode, false));
550550
551551 if (is_edge != is_boundary) {
552552 // on the edge, have a boudnary.
4444 class Connector;
4545 class Lattice;
4646 class POSMatcher;
47 class SegmenterInterface;
47 class Segmenter;
4848 class SuggestionFilter;
4949 struct Node;
5050
8181 // Try to enumurate N-best results between begin_node and end_node.
8282 NBestGenerator(
8383 const dictionary::SuppressionDictionary *suppression_dictionary,
84 const SegmenterInterface *segmenter,
84 const Segmenter *segmenter,
8585 const Connector *connector,
8686 const POSMatcher *pos_matcher,
8787 const Lattice *lattice,
170170
171171 // References to relevant modules.
172172 const dictionary::SuppressionDictionary *suppression_dictionary_;
173 const SegmenterInterface *segmenter_;
173 const Segmenter *segmenter_;
174174 const Connector *connector_;
175175 const POSMatcher *pos_matcher_;
176176 const Lattice *lattice_;
4040 #include "converter/conversion_request.h"
4141 #include "converter/immutable_converter.h"
4242 #include "converter/segmenter.h"
43 #include "converter/segmenter_interface.h"
4443 #include "converter/segments.h"
4544 #include "data_manager/data_manager_interface.h"
4645 #include "data_manager/testing/mock_data_manager.h"
144143 scoped_ptr<const DataManagerInterface> data_manager_;
145144 scoped_ptr<const SuppressionDictionary> suppression_dictionary_;
146145 scoped_ptr<const Connector> connector_;
147 scoped_ptr<const SegmenterInterface> segmenter_;
146 scoped_ptr<const Segmenter> segmenter_;
148147 scoped_ptr<const DictionaryInterface> suffix_dictionary_;
149148 scoped_ptr<const DictionaryInterface> dictionary_;
150149 scoped_ptr<const PosGroup> pos_group_;
4141 const DataManagerInterface &data_manager) {
4242 size_t l_num_elements = 0;
4343 size_t r_num_elements = 0;
44 const uint16 *l_table = NULL;
45 const uint16 *r_table = NULL;
44 const uint16 *l_table = nullptr;
45 const uint16 *r_table = nullptr;
4646 size_t bitarray_num_bytes = 0;
47 const char *bitarray_data = NULL;
48 const BoundaryData *boundary_data = NULL;
47 const char *bitarray_data = nullptr;
48 const BoundaryData *boundary_data = nullptr;
4949 data_manager.GetSegmenterData(&l_num_elements, &r_num_elements,
5050 &l_table, &r_table,
5151 &bitarray_num_bytes, &bitarray_data,
7373
7474 Segmenter::~Segmenter() {}
7575
76 bool Segmenter::IsBoundary(const Node *lnode, const Node *rnode,
76 bool Segmenter::IsBoundary(const Node &lnode, const Node &rnode,
7777 bool is_single_segment) const {
78 DCHECK(lnode);
79 DCHECK(rnode);
80 if (lnode->node_type == Node::BOS_NODE ||
81 rnode->node_type == Node::EOS_NODE) {
78 if (lnode.node_type == Node::BOS_NODE ||
79 rnode.node_type == Node::EOS_NODE) {
8280 return true;
8381 }
8482
85 // return always false in prediction mode.
83 // Always return false in prediction mode.
8684 // This implies that converter always returns single-segment-result
8785 // in prediction mode.
8886 if (is_single_segment) {
9694 // If we segment "に書く" into two segments, "二角" is never be shown.
9795 // There exits some implicit assumpution that user expects that his/her input
9896 // becomes one bunsetu. So, it would be better to keep "二角" even after "紙".
99 if (lnode->attributes & Node::STARTS_WITH_PARTICLE) {
97 if (lnode.attributes & Node::STARTS_WITH_PARTICLE) {
10098 return false;
10199 }
102100
103 return IsBoundary(lnode->rid, rnode->lid);
101 return IsBoundary(lnode.rid, rnode.lid);
104102 }
105103
106104 bool Segmenter::IsBoundary(uint16 rid, uint16 lid) const {
3030 #define MOZC_CONVERTER_SEGMENTER_H_
3131
3232 #include "base/port.h"
33 #include "converter/segmenter_interface.h"
3433
3534 namespace mozc {
3635
3837 struct Node;
3938 struct BoundaryData;
4039
41 class Segmenter : public SegmenterInterface {
40 class Segmenter {
4241 public:
4342 static Segmenter *CreateFromDataManager(
4443 const DataManagerInterface &data_manager);
4847 const uint16 *l_table, const uint16 *r_table,
4948 size_t bitarray_num_bytes, const char *bitarray_data,
5049 const BoundaryData *boundary_data);
51 virtual ~Segmenter();
50 ~Segmenter();
5251
53 virtual bool IsBoundary(const Node *lnode, const Node *rnode,
54 bool is_single_segment) const;
52 bool IsBoundary(const Node &lnode, const Node &rnode,
53 bool is_single_segment) const;
5554
56 virtual bool IsBoundary(uint16 rid, uint16 lid) const;
55 bool IsBoundary(uint16 rid, uint16 lid) const;
5756
58 virtual int32 GetPrefixPenalty(uint16 lid) const;
57 int32 GetPrefixPenalty(uint16 lid) const;
5958
60 virtual int32 GetSuffixPenalty(uint16 rid) const;
59 int32 GetSuffixPenalty(uint16 rid) const;
6160
6261 private:
6362 const size_t l_num_elements_;
6766 const size_t bitarray_num_bytes_;
6867 const char *bitarray_data_;
6968 const BoundaryData *boundary_data_;
69
70 DISALLOW_COPY_AND_ASSIGN(Segmenter);
7071 };
7172
7273 } // namespace mozc
+0
-69
src/converter/segmenter_interface.h less more
0 // Copyright 2010-2015, Google Inc.
1 // All rights reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 #ifndef MOZC_CONVERTER_SEGMENTER_INTERFACE_H_
30 #define MOZC_CONVERTER_SEGMENTER_INTERFACE_H_
31
32 #include "base/port.h"
33
34 namespace mozc {
35
36 struct Node;
37
38 class SegmenterInterface {
39 public:
40 virtual ~SegmenterInterface() {}
41
42 // Returns true if there is a segment boundary between |lnode| and |rnode|.
43 // If |is_single_segment| is true, this function basically reutrns false
44 // unless |lnode| or |rnode| is BOS/EOS. |is_single_segment| is used for
45 // prediction/suggestion mode.
46 virtual bool IsBoundary(const Node *lnode, const Node *rnode,
47 bool is_single_segment) const = 0;
48
49 virtual bool IsBoundary(uint16 rid, uint16 lid) const = 0;
50
51 // Returns cost penalty of the word prefix. We can add cost penalty if a
52 // node->lid exists at the begging of user input.
53 virtual int32 GetPrefixPenalty(uint16 lid) const = 0;
54
55 // Returns cost penalty of the word suffix. We can add cost penalty if a
56 // node->rid exists at the end of user input.
57 virtual int32 GetSuffixPenalty(uint16 rid) const = 0;
58
59 protected:
60 SegmenterInterface() {}
61
62 private:
63 DISALLOW_COPY_AND_ASSIGN(SegmenterInterface);
64 };
65
66 } // namespace mozc
67
68 #endif // MOZC_CONVERTER_SEGMENTER_INTERFACE_H_
3939 #include "base/util.h"
4040 #include "converter/connector.h"
4141 #include "converter/node.h"
42 #include "converter/segmenter_base.h"
42 #include "converter/segmenter.h"
4343 #include "data_manager/connection_file_reader.h"
4444 #include "data_manager/data_manager_interface.h"
4545 #include "dictionary/pos_matcher.h"
104104 for (size_t lid = 0; lid < rsize_; ++lid) {
105105 lnode.rid = rid;
106106 lnode.lid = lid;
107 EXPECT_TRUE(segmenter->IsBoundary(&lnode, &rnode, false));
108 EXPECT_TRUE(segmenter->IsBoundary(&lnode, &rnode, true));
107 EXPECT_TRUE(segmenter->IsBoundary(lnode, rnode, false));
108 EXPECT_TRUE(segmenter->IsBoundary(lnode, rnode, true));
109109 }
110110 }
111111 }
122122 for (size_t lid = 0; lid < rsize_; ++lid) {
123123 lnode.rid = rid;
124124 lnode.lid = lid;
125 EXPECT_TRUE(segmenter->IsBoundary(&lnode, &rnode, false));
126 EXPECT_TRUE(segmenter->IsBoundary(&lnode, &rnode, true));
125 EXPECT_TRUE(segmenter->IsBoundary(lnode, rnode, false));
126 EXPECT_TRUE(segmenter->IsBoundary(lnode, rnode, true));
127127 }
128128 }
129129 }
140140 lnode.rid = rid;
141141 rnode.lid = lid;
142142 EXPECT_EQ(segmenter->IsBoundary(rid, lid),
143 segmenter->IsBoundary(&lnode, &rnode, false));
144 EXPECT_FALSE(segmenter->IsBoundary(&lnode, &rnode, true));
143 segmenter->IsBoundary(lnode, rnode, false));
144 EXPECT_FALSE(segmenter->IsBoundary(lnode, rnode, true));
145145 }
146146 }
147147 }
160160 lnode.rid = pos_matcher->GetAcceptableParticleAtBeginOfSegmentId();
161161 // "名詞,サ変".
162162 rnode.lid = pos_matcher->GetUnknownId();
163 EXPECT_TRUE(segmenter->IsBoundary(&lnode, &rnode, false));
163 EXPECT_TRUE(segmenter->IsBoundary(lnode, rnode, false));
164164
165165 lnode.attributes |= Node::STARTS_WITH_PARTICLE;
166 EXPECT_FALSE(segmenter->IsBoundary(&lnode, &rnode, false));
166 EXPECT_FALSE(segmenter->IsBoundary(lnode, rnode, false));
167167 }
168168
169169 void DataManagerTestBase::ConnectorTest_RandomValueCheck() {
4444 class ImmutableConverterInterface;
4545 class PredictorInterface;
4646 class RewriterInterface;
47 class SegmenterInterface;
47 class Segmenter;
4848 class SuggestionFilter;
4949 class UserDataManagerInterface;
5050
7777 private:
7878 scoped_ptr<dictionary::SuppressionDictionary> suppression_dictionary_;
7979 scoped_ptr<const Connector> connector_;
80 scoped_ptr<const SegmenterInterface> segmenter_;
80 scoped_ptr<const Segmenter> segmenter_;
8181 scoped_ptr<dictionary::UserDictionary> user_dictionary_;
8282 scoped_ptr<DictionaryInterface> suffix_dictionary_;
8383 scoped_ptr<DictionaryInterface> dictionary_;
00 MAJOR=2
11 MINOR=17
2 BUILD=2085
2 BUILD=2086
33 REVISION=102
44 # NACL_DICTIONARY_VERSION is the target version of the system dictionary to be
55 # downloaded by NaCl Mozc.
5151 #include "converter/converter_interface.h"
5252 #include "converter/immutable_converter_interface.h"
5353 #include "converter/node_list_builder.h"
54 #include "converter/segmenter_interface.h"
54 #include "converter/segmenter.h"
5555 #include "converter/segments.h"
5656 #include "dictionary/dictionary_interface.h"
5757 #include "dictionary/pos_matcher.h"
293293 const DictionaryInterface *dictionary,
294294 const DictionaryInterface *suffix_dictionary,
295295 const Connector *connector,
296 const SegmenterInterface *segmenter,
296 const Segmenter *segmenter,
297297 const POSMatcher *pos_matcher,
298298 const SuggestionFilter *suggestion_filter)
299299 : converter_(converter),
4747 class DictionaryInterface;
4848 class ImmutableConverterInterface;
4949 class POSMatcher;
50 class SegmenterInterface;
50 class Segmenter;
5151 class Segments;
5252 class SuggestionFilter;
5353
6161 const DictionaryInterface *dictionary,
6262 const DictionaryInterface *suffix_dictionary,
6363 const Connector *connector,
64 const SegmenterInterface *segmenter,
64 const Segmenter *segmenter,
6565 const POSMatcher *pos_matcher,
6666 const SuggestionFilter *suggestion_filter);
6767 virtual ~DictionaryPredictor();
423423 const DictionaryInterface *dictionary_;
424424 const DictionaryInterface *suffix_dictionary_;
425425 const Connector *connector_;
426 const SegmenterInterface *segmenter_;
426 const Segmenter *segmenter_;
427427 const SuggestionFilter *suggestion_filter_;
428428 const uint16 counter_suffix_word_id_;
429429 const string predictor_name_;
156156 const DictionaryInterface *dictionary,
157157 const DictionaryInterface *suffix_dictionary,
158158 const Connector *connector,
159 const SegmenterInterface *segmenter,
159 const Segmenter *segmenter,
160160 const POSMatcher *pos_matcher,
161161 const SuggestionFilter *suggestion_filter)
162162 : DictionaryPredictor(converter,
268268 const POSMatcher *pos_matcher_;
269269 scoped_ptr<SuppressionDictionary> suppression_dictionary_;
270270 scoped_ptr<const Connector> connector_;
271 scoped_ptr<const SegmenterInterface> segmenter_;
271 scoped_ptr<const Segmenter> segmenter_;
272272 scoped_ptr<const DictionaryInterface> suffix_dictionary_;
273273 scoped_ptr<const DictionaryInterface> dictionary_;
274274 DictionaryMock *dictionary_mock_;
15601560 CreateSuffixDictionaryFromDataManager(data_manager));
15611561 scoped_ptr<const Connector> connector(
15621562 Connector::CreateFromDataManager(data_manager));
1563 scoped_ptr<const SegmenterInterface> segmenter(
1563 scoped_ptr<const Segmenter> segmenter(
15641564 Segmenter::CreateFromDataManager(data_manager));
15651565 scoped_ptr<const SuggestionFilter> suggestion_filter(
15661566 CreateSuggestionFilter(data_manager));
31323132 CreateSuffixDictionaryFromDataManager(data_manager));
31333133 scoped_ptr<const Connector> connector(
31343134 Connector::CreateFromDataManager(data_manager));
3135 scoped_ptr<const SegmenterInterface> segmenter(
3135 scoped_ptr<const Segmenter> segmenter(
31363136 Segmenter::CreateFromDataManager(data_manager));
31373137 scoped_ptr<const SuggestionFilter> suggestion_filter(
31383138 CreateSuggestionFilter(data_manager));
4848 '../composer/composer.gyp:composer',
4949 '../converter/converter_base.gyp:conversion_request',
5050 '../converter/converter_base.gyp:immutable_converter',
51 '../converter/converter_base.gyp:segmenter',
5152 '../converter/converter_base.gyp:segments',
5253 '../dictionary/dictionary.gyp:dictionary',
5354 '../dictionary/dictionary.gyp:suffix_dictionary',