Rename SegmenterBase to Segmenter as it's not used as a base class.
This is just a code cleanup. Hence no behavior change should occur.
BUG=none
TEST=unittest
Noriyuki Takahashi authored 9 years ago
Yohei Yukawa committed 9 years ago
38 | 38 | }, |
39 | 39 | 'targets': [ |
40 | 40 | { |
41 | 'target_name': 'segmenter_base', | |
41 | 'target_name': 'segmenter', | |
42 | 42 | 'type': 'static_library', |
43 | 43 | 'sources': [ |
44 | 'segmenter_base.cc', | |
44 | 'segmenter.cc', | |
45 | 45 | ], |
46 | 46 | 'dependencies': [ |
47 | 47 | '../base/base.gyp:base', |
46 | 46 | #include "converter/immutable_converter.h" |
47 | 47 | #include "converter/immutable_converter_interface.h" |
48 | 48 | #include "converter/node.h" |
49 | #include "converter/segmenter_base.h" | |
49 | #include "converter/segmenter.h" | |
50 | 50 | #include "converter/segmenter_interface.h" |
51 | 51 | #include "converter/segments.h" |
52 | 52 | #include "data_manager/data_manager_interface.h" |
207 | 207 | ret->suffix_dictionary.reset( |
208 | 208 | CreateSuffixDictionaryFromDataManager(data_manager)); |
209 | 209 | ret->connector.reset(Connector::CreateFromDataManager(data_manager)); |
210 | ret->segmenter.reset(SegmenterBase::CreateFromDataManager(data_manager)); | |
210 | ret->segmenter.reset(Segmenter::CreateFromDataManager(data_manager)); | |
211 | 211 | ret->immutable_converter.reset( |
212 | 212 | new ImmutableConverterImpl(ret->dictionary.get(), |
213 | 213 | ret->suffix_dictionary.get(), |
1244 | 1244 | scoped_ptr<const Connector> connector( |
1245 | 1245 | Connector::CreateFromDataManager(data_manager)); |
1246 | 1246 | scoped_ptr<const SegmenterInterface> segmenter( |
1247 | SegmenterBase::CreateFromDataManager(data_manager)); | |
1247 | Segmenter::CreateFromDataManager(data_manager)); | |
1248 | 1248 | scoped_ptr<const SuggestionFilter> suggestion_filter( |
1249 | 1249 | CreateSuggestionFilter(data_manager)); |
1250 | 1250 | scoped_ptr<ImmutableConverterInterface> immutable_converter( |
66 | 66 | 'converter.gyp:converter', |
67 | 67 | 'converter_base.gyp:connector', |
68 | 68 | 'converter_base.gyp:converter_mock', |
69 | 'converter_base.gyp:segmenter_base', | |
69 | 'converter_base.gyp:segmenter', | |
70 | 70 | 'converter_base.gyp:segments', |
71 | 71 | ], |
72 | 72 | 'variables': { |
43 | 43 | #include "converter/connector.h" |
44 | 44 | #include "converter/conversion_request.h" |
45 | 45 | #include "converter/lattice.h" |
46 | #include "converter/segmenter_base.h" | |
46 | #include "converter/segmenter.h" | |
47 | 47 | #include "converter/segmenter_interface.h" |
48 | 48 | #include "converter/segments.h" |
49 | 49 | #include "data_manager/data_manager_interface.h" |
129 | 129 | connector_.reset(Connector::CreateFromDataManager(*data_manager_)); |
130 | 130 | CHECK(connector_.get()); |
131 | 131 | |
132 | segmenter_.reset(SegmenterBase::CreateFromDataManager(*data_manager_)); | |
132 | segmenter_.reset(Segmenter::CreateFromDataManager(*data_manager_)); | |
133 | 133 | CHECK(segmenter_.get()); |
134 | 134 | |
135 | 135 | pos_group_.reset(new PosGroup(data_manager_->GetPosGroupData())); |
39 | 39 | #include "converter/connector.h" |
40 | 40 | #include "converter/conversion_request.h" |
41 | 41 | #include "converter/immutable_converter.h" |
42 | #include "converter/segmenter_base.h" | |
42 | #include "converter/segmenter.h" | |
43 | 43 | #include "converter/segmenter_interface.h" |
44 | 44 | #include "converter/segments.h" |
45 | 45 | #include "data_manager/data_manager_interface.h" |
102 | 102 | connector_.reset(Connector::CreateFromDataManager(*data_manager_)); |
103 | 103 | CHECK(connector_.get()); |
104 | 104 | |
105 | segmenter_.reset(SegmenterBase::CreateFromDataManager(*data_manager_)); | |
105 | segmenter_.reset(Segmenter::CreateFromDataManager(*data_manager_)); | |
106 | 106 | CHECK(segmenter_.get()); |
107 | 107 | |
108 | 108 | pos_group_.reset(new PosGroup(data_manager_->GetPosGroupData())); |
0 | // Copyright 2010-2015, Google Inc. | |
1 | // All rights reserved. | |
2 | // | |
3 | // Redistribution and use in source and binary forms, with or without | |
4 | // modification, are permitted provided that the following conditions are | |
5 | // met: | |
6 | // | |
7 | // * Redistributions of source code must retain the above copyright | |
8 | // notice, this list of conditions and the following disclaimer. | |
9 | // * Redistributions in binary form must reproduce the above | |
10 | // copyright notice, this list of conditions and the following disclaimer | |
11 | // in the documentation and/or other materials provided with the | |
12 | // distribution. | |
13 | // * Neither the name of Google Inc. nor the names of its | |
14 | // contributors may be used to endorse or promote products derived from | |
15 | // this software without specific prior written permission. | |
16 | // | |
17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ||
29 | #include "converter/segmenter.h" | |
30 | ||
31 | #include "base/bitarray.h" | |
32 | #include "base/logging.h" | |
33 | #include "base/port.h" | |
34 | #include "converter/boundary_struct.h" | |
35 | #include "converter/node.h" | |
36 | #include "data_manager/data_manager_interface.h" | |
37 | ||
38 | namespace mozc { | |
39 | ||
40 | Segmenter *Segmenter::CreateFromDataManager( | |
41 | const DataManagerInterface &data_manager) { | |
42 | size_t l_num_elements = 0; | |
43 | size_t r_num_elements = 0; | |
44 | const uint16 *l_table = NULL; | |
45 | const uint16 *r_table = NULL; | |
46 | size_t bitarray_num_bytes = 0; | |
47 | const char *bitarray_data = NULL; | |
48 | const BoundaryData *boundary_data = NULL; | |
49 | data_manager.GetSegmenterData(&l_num_elements, &r_num_elements, | |
50 | &l_table, &r_table, | |
51 | &bitarray_num_bytes, &bitarray_data, | |
52 | &boundary_data); | |
53 | return new Segmenter(l_num_elements, r_num_elements, | |
54 | l_table, r_table, | |
55 | bitarray_num_bytes, bitarray_data, | |
56 | boundary_data); | |
57 | } | |
58 | ||
59 | Segmenter::Segmenter( | |
60 | size_t l_num_elements, size_t r_num_elements, const uint16 *l_table, | |
61 | const uint16 *r_table, size_t bitarray_num_bytes, | |
62 | const char *bitarray_data, const BoundaryData *boundary_data) | |
63 | : l_num_elements_(l_num_elements), r_num_elements_(r_num_elements), | |
64 | l_table_(l_table), r_table_(r_table), | |
65 | bitarray_num_bytes_(bitarray_num_bytes), | |
66 | bitarray_data_(bitarray_data), boundary_data_(boundary_data) { | |
67 | DCHECK(l_table_); | |
68 | DCHECK(r_table_); | |
69 | DCHECK(bitarray_data_); | |
70 | DCHECK(boundary_data_); | |
71 | CHECK_LE(l_num_elements_ * r_num_elements_, bitarray_num_bytes_ * 8); | |
72 | } | |
73 | ||
74 | Segmenter::~Segmenter() {} | |
75 | ||
76 | bool Segmenter::IsBoundary(const Node *lnode, const Node *rnode, | |
77 | bool is_single_segment) const { | |
78 | DCHECK(lnode); | |
79 | DCHECK(rnode); | |
80 | if (lnode->node_type == Node::BOS_NODE || | |
81 | rnode->node_type == Node::EOS_NODE) { | |
82 | return true; | |
83 | } | |
84 | ||
85 | // return always false in prediction mode. | |
86 | // This implies that converter always returns single-segment-result | |
87 | // in prediction mode. | |
88 | if (is_single_segment) { | |
89 | return false; | |
90 | } | |
91 | ||
92 | // Concatenate particle and content word into one segment, | |
93 | // if lnode locates at the beginning of user input. | |
94 | // This hack is for handling ambiguous bunsetsu segmentation. | |
95 | // e.g. "かみ|にかく" => "紙|に書く" or "紙二角". | |
96 | // If we segment "に書く" into two segments, "二角" is never be shown. | |
97 | // There exits some implicit assumpution that user expects that his/her input | |
98 | // becomes one bunsetu. So, it would be better to keep "二角" even after "紙". | |
99 | if (lnode->attributes & Node::STARTS_WITH_PARTICLE) { | |
100 | return false; | |
101 | } | |
102 | ||
103 | return IsBoundary(lnode->rid, rnode->lid); | |
104 | } | |
105 | ||
106 | bool Segmenter::IsBoundary(uint16 rid, uint16 lid) const { | |
107 | const uint32 bitarray_index = l_table_[rid] + l_num_elements_ * r_table_[lid]; | |
108 | return BitArray::GetValue(reinterpret_cast<const char*>(bitarray_data_), | |
109 | bitarray_index); | |
110 | } | |
111 | ||
112 | int32 Segmenter::GetPrefixPenalty(uint16 lid) const { | |
113 | return boundary_data_[lid].prefix_penalty; | |
114 | } | |
115 | ||
116 | int32 Segmenter::GetSuffixPenalty(uint16 rid) const { | |
117 | return boundary_data_[rid].suffix_penalty; | |
118 | } | |
119 | ||
120 | } // namespace mozc |
0 | // Copyright 2010-2015, Google Inc. | |
1 | // All rights reserved. | |
2 | // | |
3 | // Redistribution and use in source and binary forms, with or without | |
4 | // modification, are permitted provided that the following conditions are | |
5 | // met: | |
6 | // | |
7 | // * Redistributions of source code must retain the above copyright | |
8 | // notice, this list of conditions and the following disclaimer. | |
9 | // * Redistributions in binary form must reproduce the above | |
10 | // copyright notice, this list of conditions and the following disclaimer | |
11 | // in the documentation and/or other materials provided with the | |
12 | // distribution. | |
13 | // * Neither the name of Google Inc. nor the names of its | |
14 | // contributors may be used to endorse or promote products derived from | |
15 | // this software without specific prior written permission. | |
16 | // | |
17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ||
29 | #ifndef MOZC_CONVERTER_SEGMENTER_H_ | |
30 | #define MOZC_CONVERTER_SEGMENTER_H_ | |
31 | ||
32 | #include "base/port.h" | |
33 | #include "converter/segmenter_interface.h" | |
34 | ||
35 | namespace mozc { | |
36 | ||
37 | class DataManagerInterface; | |
38 | struct Node; | |
39 | struct BoundaryData; | |
40 | ||
41 | class Segmenter : public SegmenterInterface { | |
42 | public: | |
43 | static Segmenter *CreateFromDataManager( | |
44 | const DataManagerInterface &data_manager); | |
45 | ||
46 | // This class does not take the ownership of pointer parameters. | |
47 | Segmenter(size_t l_num_elements, size_t r_num_elements, | |
48 | const uint16 *l_table, const uint16 *r_table, | |
49 | size_t bitarray_num_bytes, const char *bitarray_data, | |
50 | const BoundaryData *boundary_data); | |
51 | virtual ~Segmenter(); | |
52 | ||
53 | virtual bool IsBoundary(const Node *lnode, const Node *rnode, | |
54 | bool is_single_segment) const; | |
55 | ||
56 | virtual bool IsBoundary(uint16 rid, uint16 lid) const; | |
57 | ||
58 | virtual int32 GetPrefixPenalty(uint16 lid) const; | |
59 | ||
60 | virtual int32 GetSuffixPenalty(uint16 rid) const; | |
61 | ||
62 | private: | |
63 | const size_t l_num_elements_; | |
64 | const size_t r_num_elements_; | |
65 | const uint16 *l_table_; | |
66 | const uint16 *r_table_; | |
67 | const size_t bitarray_num_bytes_; | |
68 | const char *bitarray_data_; | |
69 | const BoundaryData *boundary_data_; | |
70 | }; | |
71 | ||
72 | } // namespace mozc | |
73 | ||
74 | #endif // MOZC_CONVERTER_SEGMENTER_H_ |
0 | // Copyright 2010-2015, Google Inc. | |
1 | // All rights reserved. | |
2 | // | |
3 | // Redistribution and use in source and binary forms, with or without | |
4 | // modification, are permitted provided that the following conditions are | |
5 | // met: | |
6 | // | |
7 | // * Redistributions of source code must retain the above copyright | |
8 | // notice, this list of conditions and the following disclaimer. | |
9 | // * Redistributions in binary form must reproduce the above | |
10 | // copyright notice, this list of conditions and the following disclaimer | |
11 | // in the documentation and/or other materials provided with the | |
12 | // distribution. | |
13 | // * Neither the name of Google Inc. nor the names of its | |
14 | // contributors may be used to endorse or promote products derived from | |
15 | // this software without specific prior written permission. | |
16 | // | |
17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ||
29 | #include "converter/segmenter_base.h" | |
30 | ||
31 | #include "base/bitarray.h" | |
32 | #include "base/logging.h" | |
33 | #include "base/port.h" | |
34 | #include "converter/boundary_struct.h" | |
35 | #include "converter/node.h" | |
36 | #include "data_manager/data_manager_interface.h" | |
37 | ||
38 | namespace mozc { | |
39 | ||
40 | SegmenterBase *SegmenterBase::CreateFromDataManager( | |
41 | const DataManagerInterface &data_manager) { | |
42 | size_t l_num_elements = 0; | |
43 | size_t r_num_elements = 0; | |
44 | const uint16 *l_table = NULL; | |
45 | const uint16 *r_table = NULL; | |
46 | size_t bitarray_num_bytes = 0; | |
47 | const char *bitarray_data = NULL; | |
48 | const BoundaryData *boundary_data = NULL; | |
49 | data_manager.GetSegmenterData(&l_num_elements, &r_num_elements, | |
50 | &l_table, &r_table, | |
51 | &bitarray_num_bytes, &bitarray_data, | |
52 | &boundary_data); | |
53 | return new SegmenterBase(l_num_elements, r_num_elements, | |
54 | l_table, r_table, | |
55 | bitarray_num_bytes, bitarray_data, | |
56 | boundary_data); | |
57 | } | |
58 | ||
59 | SegmenterBase::SegmenterBase( | |
60 | size_t l_num_elements, size_t r_num_elements, const uint16 *l_table, | |
61 | const uint16 *r_table, size_t bitarray_num_bytes, | |
62 | const char *bitarray_data, const BoundaryData *boundary_data) | |
63 | : l_num_elements_(l_num_elements), r_num_elements_(r_num_elements), | |
64 | l_table_(l_table), r_table_(r_table), | |
65 | bitarray_num_bytes_(bitarray_num_bytes), | |
66 | bitarray_data_(bitarray_data), boundary_data_(boundary_data) { | |
67 | DCHECK(l_table_); | |
68 | DCHECK(r_table_); | |
69 | DCHECK(bitarray_data_); | |
70 | DCHECK(boundary_data_); | |
71 | CHECK_LE(l_num_elements_ * r_num_elements_, bitarray_num_bytes_ * 8); | |
72 | } | |
73 | ||
74 | SegmenterBase::~SegmenterBase() {} | |
75 | ||
76 | bool SegmenterBase::IsBoundary(const Node *lnode, const Node *rnode, | |
77 | bool is_single_segment) const { | |
78 | DCHECK(lnode); | |
79 | DCHECK(rnode); | |
80 | if (lnode->node_type == Node::BOS_NODE || | |
81 | rnode->node_type == Node::EOS_NODE) { | |
82 | return true; | |
83 | } | |
84 | ||
85 | // return always false in prediction mode. | |
86 | // This implies that converter always returns single-segment-result | |
87 | // in prediction mode. | |
88 | if (is_single_segment) { | |
89 | return false; | |
90 | } | |
91 | ||
92 | // Concatenate particle and content word into one segment, | |
93 | // if lnode locates at the beginning of user input. | |
94 | // This hack is for handling ambiguous bunsetsu segmentation. | |
95 | // e.g. "かみ|にかく" => "紙|に書く" or "紙二角". | |
96 | // If we segment "に書く" into two segments, "二角" is never be shown. | |
97 | // There exits some implicit assumpution that user expects that his/her input | |
98 | // becomes one bunsetu. So, it would be better to keep "二角" even after "紙". | |
99 | if (lnode->attributes & Node::STARTS_WITH_PARTICLE) { | |
100 | return false; | |
101 | } | |
102 | ||
103 | return IsBoundary(lnode->rid, rnode->lid); | |
104 | } | |
105 | ||
106 | bool SegmenterBase::IsBoundary(uint16 rid, uint16 lid) const { | |
107 | const uint32 bitarray_index = l_table_[rid] + l_num_elements_ * r_table_[lid]; | |
108 | return BitArray::GetValue(reinterpret_cast<const char*>(bitarray_data_), | |
109 | bitarray_index); | |
110 | } | |
111 | ||
112 | int32 SegmenterBase::GetPrefixPenalty(uint16 lid) const { | |
113 | return boundary_data_[lid].prefix_penalty; | |
114 | } | |
115 | ||
116 | int32 SegmenterBase::GetSuffixPenalty(uint16 rid) const { | |
117 | return boundary_data_[rid].suffix_penalty; | |
118 | } | |
119 | } // namespace mozc |
0 | // Copyright 2010-2015, Google Inc. | |
1 | // All rights reserved. | |
2 | // | |
3 | // Redistribution and use in source and binary forms, with or without | |
4 | // modification, are permitted provided that the following conditions are | |
5 | // met: | |
6 | // | |
7 | // * Redistributions of source code must retain the above copyright | |
8 | // notice, this list of conditions and the following disclaimer. | |
9 | // * Redistributions in binary form must reproduce the above | |
10 | // copyright notice, this list of conditions and the following disclaimer | |
11 | // in the documentation and/or other materials provided with the | |
12 | // distribution. | |
13 | // * Neither the name of Google Inc. nor the names of its | |
14 | // contributors may be used to endorse or promote products derived from | |
15 | // this software without specific prior written permission. | |
16 | // | |
17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ||
29 | // Segmenter implementation base class | |
30 | ||
31 | #ifndef MOZC_CONVERTER_SEGMENTER_BASE_H_ | |
32 | #define MOZC_CONVERTER_SEGMENTER_BASE_H_ | |
33 | ||
34 | #include "base/port.h" | |
35 | #include "converter/segmenter_interface.h" | |
36 | ||
37 | namespace mozc { | |
38 | ||
39 | class DataManagerInterface; | |
40 | struct Node; | |
41 | struct BoundaryData; | |
42 | ||
43 | class SegmenterBase : public SegmenterInterface { | |
44 | public: | |
45 | static SegmenterBase *CreateFromDataManager( | |
46 | const DataManagerInterface &data_manager); | |
47 | ||
48 | // This class does not have the ownership of pointer parameters. | |
49 | SegmenterBase(size_t l_num_elements, size_t r_num_elements, | |
50 | const uint16 *l_table, const uint16 *r_table, | |
51 | size_t bitarray_num_bytes, const char *bitarray_data, | |
52 | const BoundaryData *boundary_data); | |
53 | virtual ~SegmenterBase(); | |
54 | ||
55 | virtual bool IsBoundary(const Node *lnode, const Node *rnode, | |
56 | bool is_single_segment) const; | |
57 | ||
58 | virtual bool IsBoundary(uint16 rid, uint16 lid) const; | |
59 | ||
60 | virtual int32 GetPrefixPenalty(uint16 lid) const; | |
61 | ||
62 | virtual int32 GetSuffixPenalty(uint16 rid) const; | |
63 | ||
64 | private: | |
65 | const size_t l_num_elements_; | |
66 | const size_t r_num_elements_; | |
67 | const uint16 *l_table_; | |
68 | const uint16 *r_table_; | |
69 | const size_t bitarray_num_bytes_; | |
70 | const char *bitarray_data_; | |
71 | const BoundaryData *boundary_data_; | |
72 | }; | |
73 | ||
74 | } // namespace mozc | |
75 | #endif // MOZC_CONVERTER_SEGMENTER_BASE_H_ |
41 | 41 | 'dependencies': [ |
42 | 42 | '../base/base.gyp:base', |
43 | 43 | '../converter/converter_base.gyp:connector', |
44 | '../converter/converter_base.gyp:segmenter_base', | |
44 | '../converter/converter_base.gyp:segmenter', | |
45 | 45 | '../dictionary/dictionary_base.gyp:pos_matcher', |
46 | 46 | '../prediction/prediction_base.gyp:suggestion_filter', |
47 | 47 | '../testing/testing.gyp:testing', |
40 | 40 | #include "converter/connector.h" |
41 | 41 | #include "converter/node.h" |
42 | 42 | #include "converter/segmenter_base.h" |
43 | #include "converter/segmenter_interface.h" | |
44 | 43 | #include "data_manager/connection_file_reader.h" |
45 | 44 | #include "data_manager/data_manager_interface.h" |
46 | 45 | #include "dictionary/pos_matcher.h" |
83 | 82 | void DataManagerTestBase::SegmenterTest_SameAsInternal() { |
84 | 83 | // This test verifies that a segmenter created by MockDataManager provides |
85 | 84 | // the expected boundary rule. |
86 | scoped_ptr<SegmenterInterface> segmenter( | |
87 | SegmenterBase::CreateFromDataManager(*data_manager_)); | |
85 | scoped_ptr<Segmenter> segmenter( | |
86 | Segmenter::CreateFromDataManager(*data_manager_)); | |
88 | 87 | for (size_t rid = 0; rid < lsize_; ++rid) { |
89 | 88 | for (size_t lid = 0; lid < rsize_; ++lid) { |
90 | 89 | EXPECT_EQ(is_boundary_(rid, lid), |
94 | 93 | } |
95 | 94 | |
96 | 95 | void DataManagerTestBase::SegmenterTest_LNodeTest() { |
97 | scoped_ptr<SegmenterInterface> segmenter( | |
98 | SegmenterBase::CreateFromDataManager(*data_manager_)); | |
96 | scoped_ptr<Segmenter> segmenter( | |
97 | Segmenter::CreateFromDataManager(*data_manager_)); | |
99 | 98 | |
100 | 99 | // lnode is BOS |
101 | 100 | Node lnode, rnode; |
112 | 111 | } |
113 | 112 | |
114 | 113 | void DataManagerTestBase::SegmenterTest_RNodeTest() { |
115 | scoped_ptr<SegmenterInterface> segmenter( | |
116 | SegmenterBase::CreateFromDataManager(*data_manager_)); | |
114 | scoped_ptr<Segmenter> segmenter( | |
115 | Segmenter::CreateFromDataManager(*data_manager_)); | |
117 | 116 | |
118 | 117 | // rnode is EOS |
119 | 118 | Node lnode, rnode; |
130 | 129 | } |
131 | 130 | |
132 | 131 | void DataManagerTestBase::SegmenterTest_NodeTest() { |
133 | scoped_ptr<SegmenterInterface> segmenter( | |
134 | SegmenterBase::CreateFromDataManager(*data_manager_)); | |
132 | scoped_ptr<Segmenter> segmenter( | |
133 | Segmenter::CreateFromDataManager(*data_manager_)); | |
135 | 134 | |
136 | 135 | Node lnode, rnode; |
137 | 136 | lnode.node_type = Node::NOR_NODE; |
148 | 147 | } |
149 | 148 | |
150 | 149 | void DataManagerTestBase::SegmenterTest_ParticleTest() { |
151 | scoped_ptr<SegmenterInterface> segmenter( | |
152 | SegmenterBase::CreateFromDataManager(*data_manager_)); | |
150 | scoped_ptr<Segmenter> segmenter( | |
151 | Segmenter::CreateFromDataManager(*data_manager_)); | |
153 | 152 | const POSMatcher *pos_matcher = data_manager_->GetPOSMatcher(); |
154 | 153 | |
155 | 154 | Node lnode, rnode; |
35 | 35 | #include "converter/converter_interface.h" |
36 | 36 | #include "converter/immutable_converter.h" |
37 | 37 | #include "converter/immutable_converter_interface.h" |
38 | #include "converter/segmenter_base.h" | |
38 | #include "converter/segmenter.h" | |
39 | 39 | #include "data_manager/data_manager_interface.h" |
40 | 40 | #include "dictionary/dictionary_impl.h" |
41 | 41 | #include "dictionary/dictionary_interface.h" |
175 | 175 | connector_.reset(Connector::CreateFromDataManager(*data_manager)); |
176 | 176 | CHECK(connector_.get()); |
177 | 177 | |
178 | segmenter_.reset(SegmenterBase::CreateFromDataManager(*data_manager)); | |
178 | segmenter_.reset(Segmenter::CreateFromDataManager(*data_manager)); | |
179 | 179 | CHECK(segmenter_.get()); |
180 | 180 | |
181 | 181 | pos_group_.reset(new PosGroup(data_manager->GetPosGroupData())); |
43 | 43 | '../base/base.gyp:base', |
44 | 44 | '../converter/converter.gyp:converter', |
45 | 45 | '../converter/converter_base.gyp:connector', |
46 | '../converter/converter_base.gyp:segmenter_base', | |
46 | '../converter/converter_base.gyp:segmenter', | |
47 | 47 | '../dictionary/dictionary.gyp:dictionary_impl', |
48 | 48 | '../dictionary/dictionary.gyp:suffix_dictionary', |
49 | 49 | '../dictionary/dictionary_base.gyp:dictionary_protocol', |
0 | 0 | MAJOR=2 |
1 | 1 | MINOR=17 |
2 | BUILD=2084 | |
2 | BUILD=2085 | |
3 | 3 | REVISION=102 |
4 | 4 | # NACL_DICTIONARY_VERSION is the target version of the system dictionary to be |
5 | 5 | # downloaded by NaCl Mozc. |
52 | 52 | #include "converter/immutable_converter.h" |
53 | 53 | #include "converter/immutable_converter_interface.h" |
54 | 54 | #include "converter/node_allocator.h" |
55 | #include "converter/segmenter_base.h" | |
56 | #include "converter/segmenter_interface.h" | |
55 | #include "converter/segmenter.h" | |
57 | 56 | #include "converter/segments.h" |
58 | 57 | #include "data_manager/data_manager_interface.h" |
59 | 58 | #include "data_manager/testing/mock_data_manager.h" |
223 | 222 | connector_.reset(Connector::CreateFromDataManager(data_manager)); |
224 | 223 | CHECK(connector_.get()); |
225 | 224 | |
226 | segmenter_.reset(SegmenterBase::CreateFromDataManager(data_manager)); | |
225 | segmenter_.reset(Segmenter::CreateFromDataManager(data_manager)); | |
227 | 226 | CHECK(segmenter_.get()); |
228 | 227 | |
229 | 228 | pos_group_.reset(new PosGroup(data_manager.GetPosGroupData())); |
1562 | 1561 | scoped_ptr<const Connector> connector( |
1563 | 1562 | Connector::CreateFromDataManager(data_manager)); |
1564 | 1563 | scoped_ptr<const SegmenterInterface> segmenter( |
1565 | SegmenterBase::CreateFromDataManager(data_manager)); | |
1564 | Segmenter::CreateFromDataManager(data_manager)); | |
1566 | 1565 | scoped_ptr<const SuggestionFilter> suggestion_filter( |
1567 | 1566 | CreateSuggestionFilter(data_manager)); |
1568 | 1567 | scoped_ptr<TestableDictionaryPredictor> predictor( |
3134 | 3133 | scoped_ptr<const Connector> connector( |
3135 | 3134 | Connector::CreateFromDataManager(data_manager)); |
3136 | 3135 | scoped_ptr<const SegmenterInterface> segmenter( |
3137 | SegmenterBase::CreateFromDataManager(data_manager)); | |
3136 | Segmenter::CreateFromDataManager(data_manager)); | |
3138 | 3137 | scoped_ptr<const SuggestionFilter> suggestion_filter( |
3139 | 3138 | CreateSuggestionFilter(data_manager)); |
3140 | 3139 | scoped_ptr<TestableDictionaryPredictor> predictor( |
46 | 46 | '../converter/converter_base.gyp:connector', |
47 | 47 | '../converter/converter_base.gyp:converter_mock', |
48 | 48 | '../converter/converter_base.gyp:immutable_converter', |
49 | '../converter/converter_base.gyp:segmenter_base', | |
49 | '../converter/converter_base.gyp:segmenter', | |
50 | 50 | '../converter/converter_base.gyp:segments', |
51 | 51 | '../data_manager/data_manager.gyp:user_pos_manager', |
52 | 52 | '../data_manager/testing/mock_data_manager.gyp:mock_data_manager', |