Codebase list mozc / 54acfd1
Stop embedding segmenter data as C++ code This CL moves the segmenter data into a data set file for all platforms. BUG= TEST= REF_BUG=26841123 REF_CL=114514812 REF_TIME=2016-02-12T17:13:08+09:00 REF_TIME_RAW=1455264788 +0900 Noriyuki Takahashi 8 years ago
25 changed file(s) with 228 addition(s) and 208 deletion(s). Raw diff Collapse all Expand all
153153 ],
154154 'dependencies' : [
155155 '../base/base.gyp:base',
156 '../protocol/protocol.gyp:segmenter_data_proto',
156157 ]
157158 },
158159 {
3838 #include <vector>
3939
4040 #include "base/bitarray.h"
41 #include "base/codegen_bytearray_stream.h"
4241 #include "base/file_stream.h"
4342 #include "base/logging.h"
4443 #include "base/port.h"
44 #include "base/system_util.h"
45 #include "protocol/segmenter_data.pb.h"
4546
4647 namespace mozc {
4748
8990 return compressed_table_[id];
9091 }
9192
92 size_t compressed_size() const {
93 return compressed_size_;
94 }
95
96 void Output(const string &name, ostream *os) {
97 // Disable following compression for simplifying the implementation.
98 // As CompressedTable have <3000 entries, this affects at most
99 // (16-8)(bit) * 3000 (entries) * 2 (L and R) = 6KB
100 //
101 // TODO(toshiyuki): Enable this compression again if possible or needed
102 //
103 // if (compressed_size_ < 256) {
104 // // trivial compression -- use uint8 if possible
105 // *os << "const uint8 " << name << "[] = {" << std::endl;
106 // } else {
107 // *os << "const uint16 " << name << "[] = {" << std::endl;
108 // }
109
110 *os << "const uint16 " << name << "[] = {" << std::endl;
111 for (size_t i = 0; i < compressed_table_.size(); ++i) {
112 *os << compressed_table_[i];
113 if (i < compressed_table_.size() - 1) {
114 *os << ",";
115 }
116 *os << std::endl;
117 }
118 *os << "};" << std::endl;
93 size_t compressed_size() const { return compressed_size_; }
94
95 void Output(ostream *os) {
96 const char* data = reinterpret_cast<const char*>(compressed_table_.data());
97 const size_t bytelen = compressed_table_.size() * sizeof(uint16);
98 os->write(data, bytelen);
11999 }
120100
121101 private:
127107 };
128108 } // namespace
129109
130 void SegmenterBitarrayGenerator::GenerateBitarray(int lsize, int rsize,
131 IsBoundaryFunc func,
132 const string &output_file) {
110 void SegmenterBitarrayGenerator::GenerateBitarray(
111 int lsize, int rsize, IsBoundaryFunc func, const string &output_size_info,
112 const string &output_ltable, const string &output_rtable,
113 const string &output_bitarray) {
133114 // Load the original matrix into an array
134 vector<uint8> array((lsize + 1) * (rsize + 1));
115 vector<uint8> array((lsize + 1) * (rsize + 1));
135116
136117 for (size_t rid = 0; rid <= lsize; ++rid) {
137118 for (size_t lid = 0; lid <= rsize; ++lid) {
198179 // verify the table
199180 for (size_t rid = 0; rid <= lsize; ++rid) {
200181 for (size_t lid = 0; lid <= rsize; ++lid) {
201 const int index= rid + lsize * lid;
182 const int index = rid + lsize * lid;
202183 const uint32 cindex = ltable.id(rid) + kCompressedLSize * rtable.id(lid);
203184 CHECK_EQ(barray.get(cindex), (array[index] != 0));
204185 }
207188 CHECK(barray.array());
208189 CHECK_GT(barray.size(), 0);
209190
210 mozc::OutputFileStream ofs(output_file.c_str());
211 CHECK(ofs);
212
213 ofs << "const size_t kCompressedLSize = " << kCompressedLSize << ";"
214 << std::endl;
215 ofs << "const size_t kCompressedRSize = " << kCompressedRSize << ";"
216 << std::endl;
217 ltable.Output("kCompressedLIDTable", &ofs);
218 rtable.Output("kCompressedRIDTable", &ofs);
219
220 mozc::CodeGenByteArrayOutputStream codegen_stream(
221 &ofs, mozc::codegenstream::NOT_OWN_STREAM);
222 codegen_stream.OpenVarDef("SegmenterBitArrayData");
223 codegen_stream.write(barray.array(), barray.array_size());
224 codegen_stream.CloseVarDef();
191 CHECK(SystemUtil::IsLittleEndian())
192 << "Architecture must be little endian";
193 {
194 mozc::converter::SegmenterDataSizeInfo pb;
195 pb.set_compressed_lsize(kCompressedLSize);
196 pb.set_compressed_rsize(kCompressedRSize);
197 mozc::OutputFileStream ofs(output_size_info.c_str(),
198 ios_base::out | ios_base::binary);
199 CHECK(ofs);
200 CHECK(pb.SerializeToOstream(&ofs));
201 ofs.close();
202 }
203 {
204 mozc::OutputFileStream ofs(output_ltable.c_str(),
205 ios_base::out | ios_base::binary);
206 CHECK(ofs);
207 ltable.Output(&ofs);
208 ofs.close();
209 }
210 {
211 mozc::OutputFileStream ofs(output_rtable.c_str(),
212 ios_base::out | ios_base::binary);
213 CHECK(ofs);
214 rtable.Output(&ofs);
215 ofs.close();
216 }
217 {
218 mozc::OutputFileStream ofs(output_bitarray.c_str(),
219 ios_base::out | ios_base::binary);
220 CHECK(ofs);
221 ofs.write(barray.array(), barray.array_size());
222 ofs.close();
223 }
225224 }
226225
227226 } // namespace mozc
3939 public:
4040 typedef bool (*IsBoundaryFunc)(uint16 rid, uint16 lid);
4141 static void GenerateBitarray(int lsize, int rsize, IsBoundaryFunc func,
42 const string &output_file);
42 const string &output_size_info,
43 const string &output_ltable,
44 const string &output_rtable,
45 const string &output_bitarray);
4346
4447 private:
4548 DISALLOW_COPY_AND_ASSIGN(SegmenterBitarrayGenerator);
9090 manager_.GetSystemDictionaryData(data, size);
9191 }
9292
93 namespace {
94 // Automatically generated headers containing data set for segmenter.
95 #include "data_manager/chromeos/segmenter_data.h"
96 } // namespace
97
9893 void ChromeOsDataManager::GetSegmenterData(
9994 size_t *l_num_elements, size_t *r_num_elements,
10095 const uint16 **l_table, const uint16 **r_table,
10196 size_t *bitarray_num_bytes, const char **bitarray_data,
10297 const uint16 **boundary_data) const {
103 *l_num_elements = kCompressedLSize;
104 *r_num_elements = kCompressedRSize;
105 *l_table = kCompressedLIDTable;
106 *r_table = kCompressedRIDTable;
107 *bitarray_num_bytes = kSegmenterBitArrayData_size;
108 *bitarray_data = kSegmenterBitArrayData_data;
109 *boundary_data = manager_.GetBoundaryData();
98 manager_.GetSegmenterData(l_num_elements, r_num_elements,
99 l_table, r_table, bitarray_num_bytes,
100 bitarray_data, boundary_data);
110101 }
111102
112103 namespace {
3636 namespace chromeos {
3737
3838 namespace {
39 #include "data_manager/chromeos/chromeos_segmenter_inl.h"
39 #include "data_manager/chromeos/segmenter_inl.h"
4040 } // namespace
4141
4242 class ChromeOsDataManagerTest : public DataManagerTestBase {
3131 #include "converter/gen_segmenter_bitarray.h"
3232
3333 namespace {
34 #include "data_manager/chromeos/chromeos_segmenter_inl.h"
34 #include "data_manager/chromeos/segmenter_inl.h"
3535 }
3636
37 DEFINE_string(output, "", "header filename for chromeos segmenter");
37 DEFINE_string(output_size_info, "", "Serialized SegmenterDataSizeInfo");
38 DEFINE_string(output_ltable, "", "LTable array");
39 DEFINE_string(output_rtable, "", "RTable array");
40 DEFINE_string(output_bitarray, "", "Segmenter bitarray");
3841
3942 int main(int argc, char **argv) {
4043 mozc::InitMozc(argv[0], &argc, &argv, true);
4144 mozc::SegmenterBitarrayGenerator::GenerateBitarray(
42 kLSize, kRSize, &IsBoundaryInternal, FLAGS_output);
45 kLSize, kRSize, &IsBoundaryInternal, FLAGS_output_size_info,
46 FLAGS_output_ltable, FLAGS_output_rtable, FLAGS_output_bitarray);
4347 return 0;
4448 }
3030
3131 #include "base/logging.h"
3232 #include "data_manager/dataset_reader.h"
33 #include "protocol/segmenter_data.pb.h"
3334
3435 namespace mozc {
3536
6869 }
6970 if (!reader.Get("bdry", &boundary_data_)) {
7071 LOG(ERROR) << "Cannot find a boundary data";
72 return false;
73 }
74 {
75 StringPiece memblock;
76 if (!reader.Get("segmenter_sizeinfo", &memblock)) {
77 LOG(ERROR) << "Cannot find a segmenter size info";
78 return false;
79 }
80 converter::SegmenterDataSizeInfo sizeinfo;
81 if (!sizeinfo.ParseFromArray(memblock.data(), memblock.size())) {
82 LOG(ERROR) << "Failed to parse SegmenterDataSizeInfo";
83 return false;
84 }
85 segmenter_compressed_lsize_ = sizeinfo.compressed_lsize();
86 segmenter_compressed_rsize_ = sizeinfo.compressed_rsize();
87 }
88 if (!reader.Get("segmenter_ltable", &segmenter_ltable_)) {
89 LOG(ERROR) << "Cannot find a segmenter ltable";
90 return false;
91 }
92 if (!reader.Get("segmenter_rtable", &segmenter_rtable_)) {
93 LOG(ERROR) << "Cannot find a segmenter rtable";
94 return false;
95 }
96 if (!reader.Get("segmenter_bitarray", &segmenter_bitarray_)) {
97 LOG(ERROR) << "Cannot find a segmenter bit-array";
7198 return false;
7299 }
73100 return true;
118145 size_t *l_num_elements, size_t *r_num_elements, const uint16 **l_table,
119146 const uint16 **r_table, size_t *bitarray_num_bytes,
120147 const char **bitarray_data, const uint16 **boundary_data) const {
121 LOG(FATAL) << "Not implemented";
148 *l_num_elements = segmenter_compressed_lsize_;
149 *r_num_elements = segmenter_compressed_rsize_;
150 *l_table = reinterpret_cast<const uint16 *>(segmenter_ltable_.data());
151 *r_table = reinterpret_cast<const uint16 *>(segmenter_rtable_.data());
152 *bitarray_num_bytes = segmenter_bitarray_.size();
153 *bitarray_data = segmenter_bitarray_.data();
154 *boundary_data = reinterpret_cast<const uint16 *>(boundary_data_.data());
122155 }
123156
124157 void DataManager::GetSuffixDictionaryData(const dictionary::SuffixToken **data,
5959 'target_name': '<(dataset_tag)_data_manager',
6060 'type': 'static_library',
6161 'sources': [
62 '<(gen_out_dir)/embedded_connection_data.h',
63 '<(gen_out_dir)/embedded_dictionary_data.h',
64 '<(gen_out_dir)/segmenter_data.h',
6562 '<(gen_out_dir)/suffix_data.h',
6663 '<(gen_out_dir)/symbol_rewriter_data.h',
6764 '<(mozc_dir)/dictionary/pos_group.h',
143140 'suggestion_filter': '<(gen_out_dir)/suggestion_filter_data.data',
144141 'pos_group': '<(gen_out_dir)/pos_group.data',
145142 'boundary': '<(gen_out_dir)/boundary.data',
143 'segmenter_sizeinfo': '<(gen_out_dir)/segmenter_sizeinfo.data',
144 'segmenter_ltable': '<(gen_out_dir)/segmenter_ltable.data',
145 'segmenter_rtable': '<(gen_out_dir)/segmenter_rtable.data',
146 'segmenter_bitarray': '<(gen_out_dir)/segmenter_bitarray.data',
146147 },
147148 'inputs': [
148149 '<(dictionary)',
152153 '<(suggestion_filter)',
153154 '<(pos_group)',
154155 '<(boundary)',
156 '<(segmenter_sizeinfo)',
157 '<(segmenter_ltable)',
158 '<(segmenter_rtable)',
159 '<(segmenter_bitarray)',
155160 ],
156161 'outputs': [
157162 '<(gen_out_dir)/<(out_mozc_data)',
165170 'conn:32:<(gen_out_dir)/connection.data',
166171 'dict:32:<(gen_out_dir)/system.dictionary',
167172 'sugg:32:<(gen_out_dir)/suggestion_filter_data.data',
168 'posg:8:<(gen_out_dir)/pos_group.data',
169 'bdry:16:<(gen_out_dir)/boundary.data',
173 'posg:32:<(gen_out_dir)/pos_group.data',
174 'bdry:32:<(gen_out_dir)/boundary.data',
175 'segmenter_sizeinfo:32:<(gen_out_dir)/segmenter_sizeinfo.data',
176 'segmenter_ltable:32:<(gen_out_dir)/segmenter_ltable.data',
177 'segmenter_rtable:32:<(gen_out_dir)/segmenter_rtable.data',
178 'segmenter_bitarray:32:<(gen_out_dir)/segmenter_bitarray.data'
170179 ],
171180 },
172181 ],
183192 'gen_embedded_counter_suffix_data_for_<(dataset_tag)#host',
184193 'gen_embedded_dictionary_data_for_<(dataset_tag)#host',
185194 'gen_embedded_reading_correction_data_for_<(dataset_tag)#host',
186 'gen_embedded_segmenter_data_for_<(dataset_tag)#host',
187195 'gen_embedded_suffix_data_for_<(dataset_tag)#host',
188196 'gen_embedded_suggestion_filter_data_for_<(dataset_tag)#host',
189197 'gen_embedded_symbol_rewriter_data_for_<(dataset_tag)#host',
501509 '<@(input_files)',
502510 ],
503511 'outputs': [
504 '<(gen_out_dir)/<(dataset_tag)_segmenter_inl.h',
512 '<(gen_out_dir)/segmenter_inl.h',
505513 ],
506514 'action': [
507515 'python', '<(mozc_dir)/build_tools/redirect.py',
508 '<(gen_out_dir)/<(dataset_tag)_segmenter_inl.h',
516 '<(gen_out_dir)/segmenter_inl.h',
509517 '<(mozc_dir)/converter/gen_segmenter_code.py',
510518 '<@(input_files)',
511519 ],
512520 'message': ('[<(dataset_tag)] Generating ' +
513 '<(gen_out_dir)/<(dataset_tag)_segmenter_inl.h.'),
521 '<(gen_out_dir)/segmenter_inl.h.'),
514522 },
515523 ],
516524 },
530538 ],
531539 },
532540 {
533 'target_name': 'gen_embedded_segmenter_data_for_<(dataset_tag)',
541 'target_name': 'gen_separate_segmenter_data_for_<(dataset_tag)',
534542 'type': 'none',
535543 'toolsets': ['host'],
536544 'dependencies': [
538546 ],
539547 'actions': [
540548 {
541 'action_name': 'gen_embedded_segmenter_data_for_<(dataset_tag)',
549 'action_name': 'gen_separate_segmenter_data_for_<(dataset_tag)',
542550 'variables': {
543551 'generator': '<(PRODUCT_DIR)/gen_<(dataset_tag)_sbm<(EXECUTABLE_SUFFIX)'
544552 },
546554 '<(generator)',
547555 ],
548556 'outputs': [
549 '<(gen_out_dir)/segmenter_data.h',
550 ],
551 'action': [
552 '<(generator)',
553 '--output=<(gen_out_dir)/segmenter_data.h',
554 ],
555 'message': ('[<(dataset_tag)] Generating ' +
556 '<(gen_out_dir)/segmenter_data.h.'),
557 '<(gen_out_dir)/segmenter_sizeinfo.data',
558 '<(gen_out_dir)/segmenter_ltable.data',
559 '<(gen_out_dir)/segmenter_rtable.data',
560 '<(gen_out_dir)/segmenter_bitarray.data',
561 ],
562 'action': [
563 '<(generator)',
564 '--output_size_info=<(gen_out_dir)/segmenter_sizeinfo.data',
565 '--output_ltable=<(gen_out_dir)/segmenter_ltable.data',
566 '--output_rtable=<(gen_out_dir)/segmenter_rtable.data',
567 '--output_bitarray=<(gen_out_dir)/segmenter_bitarray.data',
568 ],
569 'message': ('[<(dataset_tag)] Generating segmenter data files'),
557570 },
558571 ],
559572 },
8181 void GetCounterSuffixSortedArray(const CounterSuffixEntry **array,
8282 size_t *size) const override;
8383
84 // TODO(noriyukit): This function gives boundary data, which is a partial
85 // result of GetSegmenterData() above. Implement the full GetSegmenterData()
86 // and remove this function.
87 const uint16 *GetBoundaryData() const {
88 return reinterpret_cast<const uint16*>(boundary_data_.data());
89 }
90
9184 private:
9285 StringPiece connection_data_;
9386 StringPiece dictionary_data_;
9689 StringPiece collocation_suppression_data_;
9790 StringPiece pos_group_data_;
9891 StringPiece boundary_data_;
92 size_t segmenter_compressed_lsize_;
93 size_t segmenter_compressed_rsize_;
94 StringPiece segmenter_ltable_;
95 StringPiece segmenter_rtable_;
96 StringPiece segmenter_bitarray_;
9997
10098 DISALLOW_COPY_AND_ASSIGN(DataManager);
10199 };
4141 ],
4242 'dependencies': [
4343 '../base/base.gyp:base',
44 '../protocol/protocol.gyp:segmenter_data_proto',
4445 'dataset_reader',
4546 ],
4647 },
3131 #include "converter/gen_segmenter_bitarray.h"
3232
3333 namespace {
34 #include "data_manager/oss/oss_segmenter_inl.h"
34 #include "data_manager/oss/segmenter_inl.h"
3535 }
3636
37 DEFINE_string(output, "", "header filename for google segmenter");
37 DEFINE_string(output_size_info, "", "Serialized SegmenterDataSizeInfo");
38 DEFINE_string(output_ltable, "", "LTable array");
39 DEFINE_string(output_rtable, "", "RTable array");
40 DEFINE_string(output_bitarray, "", "Segmenter bitarray");
3841
3942 int main(int argc, char **argv) {
4043 mozc::InitMozc(argv[0], &argc, &argv, true);
4144 mozc::SegmenterBitarrayGenerator::GenerateBitarray(
42 kLSize, kRSize, &IsBoundaryInternal, FLAGS_output);
45 kLSize, kRSize, &IsBoundaryInternal, FLAGS_output_size_info,
46 FLAGS_output_ltable, FLAGS_output_rtable, FLAGS_output_bitarray);
4347 return 0;
4448 }
102102 manager_.GetSystemDictionaryData(data, size);
103103 }
104104
105 namespace {
106 // Automatically generated headers containing data set for segmenter.
107 #include "data_manager/oss/segmenter_data.h"
108 } // namespace
109
110105 void OssDataManager::GetSegmenterData(
111106 size_t *l_num_elements, size_t *r_num_elements,
112107 const uint16 **l_table, const uint16 **r_table,
113108 size_t *bitarray_num_bytes, const char **bitarray_data,
114109 const uint16 **boundary_data) const {
115 *l_num_elements = kCompressedLSize;
116 *r_num_elements = kCompressedRSize;
117 *l_table = kCompressedLIDTable;
118 *r_table = kCompressedRIDTable;
119 *bitarray_num_bytes = kSegmenterBitArrayData_size;
120 *bitarray_data = kSegmenterBitArrayData_data;
121 *boundary_data = manager_.GetBoundaryData();
110 manager_.GetSegmenterData(l_num_elements, r_num_elements,
111 l_table, r_table, bitarray_num_bytes,
112 bitarray_data, boundary_data);
122113 }
123114
124115 namespace {
3636 namespace oss {
3737
3838 namespace {
39 #include "data_manager/oss/oss_segmenter_inl.h"
39 #include "data_manager/oss/segmenter_inl.h"
4040 } // namespace
4141
4242 class OssDataManagerTest : public DataManagerTestBase {
5858
5959 #include "data_manager/@DIR@/pos_matcher_data.h"
6060 #include "data_manager/@DIR@/reading_correction_data.h"
61 #include "data_manager/@DIR@/segmenter_data.h"
6261 #include "data_manager/@DIR@/suffix_data.h"
6362 #include "data_manager/@DIR@/symbol_rewriter_data.h"
6463 #include "data_manager/@DIR@/user_pos_data.h"
8483 packer.SetSuffixTokens(kSuffixTokens, arraysize(kSuffixTokens));
8584 packer.SetReadingCorretions(kReadingCorrections,
8685 arraysize(kReadingCorrections));
87 packer.SetSegmenterData(kCompressedLSize,
88 kCompressedRSize,
89 kCompressedLIDTable,
90 arraysize(kCompressedLIDTable),
91 kCompressedRIDTable,
92 arraysize(kCompressedRIDTable),
93 kSegmenterBitArrayData_data,
94 kSegmenterBitArrayData_size);
9586 packer.SetSymbolRewriterData(kSymbolData_token_data, kSymbolData_token_size);
9687 #ifndef NO_USAGE_REWRITER
9788 packer.SetUsageRewriterData(kConjugationNum,
134134 unique_ptr<Range[]> range_table_items_;
135135 unique_ptr<SuffixToken[]> suffix_tokens_;
136136 unique_ptr<ReadingCorrectionItem[]> reading_corrections_;
137 size_t compressed_l_size_;
138 size_t compressed_r_size_;
139 unique_ptr<uint16[]> compressed_lid_table_;
140 unique_ptr<uint16[]> compressed_rid_table_;
141137 unique_ptr<EmbeddedDictionary::Value[]> symbol_data_values_;
142138 size_t symbol_data_token_size_;
143139 unique_ptr<EmbeddedDictionary::Token[]> symbol_data_tokens_;
154150 };
155151
156152 PackedDataManager::Impl::Impl()
157 : compressed_l_size_(0),
158 compressed_r_size_(0),
159 symbol_data_token_size_(0) {
153 : symbol_data_token_size_(0) {
160154 }
161155
162156 PackedDataManager::Impl::~Impl() {
331325 } else {
332326 reading_corrections_[i].correction = NULL;
333327 }
334 }
335
336 // Makes segment data.
337 const SystemDictionaryData::SegmenterData &segmenter_data =
338 system_dictionary_data_->segmenter_data();
339 compressed_l_size_ = segmenter_data.compressed_l_size();
340 compressed_r_size_ = segmenter_data.compressed_r_size();
341 compressed_lid_table_.reset(
342 new uint16[segmenter_data.compressed_lid_table_size()]);
343 for (size_t i = 0; i < segmenter_data.compressed_lid_table_size(); ++i) {
344 compressed_lid_table_[i] = segmenter_data.compressed_lid_table(i);
345 }
346 compressed_rid_table_.reset(
347 new uint16[segmenter_data.compressed_rid_table_size()]);
348 for (size_t i = 0; i < segmenter_data.compressed_rid_table_size(); ++i) {
349 compressed_rid_table_[i] = segmenter_data.compressed_rid_table(i);
350328 }
351329
352330 // Makes symbol dictionary data.
513491 const uint16 **l_table, const uint16 **r_table,
514492 size_t *bitarray_num_bytes, const char **bitarray_data,
515493 const uint16 **boundary_data) const {
516 *l_num_elements = compressed_l_size_;
517 *r_num_elements = compressed_r_size_;
518 *l_table = compressed_lid_table_.get();
519 *r_table = compressed_rid_table_.get();
520 *bitarray_num_bytes =
521 system_dictionary_data_->segmenter_data().bit_array_data().size();
522 *bitarray_data =
523 system_dictionary_data_->segmenter_data().bit_array_data().data();
524 *boundary_data = manager_.GetBoundaryData();
494 manager_.GetSegmenterData(l_num_elements, r_num_elements, l_table, r_table,
495 bitarray_num_bytes, bitarray_data, boundary_data);
525496 }
526497
527498 void PackedDataManager::Impl::GetSystemDictionaryData(
8080 };
8181 repeated ReadingCorrectionItem reading_corrections = 8;
8282
83 message SegmenterData {
84 optional uint32 compressed_l_size = 1;
85 optional uint32 compressed_r_size = 2;
86 repeated uint32 compressed_lid_table = 3;
87 repeated uint32 compressed_rid_table = 4;
88 optional bytes bit_array_data = 5;
89 }
90 optional SegmenterData segmenter_data = 9;
83 reserved 9; // DEPRECATED: optional SegmenterData segmenter_data = 9;
9184
9285 message EmbeddedDictionary {
9386 message Value {
152152 }
153153 }
154154
155 void SystemDictionaryDataPacker::SetSegmenterData(
156 size_t compressed_l_size,
157 size_t compressed_r_size,
158 const uint16 *compressed_lid_table,
159 size_t compressed_lid_table_size,
160 const uint16 *compressed_rid_table,
161 size_t compressed_rid_table_size,
162 const char *segmenter_bit_array_data,
163 size_t segmenter_bit_array_data_size) {
164 SystemDictionaryData::SegmenterData *segmenter =
165 system_dictionary_->mutable_segmenter_data();
166 segmenter->set_compressed_l_size(compressed_l_size);
167 segmenter->set_compressed_r_size(compressed_r_size);
168 for (size_t i = 0; i < compressed_lid_table_size; ++i) {
169 segmenter->add_compressed_lid_table(compressed_lid_table[i]);
170 }
171 for (size_t i = 0; i < compressed_rid_table_size; ++i) {
172 segmenter->add_compressed_rid_table(compressed_rid_table[i]);
173 }
174 segmenter->set_bit_array_data(segmenter_bit_array_data,
175 segmenter_bit_array_data_size);
176 }
177
178155 void SystemDictionaryDataPacker::SetSymbolRewriterData(
179156 const mozc::EmbeddedDictionary::Token *token_data,
180157 size_t token_size) {
6868 void SetReadingCorretions(
6969 const ReadingCorrectionItem *reading_corrections,
7070 size_t reading_corrections_count);
71 void SetSegmenterData(
72 size_t compressed_l_size,
73 size_t compressed_r_size,
74 const uint16 *compressed_lid_table,
75 size_t compressed_lid_table_size,
76 const uint16 *compressed_rid_table,
77 size_t compressed_rid_table_size,
78 const char *segmenter_bit_array_data,
79 size_t segmenter_bit_array_data_size);
8071 void SetSuggestionFilterData(
8172 const void *suggestion_filter_data,
8273 size_t suggestion_filter_data_size);
3232 namespace mozc {
3333 namespace packed {
3434
35 const int kSystemDictionaryFormatVersion = 12;
35 const int kSystemDictionaryFormatVersion = 13;
3636
3737 } // namespace packed
3838 } // namespace mozc
3131 #include "converter/gen_segmenter_bitarray.h"
3232
3333 namespace {
34 #include "data_manager/testing/mock_segmenter_inl.h"
34 #include "data_manager/testing/segmenter_inl.h"
3535 }
3636
37 DEFINE_string(output, "", "header filename for mock segmenter");
37 DEFINE_string(output_size_info, "", "Serialized SegmenterDataSizeInfo");
38 DEFINE_string(output_ltable, "", "LTable array");
39 DEFINE_string(output_rtable, "", "RTable array");
40 DEFINE_string(output_bitarray, "", "Segmenter bitarray");
3841
3942 int main(int argc, char **argv) {
4043 mozc::InitMozc(argv[0], &argc, &argv, true);
4144 mozc::SegmenterBitarrayGenerator::GenerateBitarray(
42 kLSize, kRSize, &IsBoundaryInternal, FLAGS_output);
45 kLSize, kRSize, &IsBoundaryInternal, FLAGS_output_size_info,
46 FLAGS_output_ltable, FLAGS_output_rtable, FLAGS_output_bitarray);
4347 return 0;
4448 }
7878 manager_.GetSystemDictionaryData(data, size);
7979 }
8080
81 namespace {
82 // Automatically generated headers containing data set for segmenter.
83 #include "data_manager/testing/segmenter_data.h"
84 } // namespace
85
8681 void MockDataManager::GetSegmenterData(
8782 size_t *l_num_elements, size_t *r_num_elements,
8883 const uint16 **l_table, const uint16 **r_table,
8984 size_t *bitarray_num_bytes, const char **bitarray_data,
9085 const uint16 **boundary_data) const {
91 *l_num_elements = kCompressedLSize;
92 *r_num_elements = kCompressedRSize;
93 *l_table = kCompressedLIDTable;
94 *r_table = kCompressedRIDTable;
95 *bitarray_num_bytes = kSegmenterBitArrayData_size;
96 *bitarray_data = kSegmenterBitArrayData_data;
97 *boundary_data = manager_.GetBoundaryData();
86 manager_.GetSegmenterData(l_num_elements, r_num_elements,
87 l_table, r_table, bitarray_num_bytes,
88 bitarray_data, boundary_data);
9889 }
9990
10091 namespace {
3636 namespace testing {
3737
3838 namespace {
39 #include "data_manager/testing/mock_segmenter_inl.h"
39 #include "data_manager/testing/segmenter_inl.h"
4040 } // namespace
4141
4242 class MockDataManagerTest : public DataManagerTestBase {
00 MAJOR=2
11 MINOR=17
2 BUILD=2472
2 BUILD=2473
33 REVISION=102
44 # NACL_DICTIONARY_VERSION is the target version of the system dictionary to be
55 # downloaded by NaCl Mozc.
6 NACL_DICTIONARY_VERSION=12
6 NACL_DICTIONARY_VERSION=13
208208 'genproto_user_dictionary_storage_proto#host',
209209 ],
210210 },
211 {
212 'target_name': 'genproto_segmenter_data_proto',
213 'type': 'none',
214 'toolsets': ['host'],
215 'sources': [
216 'segmenter_data.proto',
217 ],
218 'includes': [
219 '../protobuf/genproto.gypi',
220 ],
221 },
222 {
223 'target_name': 'segmenter_data_proto',
224 'type': 'static_library',
225 'toolsets': ['target', 'host'],
226 'hard_dependency': 1,
227 'sources': [
228 '<(proto_out_dir)/<(relative_dir)/segmenter_data.pb.cc',
229 ],
230 'dependencies': [
231 '../protobuf/protobuf.gyp:protobuf',
232 'genproto_segmenter_data_proto#host',
233 ],
234 'export_dependent_settings': [
235 'genproto_segmenter_data_proto#host',
236 ],
237 },
211238 ],
212239 }
0 // Copyright 2010-2016, Google Inc.
1 // All rights reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 syntax = "proto2";
30
31 package mozc.converter;
32
33 message SegmenterDataSizeInfo {
34 optional uint64 compressed_lsize = 1;
35 optional uint64 compressed_rsize = 2;
36 }