Codebase list mozc / a54a6bc
Introduce SerializedStringArray class This class is used to serialize arrays of strings to byte sequence in such a way that no deserialization is required at runtime, which is different from protobuf's repeated string field. BUG= TEST= REF_BUG=26841123 REF_CL=114716869 REF_TIME=2016-02-16T12:12:17+09:00 REF_TIME_RAW=1455592337 +0900 Noriyuki Takahashi 8 years ago
7 changed file(s) with 650 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
443443 'debug.cc',
444444 ],
445445 },
446 {
447 'target_name': 'serialized_string_array',
448 'type': 'static_library',
449 'toolsets': ['host', 'target'],
450 'sources': [
451 'serialized_string_array.cc',
452 ],
453 },
446454 ],
447455 'conditions': [
448456 ['target_platform=="Android"', {
440440 'install_embedded_file_h',
441441 ],
442442 },
443 {
444 'target_name': 'serialized_string_array_test',
445 'type': 'executable',
446 'sources': [
447 'serialized_string_array_test.cc',
448 ],
449 'dependencies': [
450 '../testing/testing.gyp:gtest_main',
451 'base.gyp:base',
452 'base.gyp:serialized_string_array',
453 ],
454 },
443455 # Test cases meta target: this target is referred from gyp/tests.gyp
444456 {
445457 'target_name': 'base_all_test',
459471 'obfuscator_support_test',
460472 'scheduler_stub_test',
461473 'scheduler_test',
474 'serialized_string_array_test',
462475 'system_util_test',
463476 'trie_test',
464477 'update_util_test',
0 // Copyright 2010-2016, Google Inc.
1 // All rights reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 #include "base/serialized_string_array.h"
30
31 #include <memory>
32
33 #include "base/logging.h"
34 #include "base/port.h"
35 #include "base/system_util.h"
36
37 namespace mozc {
38 namespace {
39
40 const uint32 kEmptyArrayData = 0x00000000;
41
42 } // namespace
43
44 SerializedStringArray::SerializedStringArray() {
45 DCHECK(SystemUtil::IsLittleEndian()) << "Little endian is assumed";
46 clear();
47 }
48
49 SerializedStringArray::~SerializedStringArray() = default;
50
51 bool SerializedStringArray::Init(StringPiece data_aligned_at_4byte_boundary) {
52 if (VerifyData(data_aligned_at_4byte_boundary)) {
53 data_ = data_aligned_at_4byte_boundary;
54 return true;
55 }
56 clear();
57 return false;
58 }
59
60 void SerializedStringArray::Set(StringPiece data_aligned_at_4byte_boundary) {
61 DCHECK(VerifyData(data_aligned_at_4byte_boundary));
62 data_ = data_aligned_at_4byte_boundary;
63 }
64
65 void SerializedStringArray::clear() {
66 data_.set(reinterpret_cast<const char *>(&kEmptyArrayData), 4);
67 }
68
69 bool SerializedStringArray::VerifyData(StringPiece data) {
70 if (data.size() < 4) {
71 LOG(ERROR) << "Array size is missing";
72 return false;
73 }
74 const uint32 *u32_array = reinterpret_cast<const uint32 *>(data.data());
75 const uint32 size = u32_array[0];
76
77 const size_t min_required_data_size = 4 + (4 + 4) * size;
78 if (data.size() < min_required_data_size) {
79 LOG(ERROR) << "Lack of data. At least " << min_required_data_size
80 << " bytes are required";
81 return false;
82 }
83
84 uint32 prev_str_end = min_required_data_size;
85 for (uint32 i = 0; i < size; ++i) {
86 const uint32 offset = u32_array[2 * i + 1];
87 const uint32 len = u32_array[2 * i + 2];
88 if (offset < prev_str_end) {
89 LOG(ERROR) << "Invalid offset for string " << i << ": len = " << len
90 << ", offset = " << offset;
91 return false;
92 }
93 if (len >= data.size() || offset > data.size() - len) {
94 LOG(ERROR) << "Invalid length for string " << i << ": len = " << len
95 << ", offset = " << offset << ", " << data.size();
96 return false;
97 }
98 if (data[offset + len] != '\0') {
99 LOG(ERROR) << "string[" << i << "] is not null-terminated";
100 return false;
101 }
102 prev_str_end = offset + len + 1;
103 }
104
105 return true;
106 }
107
108 StringPiece SerializedStringArray::SerializeToBuffer(
109 const vector<StringPiece> &strs, std::unique_ptr<uint32[]> *buffer) {
110 const size_t header_byte_size = 4 * (1 + 2 * strs.size());
111
112 // Calculate the offsets of each string.
113 std::unique_ptr<uint32[]> offsets(new uint32[strs.size()]);
114 size_t current_offset = header_byte_size; // The offset for first string.
115 for (size_t i = 0; i < strs.size(); ++i) {
116 offsets[i] = static_cast<uint32>(current_offset);
117 // The next string is written after terminating '\0', so increment one byte
118 // in addition to the string byte length.
119 current_offset += strs[i].size() + 1;
120 }
121
122 // At this point, |current_offset| is the byte length of the whole binary
123 // image. Allocate a necessary buffer as uint32 array.
124 buffer->reset(new uint32[(current_offset + 3) / 4]);
125
126 (*buffer)[0] = static_cast<uint32>(strs.size());
127 for (size_t i = 0; i < strs.size(); ++i) {
128 // Fill offset and length.
129 (*buffer)[2 * i + 1] = offsets[i];
130 (*buffer)[2 * i + 2] = static_cast<uint32>(strs[i].size());
131
132 // Copy string buffer at the calculated offset. Guarantee that the buffer
133 // is null-terminated.
134 char *dest = reinterpret_cast<char *>(buffer->get()) + offsets[i];
135 memcpy(dest, strs[i].data(), strs[i].size());
136 dest[strs[i].size()] = '\0';
137 }
138
139 return StringPiece(reinterpret_cast<const char *>(buffer->get()),
140 current_offset);
141 }
142
143 } // namespace mozc
0 // Copyright 2010-2016, Google Inc.
1 // All rights reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 #ifndef MOZC_BASE_SERIALIZED_STRING_ARRAY_H_
30 #define MOZC_BASE_SERIALIZED_STRING_ARRAY_H_
31
32 #include <cstddef>
33 #include <iterator>
34 #include <memory>
35 #include <utility>
36 #include <vector>
37
38 #include "base/logging.h"
39 #include "base/port.h"
40 #include "base/string_piece.h"
41
42 namespace mozc {
43
44 // Immutable array of strings serialized in binary image. This class is used to
45 // serialize arrays of strings to byte sequence, and access the serialized
46 // array, in such a way that no deserialization is required at runtime, which is
47 // different from protobuf's repeated string field.
48 //
49 // * Prerequisite
50 // Little endian is assumed.
51 //
52 // * Serialized data creation
53 // To create a binary image, use SerializedStringArray::SerializeToBuffer() or
54 // build_tools/serialized_string_array_builder.py.
55 //
56 // * Array access
57 // At runtime, we can access array contents just by loading a binary image,
58 // e.g., from a file, onto memory where the first address must be aligned at
59 // 4-byte boundary. For array access, a similar interface to
60 // vector<StringPiece> is available; e.g., operator[], size(), and iterator.
61 //
62 // * Binary format
63 // The former block of size 4 + 8 * N bytes is an array of uint32 (in little
64 // endian order) storing the array size and offset and length of each string;
65 // see the diagram below. These data can be used to extract strings from the
66 // latter block.
67 //
68 // +=====================================================================+
69 // | Number of elements N in array (4 byte) |
70 // +---------------------------------------------------------------------+
71 // | Byte offset of string[0] (4 byte) |
72 // + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
73 // | Byte length of string[0] (4 byte, excluding terminating '\0') |
74 // +---------------------------------------------------------------------+
75 // | Byte offset of string[1] (4 byte) |
76 // + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
77 // | Byte length of string[1] (4 byte, excluding terminating '\0') |
78 // +---------------------------------------------------------------------+
79 // | . |
80 // | . |
81 // | . |
82 // +---------------------------------------------------------------------+
83 // | Byte offset of string[N - 1] (4 byte) |
84 // + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
85 // | Byte length of string[N - 1] (4 byte, excluding terminating '\0') |
86 // +=====================================================================+
87 // | string[0] (Variable length) |
88 // + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
89 // | '\0' (1 byte) |
90 // +---------------------------------------------------------------------+
91 // | string[1] (Variable length) |
92 // + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
93 // | '\0' (1 byte) |
94 // +---------------------------------------------------------------------+
95 // | . |
96 // | . |
97 // | . |
98 // +---------------------------------------------------------------------+
99 // | string[N - 1] (Variable length) |
100 // + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
101 // | '\0' (1 byte) |
102 // +=====================================================================+
103 class SerializedStringArray {
104 public:
105 class iterator {
106 public:
107 using value_type = StringPiece;
108 using difference_type = ptrdiff_t;
109 using pointer = const StringPiece *;
110 using reference = const StringPiece &;
111 using iterator_category = random_access_iterator_tag;
112
113 iterator() : array_(nullptr), index_(0) {}
114 iterator(const SerializedStringArray *array, size_t index)
115 : array_(array), index_(index) {}
116 iterator(const iterator &x) = default;
117
118 StringPiece operator*() { return (*array_)[index_]; }
119 StringPiece operator*() const { return (*array_)[index_]; }
120 StringPiece operator[](difference_type n) const {
121 return (*array_)[index_ + n];
122 }
123
124 void swap(iterator &x) {
125 using std::swap;
126 swap(array_, x.array_);
127 swap(index_, x.index_);
128 }
129
130 friend void swap(iterator &x, iterator &y) { x.swap(y); }
131
132 iterator &operator++() {
133 ++index_;
134 return *this;
135 }
136
137 iterator operator++(int) {
138 const size_t tmp = index_;
139 ++index_;
140 return iterator(array_, tmp);
141 }
142
143 iterator &operator--() {
144 --index_;
145 return *this;
146 }
147
148 iterator operator--(int) {
149 const size_t tmp = index_;
150 --index_;
151 return iterator(array_, tmp);
152 }
153
154 iterator &operator+=(difference_type n) {
155 index_ += n;
156 return *this;
157 }
158
159 iterator &operator-=(difference_type n) {
160 index_ -= n;
161 return *this;
162 }
163
164 friend iterator operator+(iterator x, difference_type n) {
165 return iterator(x.array_, x.index_ + n);
166 }
167
168 friend iterator operator+(difference_type n, iterator x) {
169 return iterator(x.array_, x.index_ + n);
170 }
171
172 friend iterator operator-(iterator x, difference_type n) {
173 return iterator(x.array_, x.index_ - n);
174 }
175
176 friend difference_type operator-(iterator x, iterator y) {
177 return x.index_ - y.index_;
178 }
179
180 // The following comparison operators make sense only for iterators obtained
181 // from the same array.
182 friend bool operator==(iterator x, iterator y) {
183 DCHECK_EQ(x.array_, y.array_);
184 return x.index_ == y.index_;
185 }
186
187 friend bool operator!=(iterator x, iterator y) {
188 DCHECK_EQ(x.array_, y.array_);
189 return x.index_ != y.index_;
190 }
191
192 friend bool operator<(iterator x, iterator y) {
193 DCHECK_EQ(x.array_, y.array_);
194 return x.index_ < y.index_;
195 }
196
197 friend bool operator<=(iterator x, iterator y) {
198 DCHECK_EQ(x.array_, y.array_);
199 return x.index_ <= y.index_;
200 }
201
202 friend bool operator>(iterator x, iterator y) {
203 DCHECK_EQ(x.array_, y.array_);
204 return x.index_ > y.index_;
205 }
206
207 friend bool operator>=(iterator x, iterator y) {
208 DCHECK_EQ(x.array_, y.array_);
209 return x.index_ >= y.index_;
210 }
211
212 private:
213 const SerializedStringArray *array_;
214 size_t index_;
215 };
216
217 using const_iterator = iterator;
218
219 SerializedStringArray(); // Default is an empty array.
220 ~SerializedStringArray();
221
222 // Initializes the array from given memory block. The block must be aligned
223 // at 4 byte boundary. Returns false when the data is invalid.
224 bool Init(StringPiece data_aligned_at_4byte_boundary);
225
226 // Initializes the array from given memory block without verifying data.
227 void Set(StringPiece data_aligned_at_4byte_boundary);
228
229 uint32 size() const {
230 // The first 4 bytes of data stores the number of elements in this array in
231 // little endian order.
232 return *reinterpret_cast<const uint32 *>(data_.data());
233 }
234
235 StringPiece operator[](size_t i) const {
236 const uint32 *ptr = reinterpret_cast<const uint32 *>(data_.data()) + 1;
237 const uint32 offset = ptr[2 * i];
238 const uint32 len = ptr[2 * i + 1];
239 return data_.substr(offset, len);
240 }
241
242 bool empty() const { return size() == 0; }
243 StringPiece data() const { return data_; }
244 void clear();
245
246 iterator begin() { return iterator(this, 0); }
247 iterator end() { return iterator(this, size()); }
248 const_iterator begin() const { return const_iterator(this, 0); }
249 const_iterator end() const { return const_iterator(this, size()); }
250
251 // Checks if the data is a valid array image.
252 static bool VerifyData(StringPiece data);
253
254 // Creates a byte image of |strs| in |buffer| and returns the memory block in
255 // |buffer| pointing to the image. Note that uint32 array is used for buffer
256 // to align data at 4 byte boundary.
257 static StringPiece SerializeToBuffer(const vector<StringPiece> &strs,
258 std::unique_ptr<uint32[]> *buffer);
259
260 private:
261 StringPiece data_;
262
263 DISALLOW_COPY_AND_ASSIGN(SerializedStringArray);
264 };
265
266 } // namespace mozc
267
268 #endif // MOZC_BASE_SERIALIZED_STRING_ARRAY_H_
0 // Copyright 2010-2016, Google Inc.
1 // All rights reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 #include "base/serialized_string_array.h"
30
31 #include <algorithm>
32 #include <cstring>
33 #include <memory>
34 #include <string>
35
36 #include "base/port.h"
37 #include "base/string_piece.h"
38 #include "testing/base/public/gunit.h"
39
40 namespace mozc {
41 namespace {
42
43 class SerializedStringArrayTest : public ::testing::Test {
44 protected:
45 StringPiece AlignString(const char *s, size_t len) {
46 return AlignString(string(s, len));
47 }
48
49 StringPiece AlignString(const string &s) {
50 buf_.reset(new uint32[(s.size() + 3) / 4]);
51 return StringPiece(
52 static_cast<const char *>(memcpy(buf_.get(), s.data(), s.size())),
53 s.size());
54 }
55
56 private:
57 std::unique_ptr<uint32[]> buf_;
58 };
59
60 TEST_F(SerializedStringArrayTest, DefaultConstructor) {
61 SerializedStringArray a;
62 EXPECT_TRUE(a.empty());
63 EXPECT_EQ(0, a.size());
64 }
65
66 TEST_F(SerializedStringArrayTest, EmptyArray) {
67 const StringPiece data = AlignString("\x00\x00\x00\x00", 4);
68 ASSERT_TRUE(SerializedStringArray::VerifyData(data));
69
70 SerializedStringArray a;
71 ASSERT_TRUE(a.Init(data));
72 EXPECT_TRUE(a.empty());
73 EXPECT_EQ(0, a.size());
74 }
75
76 const char kTestData[] =
77 "\x03\x00\x00\x00" // Array size = 3
78 "\x1c\x00\x00\x00\x05\x00\x00\x00" // (28, 5)
79 "\x22\x00\x00\x00\x04\x00\x00\x00" // (34, 4)
80 "\x27\x00\x00\x00\x06\x00\x00\x00" // (39, 6)
81 "Hello\0" // offset = 28, len = 5
82 "Mozc\0" // offset = 34, len = 4
83 "google\0"; // offset = 39, len = 6
84
85 TEST_F(SerializedStringArrayTest, SerializeToBuffer) {
86 std::unique_ptr<uint32[]> buf;
87 const StringPiece actual = SerializedStringArray::SerializeToBuffer(
88 {"Hello", "Mozc", "google"}, &buf);
89 const StringPiece expected(kTestData, arraysize(kTestData) - 1);
90 EXPECT_EQ(expected, actual);
91 }
92
93 TEST_F(SerializedStringArrayTest, Basic) {
94 const StringPiece data =
95 AlignString(string(kTestData, arraysize(kTestData) - 1));
96
97 ASSERT_TRUE(SerializedStringArray::VerifyData(data));
98
99 SerializedStringArray a;
100 ASSERT_TRUE(a.Init(data));
101 ASSERT_EQ(3, a.size());
102 EXPECT_EQ("Hello", a[0]);
103 EXPECT_EQ("Mozc", a[1]);
104 EXPECT_EQ("google", a[2]);
105
106 SerializedStringArray b;
107 b.Set(a.data());
108 ASSERT_EQ(3, b.size());
109 EXPECT_EQ("Hello", b[0]);
110 EXPECT_EQ("Mozc", b[1]);
111 EXPECT_EQ("google", b[2]);
112
113 a.clear();
114 EXPECT_TRUE(a.empty());
115 EXPECT_EQ(0, a.size());
116 }
117
118 TEST_F(SerializedStringArrayTest, Iterator) {
119 const StringPiece data =
120 AlignString(string(kTestData, arraysize(kTestData) - 1));
121
122 ASSERT_TRUE(SerializedStringArray::VerifyData(data));
123
124 SerializedStringArray a;
125 ASSERT_TRUE(a.Init(data));
126 {
127 auto iter = a.begin();
128 ASSERT_NE(a.end(), iter);
129 EXPECT_EQ("Hello", *iter);
130 ++iter;
131 ASSERT_NE(a.end(), iter);
132 EXPECT_EQ("Mozc", *iter);
133 ++iter;
134 ASSERT_NE(a.end(), iter);
135 EXPECT_EQ("google", *iter);
136 ++iter;
137 EXPECT_EQ(a.end(), iter);
138 }
139 EXPECT_TRUE(std::binary_search(a.begin(), a.end(), "Hello"));
140 EXPECT_TRUE(std::binary_search(a.begin(), a.end(), "Mozc"));
141 EXPECT_TRUE(std::binary_search(a.begin(), a.end(), "google"));
142 EXPECT_FALSE(std::binary_search(a.begin(), a.end(), "Japan"));
143 }
144
145 } // namespace
146 } // namespace mozc
0 # -*- coding: utf-8 -*-
1 # Copyright 2010-2016, Google Inc.
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 # * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
13 # distribution.
14 # * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 """Generate a binary image of SerializedStringArray."""
31
32 import struct
33
34
35 def SerializeToFile(strings, filename):
36 """Builds a binary image of strings.
37
38 For file format, see base/serialized_string_array.h.
39
40 Args:
41 strings: A list of strings to be serialized.
42 filename: Output binary file.
43 """
44 array_size = len(strings)
45
46 # Precompute offsets and lengths.
47 offsets = []
48 lengths = []
49 offset = 4 + 8 * array_size # The start offset of strings chunk
50 for s in strings:
51 offsets.append(offset)
52 lengths.append(len(s))
53 offset += len(s) + 1 # Include one byte for the trailing '\0'
54
55 with open(filename, 'wb') as f:
56 # 4-byte array_size.
57 f.write(struct.pack('<I', array_size))
58
59 # Offset and length array of (4 + 4) * array_size bytes.
60 for i in xrange(array_size):
61 f.write(struct.pack('<I', offsets[i]))
62 f.write(struct.pack('<I', lengths[i]))
63
64 # Strings chunk.
65 for i in xrange(array_size):
66 f.write(strings[i])
67 f.write('\0')
00 MAJOR=2
11 MINOR=17
2 BUILD=2496
2 BUILD=2497
33 REVISION=102
44 # NACL_DICTIONARY_VERSION is the target version of the system dictionary to be
55 # downloaded by NaCl Mozc.