Introduce SerializedStringArray class
This class is used to serialize arrays of strings to byte sequence in
such a way that no deserialization is required at runtime, which is
different from protobuf's repeated string field.
BUG=
TEST=
REF_BUG=26841123
REF_CL=114716869
REF_TIME=2016-02-16T12:12:17+09:00
REF_TIME_RAW=1455592337 +0900
Noriyuki Takahashi
8 years ago
443 | 443 | 'debug.cc', |
444 | 444 | ], |
445 | 445 | }, |
446 | { | |
447 | 'target_name': 'serialized_string_array', | |
448 | 'type': 'static_library', | |
449 | 'toolsets': ['host', 'target'], | |
450 | 'sources': [ | |
451 | 'serialized_string_array.cc', | |
452 | ], | |
453 | }, | |
446 | 454 | ], |
447 | 455 | 'conditions': [ |
448 | 456 | ['target_platform=="Android"', { |
440 | 440 | 'install_embedded_file_h', |
441 | 441 | ], |
442 | 442 | }, |
443 | { | |
444 | 'target_name': 'serialized_string_array_test', | |
445 | 'type': 'executable', | |
446 | 'sources': [ | |
447 | 'serialized_string_array_test.cc', | |
448 | ], | |
449 | 'dependencies': [ | |
450 | '../testing/testing.gyp:gtest_main', | |
451 | 'base.gyp:base', | |
452 | 'base.gyp:serialized_string_array', | |
453 | ], | |
454 | }, | |
443 | 455 | # Test cases meta target: this target is referred from gyp/tests.gyp |
444 | 456 | { |
445 | 457 | 'target_name': 'base_all_test', |
459 | 471 | 'obfuscator_support_test', |
460 | 472 | 'scheduler_stub_test', |
461 | 473 | 'scheduler_test', |
474 | 'serialized_string_array_test', | |
462 | 475 | 'system_util_test', |
463 | 476 | 'trie_test', |
464 | 477 | 'update_util_test', |
0 | // Copyright 2010-2016, Google Inc. | |
1 | // All rights reserved. | |
2 | // | |
3 | // Redistribution and use in source and binary forms, with or without | |
4 | // modification, are permitted provided that the following conditions are | |
5 | // met: | |
6 | // | |
7 | // * Redistributions of source code must retain the above copyright | |
8 | // notice, this list of conditions and the following disclaimer. | |
9 | // * Redistributions in binary form must reproduce the above | |
10 | // copyright notice, this list of conditions and the following disclaimer | |
11 | // in the documentation and/or other materials provided with the | |
12 | // distribution. | |
13 | // * Neither the name of Google Inc. nor the names of its | |
14 | // contributors may be used to endorse or promote products derived from | |
15 | // this software without specific prior written permission. | |
16 | // | |
17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ||
29 | #include "base/serialized_string_array.h" | |
30 | ||
31 | #include <memory> | |
32 | ||
33 | #include "base/logging.h" | |
34 | #include "base/port.h" | |
35 | #include "base/system_util.h" | |
36 | ||
37 | namespace mozc { | |
38 | namespace { | |
39 | ||
40 | const uint32 kEmptyArrayData = 0x00000000; | |
41 | ||
42 | } // namespace | |
43 | ||
44 | SerializedStringArray::SerializedStringArray() { | |
45 | DCHECK(SystemUtil::IsLittleEndian()) << "Little endian is assumed"; | |
46 | clear(); | |
47 | } | |
48 | ||
49 | SerializedStringArray::~SerializedStringArray() = default; | |
50 | ||
51 | bool SerializedStringArray::Init(StringPiece data_aligned_at_4byte_boundary) { | |
52 | if (VerifyData(data_aligned_at_4byte_boundary)) { | |
53 | data_ = data_aligned_at_4byte_boundary; | |
54 | return true; | |
55 | } | |
56 | clear(); | |
57 | return false; | |
58 | } | |
59 | ||
60 | void SerializedStringArray::Set(StringPiece data_aligned_at_4byte_boundary) { | |
61 | DCHECK(VerifyData(data_aligned_at_4byte_boundary)); | |
62 | data_ = data_aligned_at_4byte_boundary; | |
63 | } | |
64 | ||
65 | void SerializedStringArray::clear() { | |
66 | data_.set(reinterpret_cast<const char *>(&kEmptyArrayData), 4); | |
67 | } | |
68 | ||
69 | bool SerializedStringArray::VerifyData(StringPiece data) { | |
70 | if (data.size() < 4) { | |
71 | LOG(ERROR) << "Array size is missing"; | |
72 | return false; | |
73 | } | |
74 | const uint32 *u32_array = reinterpret_cast<const uint32 *>(data.data()); | |
75 | const uint32 size = u32_array[0]; | |
76 | ||
77 | const size_t min_required_data_size = 4 + (4 + 4) * size; | |
78 | if (data.size() < min_required_data_size) { | |
79 | LOG(ERROR) << "Lack of data. At least " << min_required_data_size | |
80 | << " bytes are required"; | |
81 | return false; | |
82 | } | |
83 | ||
84 | uint32 prev_str_end = min_required_data_size; | |
85 | for (uint32 i = 0; i < size; ++i) { | |
86 | const uint32 offset = u32_array[2 * i + 1]; | |
87 | const uint32 len = u32_array[2 * i + 2]; | |
88 | if (offset < prev_str_end) { | |
89 | LOG(ERROR) << "Invalid offset for string " << i << ": len = " << len | |
90 | << ", offset = " << offset; | |
91 | return false; | |
92 | } | |
93 | if (len >= data.size() || offset > data.size() - len) { | |
94 | LOG(ERROR) << "Invalid length for string " << i << ": len = " << len | |
95 | << ", offset = " << offset << ", " << data.size(); | |
96 | return false; | |
97 | } | |
98 | if (data[offset + len] != '\0') { | |
99 | LOG(ERROR) << "string[" << i << "] is not null-terminated"; | |
100 | return false; | |
101 | } | |
102 | prev_str_end = offset + len + 1; | |
103 | } | |
104 | ||
105 | return true; | |
106 | } | |
107 | ||
108 | StringPiece SerializedStringArray::SerializeToBuffer( | |
109 | const vector<StringPiece> &strs, std::unique_ptr<uint32[]> *buffer) { | |
110 | const size_t header_byte_size = 4 * (1 + 2 * strs.size()); | |
111 | ||
112 | // Calculate the offsets of each string. | |
113 | std::unique_ptr<uint32[]> offsets(new uint32[strs.size()]); | |
114 | size_t current_offset = header_byte_size; // The offset for first string. | |
115 | for (size_t i = 0; i < strs.size(); ++i) { | |
116 | offsets[i] = static_cast<uint32>(current_offset); | |
117 | // The next string is written after terminating '\0', so increment one byte | |
118 | // in addition to the string byte length. | |
119 | current_offset += strs[i].size() + 1; | |
120 | } | |
121 | ||
122 | // At this point, |current_offset| is the byte length of the whole binary | |
123 | // image. Allocate a necessary buffer as uint32 array. | |
124 | buffer->reset(new uint32[(current_offset + 3) / 4]); | |
125 | ||
126 | (*buffer)[0] = static_cast<uint32>(strs.size()); | |
127 | for (size_t i = 0; i < strs.size(); ++i) { | |
128 | // Fill offset and length. | |
129 | (*buffer)[2 * i + 1] = offsets[i]; | |
130 | (*buffer)[2 * i + 2] = static_cast<uint32>(strs[i].size()); | |
131 | ||
132 | // Copy string buffer at the calculated offset. Guarantee that the buffer | |
133 | // is null-terminated. | |
134 | char *dest = reinterpret_cast<char *>(buffer->get()) + offsets[i]; | |
135 | memcpy(dest, strs[i].data(), strs[i].size()); | |
136 | dest[strs[i].size()] = '\0'; | |
137 | } | |
138 | ||
139 | return StringPiece(reinterpret_cast<const char *>(buffer->get()), | |
140 | current_offset); | |
141 | } | |
142 | ||
143 | } // namespace mozc |
0 | // Copyright 2010-2016, Google Inc. | |
1 | // All rights reserved. | |
2 | // | |
3 | // Redistribution and use in source and binary forms, with or without | |
4 | // modification, are permitted provided that the following conditions are | |
5 | // met: | |
6 | // | |
7 | // * Redistributions of source code must retain the above copyright | |
8 | // notice, this list of conditions and the following disclaimer. | |
9 | // * Redistributions in binary form must reproduce the above | |
10 | // copyright notice, this list of conditions and the following disclaimer | |
11 | // in the documentation and/or other materials provided with the | |
12 | // distribution. | |
13 | // * Neither the name of Google Inc. nor the names of its | |
14 | // contributors may be used to endorse or promote products derived from | |
15 | // this software without specific prior written permission. | |
16 | // | |
17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ||
29 | #ifndef MOZC_BASE_SERIALIZED_STRING_ARRAY_H_ | |
30 | #define MOZC_BASE_SERIALIZED_STRING_ARRAY_H_ | |
31 | ||
32 | #include <cstddef> | |
33 | #include <iterator> | |
34 | #include <memory> | |
35 | #include <utility> | |
36 | #include <vector> | |
37 | ||
38 | #include "base/logging.h" | |
39 | #include "base/port.h" | |
40 | #include "base/string_piece.h" | |
41 | ||
42 | namespace mozc { | |
43 | ||
44 | // Immutable array of strings serialized in binary image. This class is used to | |
45 | // serialize arrays of strings to byte sequence, and access the serialized | |
46 | // array, in such a way that no deserialization is required at runtime, which is | |
47 | // different from protobuf's repeated string field. | |
48 | // | |
49 | // * Prerequisite | |
50 | // Little endian is assumed. | |
51 | // | |
52 | // * Serialized data creation | |
53 | // To create a binary image, use SerializedStringArray::SerializeToBuffer() or | |
54 | // build_tools/serialized_string_array_builder.py. | |
55 | // | |
56 | // * Array access | |
57 | // At runtime, we can access array contents just by loading a binary image, | |
58 | // e.g., from a file, onto memory where the first address must be aligned at | |
59 | // 4-byte boundary. For array access, a similar interface to | |
60 | // vector<StringPiece> is available; e.g., operator[], size(), and iterator. | |
61 | // | |
62 | // * Binary format | |
63 | // The former block of size 4 + 8 * N bytes is an array of uint32 (in little | |
64 | // endian order) storing the array size and offset and length of each string; | |
65 | // see the diagram below. These data can be used to extract strings from the | |
66 | // latter block. | |
67 | // | |
68 | // +=====================================================================+ | |
69 | // | Number of elements N in array (4 byte) | | |
70 | // +---------------------------------------------------------------------+ | |
71 | // | Byte offset of string[0] (4 byte) | | |
72 | // + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + | |
73 | // | Byte length of string[0] (4 byte, excluding terminating '\0') | | |
74 | // +---------------------------------------------------------------------+ | |
75 | // | Byte offset of string[1] (4 byte) | | |
76 | // + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + | |
77 | // | Byte length of string[1] (4 byte, excluding terminating '\0') | | |
78 | // +---------------------------------------------------------------------+ | |
79 | // | . | | |
80 | // | . | | |
81 | // | . | | |
82 | // +---------------------------------------------------------------------+ | |
83 | // | Byte offset of string[N - 1] (4 byte) | | |
84 | // + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + | |
85 | // | Byte length of string[N - 1] (4 byte, excluding terminating '\0') | | |
86 | // +=====================================================================+ | |
87 | // | string[0] (Variable length) | | |
88 | // + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + | |
89 | // | '\0' (1 byte) | | |
90 | // +---------------------------------------------------------------------+ | |
91 | // | string[1] (Variable length) | | |
92 | // + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + | |
93 | // | '\0' (1 byte) | | |
94 | // +---------------------------------------------------------------------+ | |
95 | // | . | | |
96 | // | . | | |
97 | // | . | | |
98 | // +---------------------------------------------------------------------+ | |
99 | // | string[N - 1] (Variable length) | | |
100 | // + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + | |
101 | // | '\0' (1 byte) | | |
102 | // +=====================================================================+ | |
103 | class SerializedStringArray { | |
104 | public: | |
105 | class iterator { | |
106 | public: | |
107 | using value_type = StringPiece; | |
108 | using difference_type = ptrdiff_t; | |
109 | using pointer = const StringPiece *; | |
110 | using reference = const StringPiece &; | |
111 | using iterator_category = random_access_iterator_tag; | |
112 | ||
113 | iterator() : array_(nullptr), index_(0) {} | |
114 | iterator(const SerializedStringArray *array, size_t index) | |
115 | : array_(array), index_(index) {} | |
116 | iterator(const iterator &x) = default; | |
117 | ||
118 | StringPiece operator*() { return (*array_)[index_]; } | |
119 | StringPiece operator*() const { return (*array_)[index_]; } | |
120 | StringPiece operator[](difference_type n) const { | |
121 | return (*array_)[index_ + n]; | |
122 | } | |
123 | ||
124 | void swap(iterator &x) { | |
125 | using std::swap; | |
126 | swap(array_, x.array_); | |
127 | swap(index_, x.index_); | |
128 | } | |
129 | ||
130 | friend void swap(iterator &x, iterator &y) { x.swap(y); } | |
131 | ||
132 | iterator &operator++() { | |
133 | ++index_; | |
134 | return *this; | |
135 | } | |
136 | ||
137 | iterator operator++(int) { | |
138 | const size_t tmp = index_; | |
139 | ++index_; | |
140 | return iterator(array_, tmp); | |
141 | } | |
142 | ||
143 | iterator &operator--() { | |
144 | --index_; | |
145 | return *this; | |
146 | } | |
147 | ||
148 | iterator operator--(int) { | |
149 | const size_t tmp = index_; | |
150 | --index_; | |
151 | return iterator(array_, tmp); | |
152 | } | |
153 | ||
154 | iterator &operator+=(difference_type n) { | |
155 | index_ += n; | |
156 | return *this; | |
157 | } | |
158 | ||
159 | iterator &operator-=(difference_type n) { | |
160 | index_ -= n; | |
161 | return *this; | |
162 | } | |
163 | ||
164 | friend iterator operator+(iterator x, difference_type n) { | |
165 | return iterator(x.array_, x.index_ + n); | |
166 | } | |
167 | ||
168 | friend iterator operator+(difference_type n, iterator x) { | |
169 | return iterator(x.array_, x.index_ + n); | |
170 | } | |
171 | ||
172 | friend iterator operator-(iterator x, difference_type n) { | |
173 | return iterator(x.array_, x.index_ - n); | |
174 | } | |
175 | ||
176 | friend difference_type operator-(iterator x, iterator y) { | |
177 | return x.index_ - y.index_; | |
178 | } | |
179 | ||
180 | // The following comparison operators make sense only for iterators obtained | |
181 | // from the same array. | |
182 | friend bool operator==(iterator x, iterator y) { | |
183 | DCHECK_EQ(x.array_, y.array_); | |
184 | return x.index_ == y.index_; | |
185 | } | |
186 | ||
187 | friend bool operator!=(iterator x, iterator y) { | |
188 | DCHECK_EQ(x.array_, y.array_); | |
189 | return x.index_ != y.index_; | |
190 | } | |
191 | ||
192 | friend bool operator<(iterator x, iterator y) { | |
193 | DCHECK_EQ(x.array_, y.array_); | |
194 | return x.index_ < y.index_; | |
195 | } | |
196 | ||
197 | friend bool operator<=(iterator x, iterator y) { | |
198 | DCHECK_EQ(x.array_, y.array_); | |
199 | return x.index_ <= y.index_; | |
200 | } | |
201 | ||
202 | friend bool operator>(iterator x, iterator y) { | |
203 | DCHECK_EQ(x.array_, y.array_); | |
204 | return x.index_ > y.index_; | |
205 | } | |
206 | ||
207 | friend bool operator>=(iterator x, iterator y) { | |
208 | DCHECK_EQ(x.array_, y.array_); | |
209 | return x.index_ >= y.index_; | |
210 | } | |
211 | ||
212 | private: | |
213 | const SerializedStringArray *array_; | |
214 | size_t index_; | |
215 | }; | |
216 | ||
217 | using const_iterator = iterator; | |
218 | ||
219 | SerializedStringArray(); // Default is an empty array. | |
220 | ~SerializedStringArray(); | |
221 | ||
222 | // Initializes the array from given memory block. The block must be aligned | |
223 | // at 4 byte boundary. Returns false when the data is invalid. | |
224 | bool Init(StringPiece data_aligned_at_4byte_boundary); | |
225 | ||
226 | // Initializes the array from given memory block without verifying data. | |
227 | void Set(StringPiece data_aligned_at_4byte_boundary); | |
228 | ||
229 | uint32 size() const { | |
230 | // The first 4 bytes of data stores the number of elements in this array in | |
231 | // little endian order. | |
232 | return *reinterpret_cast<const uint32 *>(data_.data()); | |
233 | } | |
234 | ||
235 | StringPiece operator[](size_t i) const { | |
236 | const uint32 *ptr = reinterpret_cast<const uint32 *>(data_.data()) + 1; | |
237 | const uint32 offset = ptr[2 * i]; | |
238 | const uint32 len = ptr[2 * i + 1]; | |
239 | return data_.substr(offset, len); | |
240 | } | |
241 | ||
242 | bool empty() const { return size() == 0; } | |
243 | StringPiece data() const { return data_; } | |
244 | void clear(); | |
245 | ||
246 | iterator begin() { return iterator(this, 0); } | |
247 | iterator end() { return iterator(this, size()); } | |
248 | const_iterator begin() const { return const_iterator(this, 0); } | |
249 | const_iterator end() const { return const_iterator(this, size()); } | |
250 | ||
251 | // Checks if the data is a valid array image. | |
252 | static bool VerifyData(StringPiece data); | |
253 | ||
254 | // Creates a byte image of |strs| in |buffer| and returns the memory block in | |
255 | // |buffer| pointing to the image. Note that uint32 array is used for buffer | |
256 | // to align data at 4 byte boundary. | |
257 | static StringPiece SerializeToBuffer(const vector<StringPiece> &strs, | |
258 | std::unique_ptr<uint32[]> *buffer); | |
259 | ||
260 | private: | |
261 | StringPiece data_; | |
262 | ||
263 | DISALLOW_COPY_AND_ASSIGN(SerializedStringArray); | |
264 | }; | |
265 | ||
266 | } // namespace mozc | |
267 | ||
268 | #endif // MOZC_BASE_SERIALIZED_STRING_ARRAY_H_ |
0 | // Copyright 2010-2016, Google Inc. | |
1 | // All rights reserved. | |
2 | // | |
3 | // Redistribution and use in source and binary forms, with or without | |
4 | // modification, are permitted provided that the following conditions are | |
5 | // met: | |
6 | // | |
7 | // * Redistributions of source code must retain the above copyright | |
8 | // notice, this list of conditions and the following disclaimer. | |
9 | // * Redistributions in binary form must reproduce the above | |
10 | // copyright notice, this list of conditions and the following disclaimer | |
11 | // in the documentation and/or other materials provided with the | |
12 | // distribution. | |
13 | // * Neither the name of Google Inc. nor the names of its | |
14 | // contributors may be used to endorse or promote products derived from | |
15 | // this software without specific prior written permission. | |
16 | // | |
17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ||
29 | #include "base/serialized_string_array.h" | |
30 | ||
31 | #include <algorithm> | |
32 | #include <cstring> | |
33 | #include <memory> | |
34 | #include <string> | |
35 | ||
36 | #include "base/port.h" | |
37 | #include "base/string_piece.h" | |
38 | #include "testing/base/public/gunit.h" | |
39 | ||
40 | namespace mozc { | |
41 | namespace { | |
42 | ||
43 | class SerializedStringArrayTest : public ::testing::Test { | |
44 | protected: | |
45 | StringPiece AlignString(const char *s, size_t len) { | |
46 | return AlignString(string(s, len)); | |
47 | } | |
48 | ||
49 | StringPiece AlignString(const string &s) { | |
50 | buf_.reset(new uint32[(s.size() + 3) / 4]); | |
51 | return StringPiece( | |
52 | static_cast<const char *>(memcpy(buf_.get(), s.data(), s.size())), | |
53 | s.size()); | |
54 | } | |
55 | ||
56 | private: | |
57 | std::unique_ptr<uint32[]> buf_; | |
58 | }; | |
59 | ||
60 | TEST_F(SerializedStringArrayTest, DefaultConstructor) { | |
61 | SerializedStringArray a; | |
62 | EXPECT_TRUE(a.empty()); | |
63 | EXPECT_EQ(0, a.size()); | |
64 | } | |
65 | ||
66 | TEST_F(SerializedStringArrayTest, EmptyArray) { | |
67 | const StringPiece data = AlignString("\x00\x00\x00\x00", 4); | |
68 | ASSERT_TRUE(SerializedStringArray::VerifyData(data)); | |
69 | ||
70 | SerializedStringArray a; | |
71 | ASSERT_TRUE(a.Init(data)); | |
72 | EXPECT_TRUE(a.empty()); | |
73 | EXPECT_EQ(0, a.size()); | |
74 | } | |
75 | ||
76 | const char kTestData[] = | |
77 | "\x03\x00\x00\x00" // Array size = 3 | |
78 | "\x1c\x00\x00\x00\x05\x00\x00\x00" // (28, 5) | |
79 | "\x22\x00\x00\x00\x04\x00\x00\x00" // (34, 4) | |
80 | "\x27\x00\x00\x00\x06\x00\x00\x00" // (39, 6) | |
81 | "Hello\0" // offset = 28, len = 5 | |
82 | "Mozc\0" // offset = 34, len = 4 | |
83 | "google\0"; // offset = 39, len = 6 | |
84 | ||
85 | TEST_F(SerializedStringArrayTest, SerializeToBuffer) { | |
86 | std::unique_ptr<uint32[]> buf; | |
87 | const StringPiece actual = SerializedStringArray::SerializeToBuffer( | |
88 | {"Hello", "Mozc", "google"}, &buf); | |
89 | const StringPiece expected(kTestData, arraysize(kTestData) - 1); | |
90 | EXPECT_EQ(expected, actual); | |
91 | } | |
92 | ||
93 | TEST_F(SerializedStringArrayTest, Basic) { | |
94 | const StringPiece data = | |
95 | AlignString(string(kTestData, arraysize(kTestData) - 1)); | |
96 | ||
97 | ASSERT_TRUE(SerializedStringArray::VerifyData(data)); | |
98 | ||
99 | SerializedStringArray a; | |
100 | ASSERT_TRUE(a.Init(data)); | |
101 | ASSERT_EQ(3, a.size()); | |
102 | EXPECT_EQ("Hello", a[0]); | |
103 | EXPECT_EQ("Mozc", a[1]); | |
104 | EXPECT_EQ("google", a[2]); | |
105 | ||
106 | SerializedStringArray b; | |
107 | b.Set(a.data()); | |
108 | ASSERT_EQ(3, b.size()); | |
109 | EXPECT_EQ("Hello", b[0]); | |
110 | EXPECT_EQ("Mozc", b[1]); | |
111 | EXPECT_EQ("google", b[2]); | |
112 | ||
113 | a.clear(); | |
114 | EXPECT_TRUE(a.empty()); | |
115 | EXPECT_EQ(0, a.size()); | |
116 | } | |
117 | ||
118 | TEST_F(SerializedStringArrayTest, Iterator) { | |
119 | const StringPiece data = | |
120 | AlignString(string(kTestData, arraysize(kTestData) - 1)); | |
121 | ||
122 | ASSERT_TRUE(SerializedStringArray::VerifyData(data)); | |
123 | ||
124 | SerializedStringArray a; | |
125 | ASSERT_TRUE(a.Init(data)); | |
126 | { | |
127 | auto iter = a.begin(); | |
128 | ASSERT_NE(a.end(), iter); | |
129 | EXPECT_EQ("Hello", *iter); | |
130 | ++iter; | |
131 | ASSERT_NE(a.end(), iter); | |
132 | EXPECT_EQ("Mozc", *iter); | |
133 | ++iter; | |
134 | ASSERT_NE(a.end(), iter); | |
135 | EXPECT_EQ("google", *iter); | |
136 | ++iter; | |
137 | EXPECT_EQ(a.end(), iter); | |
138 | } | |
139 | EXPECT_TRUE(std::binary_search(a.begin(), a.end(), "Hello")); | |
140 | EXPECT_TRUE(std::binary_search(a.begin(), a.end(), "Mozc")); | |
141 | EXPECT_TRUE(std::binary_search(a.begin(), a.end(), "google")); | |
142 | EXPECT_FALSE(std::binary_search(a.begin(), a.end(), "Japan")); | |
143 | } | |
144 | ||
145 | } // namespace | |
146 | } // namespace mozc |
0 | # -*- coding: utf-8 -*- | |
1 | # Copyright 2010-2016, Google Inc. | |
2 | # All rights reserved. | |
3 | # | |
4 | # Redistribution and use in source and binary forms, with or without | |
5 | # modification, are permitted provided that the following conditions are | |
6 | # met: | |
7 | # | |
8 | # * Redistributions of source code must retain the above copyright | |
9 | # notice, this list of conditions and the following disclaimer. | |
10 | # * Redistributions in binary form must reproduce the above | |
11 | # copyright notice, this list of conditions and the following disclaimer | |
12 | # in the documentation and/or other materials provided with the | |
13 | # distribution. | |
14 | # * Neither the name of Google Inc. nor the names of its | |
15 | # contributors may be used to endorse or promote products derived from | |
16 | # this software without specific prior written permission. | |
17 | # | |
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | ||
30 | """Generate a binary image of SerializedStringArray.""" | |
31 | ||
32 | import struct | |
33 | ||
34 | ||
35 | def SerializeToFile(strings, filename): | |
36 | """Builds a binary image of strings. | |
37 | ||
38 | For file format, see base/serialized_string_array.h. | |
39 | ||
40 | Args: | |
41 | strings: A list of strings to be serialized. | |
42 | filename: Output binary file. | |
43 | """ | |
44 | array_size = len(strings) | |
45 | ||
46 | # Precompute offsets and lengths. | |
47 | offsets = [] | |
48 | lengths = [] | |
49 | offset = 4 + 8 * array_size # The start offset of strings chunk | |
50 | for s in strings: | |
51 | offsets.append(offset) | |
52 | lengths.append(len(s)) | |
53 | offset += len(s) + 1 # Include one byte for the trailing '\0' | |
54 | ||
55 | with open(filename, 'wb') as f: | |
56 | # 4-byte array_size. | |
57 | f.write(struct.pack('<I', array_size)) | |
58 | ||
59 | # Offset and length array of (4 + 4) * array_size bytes. | |
60 | for i in xrange(array_size): | |
61 | f.write(struct.pack('<I', offsets[i])) | |
62 | f.write(struct.pack('<I', lengths[i])) | |
63 | ||
64 | # Strings chunk. | |
65 | for i in xrange(array_size): | |
66 | f.write(strings[i]) | |
67 | f.write('\0') |