Add lot o javadocs and some assertions
Thomas E. Enebo
14 years ago
30 | 30 | ***** END LICENSE BLOCK *****/ |
31 | 31 | package org.jruby.util; |
32 | 32 | |
33 | import java.io.EOFException; | |
34 | 33 | import java.io.IOException; |
35 | 34 | import java.io.InputStream; |
36 | 35 | import java.io.Serializable; |
37 | import java.io.UnsupportedEncodingException; | |
38 | 36 | import java.nio.ByteBuffer; |
39 | 37 | import java.nio.CharBuffer; |
40 | 38 | import java.nio.charset.Charset; |
46 | 44 | import org.jcodings.specific.ASCIIEncoding; |
47 | 45 | |
48 | 46 | /** |
49 | * | |
50 | * @author headius | |
47 | * ByteList is simple a collection of bytes in the same way a Java String is a collection | |
48 | * of characters. However, it's API resembles StringBuffer/StringBuilder more than String | |
49 | * because it is a mutable object. | |
51 | 50 | */ |
52 | 51 | @SuppressWarnings("deprecation") |
53 | 52 | public final class ByteList implements Comparable, CharSequence, Serializable { |
58 | 57 | |
59 | 58 | @Deprecated |
60 | 59 | public byte[] bytes; |
60 | ||
61 | 61 | @Deprecated |
62 | 62 | public int begin; |
63 | ||
63 | 64 | @Deprecated |
64 | 65 | public int realSize; |
66 | ||
65 | 67 | @Deprecated |
66 | 68 | public Encoding encoding = ASCIIEncoding.INSTANCE; |
69 | ||
67 | 70 | int hash; |
71 | ||
68 | 72 | String stringValue; |
69 | 73 | |
70 | 74 | private static final int DEFAULT_SIZE = 4; |
71 | 75 | |
72 | /** Creates a new instance of ByteList */ | |
76 | /** | |
77 | * Creates a new instance of ByteList | |
78 | */ | |
73 | 79 | public ByteList() { |
74 | 80 | this(DEFAULT_SIZE); |
75 | 81 | } |
76 | 82 | |
83 | /** | |
84 | * Creates a new instance of Bytelist with a pre-allocated size. If you know the size ahead | |
85 | * of time this saves additional array allocations to grow the bytelist to the proper size. | |
86 | * | |
87 | * @param size to preallocate the bytelist to | |
88 | */ | |
77 | 89 | public ByteList(int size) { |
78 | 90 | bytes = new byte[size]; |
79 | 91 | realSize = 0; |
80 | 92 | } |
81 | 93 | |
94 | /** | |
95 | * Create a new instance of ByteList with the bytes supplied using the specified encoding. | |
96 | * | |
97 | * Important: bytes is used as the initial backing store for the bytelist. Over time as the | |
98 | * bytelist is mutated this backing store may be replaced with a new one to hold the additional | |
99 | * bytes. If you pass in bytes and then modify the contents of the original bytes, then those | |
100 | * changes will get reflected. | |
101 | * | |
102 | * @param bytes to use | |
103 | * @param encoding | |
104 | */ | |
105 | // TODO: Deprecate and replace with a static method which implies the caveats of this constructor. | |
82 | 106 | public ByteList(byte[] bytes, Encoding encoding) { |
83 | 107 | this.bytes = bytes; |
84 | 108 | this.realSize = bytes.length; |
85 | 109 | this.encoding = encoding; |
86 | 110 | } |
87 | 111 | |
112 | /** | |
113 | * Create a new instance of ByteList with the contents of wrap. This constructor will make | |
114 | * a copy of bytes passed | |
115 | * | |
116 | * @param wrap the initial bytes for this ByteList | |
117 | */ | |
88 | 118 | public ByteList(byte[] wrap) { |
89 | this (wrap, true); | |
90 | } | |
91 | ||
119 | this(wrap, true); | |
120 | } | |
121 | ||
122 | /** | |
123 | * Create a new instance of ByteList with the contents of wrap. If copy is true then it will | |
124 | * array copy the contents. Otherwise it will use the byte array passed in as its initial | |
125 | * backing store. | |
126 | * | |
127 | * @param wrap the initial bytes for this ByteList | |
128 | * @param copy whether to arraycopy wrap for the backing store or not | |
129 | */ | |
92 | 130 | public ByteList(byte[] wrap, boolean copy) { |
93 | 131 | assert wrap != null; |
94 | 132 | if (copy) { |
99 | 137 | realSize = wrap.length; |
100 | 138 | } |
101 | 139 | |
140 | /** | |
141 | * Create a new instance of byte list with the same contents as the passed in ByteList wrap. | |
142 | * Note that this does array copy the data for the new objects initial backing store. | |
143 | * | |
144 | * @param wrap is contents for new ByteList | |
145 | */ | |
102 | 146 | public ByteList(ByteList wrap) { |
103 | 147 | this(wrap.bytes, wrap.begin, wrap.realSize); |
104 | 148 | } |
105 | 149 | |
150 | /** | |
151 | * Create a new instance of ByteList with the same contents as the passed in ByteList wrap. | |
152 | * The copy parameter gives you control over whether you want this new ByteList to share | |
153 | * the same byte array for its backing store. | |
154 | * | |
155 | * ****IMPORTANT NOTES***** | |
156 | * copy is currently ignored and always assumed false. This constructor should just go away | |
157 | * so it has been marked as deprecated. | |
158 | * | |
159 | * @param wrap | |
160 | * @param copy | |
161 | * | |
162 | * Deprecated to coincide with JRuby 1.5 (not used by anything we can find luckily) | |
163 | */ | |
164 | @Deprecated | |
106 | 165 | public ByteList(ByteList wrap, boolean copy) { |
107 | 166 | this(wrap.bytes, wrap.begin, wrap.realSize, false); |
108 | 167 | } |
109 | 168 | |
169 | /** | |
170 | * Create a new instance of ByteList using wrap as a backing store where index is the first | |
171 | * index in the byte array where the data starts and len indicates how long the data portion | |
172 | * of the bytelist is. wrap will be array copied in this constructor. | |
173 | * | |
174 | * @param wrap the bytes to use | |
175 | * @param index where in the bytes the data starts | |
176 | * @param len how long the data is in the wrap array | |
177 | */ | |
110 | 178 | public ByteList(byte[] wrap, int index, int len) { |
111 | this(wrap,index,len,true); | |
112 | } | |
113 | ||
179 | this(wrap, index, len, true); | |
180 | } | |
181 | ||
182 | /** | |
183 | * Create a new instance of ByteList using wrap as a backing store where index is the first | |
184 | * index in the byte array where the data starts and len indicates how long the data portion | |
185 | * of the bytelist is. wrap will be array copied if copy is true OR if index != 0. | |
186 | * | |
187 | * @param wrap the bytes to use | |
188 | * @param index where in the bytes the data starts | |
189 | * @param len how long the data is in the wrap array | |
190 | * @param copy if true array copy wrap. otherwise use as backing store | |
191 | */ | |
192 | // FIXME: Fix the index != 0 not honoring copy and separate out into a different caller. JRuby.next would be the right time for this. | |
114 | 193 | public ByteList(byte[] wrap, int index, int len, boolean copy) { |
115 | assert wrap != null; | |
194 | assert wrap != null : "'wrap' must not be null"; | |
195 | assert index >= 0 && index <= wrap.length : "'index' is not without bounds of 'wrap' array"; | |
196 | assert wrap.length >= index + len : "'index' + 'len' is longer than the 'wrap' array"; | |
197 | ||
116 | 198 | if (copy || index != 0) { |
117 | 199 | bytes = new byte[len]; |
118 | 200 | System.arraycopy(wrap, index, bytes, 0, len); |
122 | 204 | realSize = len; |
123 | 205 | } |
124 | 206 | |
207 | /** | |
208 | * Create a new instance of ByteList using wrap as a backing store where index is the first | |
209 | * index in the byte array where the data starts and len indicates how long the data portion | |
210 | * of the bytelist is. wrap's byte array will be array copied for initial backing store. | |
211 | * | |
212 | * @param wrap the bytes to use | |
213 | * @param index where in the bytes the data starts | |
214 | * @param len how long the data is in the wrap array | |
215 | */ | |
125 | 216 | public ByteList(ByteList wrap, int index, int len) { |
126 | 217 | this(wrap.bytes, wrap.begin + index, len); |
127 | 218 | } |
128 | 219 | |
129 | private ByteList(boolean flag) { | |
130 | } | |
131 | ||
220 | /** | |
221 | * Delete len bytes from start index. This does no bullet-proofing so it is your | |
222 | * responsibility to ensure you do not run off the backing store array. | |
223 | * | |
224 | * @param start index to delete from | |
225 | * @param len number of bytes to delete | |
226 | */ | |
132 | 227 | public void delete(int start, int len) { |
133 | realSize-=len; | |
134 | System.arraycopy(bytes,start+len,bytes,start,realSize); | |
135 | } | |
136 | ||
228 | assert start >= begin && start < realSize : "'start' is at invalid index"; | |
229 | assert len >= 0 : "'len' must be positive"; | |
230 | assert start + len <= begin + realSize : "too many bytes requested"; | |
231 | ||
232 | realSize -= len; | |
233 | ||
234 | System.arraycopy(bytes, start + len, bytes, start, realSize); | |
235 | } | |
236 | ||
237 | /** | |
238 | * Append the byte b up to len times onto the end of the current ByteList. | |
239 | * | |
240 | * @param b is byte to be appended | |
241 | * @param len is number of times to repeat the append | |
242 | */ | |
243 | // FIXME: Innefficient impl since we know the len up front. | |
244 | public void fill(int b, int len) { | |
245 | for ( ; --len >= 0; ) { | |
246 | append(b); | |
247 | } | |
248 | } | |
249 | ||
250 | /** | |
251 | * @see Object#clone() | |
252 | */ | |
253 | @Override | |
254 | public Object clone() { | |
255 | return dup(); | |
256 | } | |
257 | ||
258 | /** | |
259 | * creates a duplicate of this bytelist but only in the case of a stringValue and its resulting | |
260 | * hash value. No other elements are duplicated. | |
261 | */ | |
262 | public ByteList dup() { | |
263 | ByteList dup = dup(realSize); | |
264 | dup.hash = hash; | |
265 | dup.stringValue = stringValue; | |
266 | return dup; | |
267 | } | |
268 | ||
269 | /** | |
270 | * Create a new ByteList but do not array copy the byte backing store. | |
271 | * | |
272 | * @return a new ByteList with same backing store | |
273 | */ | |
274 | public ByteList shallowDup() { | |
275 | ByteList dup = new ByteList(bytes, false); | |
276 | dup.realSize = realSize; | |
277 | dup.begin = begin; | |
278 | dup.encoding = encoding; | |
279 | dup.hash = hash; | |
280 | dup.stringValue = stringValue; | |
281 | return dup; | |
282 | } | |
283 | ||
284 | /** | |
285 | * @param length is the value of how big the buffer is going to be, not the actual length to copy | |
286 | * | |
287 | * It is used by RubyString.modify(int) to prevent COW pathological situations | |
288 | * (namely to COW with having <code>length - realSize</code> bytes ahead) | |
289 | */ | |
290 | public ByteList dup(int length) { | |
291 | ByteList dup = new ByteList(length); | |
292 | ||
293 | dup.append(this.bytes, this.begin, this.realSize); | |
294 | dup.encoding = encoding; | |
295 | ||
296 | return dup; | |
297 | } | |
298 | ||
299 | /** | |
300 | * Ensure that the bytelist is at least length bytes long. Otherwise grow the backing store | |
301 | * so that it is length bytes long | |
302 | * | |
303 | * @param length to use to make sure ByteList is long enough | |
304 | */ | |
305 | public void ensure(int length) { | |
306 | if (length >= bytes.length) { | |
307 | byte[] tmp = new byte[length + (length >>> 1)]; | |
308 | System.arraycopy(bytes, begin, tmp, 0, realSize); | |
309 | bytes = tmp; | |
310 | } | |
311 | } | |
312 | ||
313 | /** | |
314 | * Make a shared copy of this ByteList. This is used for COW'ing ByteLists, you typically | |
315 | * want a piece of the same backing store to be shared across ByteBuffers, while those | |
316 | * ByteLists will be pointing at different indexes and lengths of the same backing store. | |
317 | * | |
318 | * Note: that this does not update hash or stringValue. | |
319 | * | |
320 | * @param index new begin value for shared ByteBuffer | |
321 | * @param len new length/realSize for chared | |
322 | * @return | |
323 | */ | |
324 | public ByteList makeShared(int index, int len) { | |
325 | ByteList shared = new ByteList(bytes, encoding); | |
326 | ||
327 | shared.realSize = len; | |
328 | shared.begin = begin + index; | |
329 | ||
330 | return shared; | |
331 | } | |
332 | ||
333 | /** | |
334 | * Change ByteBuffer to have a new begin that is +index positions past begin with a new length. | |
335 | * | |
336 | * @param index new value to add to begin | |
337 | * @param len the new realSize/length value | |
338 | */ | |
339 | public void view(int index, int len) { | |
340 | realSize = len; | |
341 | begin = begin + index; | |
342 | } | |
343 | ||
344 | /** | |
345 | * Array copy the byte backing store so that you can guarantee that no other objects are | |
346 | * referencing this objects backing store. | |
347 | */ | |
348 | public void unshare() { | |
349 | unshare(realSize); | |
350 | } | |
351 | ||
352 | /** | |
353 | * Array copy the byte backing store so that you can guarantee that no other objects are | |
354 | * referencing this objects backing store. This version on unshare allows a length to be | |
355 | * specified which will copy length bytes from the old backing store. | |
356 | * | |
357 | * @param length is the value of how big the buffer is going to be, not the actual length to copy | |
358 | * | |
359 | * It is used by RubyString.modify(int) to prevent COW pathological situations | |
360 | * (namely to COW with having <code>length - realSize</code> bytes ahead) | |
361 | */ | |
362 | public void unshare(int length) { | |
363 | byte[] tmp = new byte[length]; | |
364 | System.arraycopy(bytes, begin, tmp, 0, Math.min(realSize, length)); | |
365 | bytes = tmp; | |
366 | begin = 0; | |
367 | } | |
368 | ||
369 | /** | |
370 | * Invalidate the hash and stringValue which may have been cached in this ByteList. | |
371 | */ | |
372 | public void invalidate() { | |
373 | hash = 0; | |
374 | stringValue = null; | |
375 | } | |
376 | ||
377 | /** | |
378 | * Prepend a byte onto the front of this ByteList. | |
379 | * | |
380 | * @param b is the byte to be prepended | |
381 | */ | |
382 | public void prepend(byte b) { | |
383 | grow(1); | |
384 | System.arraycopy(bytes, 0, bytes, 1, realSize); | |
385 | bytes[0] = b; | |
386 | realSize++; | |
387 | } | |
388 | ||
389 | /** | |
390 | * Append a single byte to the ByteList | |
391 | * | |
392 | * @param b the byte to be added | |
393 | * @return this instance | |
394 | */ | |
137 | 395 | public ByteList append(byte b) { |
138 | 396 | grow(1); |
139 | 397 | bytes[realSize++] = b; |
140 | 398 | return this; |
141 | 399 | } |
142 | 400 | |
401 | /** | |
402 | * Append a single int to the ByteList | |
403 | * | |
404 | * @param b the int to be added | |
405 | * @return this instance | |
406 | */ | |
143 | 407 | public ByteList append(int b) { |
144 | 408 | append((byte)b); |
145 | 409 | return this; |
146 | 410 | } |
147 | 411 | |
412 | /** | |
413 | * Append up to length bytes from InputStream to the ByteList. If no bytes are read from the | |
414 | * stream then throw an IOException. | |
415 | * | |
416 | * @param input the stream to read bytes from | |
417 | * @param length how many byte to try and read | |
418 | * @return this instance | |
419 | * @throws IOException when no bytes are read | |
420 | */ | |
148 | 421 | public ByteList append(InputStream input, int length) throws IOException { |
149 | 422 | grow(length); |
150 | 423 | int read = 0; |
162 | 435 | return this; |
163 | 436 | } |
164 | 437 | |
438 | /** | |
439 | * Append contents of the supplied nio ByteList up to len length onto the end of this | |
440 | * ByteBuffer. | |
441 | * | |
442 | * @param buffer to be appended | |
443 | * @param len is number of bytes you hoping to get from the ByteBuffer | |
444 | */ | |
165 | 445 | public void append(ByteBuffer buffer, int len) { |
166 | 446 | grow(len); |
167 | 447 | buffer.get(bytes, realSize, len); |
168 | 448 | realSize += len; |
169 | 449 | } |
170 | 450 | |
171 | public void fill(int b, int len) { | |
172 | for ( ; --len >= 0; ) { | |
173 | append(b); | |
174 | } | |
175 | } | |
176 | ||
177 | public Object clone() { | |
178 | return dup(); | |
179 | } | |
180 | ||
181 | public ByteList dup() { | |
182 | ByteList dup = dup(realSize); | |
183 | dup.hash = hash; | |
184 | dup.stringValue = stringValue; | |
185 | return dup; | |
186 | } | |
187 | ||
188 | public ByteList shallowDup() { | |
189 | ByteList dup = new ByteList(bytes, false); | |
190 | dup.realSize = realSize; | |
191 | dup.begin = begin; | |
192 | dup.encoding = encoding; | |
193 | dup.hash = hash; | |
194 | dup.stringValue = stringValue; | |
195 | return dup; | |
196 | } | |
197 | ||
198 | /** | |
199 | * @param length is the value of how big the buffer is going to be, not the actual length to copy | |
200 | * | |
201 | * It is used by RubyString.modify(int) to prevent COW pathological situations | |
202 | * (namely to COW with having <code>length - realSize</code> bytes ahead) | |
203 | */ | |
204 | public ByteList dup(int length) { | |
205 | ByteList dup = new ByteList(false); | |
206 | dup.bytes = new byte[length]; | |
207 | // use the smaller of the two sizes for the new real size, to allow truncating | |
208 | int newRealSize = Math.min(length, realSize); | |
209 | System.arraycopy(bytes, begin, dup.bytes, 0, newRealSize); | |
210 | dup.realSize = newRealSize; | |
211 | dup.begin = 0; | |
212 | dup.encoding = encoding; | |
213 | return dup; | |
214 | } | |
215 | ||
216 | public void ensure(int length) { | |
217 | if (length >= bytes.length) { | |
218 | byte[]tmp = new byte[length + (length >>> 1)]; | |
219 | System.arraycopy(bytes, begin, tmp, 0, realSize); | |
220 | bytes = tmp; | |
221 | } | |
222 | } | |
223 | ||
224 | public ByteList makeShared(int index, int len) { | |
225 | ByteList shared = new ByteList(false); | |
226 | shared.bytes = bytes; | |
227 | shared.realSize = len; | |
228 | shared.begin = begin + index; | |
229 | shared.encoding = encoding; | |
230 | return shared; | |
231 | } | |
232 | ||
233 | public void view(int index, int len) { | |
234 | realSize = len; | |
235 | begin = begin + index; | |
236 | } | |
237 | ||
238 | public void unshare() { | |
239 | unshare(realSize); | |
240 | } | |
241 | ||
242 | /** | |
243 | * @param length is the value of how big the buffer is going to be, not the actual length to copy | |
244 | * | |
245 | * It is used by RubyString.modify(int) to prevent COW pathological situations | |
246 | * (namely to COW with having <code>length - realSize</code> bytes ahead) | |
247 | */ | |
248 | public void unshare(int length) { | |
249 | byte[] tmp = new byte[length]; | |
250 | System.arraycopy(bytes, begin, tmp, 0, Math.min(realSize, length)); | |
251 | bytes = tmp; | |
252 | begin = 0; | |
253 | } | |
254 | ||
255 | public void invalidate() { | |
256 | hash = 0; | |
257 | stringValue = null; | |
258 | } | |
259 | ||
260 | public void prepend(byte b) { | |
261 | grow(1); | |
262 | System.arraycopy(bytes, 0, bytes, 1, realSize); | |
263 | bytes[0] = b; | |
264 | realSize++; | |
265 | } | |
266 | ||
451 | /** | |
452 | * Append moreBytes onto the end of the current ByteList. | |
453 | * | |
454 | * @param moreBytes to be added. | |
455 | */ | |
267 | 456 | public void append(byte[] moreBytes) { |
457 | assert moreBytes != null : "moreBytes is null"; | |
458 | ||
268 | 459 | grow(moreBytes.length); |
269 | 460 | System.arraycopy(moreBytes, 0, bytes, realSize, moreBytes.length); |
270 | 461 | realSize += moreBytes.length; |
271 | 462 | } |
272 | 463 | |
464 | /** | |
465 | * Append moreBytes onto the end of the current ByteList. | |
466 | * | |
467 | * @param moreBytes to be added. | |
468 | */ | |
273 | 469 | public void append(ByteList moreBytes) { |
274 | 470 | append(moreBytes.bytes, moreBytes.begin, moreBytes.realSize); |
275 | 471 | } |
276 | 472 | |
473 | /** | |
474 | * Append moreBytes onto the end of the current ByteList with +index as the new begin for | |
475 | * len bytes from the moreBytes ByteList. | |
476 | * | |
477 | * @param moreBytes to be added. | |
478 | * @param index new index past current begin value | |
479 | * @param len is the number of bytes to append from source ByteList | |
480 | */ | |
277 | 481 | public void append(ByteList moreBytes, int index, int len) { |
278 | 482 | append(moreBytes.bytes, moreBytes.begin + index, len); |
279 | 483 | } |
280 | 484 | |
485 | /** | |
486 | * Append moreBytes onto the end of the current ByteList with start as the new begin for | |
487 | * len bytes from the moreBytes byte array. | |
488 | * | |
489 | * @param moreBytes to be added. | |
490 | * @param start is the new begin value | |
491 | * @param len is the number of bytes to append from source byte array | |
492 | */ | |
281 | 493 | public void append(byte[] moreBytes, int start, int len) { |
494 | assert moreBytes != null : "moreBytes is null"; | |
495 | assert start >= 0 && (start == 0 || start < moreBytes.length) : "Invalid start"; | |
496 | assert len >= 0 && moreBytes.length - start >= len : "Bad length"; | |
497 | ||
282 | 498 | grow(len); |
283 | 499 | System.arraycopy(moreBytes, start, bytes, realSize, len); |
284 | 500 | realSize += len; |
285 | 501 | } |
286 | 502 | |
503 | /** | |
504 | * Resize the ByteList's backing store to be length in size. Note that this forces the backing | |
505 | * store to array copy regardless of ByteLists current size or contents. It essentially will | |
506 | * end any COWing. | |
507 | * | |
508 | * @param length the new length for the backing store. | |
509 | */ | |
287 | 510 | public void realloc(int length) { |
511 | assert length >= 0 : "Invalid length"; | |
512 | assert length >= realSize : "length is too small"; | |
513 | ||
288 | 514 | byte tmp[] = new byte[length]; |
289 | 515 | System.arraycopy(bytes, 0, tmp, 0, realSize); |
290 | 516 | bytes = tmp; |
291 | 517 | } |
292 | 518 | |
519 | /** | |
520 | * Return the current length of the ByteList. | |
521 | * | |
522 | * @return the number of bytes in this ByteList. | |
523 | */ | |
293 | 524 | public int length() { |
294 | 525 | return realSize; |
295 | 526 | } |
296 | 527 | |
528 | // ENEBO: Wow...what happens if newLength < realSize...nasty shrinkage? | |
529 | /** | |
530 | * grow the bytelist to be newLength in size. | |
531 | * | |
532 | * @param newLength | |
533 | */ | |
534 | public void length(int newLength) { | |
535 | // assert newLength >= realSize : "newLength is too small"; | |
536 | ||
537 | grow(newLength - realSize); | |
538 | realSize = newLength; | |
539 | } | |
540 | ||
541 | /** | |
542 | * Number of characters in this ByteList based on its current encoding. | |
543 | * | |
544 | * @return number of characters | |
545 | */ | |
297 | 546 | public int lengthEnc() { |
298 | 547 | return encoding.strLength(bytes, begin, begin + realSize); |
299 | 548 | } |
300 | 549 | |
301 | public void length(int newLength) { | |
302 | grow(newLength - realSize); | |
303 | realSize = newLength; | |
304 | } | |
305 | ||
550 | /** | |
551 | * Get the byte at index from the ByteList. | |
552 | * | |
553 | * @param index to retreive byte from | |
554 | * @return the byte retreived | |
555 | */ | |
306 | 556 | public int get(int index) { |
557 | assert index >= 0 : "index must be positive"; | |
558 | ||
307 | 559 | return bytes[begin + index]; |
308 | 560 | } |
309 | 561 | |
562 | /** | |
563 | * Get the index code point in this ByteList. | |
564 | * | |
565 | * @param index is the element you want | |
566 | * @return the element you requested | |
567 | */ | |
310 | 568 | public int getEnc(int index) { |
311 | 569 | return encoding.strCodeAt(bytes, begin, begin + realSize, index); |
312 | 570 | } |
313 | 571 | |
572 | /** | |
573 | * Set the byte at index to be new value. | |
574 | * | |
575 | * @param index to set byte | |
576 | * @param b is the new value. | |
577 | */ | |
314 | 578 | public void set(int index, int b) { |
579 | assert index >= 0 : "index must be positive"; | |
580 | assert begin + index < begin + realSize : "index is too large"; | |
581 | ||
315 | 582 | bytes[begin + index] = (byte)b; |
316 | 583 | } |
317 | 584 | |
585 | /** | |
586 | * Replace the byte array backing store with newBytes. This method is only referred to in | |
587 | * deprecated method RubyString.view(CharSequence). | |
588 | * | |
589 | * We deprecated this method because it ignore begin and if we | |
590 | * | |
591 | * @param newBytes | |
592 | */ | |
593 | @Deprecated | |
318 | 594 | public void replace(byte[] newBytes) { |
319 | 595 | assert newBytes != null; |
320 | 596 | this.bytes = newBytes; |
349 | 625 | realSize = newSize; |
350 | 626 | } |
351 | 627 | |
628 | /** | |
629 | * Note: This is as unsafe as unsafeReplace | |
630 | * | |
631 | * @param beg | |
632 | * @param len | |
633 | * @param nbytes | |
634 | */ | |
352 | 635 | public void replace(int beg, int len, ByteList nbytes) { |
353 | 636 | replace(beg, len, nbytes.bytes, nbytes.begin, nbytes.realSize); |
354 | 637 | } |
355 | 638 | |
639 | /** | |
640 | * Note: This is as unsafe as unsafeReplace | |
641 | * @param beg | |
642 | * @param len | |
643 | * @param buf | |
644 | */ | |
356 | 645 | public void replace(int beg, int len, byte[] buf) { |
357 | 646 | replace(beg, len, buf, 0, buf.length); |
358 | 647 | } |
359 | 648 | |
649 | /** | |
650 | * Note: This is as unsafe as unsafeReplace | |
651 | * @param beg | |
652 | * @param len | |
653 | * @param nbytes | |
654 | * @param index | |
655 | * @param count | |
656 | */ | |
360 | 657 | public void replace(int beg, int len, byte[] nbytes, int index, int count) { |
361 | 658 | unsafeReplace(beg, len, nbytes, index, count); |
362 | 659 | } |
368 | 665 | realSize++; |
369 | 666 | } |
370 | 667 | |
668 | /** | |
669 | * Get the index of first occurrence of c in ByteList from the beginning of the ByteList. | |
670 | * | |
671 | * @param c byte to be looking for | |
672 | * @return the index of the byte or -1 if not found | |
673 | */ | |
371 | 674 | public int indexOf(int c) { |
372 | 675 | return indexOf(c, 0); |
373 | 676 | } |
374 | 677 | |
678 | /** | |
679 | * Get the index of first occurrence of c in ByteList from the pos offset of the ByteList. | |
680 | * | |
681 | * @param c byte to be looking for | |
682 | * @param pos off set from beginning of ByteList to look for byte | |
683 | * @return the index of the byte or -1 if not found | |
684 | */ | |
375 | 685 | public int indexOf(final int c, int pos) { |
376 | 686 | // not sure if this is checked elsewhere, |
377 | 687 | // didn't see it in RubyString. RubyString does |
386 | 696 | return pos < size ? pos - begin : -1; |
387 | 697 | } |
388 | 698 | |
699 | /** | |
700 | * Get the index of first occurrence of Bytelist find in this ByteList. | |
701 | * | |
702 | * @param find the ByteList to find | |
703 | * @return the index of the byte or -1 if not found | |
704 | */ | |
389 | 705 | public int indexOf(ByteList find) { |
390 | 706 | return indexOf(find, 0); |
391 | 707 | } |
392 | 708 | |
709 | /** | |
710 | * Get the index of first occurrence of Bytelist find in this ByteList starting at index i. | |
711 | * | |
712 | * @param find the ByteList to find | |
713 | * @param i the index to start from | |
714 | * @return the index of the byte or -1 if not found | |
715 | */ | |
393 | 716 | public int indexOf(ByteList find, int i) { |
394 | 717 | return indexOf(bytes, begin, realSize, find.bytes, find.begin, find.realSize, i); |
395 | 718 | } |
396 | 719 | |
720 | /** | |
721 | * Get the index of first occurrence of target in source using the offset and count parameters. | |
722 | * fromIndex can be used to start beyond zero on source. | |
723 | * | |
724 | * @return the index of the byte or -1 if not found | |
725 | */ | |
397 | 726 | static int indexOf(byte[] source, int sourceOffset, int sourceCount, byte[] target, int targetOffset, int targetCount, int fromIndex) { |
398 | 727 | if (fromIndex >= sourceCount) return (targetCount == 0 ? sourceCount : -1); |
399 | 728 | if (fromIndex < 0) fromIndex = 0; |
416 | 745 | return -1; |
417 | 746 | } |
418 | 747 | |
748 | /** | |
749 | * Get the index of last occurrence of c in ByteList from the end of the ByteList. | |
750 | * | |
751 | * @param c byte to be looking for | |
752 | * @return the index of the byte or -1 if not found | |
753 | */ | |
419 | 754 | public int lastIndexOf(int c) { |
420 | 755 | return lastIndexOf(c, realSize - 1); |
421 | 756 | } |
422 | 757 | |
758 | /** | |
759 | * Get the index of last occurrence of c in ByteList from the pos offset of the ByteList. | |
760 | * | |
761 | * @param c byte to be looking for | |
762 | * @param pos off set from end of ByteList to look for byte | |
763 | * @return the index of the byte or -1 if not found | |
764 | */ | |
423 | 765 | public int lastIndexOf(final int c, int pos) { |
424 | 766 | // not sure if this is checked elsewhere, |
425 | 767 | // didn't see it in RubyString. RubyString does |
426 | 768 | // cast to char, so c will be >= 0. |
427 | if (c > 255) | |
428 | return -1; | |
769 | if (c > 255) return -1; | |
770 | ||
429 | 771 | final byte b = (byte)(c&0xFF); |
430 | 772 | final int size = begin + realSize; |
431 | 773 | pos += begin; |
439 | 781 | return pos - begin; |
440 | 782 | } |
441 | 783 | |
784 | /** | |
785 | * Get the index of last occurrence of find in ByteList from the end of the ByteList. | |
786 | * | |
787 | * @param find ByteList to be looking for | |
788 | * @return the index of the byte or -1 if not found | |
789 | */ | |
442 | 790 | public int lastIndexOf(ByteList find) { |
443 | 791 | return lastIndexOf(find, realSize); |
444 | 792 | } |
445 | 793 | |
794 | /** | |
795 | * Get the index of last occurrence of find in ByteList from the end of the ByteList. | |
796 | * | |
797 | * @param find ByteList to be looking for | |
798 | * @param pos index from end of list to search from | |
799 | * @return the index of the byte or -1 if not found | |
800 | */ | |
446 | 801 | public int lastIndexOf(ByteList find, int pos) { |
447 | 802 | return lastIndexOf(bytes, begin, realSize, find.bytes, find.begin, find.realSize, pos); |
448 | 803 | } |
449 | 804 | |
805 | /** | |
806 | * Get the index of last occurrence of target in source using the offset and count parameters. | |
807 | * fromIndex can be used to start beyond zero on source. | |
808 | * | |
809 | * @return the index of the byte or -1 if not found | |
810 | */ | |
450 | 811 | static int lastIndexOf(byte[] source, int sourceOffset, int sourceCount, byte[] target, int targetOffset, int targetCount, int fromIndex) { |
451 | 812 | int rightIndex = sourceCount - targetCount; |
452 | 813 | if (fromIndex < 0) return -1; |
489 | 850 | return true; |
490 | 851 | } |
491 | 852 | |
853 | /** | |
854 | * Does this ByteList start with the supplied ByteList? | |
855 | * | |
856 | * @param other is the bytelist to compare with | |
857 | * @return true is this ByteList starts with other | |
858 | */ | |
492 | 859 | public boolean startsWith(ByteList other) { |
493 | 860 | return startsWith(other, 0); |
494 | 861 | } |
495 | 862 | |
863 | /** | |
864 | * Does this ByteList end with the supplied ByteList? | |
865 | * | |
866 | * @param other is the bytelist to compare with | |
867 | * @return true is this ByteList starts with other | |
868 | */ | |
496 | 869 | public boolean endsWith(ByteList other) { |
497 | 870 | return startsWith(other, realSize - other.realSize); |
498 | 871 | } |
499 | 872 | |
873 | /** | |
874 | * Does this ByteList equal the other ByteList? | |
875 | * | |
876 | * @param other is the bytelist to compare with | |
877 | * @return true is this ByteList is the same | |
878 | */ | |
879 | @Override | |
500 | 880 | public boolean equals(Object other) { |
501 | 881 | if (other instanceof ByteList) return equal((ByteList)other); |
502 | 882 | return false; |
503 | 883 | } |
504 | 884 | |
885 | /** | |
886 | * Does this ByteList equal the other ByteList? | |
887 | * | |
888 | * @param other is the bytelist to compare with | |
889 | * @return true is this ByteList is the same | |
890 | */ | |
505 | 891 | public boolean equal(ByteList other) { |
506 | 892 | if (other == this) return true; |
507 | 893 | if (hash != 0 && other.hash != 0 && hash != other.hash) return false; |
526 | 912 | return false; |
527 | 913 | } |
528 | 914 | |
529 | // an alternative to the new version of equals, should | |
530 | // detect inequality faster (in many cases), but is slow | |
531 | // in the case of equal values (all bytes visited), due to | |
532 | // using n+=2, n-=2 vs. ++n, --n while iterating over the array. | |
915 | /** | |
916 | * an alternative to the new version of equals, should | |
917 | * detect inequality faster (in many cases), but is slow | |
918 | * in the case of equal values (all bytes visited), due to | |
919 | * using n+=2, n-=2 vs. ++n, --n while iterating over the array. | |
920 | */ | |
533 | 921 | public boolean sample_equals(Object other) { |
534 | 922 | if (other == this) return true; |
535 | 923 | if (other instanceof ByteList) { |
560 | 948 | return cmp((ByteList)other); |
561 | 949 | } |
562 | 950 | |
951 | /** | |
952 | * This comparison matches MRI comparison of Strings (rb_str_cmp). | |
953 | */ | |
563 | 954 | public int cmp(final ByteList other) { |
564 | 955 | if (other == this) return 0; |
565 | 956 | final int size = realSize; |
578 | 969 | return size == other.realSize ? 0 : size == len ? -1 : 1; |
579 | 970 | } |
580 | 971 | |
972 | /** | |
973 | * Do a case insensitive comparison with other ByteList with return types similiar to compareTo. | |
974 | * | |
975 | * @param other the ByteList to compare | |
976 | * @return -1, 0, or 1 | |
977 | */ | |
581 | 978 | public int caseInsensitiveCmp(final ByteList other) { |
582 | 979 | if (other == this) return 0; |
583 | 980 | |
609 | 1006 | return bytes; |
610 | 1007 | } |
611 | 1008 | |
1009 | /** | |
1010 | * Get a copy of the bytes referenced by this ByteList. It will make an optimal copy and not | |
1011 | * carry along unused bytes from COW sharing. | |
1012 | * | |
1013 | * @return a copy of the bytes. | |
1014 | */ | |
612 | 1015 | public byte[] bytes() { |
613 | 1016 | byte[] newBytes = new byte[realSize]; |
614 | 1017 | System.arraycopy(bytes, begin, newBytes, 0, realSize); |
615 | 1018 | return newBytes; |
616 | 1019 | } |
617 | 1020 | |
1021 | /** | |
1022 | * First index of the backing array that contains data for the ByteList. Note that we have | |
1023 | * copy-on-write (COW) semantics which means sharing the same backing store will yield different | |
1024 | * begin and size values while using the same byte[]. | |
1025 | * | |
1026 | * @return the index | |
1027 | */ | |
618 | 1028 | public int begin() { |
619 | 1029 | return begin; |
620 | 1030 | } |
621 | 1031 | |
1032 | /** | |
1033 | * Grow the ByteList by increaseRequested bytes. A value <0 will be a no-op. | |
1034 | * | |
1035 | * @param increaseRequested number of bytes to grow | |
1036 | */ | |
622 | 1037 | private void grow(int increaseRequested) { |
623 | 1038 | if (increaseRequested < 0) return; |
624 | 1039 | |
630 | 1045 | } |
631 | 1046 | } |
632 | 1047 | |
1048 | /** | |
1049 | * @see Object#hashCode() | |
1050 | */ | |
1051 | @Override | |
633 | 1052 | public int hashCode() { |
634 | 1053 | if (hash != 0) return hash; |
635 | 1054 | |
649 | 1068 | * |
650 | 1069 | * @return an ISO-8859-1 representation of the byte list |
651 | 1070 | */ |
1071 | @Override | |
652 | 1072 | public String toString() { |
653 | 1073 | if (stringValue == null) { |
654 | 1074 | stringValue = decode(bytes, begin, realSize, "ISO-8859-1"); |
656 | 1076 | return stringValue; |
657 | 1077 | } |
658 | 1078 | |
1079 | /** | |
1080 | * Create a bytelist with ISO_8859_1 encoding from the provided CharSequence. | |
1081 | * | |
1082 | * @param s the source for new ByteList | |
1083 | * @return the new ByteList | |
1084 | */ | |
659 | 1085 | public static ByteList create(CharSequence s) { |
660 | 1086 | return new ByteList(plain(s),false); |
661 | 1087 | } |
662 | 1088 | |
1089 | /** | |
1090 | * Create a byte[] from a CharSequence assuming a raw/ISO-8859-1 encoding | |
1091 | * | |
1092 | * @param s the CharSequence to convert | |
1093 | * @return a byte[] | |
1094 | */ | |
663 | 1095 | public static byte[] plain(CharSequence s) { |
664 | if (s instanceof String) { | |
665 | return encode(s, "ISO-8859-1"); | |
666 | } | |
1096 | if (s instanceof String) return encode(s, "ISO-8859-1"); | |
1097 | ||
1098 | // Not a String...get it the slow way | |
667 | 1099 | byte[] bytes = new byte[s.length()]; |
668 | 1100 | for (int i = 0; i < bytes.length; i++) { |
669 | 1101 | bytes[i] = (byte) s.charAt(i); |
671 | 1103 | return bytes; |
672 | 1104 | } |
673 | 1105 | |
1106 | /** | |
1107 | * Create a byte[] from a char[] assuming a raw/ISO-8859-1 encoding | |
1108 | * | |
1109 | * @param s the CharSequence to convert | |
1110 | * @return a byte[] | |
1111 | */ | |
674 | 1112 | public static byte[] plain(char[] s) { |
675 | 1113 | byte[] bytes = new byte[s.length]; |
676 | 1114 | for (int i = 0; i < s.length; i++) { |
679 | 1117 | return bytes; |
680 | 1118 | } |
681 | 1119 | |
1120 | /** | |
1121 | * Create a char[] from a byte[] assuming a raw/ISO-8859-1 encoding | |
1122 | * | |
1123 | * @param b the source byte[] | |
1124 | * @param start index to start converting to char's | |
1125 | * @param length how many bytes to convert to char's | |
1126 | * @return a byte[] | |
1127 | */ | |
682 | 1128 | public static char[] plain(byte[] b, int start, int length) { |
1129 | assert b != null : "byte array cannot be null"; | |
1130 | assert start >= 0 && start + length <= b.length : "Invalid start or start+length too long"; | |
1131 | ||
683 | 1132 | char[] chars = new char[length]; |
684 | 1133 | for (int i = 0; i < length; i++) { |
685 | 1134 | chars[i] = (char) (b[start + i] & 0xFF); |
687 | 1136 | return chars; |
688 | 1137 | } |
689 | 1138 | |
1139 | /** | |
1140 | * Create a char[] from a byte[] assuming a raw/ISO-8859-1 encoding | |
1141 | * | |
1142 | * @param b the source byte[] | |
1143 | * @return a byte[] | |
1144 | */ | |
690 | 1145 | public static char[] plain(byte[] b) { |
1146 | assert b != null : "byte array cannot be null"; | |
1147 | ||
691 | 1148 | char[] chars = new char[b.length]; |
692 | 1149 | for (int i = 0; i < b.length; i++) { |
693 | 1150 | chars[i] = (char) (b[i] & 0xFF); |
700 | 1157 | private static final ConcurrentMap<String,Charset> charsetsByAlias = |
701 | 1158 | new ConcurrentHashMap<String,Charset>(); |
702 | 1159 | |
1160 | /** | |
1161 | * Decode byte data into a String with the supplied charsetName. | |
1162 | * | |
1163 | * @param data to be decoded | |
1164 | * @param offset where to start decoding from in data | |
1165 | * @param length how many bytes to decode from data | |
1166 | * @param charsetName used to make the resulting String | |
1167 | * @return the new String | |
1168 | */ | |
703 | 1169 | public static String decode(byte[] data, int offset, int length, String charsetName) { |
704 | Charset cs = lookup(charsetName); | |
705 | return cs.decode(ByteBuffer.wrap(data, offset, length)).toString(); | |
706 | } | |
707 | ||
1170 | return lookup(charsetName).decode(ByteBuffer.wrap(data, offset, length)).toString(); | |
1171 | } | |
1172 | ||
1173 | /** | |
1174 | * Decode byte data into a String with the supplied charsetName. | |
1175 | * | |
1176 | * @param data to be decoded | |
1177 | * @param charsetName used to make the resulting String | |
1178 | * @return the new String | |
1179 | */ | |
708 | 1180 | public static String decode(byte[] data, String charsetName) { |
709 | Charset cs = lookup(charsetName); | |
710 | return cs.decode(ByteBuffer.wrap(data)).toString(); | |
711 | } | |
712 | ||
1181 | return lookup(charsetName).decode(ByteBuffer.wrap(data)).toString(); | |
1182 | } | |
1183 | ||
1184 | /** | |
1185 | * Encode CharSequence into a set of bytes based on the charsetName. | |
1186 | * | |
1187 | * @param data to be encoded | |
1188 | * @param charsetName used to extract the resulting bytes | |
1189 | * @return the new byte[] | |
1190 | */ | |
713 | 1191 | public static byte[] encode(CharSequence data, String charsetName) { |
714 | Charset cs = lookup(charsetName); | |
715 | return cs.encode(CharBuffer.wrap(data)).array(); | |
1192 | return lookup(charsetName).encode(CharBuffer.wrap(data)).array(); | |
716 | 1193 | } |
717 | 1194 | |
718 | 1195 | private static Charset lookup(String alias) { |
724 | 1201 | return cs; |
725 | 1202 | } |
726 | 1203 | |
1204 | /** | |
1205 | * Pretend byte array is raw and each byte is also the character value | |
1206 | * | |
1207 | * @param ix is the index you want | |
1208 | * @return | |
1209 | */ | |
727 | 1210 | public char charAt(int ix) { |
728 | 1211 | return (char)(this.bytes[begin + ix] & 0xFF); |
729 | 1212 | } |
730 | 1213 | |
1214 | /** | |
1215 | * Create subSequence of this array between start and end offsets | |
1216 | * | |
1217 | * @param start index for beginning of subsequence | |
1218 | * @param end index for end of subsequence | |
1219 | * @return a new ByteList/CharSequence | |
1220 | */ | |
731 | 1221 | public CharSequence subSequence(int start, int end) { |
732 | 1222 | return new ByteList(this, start, end - start); |
733 | 1223 | } |
734 | 1224 | |
1225 | /** | |
1226 | * Are these two byte arrays similiar (semantics similiar too compareTo). This is slightly | |
1227 | * special in that it will only compare the same number of bytes based on the lesser of the two | |
1228 | * lengths. | |
1229 | * | |
1230 | * @return -1, 0, 1 | |
1231 | */ | |
735 | 1232 | public static int memcmp(final byte[] first, final int firstStart, final int firstLen, final byte[] second, final int secondStart, final int secondLen) { |
736 | 1233 | if (first == second) return 0; |
737 | 1234 | final int len = Math.min(firstLen,secondLen); |
744 | 1241 | |
745 | 1242 | } |
746 | 1243 | |
1244 | /** | |
1245 | * Are these two byte arrays similiar (semantics similiar too compareTo). | |
1246 | * | |
1247 | * @return -1, 0, 1 | |
1248 | */ | |
747 | 1249 | public static int memcmp(final byte[] first, final int firstStart, final byte[] second, final int secondStart, final int len) { |
748 | 1250 | if (first == second) return 0; |
749 | 1251 | int offset = -1; |