New upstream version 0.8.10
Emmanuel Bourg
3 years ago
0 | version 0.8.10 (August 13th 2014) | |
1 | - getFirstSetBit | |
2 | - Capitalized some constants | |
3 | - Implement composition using chunk iterators | |
4 | ||
0 | 5 | version 0.8.9 (August 11th 2014) |
1 | 6 | - Fixed bug in clearIntIterator with bitmap of zeros |
2 | 7 |
95 | 95 | <dependency> |
96 | 96 | <groupId>com.googlecode.javaewah</groupId> |
97 | 97 | <artifactId>JavaEWAH</artifactId> |
98 | <version>0.8.9</version> | |
98 | <version>0.8.10</version> | |
99 | 99 | </dependency> |
100 | 100 | </dependencies> |
101 | 101 |
2 | 2 | |
3 | 3 | <groupId>com.googlecode.javaewah</groupId> |
4 | 4 | <artifactId>JavaEWAH</artifactId> |
5 | <version>0.8.9</version> | |
5 | <version>0.8.10</version> | |
6 | 6 | |
7 | 7 | <packaging>bundle</packaging> |
8 | 8 | <properties> |
19 | 19 | * @param a the word |
20 | 20 | */ |
21 | 21 | public BufferedRunningLengthWord(final long a) { |
22 | this.numberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runningLengthBits)); | |
22 | this.numberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.RUNNING_LENGTH_BITS)); | |
23 | 23 | this.runningBit = (a & 1) != 0; |
24 | this.runningLength = (int) ((a >>> 1) & RunningLengthWord.largestRunningLengthCount); | |
24 | this.runningLength = (int) ((a >>> 1) & RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT); | |
25 | 25 | } |
26 | 26 | |
27 | 27 | /** |
82 | 82 | * @param a the word |
83 | 83 | */ |
84 | 84 | public void reset(final long a) { |
85 | this.numberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runningLengthBits)); | |
85 | this.numberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.RUNNING_LENGTH_BITS)); | |
86 | 86 | this.runningBit = (a & 1) != 0; |
87 | this.runningLength = (int) ((a >>> 1) & RunningLengthWord.largestRunningLengthCount); | |
87 | this.runningLength = (int) ((a >>> 1) & RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT); | |
88 | 88 | this.literalWordOffset = 0; |
89 | 89 | } |
90 | 90 |
0 | package com.googlecode.javaewah; | |
1 | ||
2 | /* | |
3 | * Copyright 2009-2014, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves | |
4 | * Licensed under the Apache License, Version 2.0. | |
5 | */ | |
6 | ||
7 | /** | |
8 | * The ChunkIterator interface is used to iterate over chunks of ones or zeros. | |
9 | * | |
10 | * @author Gregory Ssi-Yan-Kai | |
11 | */ | |
12 | public interface ChunkIterator { | |
13 | ||
14 | /** | |
15 | * Is there more? | |
16 | * | |
17 | * @return true, if there is more, false otherwise | |
18 | */ | |
19 | boolean hasNext(); | |
20 | ||
21 | /** | |
22 | * Return the next bit | |
23 | * | |
24 | * @return the bit | |
25 | */ | |
26 | boolean nextBit(); | |
27 | ||
28 | /** | |
29 | * Return the length of the next bit | |
30 | * | |
31 | * @return the length | |
32 | */ | |
33 | int nextLength(); | |
34 | ||
35 | /** | |
36 | * Move the iterator at the next different bit | |
37 | */ | |
38 | void move(); | |
39 | ||
40 | /** | |
41 | * Move the iterator at the next ith bit | |
42 | * | |
43 | * @param bits the number of bits to skip | |
44 | */ | |
45 | void move(int bits); | |
46 | ||
47 | } |
0 | package com.googlecode.javaewah; | |
1 | ||
2 | /* | |
3 | * Copyright 2009-2014, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves | |
4 | * Licensed under the Apache License, Version 2.0. | |
5 | */ | |
6 | ||
7 | import static com.googlecode.javaewah.EWAHCompressedBitmap.WORD_IN_BITS; | |
8 | ||
9 | /** | |
10 | * The ChunkIteratorImpl is the 64 bit implementation of the ChunkIterator | |
11 | * interface, which efficiently returns the chunks of ones and zeros represented by an | |
12 | * EWAHIterator. | |
13 | * | |
14 | * @author Gregory Ssi-Yan-Kai | |
15 | */ | |
16 | final class ChunkIteratorImpl implements ChunkIterator { | |
17 | ||
18 | private final EWAHIterator ewahIter; | |
19 | private final int sizeInBits; | |
20 | private final long[] ewahBuffer; | |
21 | private int position; | |
22 | private boolean runningBit; | |
23 | private int runningLength; | |
24 | private long word; | |
25 | private long wordMask; | |
26 | private int wordPosition; | |
27 | private int wordLength; | |
28 | private boolean hasNext; | |
29 | private Boolean nextBit; | |
30 | private int nextLength; | |
31 | ||
32 | ChunkIteratorImpl(EWAHIterator ewahIter, int sizeInBits) { | |
33 | this.ewahIter = ewahIter; | |
34 | this.sizeInBits = sizeInBits; | |
35 | this.ewahBuffer = ewahIter.buffer(); | |
36 | this.hasNext = moveToNextRLW(); | |
37 | } | |
38 | ||
39 | @Override | |
40 | public boolean hasNext() { | |
41 | return this.hasNext; | |
42 | } | |
43 | ||
44 | @Override | |
45 | public boolean nextBit() { | |
46 | return this.nextBit; | |
47 | } | |
48 | ||
49 | @Override | |
50 | public int nextLength() { | |
51 | return this.nextLength; | |
52 | } | |
53 | ||
54 | @Override | |
55 | public void move() { | |
56 | move(this.nextLength); | |
57 | } | |
58 | ||
59 | @Override | |
60 | public void move(int bits) { | |
61 | this.nextLength -= bits; | |
62 | if(this.nextLength <= 0) { | |
63 | do { | |
64 | this.nextBit = null; | |
65 | updateNext(); | |
66 | this.hasNext = moveToNextRLW(); | |
67 | } while(this.nextLength < 0); | |
68 | } | |
69 | } | |
70 | ||
71 | private boolean moveToNextRLW() { | |
72 | while (!runningHasNext() && !literalHasNext()) { | |
73 | if (!hasNextRLW()) { | |
74 | return this.nextBit!=null; | |
75 | } | |
76 | setRLW(nextRLW()); | |
77 | updateNext(); | |
78 | } | |
79 | return true; | |
80 | } | |
81 | ||
82 | private void setRLW(RunningLengthWord rlw) { | |
83 | this.runningLength = Math.min(this.sizeInBits, | |
84 | this.position + WORD_IN_BITS * (int) rlw.getRunningLength()); | |
85 | this.runningBit = rlw.getRunningBit(); | |
86 | this.wordPosition = this.ewahIter.literalWords(); | |
87 | this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); | |
88 | } | |
89 | ||
90 | private boolean runningHasNext() { | |
91 | return this.position < this.runningLength; | |
92 | } | |
93 | ||
94 | private boolean literalHasNext() { | |
95 | while (this.word == 0 && this.wordPosition < this.wordLength) { | |
96 | this.word = this.ewahBuffer[this.wordPosition++]; | |
97 | if (this.wordPosition == this.wordLength && !hasNextRLW()) { | |
98 | final int usedBitsInLast = this.sizeInBits % WORD_IN_BITS; | |
99 | if (usedBitsInLast > 0) { | |
100 | this.word &= ((~0l) >>> (WORD_IN_BITS - usedBitsInLast)); | |
101 | } | |
102 | } | |
103 | this.wordMask = 1l; | |
104 | } | |
105 | return this.word != 0 || (!hasNextRLW() && this.position < this.sizeInBits); | |
106 | } | |
107 | ||
108 | private boolean hasNextRLW() { | |
109 | return this.ewahIter.hasNext(); | |
110 | } | |
111 | ||
112 | private RunningLengthWord nextRLW() { | |
113 | return this.ewahIter.next(); | |
114 | } | |
115 | ||
116 | private void updateNext() { | |
117 | if(runningHasNext()) { | |
118 | if(this.nextBit == null || this.nextBit == this.runningBit) { | |
119 | this.nextBit = this.runningBit; | |
120 | int offset = runningOffset(); | |
121 | this.nextLength += offset; | |
122 | movePosition(offset); | |
123 | updateNext(); | |
124 | } | |
125 | } else if (literalHasNext()) { | |
126 | boolean b = currentWordBit(); | |
127 | if(this.nextBit == null || this.nextBit == b) { | |
128 | this.nextBit = b; | |
129 | this.nextLength++; | |
130 | movePosition(1); | |
131 | shiftWordMask(); | |
132 | updateNext(); | |
133 | } | |
134 | } else { | |
135 | moveToNextRLW(); | |
136 | } | |
137 | } | |
138 | ||
139 | private int runningOffset() { | |
140 | return this.runningLength - this.position; | |
141 | } | |
142 | ||
143 | private void movePosition(int offset) { | |
144 | this.position += offset; | |
145 | } | |
146 | ||
147 | private boolean currentWordBit() { | |
148 | return (this.word & this.wordMask) != 0; | |
149 | } | |
150 | ||
151 | private void shiftWordMask() { | |
152 | this.word &= ~this.wordMask; | |
153 | this.wordMask = this.wordMask << 1; | |
154 | } | |
155 | ||
156 | } |
206 | 206 | this.rlw.setRunningBit(v); |
207 | 207 | } |
208 | 208 | if (noLiteralWords && this.rlw.getRunningBit() == v |
209 | && (runningLength < RunningLengthWord.largestRunningLengthCount)) { | |
209 | && (runningLength < RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT)) { | |
210 | 210 | this.rlw.setRunningLength(runningLength + 1); |
211 | 211 | return; |
212 | 212 | } |
223 | 223 | */ |
224 | 224 | private void addLiteralWord(final long newData) { |
225 | 225 | final int numberSoFar = this.rlw.getNumberOfLiteralWords(); |
226 | if (numberSoFar >= RunningLengthWord.largestLiteralCount) { | |
226 | if (numberSoFar >= RunningLengthWord.LARGEST_LITERAL_COUNT) { | |
227 | 227 | push_back(0); |
228 | 228 | this.rlw.position = this.actualSizeInWords - 1; |
229 | 229 | this.rlw.setNumberOfLiteralWords(1); |
248 | 248 | int leftOverNumber = number; |
249 | 249 | while (leftOverNumber > 0) { |
250 | 250 | final int numberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); |
251 | final int whatWeCanAdd = leftOverNumber < RunningLengthWord.largestLiteralCount | |
251 | final int whatWeCanAdd = leftOverNumber < RunningLengthWord.LARGEST_LITERAL_COUNT | |
252 | 252 | - numberOfLiteralWords ? leftOverNumber |
253 | : RunningLengthWord.largestLiteralCount - numberOfLiteralWords; | |
253 | : RunningLengthWord.LARGEST_LITERAL_COUNT - numberOfLiteralWords; | |
254 | 254 | this.rlw.setNumberOfLiteralWords(numberOfLiteralWords+ whatWeCanAdd); |
255 | 255 | leftOverNumber -= whatWeCanAdd; |
256 | 256 | push_back(data, start, whatWeCanAdd); |
285 | 285 | this.rlw.setRunningBit(true); |
286 | 286 | } |
287 | 287 | final long runLen = this.rlw.getRunningLength(); |
288 | final long whatWeCanAdd = number < RunningLengthWord.largestRunningLengthCount | |
289 | - runLen ? number : RunningLengthWord.largestRunningLengthCount - runLen; | |
288 | final long whatWeCanAdd = number < RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT | |
289 | - runLen ? number : RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT - runLen; | |
290 | 290 | this.rlw.setRunningLength(runLen + whatWeCanAdd); |
291 | 291 | number -= whatWeCanAdd; |
292 | while(number >= RunningLengthWord.largestRunningLengthCount) { | |
292 | while(number >= RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT) { | |
293 | 293 | push_back(0); |
294 | 294 | this.rlw.position = this.actualSizeInWords - 1; |
295 | 295 | if (v) |
296 | 296 | this.rlw.setRunningBit(true); |
297 | this.rlw.setRunningLength(RunningLengthWord.largestRunningLengthCount); | |
298 | number -= RunningLengthWord.largestRunningLengthCount; | |
297 | this.rlw.setRunningLength(RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT); | |
298 | number -= RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT; | |
299 | 299 | } |
300 | 300 | if (number > 0) { |
301 | 301 | push_back(0); |
321 | 321 | int leftOverNumber = number; |
322 | 322 | while (leftOverNumber > 0) { |
323 | 323 | final int numberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); |
324 | final int whatWeCanAdd = leftOverNumber < RunningLengthWord.largestLiteralCount | |
324 | final int whatWeCanAdd = leftOverNumber < RunningLengthWord.LARGEST_LITERAL_COUNT | |
325 | 325 | - numberOfLiteralWords ? leftOverNumber |
326 | : RunningLengthWord.largestLiteralCount | |
326 | : RunningLengthWord.LARGEST_LITERAL_COUNT | |
327 | 327 | - numberOfLiteralWords; |
328 | 328 | this.rlw.setNumberOfLiteralWords(numberOfLiteralWords + whatWeCanAdd); |
329 | 329 | leftOverNumber -= whatWeCanAdd; |
641 | 641 | } |
642 | 642 | |
643 | 643 | final long runLen = this.rlw.getRunningLength(); |
644 | final long whatWeCanAdd = number < RunningLengthWord.largestRunningLengthCount | |
644 | final long whatWeCanAdd = number < RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT | |
645 | 645 | - runLen ? number |
646 | : RunningLengthWord.largestRunningLengthCount - runLen; | |
646 | : RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT - runLen; | |
647 | 647 | this.rlw.setRunningLength(runLen + whatWeCanAdd); |
648 | 648 | number -= whatWeCanAdd; |
649 | 649 | |
650 | while (number >= RunningLengthWord.largestRunningLengthCount) { | |
650 | while (number >= RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT) { | |
651 | 651 | push_back(0); |
652 | 652 | this.rlw.position = this.actualSizeInWords - 1; |
653 | 653 | if (v) |
654 | 654 | this.rlw.setRunningBit(true); |
655 | this.rlw.setRunningLength(RunningLengthWord.largestRunningLengthCount); | |
656 | number -= RunningLengthWord.largestRunningLengthCount; | |
655 | this.rlw.setRunningLength(RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT); | |
656 | number -= RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT; | |
657 | 657 | } |
658 | 658 | if (number > 0) { |
659 | 659 | push_back(0); |
833 | 833 | */ |
834 | 834 | public IntIterator clearIntIterator() { |
835 | 835 | return new ClearIntIterator(this.getEWAHIterator(), this.sizeInBits); |
836 | } | |
837 | ||
838 | /** | |
839 | * Iterator over the chunk of bits. | |
840 | * | |
841 | * The current bitmap is not modified. | |
842 | * | |
843 | * @return the chunk iterator | |
844 | */ | |
845 | public ChunkIterator chunkIterator() { | |
846 | return new ChunkIteratorImpl(this.getEWAHIterator(), sizeInBits); | |
836 | 847 | } |
837 | 848 | |
838 | 849 | /** |
1175 | 1186 | } |
1176 | 1187 | return false; |
1177 | 1188 | } |
1178 | ||
1189 | ||
1190 | /** | |
1191 | * getFirstSetBit is a light-weight method that returns the | |
1192 | * location of the set bit (=1) or -1 if there is none. | |
1193 | * | |
1194 | * @return location of the first set bit or -1 | |
1195 | */ | |
1196 | public int getFirstSetBit() { | |
1197 | int nword = 0; | |
1198 | for(int pos = 0; pos < this.actualSizeInWords;++pos) { | |
1199 | long rl = (this.buffer[pos] >>> 1) & RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT; | |
1200 | boolean rb = (this.buffer[pos] & 1) != 0; | |
1201 | if((rl > 0) && rb ) { | |
1202 | return nword * WORD_IN_BITS; | |
1203 | } | |
1204 | nword += rl; | |
1205 | long lw = (this.buffer[pos] >>> (1 + RunningLengthWord.RUNNING_LENGTH_BITS)); | |
1206 | if(lw > 0) { | |
1207 | long word = this.buffer[pos + 1]; | |
1208 | long T = word & -word; | |
1209 | return nword * WORD_IN_BITS + Long.bitCount(T - 1); | |
1210 | } | |
1211 | } | |
1212 | return -1; | |
1213 | } | |
1214 | ||
1215 | ||
1179 | 1216 | /** |
1180 | 1217 | * Set the bit at position i to true, the bits must be set in (strictly) |
1181 | 1218 | * increasing order. For example, set(15) and then set(7) will fail. You |
1618 | 1655 | public void composeToContainer(final EWAHCompressedBitmap a, |
1619 | 1656 | final EWAHCompressedBitmap container) { |
1620 | 1657 | container.clear(); |
1621 | final IntIterator i = a.intIterator(); | |
1622 | final IntIterator j = intIterator(); | |
1658 | final ChunkIterator iterator = chunkIterator(); | |
1659 | final ChunkIterator aIterator = a.chunkIterator(); | |
1623 | 1660 | int index = 0; |
1624 | while(i.hasNext() && j.hasNext()) { | |
1625 | int iPosition = i.next(); | |
1626 | while(j.hasNext()) { | |
1627 | int jPosition = j.next(); | |
1628 | if(iPosition == index++) { | |
1629 | //todo: consecutive ones could be optimized | |
1630 | container.set(jPosition); | |
1631 | break; | |
1632 | } | |
1661 | while(iterator.hasNext() && aIterator.hasNext()) { | |
1662 | if (!iterator.nextBit()) { | |
1663 | int length = iterator.nextLength(); | |
1664 | index += length; | |
1665 | container.setSizeInBits(index, false); | |
1666 | iterator.move(length); | |
1667 | } else { | |
1668 | int length = Math.min(iterator.nextLength(), aIterator.nextLength()); | |
1669 | index += length; | |
1670 | container.setSizeInBits(index, aIterator.nextBit()); | |
1671 | iterator.move(length); | |
1672 | aIterator.move(length); | |
1633 | 1673 | } |
1634 | 1674 | } |
1635 | 1675 | container.setSizeInBits(sizeInBits, false); |
30 | 30 | * @return the number of literal words |
31 | 31 | */ |
32 | 32 | public int getNumberOfLiteralWords() { |
33 | return (int) (this.parent.buffer[this.position] >>> (1 + runningLengthBits)); | |
33 | return (int) (this.parent.buffer[this.position] >>> (1 + RUNNING_LENGTH_BITS)); | |
34 | 34 | } |
35 | 35 | |
36 | 36 | /** |
48 | 48 | * @return the running length |
49 | 49 | */ |
50 | 50 | public long getRunningLength() { |
51 | return (this.parent.buffer[this.position] >>> 1) & largestRunningLengthCount; | |
51 | return (this.parent.buffer[this.position] >>> 1) & LARGEST_RUNNING_LENGTH_COUNT; | |
52 | 52 | } |
53 | 53 | |
54 | 54 | /** |
57 | 57 | * @param number the new number of literal words |
58 | 58 | */ |
59 | 59 | public void setNumberOfLiteralWords(final long number) { |
60 | this.parent.buffer[this.position] |= notRunningLengthPlusRunningBit; | |
61 | this.parent.buffer[this.position] &= (number << (runningLengthBits + 1)) | |
62 | | runningLengthPlusRunningBit; | |
60 | this.parent.buffer[this.position] |= NOT_RUNNING_LENGTH_PLUS_RUNNING_BIT; | |
61 | this.parent.buffer[this.position] &= (number << (RUNNING_LENGTH_BITS + 1)) | |
62 | | RUNNING_LENGTH_PLUS_RUNNING_BIT; | |
63 | 63 | } |
64 | 64 | |
65 | 65 | /** |
80 | 80 | * @param number the new running length |
81 | 81 | */ |
82 | 82 | public void setRunningLength(final long number) { |
83 | this.parent.buffer[this.position] |= shiftedLargestRunningLengthCount; | |
83 | this.parent.buffer[this.position] |= SHIFTED_LARGEST_RUNNING_LENGTH_COUNT; | |
84 | 84 | this.parent.buffer[this.position] &= (number << 1) |
85 | | notShiftedLargestRunningLengthCount; | |
85 | | NOT_SHIFTED_LARGEST_RUNNING_LENGTH_COUNT; | |
86 | 86 | } |
87 | 87 | |
88 | 88 | /** |
128 | 128 | * number of bits dedicated to marking of the running length of clean |
129 | 129 | * words |
130 | 130 | */ |
131 | public static final int runningLengthBits = 32; | |
131 | public static final int RUNNING_LENGTH_BITS = 32; | |
132 | 132 | |
133 | private static final int literalBits = 64 - 1 - runningLengthBits; | |
133 | private static final int LITERAL_BITS = 64 - 1 - RUNNING_LENGTH_BITS; | |
134 | 134 | |
135 | 135 | /** |
136 | 136 | * largest number of literal words in a run. |
137 | 137 | */ |
138 | public static final int largestLiteralCount = (1 << literalBits) - 1; | |
138 | public static final int LARGEST_LITERAL_COUNT = (1 << LITERAL_BITS) - 1; | |
139 | 139 | |
140 | 140 | /** |
141 | 141 | * largest number of clean words in a run |
142 | 142 | */ |
143 | public static final long largestRunningLengthCount = (1l << runningLengthBits) - 1; | |
143 | public static final long LARGEST_RUNNING_LENGTH_COUNT = (1l << RUNNING_LENGTH_BITS) - 1; | |
144 | 144 | |
145 | private static final long runningLengthPlusRunningBit = (1l << (runningLengthBits + 1)) - 1; | |
145 | private static final long RUNNING_LENGTH_PLUS_RUNNING_BIT = (1l << (RUNNING_LENGTH_BITS + 1)) - 1; | |
146 | 146 | |
147 | private static final long shiftedLargestRunningLengthCount = largestRunningLengthCount << 1; | |
147 | private static final long SHIFTED_LARGEST_RUNNING_LENGTH_COUNT = LARGEST_RUNNING_LENGTH_COUNT << 1; | |
148 | 148 | |
149 | private static final long notRunningLengthPlusRunningBit = ~runningLengthPlusRunningBit; | |
149 | private static final long NOT_RUNNING_LENGTH_PLUS_RUNNING_BIT = ~RUNNING_LENGTH_PLUS_RUNNING_BIT; | |
150 | 150 | |
151 | private static final long notShiftedLargestRunningLengthCount = ~shiftedLargestRunningLengthCount; | |
151 | private static final long NOT_SHIFTED_LARGEST_RUNNING_LENGTH_COUNT = ~SHIFTED_LARGEST_RUNNING_LENGTH_COUNT; | |
152 | 152 | |
153 | 153 | }⏎ |
0 | package com.googlecode.javaewah32; | |
1 | ||
2 | /* | |
3 | * Copyright 2009-2014, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves | |
4 | * Licensed under the Apache License, Version 2.0. | |
5 | */ | |
6 | ||
7 | import com.googlecode.javaewah.ChunkIterator; | |
8 | ||
9 | import static com.googlecode.javaewah32.EWAHCompressedBitmap32.WORD_IN_BITS; | |
10 | ||
11 | /** | |
12 | * The ChunkIteratorImpl is the 32 bit implementation of the ChunkIterator | |
13 | * interface, which efficiently returns the chunks of ones and zeros represented by an | |
14 | * EWAHIterator. | |
15 | * | |
16 | * @author Gregory Ssi-Yan-Kai | |
17 | */ | |
18 | final class ChunkIteratorImpl32 implements ChunkIterator { | |
19 | ||
20 | private final EWAHIterator32 ewahIter; | |
21 | private final int sizeInBits; | |
22 | private final int[] ewahBuffer; | |
23 | private int position; | |
24 | private boolean runningBit; | |
25 | private int runningLength; | |
26 | private int word; | |
27 | private int wordMask; | |
28 | private int wordPosition; | |
29 | private int wordLength; | |
30 | private boolean hasNext; | |
31 | private Boolean nextBit; | |
32 | private int nextLength; | |
33 | ||
34 | ChunkIteratorImpl32(EWAHIterator32 ewahIter, int sizeInBits) { | |
35 | this.ewahIter = ewahIter; | |
36 | this.sizeInBits = sizeInBits; | |
37 | this.ewahBuffer = ewahIter.buffer(); | |
38 | this.hasNext = moveToNextRLW(); | |
39 | } | |
40 | ||
41 | @Override | |
42 | public boolean hasNext() { | |
43 | return this.hasNext; | |
44 | } | |
45 | ||
46 | @Override | |
47 | public boolean nextBit() { | |
48 | return this.nextBit; | |
49 | } | |
50 | ||
51 | @Override | |
52 | public int nextLength() { | |
53 | return this.nextLength; | |
54 | } | |
55 | ||
56 | @Override | |
57 | public void move() { | |
58 | move(this.nextLength); | |
59 | } | |
60 | ||
61 | @Override | |
62 | public void move(int bits) { | |
63 | this.nextLength -= bits; | |
64 | if(this.nextLength <= 0) { | |
65 | do { | |
66 | this.nextBit = null; | |
67 | updateNext(); | |
68 | this.hasNext = moveToNextRLW(); | |
69 | } while(this.nextLength < 0); | |
70 | } | |
71 | } | |
72 | ||
73 | private boolean moveToNextRLW() { | |
74 | while (!runningHasNext() && !literalHasNext()) { | |
75 | if (!hasNextRLW()) { | |
76 | return this.nextBit!=null; | |
77 | } | |
78 | setRLW(nextRLW()); | |
79 | updateNext(); | |
80 | } | |
81 | return true; | |
82 | } | |
83 | ||
84 | private void setRLW(RunningLengthWord32 rlw) { | |
85 | this.runningLength = Math.min(this.sizeInBits, | |
86 | this.position + WORD_IN_BITS * rlw.getRunningLength()); | |
87 | this.runningBit = rlw.getRunningBit(); | |
88 | this.wordPosition = this.ewahIter.literalWords(); | |
89 | this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); | |
90 | } | |
91 | ||
92 | private boolean runningHasNext() { | |
93 | return this.position < this.runningLength; | |
94 | } | |
95 | ||
96 | private boolean literalHasNext() { | |
97 | while (this.word == 0 && this.wordPosition < this.wordLength) { | |
98 | this.word = this.ewahBuffer[this.wordPosition++]; | |
99 | if (this.wordPosition == this.wordLength && !hasNextRLW()) { | |
100 | final int usedBitsInLast = this.sizeInBits % WORD_IN_BITS; | |
101 | if (usedBitsInLast > 0) { | |
102 | this.word &= ((~0) >>> (WORD_IN_BITS - usedBitsInLast)); | |
103 | } | |
104 | } | |
105 | this.wordMask = 1; | |
106 | } | |
107 | return this.word != 0 || (!hasNextRLW() && this.position < this.sizeInBits); | |
108 | } | |
109 | ||
110 | private boolean hasNextRLW() { | |
111 | return this.ewahIter.hasNext(); | |
112 | } | |
113 | ||
114 | private RunningLengthWord32 nextRLW() { | |
115 | return this.ewahIter.next(); | |
116 | } | |
117 | ||
118 | private void updateNext() { | |
119 | if(runningHasNext()) { | |
120 | if(this.nextBit == null || this.nextBit == this.runningBit) { | |
121 | this.nextBit = this.runningBit; | |
122 | int offset = runningOffset(); | |
123 | this.nextLength += offset; | |
124 | movePosition(offset); | |
125 | updateNext(); | |
126 | } | |
127 | } else if (literalHasNext()) { | |
128 | boolean b = currentWordBit(); | |
129 | if(this.nextBit == null || this.nextBit == b) { | |
130 | this.nextBit = b; | |
131 | this.nextLength++; | |
132 | movePosition(1); | |
133 | shiftWordMask(); | |
134 | updateNext(); | |
135 | } | |
136 | } else { | |
137 | moveToNextRLW(); | |
138 | } | |
139 | } | |
140 | ||
141 | private int runningOffset() { | |
142 | return this.runningLength - this.position; | |
143 | } | |
144 | ||
145 | private void movePosition(int offset) { | |
146 | this.position += offset; | |
147 | } | |
148 | ||
149 | private boolean currentWordBit() { | |
150 | return (this.word & this.wordMask) != 0; | |
151 | } | |
152 | ||
153 | private void shiftWordMask() { | |
154 | this.word &= ~this.wordMask; | |
155 | this.wordMask = this.wordMask << 1; | |
156 | } | |
157 | ||
158 | } |
1 | 1 | |
2 | 2 | import com.googlecode.javaewah.IntIterator; |
3 | 3 | |
4 | import static com.googlecode.javaewah32.EWAHCompressedBitmap32.WORD_IN_BITS;; | |
4 | import static com.googlecode.javaewah32.EWAHCompressedBitmap32.WORD_IN_BITS; | |
5 | 5 | |
6 | 6 | /* |
7 | 7 | * Copyright 2009-2014, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves |
4 | 4 | * Licensed under the Apache License, Version 2.0. |
5 | 5 | */ |
6 | 6 | |
7 | import com.googlecode.javaewah.ChunkIterator; | |
7 | 8 | import com.googlecode.javaewah.IntIterator; |
8 | 9 | import com.googlecode.javaewah.LogicalElement; |
9 | 10 | import com.googlecode.javaewah32.symmetric.RunningBitmapMerge32; |
852 | 853 | } |
853 | 854 | |
854 | 855 | /** |
856 | * Iterator over the chunk of bits. | |
857 | * | |
858 | * The current bitmap is not modified. | |
859 | * | |
860 | * @return the chunk iterator | |
861 | */ | |
862 | public ChunkIterator chunkIterator() { | |
863 | return new ChunkIteratorImpl32(this.getEWAHIterator(), sizeInBits); | |
864 | } | |
865 | ||
866 | /** | |
855 | 867 | * Iterates over the positions of the true values. This is similar to |
856 | 868 | * intIterator(), but it uses Java generics. |
857 | 869 | * |
1214 | 1226 | } |
1215 | 1227 | |
1216 | 1228 | /** |
1229 | * getFirstSetBit is a light-weight method that returns the | |
1230 | * location of the set bit (=1) or -1 if there is none. | |
1231 | * | |
1232 | * @return location of the first set bit or -1 | |
1233 | */ | |
1234 | public int getFirstSetBit() { | |
1235 | int nword = 0; | |
1236 | for(int pos = 0; pos < this.actualSizeInWords;++pos) { | |
1237 | int rl = (this.buffer[pos] >>> 1) & RunningLengthWord32.LARGEST_RUNNING_LENGTH_COUNT; | |
1238 | boolean rb = (this.buffer[pos] & 1) != 0; | |
1239 | if((rl > 0) && rb ) { | |
1240 | return nword * WORD_IN_BITS; | |
1241 | } | |
1242 | nword += rl; | |
1243 | int lw = (this.buffer[pos] >>> (1 + RunningLengthWord32.RUNNING_LENGTH_BITS)); | |
1244 | if(lw > 0) { | |
1245 | int word = this.buffer[pos + 1]; | |
1246 | int T = word & -word; | |
1247 | return nword * WORD_IN_BITS + Integer.bitCount(T - 1); | |
1248 | } | |
1249 | } | |
1250 | return -1; | |
1251 | } | |
1252 | /** | |
1217 | 1253 | * Set the bit at position i to true, the bits must be set in (strictly) |
1218 | 1254 | * increasing order. For example, set(15) and then set(7) will fail. You |
1219 | 1255 | * must do set(7) and then set(15). |
1685 | 1721 | public void composeToContainer(final EWAHCompressedBitmap32 a, |
1686 | 1722 | final EWAHCompressedBitmap32 container) { |
1687 | 1723 | container.clear(); |
1688 | final IntIterator i = a.intIterator(); | |
1689 | final IntIterator j = intIterator(); | |
1724 | final ChunkIterator iterator = chunkIterator(); | |
1725 | final ChunkIterator aIterator = a.chunkIterator(); | |
1690 | 1726 | int index = 0; |
1691 | while(i.hasNext() && j.hasNext()) { | |
1692 | int iPosition = i.next(); | |
1693 | while(j.hasNext()) { | |
1694 | int jPosition = j.next(); | |
1695 | if(iPosition == index++) { | |
1696 | //consecutive ones could be optimized | |
1697 | container.set(jPosition); | |
1698 | break; | |
1699 | } | |
1727 | while(iterator.hasNext() && aIterator.hasNext()) { | |
1728 | if(!iterator.nextBit()) { | |
1729 | int length = iterator.nextLength(); | |
1730 | index += length; | |
1731 | container.setSizeInBits(index, false); | |
1732 | iterator.move(length); | |
1733 | } else { | |
1734 | int length = Math.min(iterator.nextLength(), aIterator.nextLength()); | |
1735 | index += length; | |
1736 | container.setSizeInBits(index, aIterator.nextBit()); | |
1737 | iterator.move(length); | |
1738 | aIterator.move(length); | |
1700 | 1739 | } |
1701 | 1740 | } |
1702 | 1741 | container.setSizeInBits(sizeInBits, false); |
6 | 6 | |
7 | 7 | import com.googlecode.javaewah.IntIterator; |
8 | 8 | |
9 | import static com.googlecode.javaewah32.EWAHCompressedBitmap32.WORD_IN_BITS;; | |
9 | import static com.googlecode.javaewah32.EWAHCompressedBitmap32.WORD_IN_BITS; | |
10 | 10 | |
11 | 11 | /** |
12 | 12 | * The IntIteratorImpl32 is the 32 bit implementation of the IntIterator |
18 | 18 | */ |
19 | 19 | @SuppressWarnings("javadoc") |
20 | 20 | public class EWAHCompressedBitmapTest { |
21 | ||
22 | @Test | |
23 | public void chunkIterator() { | |
24 | EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(0, 1, 2, 3, 4, 7, 8, 9, 10); | |
25 | ||
26 | ChunkIterator iterator = bitmap.chunkIterator(); | |
27 | Assert.assertTrue(iterator.hasNext()); | |
28 | Assert.assertTrue(iterator.nextBit()); | |
29 | Assert.assertEquals(5, iterator.nextLength()); | |
30 | iterator.move(2); | |
31 | Assert.assertTrue(iterator.hasNext()); | |
32 | Assert.assertTrue(iterator.nextBit()); | |
33 | Assert.assertEquals(3, iterator.nextLength()); | |
34 | iterator.move(); | |
35 | Assert.assertTrue(iterator.hasNext()); | |
36 | Assert.assertFalse(iterator.nextBit()); | |
37 | Assert.assertEquals(2, iterator.nextLength()); | |
38 | iterator.move(5); | |
39 | Assert.assertTrue(iterator.hasNext()); | |
40 | Assert.assertTrue(iterator.nextBit()); | |
41 | Assert.assertEquals(1, iterator.nextLength()); | |
42 | iterator.move(); | |
43 | Assert.assertFalse(iterator.hasNext()); | |
44 | } | |
45 | ||
46 | @Test | |
47 | public void chunkIteratorOverBitmapOfZeros() { | |
48 | EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); | |
49 | bitmap.setSizeInBits(WORD_IN_BITS, false); | |
50 | ||
51 | ChunkIterator iterator = bitmap.chunkIterator(); | |
52 | Assert.assertTrue(iterator.hasNext()); | |
53 | Assert.assertFalse(iterator.nextBit()); | |
54 | Assert.assertEquals(WORD_IN_BITS, iterator.nextLength()); | |
55 | iterator.move(); | |
56 | Assert.assertFalse(iterator.hasNext()); | |
57 | } | |
58 | ||
59 | @Test | |
60 | public void chunkIteratorOverBitmapOfZerosAndOnes() { | |
61 | EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); | |
62 | bitmap.setSizeInBits(WORD_IN_BITS + 10, false); | |
63 | bitmap.setSizeInBits(2 * WORD_IN_BITS, true); | |
64 | ||
65 | ChunkIterator iterator = bitmap.chunkIterator(); | |
66 | Assert.assertTrue(iterator.hasNext()); | |
67 | Assert.assertFalse(iterator.nextBit()); | |
68 | Assert.assertEquals(WORD_IN_BITS + 10, iterator.nextLength()); | |
69 | iterator.move(); | |
70 | Assert.assertTrue(iterator.hasNext()); | |
71 | Assert.assertTrue(iterator.nextBit()); | |
72 | Assert.assertEquals(WORD_IN_BITS - 10, iterator.nextLength()); | |
73 | iterator.move(); | |
74 | Assert.assertFalse(iterator.hasNext()); | |
75 | } | |
76 | ||
77 | @Test | |
78 | public void chunkIteratorOverBitmapOfOnesAndZeros() { | |
79 | EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); | |
80 | bitmap.setSizeInBits(WORD_IN_BITS - 10, true); | |
81 | bitmap.setSizeInBits(2 * WORD_IN_BITS, false); | |
82 | ||
83 | ChunkIterator iterator = bitmap.chunkIterator(); | |
84 | Assert.assertTrue(iterator.hasNext()); | |
85 | Assert.assertTrue(iterator.nextBit()); | |
86 | Assert.assertEquals(WORD_IN_BITS - 10, iterator.nextLength()); | |
87 | iterator.move(); | |
88 | Assert.assertTrue(iterator.hasNext()); | |
89 | Assert.assertFalse(iterator.nextBit()); | |
90 | Assert.assertEquals(WORD_IN_BITS + 10, iterator.nextLength()); | |
91 | iterator.move(); | |
92 | Assert.assertFalse(iterator.hasNext()); | |
93 | } | |
21 | 94 | |
22 | 95 | @Test |
23 | 96 | public void simpleCompose() { |
4 | 4 | * Licensed under the Apache License, Version 2.0. |
5 | 5 | */ |
6 | 6 | |
7 | import com.googlecode.javaewah.ChunkIterator; | |
7 | 8 | import com.googlecode.javaewah.FastAggregation; |
8 | 9 | import com.googlecode.javaewah.IntIterator; |
9 | 10 | import org.junit.Assert; |
19 | 20 | */ |
20 | 21 | @SuppressWarnings("javadoc") |
21 | 22 | public class EWAHCompressedBitmap32Test { |
23 | ||
24 | @Test | |
25 | public void chunkIterator() { | |
26 | EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(0, 1, 2, 3, 4, 7, 8, 9, 10); | |
27 | ||
28 | ChunkIterator iterator = bitmap.chunkIterator(); | |
29 | Assert.assertTrue(iterator.hasNext()); | |
30 | Assert.assertTrue(iterator.nextBit()); | |
31 | Assert.assertEquals(5, iterator.nextLength()); | |
32 | iterator.move(2); | |
33 | Assert.assertTrue(iterator.hasNext()); | |
34 | Assert.assertTrue(iterator.nextBit()); | |
35 | Assert.assertEquals(3, iterator.nextLength()); | |
36 | iterator.move(); | |
37 | Assert.assertTrue(iterator.hasNext()); | |
38 | Assert.assertFalse(iterator.nextBit()); | |
39 | Assert.assertEquals(2, iterator.nextLength()); | |
40 | iterator.move(5); | |
41 | Assert.assertTrue(iterator.hasNext()); | |
42 | Assert.assertTrue(iterator.nextBit()); | |
43 | Assert.assertEquals(1, iterator.nextLength()); | |
44 | iterator.move(); | |
45 | Assert.assertFalse(iterator.hasNext()); | |
46 | } | |
47 | ||
48 | @Test | |
49 | public void chunkIteratorOverBitmapOfZeros() { | |
50 | EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); | |
51 | bitmap.setSizeInBits(WORD_IN_BITS, false); | |
52 | ||
53 | ChunkIterator iterator = bitmap.chunkIterator(); | |
54 | Assert.assertTrue(iterator.hasNext()); | |
55 | Assert.assertFalse(iterator.nextBit()); | |
56 | Assert.assertEquals(WORD_IN_BITS, iterator.nextLength()); | |
57 | iterator.move(); | |
58 | Assert.assertFalse(iterator.hasNext()); | |
59 | } | |
60 | ||
61 | @Test | |
62 | public void chunkIteratorOverBitmapOfZerosAndOnes() { | |
63 | EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); | |
64 | bitmap.setSizeInBits(WORD_IN_BITS + 10, false); | |
65 | bitmap.setSizeInBits(2 * WORD_IN_BITS, true); | |
66 | ||
67 | ChunkIterator iterator = bitmap.chunkIterator(); | |
68 | Assert.assertTrue(iterator.hasNext()); | |
69 | Assert.assertFalse(iterator.nextBit()); | |
70 | Assert.assertEquals(WORD_IN_BITS + 10, iterator.nextLength()); | |
71 | iterator.move(); | |
72 | Assert.assertTrue(iterator.hasNext()); | |
73 | Assert.assertTrue(iterator.nextBit()); | |
74 | Assert.assertEquals(WORD_IN_BITS - 10, iterator.nextLength()); | |
75 | iterator.move(); | |
76 | Assert.assertFalse(iterator.hasNext()); | |
77 | } | |
78 | ||
79 | @Test | |
80 | public void chunkIteratorOverBitmapOfOnesAndZeros() { | |
81 | EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); | |
82 | bitmap.setSizeInBits(WORD_IN_BITS - 10, true); | |
83 | bitmap.setSizeInBits(2 * WORD_IN_BITS, false); | |
84 | ||
85 | ChunkIterator iterator = bitmap.chunkIterator(); | |
86 | Assert.assertTrue(iterator.hasNext()); | |
87 | Assert.assertTrue(iterator.nextBit()); | |
88 | Assert.assertEquals(WORD_IN_BITS - 10, iterator.nextLength()); | |
89 | iterator.move(); | |
90 | Assert.assertTrue(iterator.hasNext()); | |
91 | Assert.assertFalse(iterator.nextBit()); | |
92 | Assert.assertEquals(WORD_IN_BITS + 10, iterator.nextLength()); | |
93 | iterator.move(); | |
94 | Assert.assertFalse(iterator.hasNext()); | |
95 | } | |
22 | 96 | |
23 | 97 | @Test |
24 | 98 | public void simpleCompose() { |