Codebase list golang-github-beorn7-perks / 2c62da0
Imported Upstream version 0.0~git20150223.0.b965b61 Dmitry Smirnov 8 years ago
11 changed file(s) with 3402 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 # Perks for Go (golang.org)
1
2 Perks contains the Go package quantile that computes approximate quantiles over
3 an unbounded data stream within low memory and CPU bounds.
4
5 For more information and examples, see:
6 http://godoc.org/github.com/bmizerany/perks
7
8 A very special thank you and shout out to Graham Cormode (Rutgers University),
9 Flip Korn (AT&T Labs–Research), S. Muthukrishnan (Rutgers University), and
10 Divesh Srivastava (AT&T Labs–Research) for their research and publication of
11 [Effective Computation of Biased Quantiles over Data Streams](http://www.cs.rutgers.edu/~muthu/bquant.pdf)
12
13 Thank you, also:
14 * Armon Dadgar (@armon)
15 * Andrew Gerrand (@nf)
16 * Brad Fitzpatrick (@bradfitz)
17 * Keith Rarick (@kr)
18
19 FAQ:
20
21 Q: Why not move the quantile package into the project root?
22 A: I want to add more packages to perks later.
23
24 Copyright (C) 2013 Blake Mizerany
25
26 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
27
28 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
29
30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0 package histogram
1
2 import (
3 "math/rand"
4 "testing"
5 )
6
7 func BenchmarkInsert10Bins(b *testing.B) {
8 b.StopTimer()
9 h := New(10)
10 b.StartTimer()
11 for i := 0; i < b.N; i++ {
12 f := rand.ExpFloat64()
13 h.Insert(f)
14 }
15 }
16
17 func BenchmarkInsert100Bins(b *testing.B) {
18 b.StopTimer()
19 h := New(100)
20 b.StartTimer()
21 for i := 0; i < b.N; i++ {
22 f := rand.ExpFloat64()
23 h.Insert(f)
24 }
25 }
0 // Package histogram provides a Go implementation of BigML's histogram package
1 // for Clojure/Java. It is currently experimental.
2 package histogram
3
4 import (
5 "container/heap"
6 "math"
7 "sort"
8 )
9
10 type Bin struct {
11 Count int
12 Sum float64
13 }
14
15 func (b *Bin) Update(x *Bin) {
16 b.Count += x.Count
17 b.Sum += x.Sum
18 }
19
20 func (b *Bin) Mean() float64 {
21 return b.Sum / float64(b.Count)
22 }
23
24 type Bins []*Bin
25
26 func (bs Bins) Len() int { return len(bs) }
27 func (bs Bins) Less(i, j int) bool { return bs[i].Mean() < bs[j].Mean() }
28 func (bs Bins) Swap(i, j int) { bs[i], bs[j] = bs[j], bs[i] }
29
30 func (bs *Bins) Push(x interface{}) {
31 *bs = append(*bs, x.(*Bin))
32 }
33
34 func (bs *Bins) Pop() interface{} {
35 return bs.remove(len(*bs) - 1)
36 }
37
38 func (bs *Bins) remove(n int) *Bin {
39 if n < 0 || len(*bs) < n {
40 return nil
41 }
42 x := (*bs)[n]
43 *bs = append((*bs)[:n], (*bs)[n+1:]...)
44 return x
45 }
46
47 type Histogram struct {
48 res *reservoir
49 }
50
51 func New(maxBins int) *Histogram {
52 return &Histogram{res: newReservoir(maxBins)}
53 }
54
55 func (h *Histogram) Insert(f float64) {
56 h.res.insert(&Bin{1, f})
57 h.res.compress()
58 }
59
60 func (h *Histogram) Bins() Bins {
61 return h.res.bins
62 }
63
64 type reservoir struct {
65 n int
66 maxBins int
67 bins Bins
68 }
69
70 func newReservoir(maxBins int) *reservoir {
71 return &reservoir{maxBins: maxBins}
72 }
73
74 func (r *reservoir) insert(bin *Bin) {
75 r.n += bin.Count
76 i := sort.Search(len(r.bins), func(i int) bool {
77 return r.bins[i].Mean() >= bin.Mean()
78 })
79 if i < 0 || i == r.bins.Len() {
80 // TODO(blake): Maybe use an .insert(i, bin) instead of
81 // performing the extra work of a heap.Push.
82 heap.Push(&r.bins, bin)
83 return
84 }
85 r.bins[i].Update(bin)
86 }
87
88 func (r *reservoir) compress() {
89 for r.bins.Len() > r.maxBins {
90 minGapIndex := -1
91 minGap := math.MaxFloat64
92 for i := 0; i < r.bins.Len()-1; i++ {
93 gap := gapWeight(r.bins[i], r.bins[i+1])
94 if minGap > gap {
95 minGap = gap
96 minGapIndex = i
97 }
98 }
99 prev := r.bins[minGapIndex]
100 next := r.bins.remove(minGapIndex + 1)
101 prev.Update(next)
102 }
103 }
104
105 func gapWeight(prev, next *Bin) float64 {
106 return next.Mean() - prev.Mean()
107 }
0 package histogram
1
2 import (
3 "math/rand"
4 "testing"
5 )
6
7 func TestHistogram(t *testing.T) {
8 const numPoints = 1e6
9 const maxBins = 3
10
11 h := New(maxBins)
12 for i := 0; i < numPoints; i++ {
13 f := rand.ExpFloat64()
14 h.Insert(f)
15 }
16
17 bins := h.Bins()
18 if g := len(bins); g > maxBins {
19 t.Fatalf("got %d bins, wanted <= %d", g, maxBins)
20 }
21
22 for _, b := range bins {
23 t.Logf("%+v", b)
24 }
25
26 if g := count(h.Bins()); g != numPoints {
27 t.Fatalf("binned %d points, wanted %d", g, numPoints)
28 }
29 }
30
31 func count(bins Bins) int {
32 binCounts := 0
33 for _, b := range bins {
34 binCounts += b.Count
35 }
36 return binCounts
37 }
0 package quantile
1
2 import (
3 "testing"
4 )
5
6 func BenchmarkInsertTargeted(b *testing.B) {
7 b.ReportAllocs()
8
9 s := NewTargeted(Targets)
10 b.ResetTimer()
11 for i := float64(0); i < float64(b.N); i++ {
12 s.Insert(i)
13 }
14 }
15
16 func BenchmarkInsertTargetedSmallEpsilon(b *testing.B) {
17 s := NewTargeted(TargetsSmallEpsilon)
18 b.ResetTimer()
19 for i := float64(0); i < float64(b.N); i++ {
20 s.Insert(i)
21 }
22 }
23
24 func BenchmarkInsertBiased(b *testing.B) {
25 s := NewLowBiased(0.01)
26 b.ResetTimer()
27 for i := float64(0); i < float64(b.N); i++ {
28 s.Insert(i)
29 }
30 }
31
32 func BenchmarkInsertBiasedSmallEpsilon(b *testing.B) {
33 s := NewLowBiased(0.0001)
34 b.ResetTimer()
35 for i := float64(0); i < float64(b.N); i++ {
36 s.Insert(i)
37 }
38 }
39
40 func BenchmarkQuery(b *testing.B) {
41 s := NewTargeted(Targets)
42 for i := float64(0); i < 1e6; i++ {
43 s.Insert(i)
44 }
45 b.ResetTimer()
46 n := float64(b.N)
47 for i := float64(0); i < n; i++ {
48 s.Query(i / n)
49 }
50 }
51
52 func BenchmarkQuerySmallEpsilon(b *testing.B) {
53 s := NewTargeted(TargetsSmallEpsilon)
54 for i := float64(0); i < 1e6; i++ {
55 s.Insert(i)
56 }
57 b.ResetTimer()
58 n := float64(b.N)
59 for i := float64(0); i < n; i++ {
60 s.Query(i / n)
61 }
62 }
0 // +build go1.1
1
2 package quantile_test
3
4 import (
5 "bufio"
6 "fmt"
7 "log"
8 "os"
9 "strconv"
10 "time"
11
12 "github.com/beorn7/perks/quantile"
13 )
14
15 func Example_simple() {
16 ch := make(chan float64)
17 go sendFloats(ch)
18
19 // Compute the 50th, 90th, and 99th percentile.
20 q := quantile.NewTargeted(map[float64]float64{
21 0.50: 0.005,
22 0.90: 0.001,
23 0.99: 0.0001,
24 })
25 for v := range ch {
26 q.Insert(v)
27 }
28
29 fmt.Println("perc50:", q.Query(0.50))
30 fmt.Println("perc90:", q.Query(0.90))
31 fmt.Println("perc99:", q.Query(0.99))
32 fmt.Println("count:", q.Count())
33 // Output:
34 // perc50: 5
35 // perc90: 16
36 // perc99: 223
37 // count: 2388
38 }
39
40 func Example_mergeMultipleStreams() {
41 // Scenario:
42 // We have multiple database shards. On each shard, there is a process
43 // collecting query response times from the database logs and inserting
44 // them into a Stream (created via NewTargeted(0.90)), much like the
45 // Simple example. These processes expose a network interface for us to
46 // ask them to serialize and send us the results of their
47 // Stream.Samples so we may Merge and Query them.
48 //
49 // NOTES:
50 // * These sample sets are small, allowing us to get them
51 // across the network much faster than sending the entire list of data
52 // points.
53 //
54 // * For this to work correctly, we must supply the same quantiles
55 // a priori the process collecting the samples supplied to NewTargeted,
56 // even if we do not plan to query them all here.
57 ch := make(chan quantile.Samples)
58 getDBQuerySamples(ch)
59 q := quantile.NewTargeted(map[float64]float64{0.90: 0.001})
60 for samples := range ch {
61 q.Merge(samples)
62 }
63 fmt.Println("perc90:", q.Query(0.90))
64 }
65
66 func Example_window() {
67 // Scenario: We want the 90th, 95th, and 99th percentiles for each
68 // minute.
69
70 ch := make(chan float64)
71 go sendStreamValues(ch)
72
73 tick := time.NewTicker(1 * time.Minute)
74 q := quantile.NewTargeted(map[float64]float64{
75 0.90: 0.001,
76 0.95: 0.0005,
77 0.99: 0.0001,
78 })
79 for {
80 select {
81 case t := <-tick.C:
82 flushToDB(t, q.Samples())
83 q.Reset()
84 case v := <-ch:
85 q.Insert(v)
86 }
87 }
88 }
89
90 func sendStreamValues(ch chan float64) {
91 // Use your imagination
92 }
93
94 func flushToDB(t time.Time, samples quantile.Samples) {
95 // Use your imagination
96 }
97
98 // This is a stub for the above example. In reality this would hit the remote
99 // servers via http or something like it.
100 func getDBQuerySamples(ch chan quantile.Samples) {}
101
102 func sendFloats(ch chan<- float64) {
103 f, err := os.Open("exampledata.txt")
104 if err != nil {
105 log.Fatal(err)
106 }
107 sc := bufio.NewScanner(f)
108 for sc.Scan() {
109 b := sc.Bytes()
110 v, err := strconv.ParseFloat(string(b), 64)
111 if err != nil {
112 log.Fatal(err)
113 }
114 ch <- v
115 }
116 if sc.Err() != nil {
117 log.Fatal(sc.Err())
118 }
119 close(ch)
120 }
0 8
1 5
2 26
3 12
4 5
5 235
6 13
7 6
8 28
9 30
10 3
11 3
12 3
13 3
14 5
15 2
16 33
17 7
18 2
19 4
20 7
21 12
22 14
23 5
24 8
25 3
26 10
27 4
28 5
29 3
30 6
31 6
32 209
33 20
34 3
35 10
36 14
37 3
38 4
39 6
40 8
41 5
42 11
43 7
44 3
45 2
46 3
47 3
48 212
49 5
50 222
51 4
52 10
53 10
54 5
55 6
56 3
57 8
58 3
59 10
60 254
61 220
62 2
63 3
64 5
65 24
66 5
67 4
68 222
69 7
70 3
71 3
72 223
73 8
74 15
75 12
76 14
77 14
78 3
79 2
80 2
81 3
82 13
83 3
84 11
85 4
86 4
87 6
88 5
89 7
90 13
91 5
92 3
93 5
94 2
95 5
96 3
97 5
98 2
99 7
100 15
101 17
102 14
103 3
104 6
105 6
106 3
107 17
108 5
109 4
110 7
111 6
112 4
113 4
114 8
115 6
116 8
117 3
118 9
119 3
120 6
121 3
122 4
123 5
124 3
125 3
126 660
127 4
128 6
129 10
130 3
131 6
132 3
133 2
134 5
135 13
136 2
137 4
138 4
139 10
140 4
141 8
142 4
143 3
144 7
145 9
146 9
147 3
148 10
149 37
150 3
151 13
152 4
153 12
154 3
155 6
156 10
157 8
158 5
159 21
160 2
161 3
162 8
163 3
164 2
165 3
166 3
167 4
168 12
169 2
170 4
171 8
172 8
173 4
174 3
175 2
176 20
177 1
178 6
179 32
180 2
181 11
182 6
183 18
184 3
185 8
186 11
187 3
188 212
189 3
190 4
191 2
192 6
193 7
194 12
195 11
196 3
197 2
198 16
199 10
200 6
201 4
202 6
203 3
204 2
205 7
206 3
207 2
208 2
209 2
210 2
211 5
212 6
213 4
214 3
215 10
216 3
217 4
218 6
219 5
220 3
221 4
222 4
223 5
224 6
225 4
226 3
227 4
228 4
229 5
230 7
231 5
232 5
233 3
234 2
235 7
236 2
237 4
238 12
239 4
240 5
241 6
242 2
243 4
244 4
245 8
246 4
247 15
248 13
249 7
250 16
251 5
252 3
253 23
254 5
255 5
256 7
257 3
258 2
259 9
260 8
261 7
262 5
263 8
264 11
265 4
266 10
267 76
268 4
269 47
270 4
271 3
272 2
273 7
274 4
275 2
276 3
277 37
278 10
279 4
280 2
281 20
282 5
283 4
284 4
285 10
286 10
287 4
288 3
289 7
290 23
291 240
292 7
293 13
294 5
295 5
296 3
297 3
298 2
299 5
300 4
301 2
302 8
303 7
304 19
305 2
306 23
307 8
308 7
309 2
310 5
311 3
312 8
313 3
314 8
315 13
316 5
317 5
318 5
319 2
320 3
321 23
322 4
323 9
324 8
325 4
326 3
327 3
328 5
329 220
330 2
331 3
332 4
333 6
334 14
335 3
336 53
337 6
338 2
339 5
340 18
341 6
342 3
343 219
344 6
345 5
346 2
347 5
348 3
349 6
350 5
351 15
352 4
353 3
354 17
355 3
356 2
357 4
358 7
359 2
360 3
361 3
362 4
363 4
364 3
365 2
366 664
367 6
368 3
369 23
370 5
371 5
372 16
373 5
374 8
375 2
376 4
377 2
378 24
379 12
380 3
381 2
382 3
383 5
384 8
385 3
386 5
387 4
388 3
389 14
390 3
391 5
392 8
393 2
394 3
395 7
396 9
397 4
398 2
399 3
400 6
401 8
402 4
403 3
404 4
405 6
406 5
407 3
408 3
409 6
410 3
411 19
412 4
413 4
414 6
415 3
416 6
417 3
418 5
419 22
420 5
421 4
422 4
423 3
424 8
425 11
426 4
427 9
428 7
429 6
430 13
431 4
432 4
433 4
434 6
435 17
436 9
437 3
438 3
439 3
440 4
441 3
442 221
443 5
444 11
445 3
446 4
447 2
448 12
449 6
450 3
451 5
452 7
453 5
454 7
455 4
456 9
457 7
458 14
459 37
460 19
461 217
462 16
463 3
464 5
465 2
466 2
467 7
468 19
469 7
470 6
471 7
472 4
473 24
474 5
475 11
476 4
477 7
478 7
479 9
480 13
481 3
482 4
483 3
484 6
485 28
486 4
487 4
488 5
489 5
490 2
491 5
492 6
493 4
494 4
495 6
496 10
497 5
498 4
499 3
500 2
501 3
502 3
503 6
504 5
505 5
506 4
507 3
508 2
509 3
510 7
511 4
512 6
513 18
514 16
515 8
516 16
517 4
518 5
519 8
520 6
521 9
522 13
523 1545
524 6
525 215
526 6
527 5
528 6
529 3
530 45
531 31
532 5
533 2
534 2
535 4
536 3
537 3
538 2
539 5
540 4
541 3
542 5
543 7
544 7
545 4
546 5
547 8
548 5
549 4
550 749
551 2
552 31
553 9
554 11
555 2
556 11
557 5
558 4
559 4
560 7
561 9
562 11
563 4
564 5
565 4
566 7
567 3
568 4
569 6
570 2
571 15
572 3
573 4
574 3
575 4
576 3
577 5
578 2
579 13
580 5
581 5
582 3
583 3
584 23
585 4
586 4
587 5
588 7
589 4
590 13
591 2
592 4
593 3
594 4
595 2
596 6
597 2
598 7
599 3
600 5
601 5
602 3
603 29
604 5
605 4
606 4
607 3
608 10
609 2
610 3
611 79
612 16
613 6
614 6
615 7
616 7
617 3
618 5
619 5
620 7
621 4
622 3
623 7
624 9
625 5
626 6
627 5
628 9
629 6
630 3
631 6
632 4
633 17
634 2
635 10
636 9
637 3
638 6
639 2
640 3
641 21
642 22
643 5
644 11
645 4
646 2
647 17
648 2
649 224
650 2
651 14
652 3
653 4
654 4
655 2
656 4
657 4
658 4
659 4
660 5
661 3
662 4
663 4
664 10
665 2
666 6
667 3
668 3
669 5
670 7
671 2
672 7
673 5
674 6
675 3
676 218
677 2
678 2
679 5
680 2
681 6
682 3
683 5
684 222
685 14
686 6
687 33
688 3
689 2
690 5
691 3
692 3
693 3
694 9
695 5
696 3
697 3
698 2
699 7
700 4
701 3
702 4
703 3
704 5
705 6
706 5
707 26
708 4
709 13
710 9
711 7
712 3
713 221
714 3
715 3
716 4
717 4
718 4
719 4
720 2
721 18
722 5
723 3
724 7
725 9
726 6
727 8
728 3
729 10
730 3
731 11
732 9
733 5
734 4
735 17
736 5
737 5
738 6
739 6
740 3
741 2
742 4
743 12
744 17
745 6
746 7
747 218
748 4
749 2
750 4
751 10
752 3
753 5
754 15
755 3
756 9
757 4
758 3
759 3
760 6
761 29
762 3
763 3
764 4
765 5
766 5
767 3
768 8
769 5
770 6
771 6
772 7
773 5
774 3
775 5
776 3
777 29
778 2
779 31
780 5
781 15
782 24
783 16
784 5
785 207
786 4
787 3
788 3
789 2
790 15
791 4
792 4
793 13
794 5
795 5
796 4
797 6
798 10
799 2
800 7
801 8
802 4
803 6
804 20
805 5
806 3
807 4
808 3
809 12
810 12
811 5
812 17
813 7
814 3
815 3
816 3
817 6
818 10
819 3
820 5
821 25
822 80
823 4
824 9
825 3
826 2
827 11
828 3
829 3
830 2
831 3
832 8
833 7
834 5
835 5
836 19
837 5
838 3
839 3
840 12
841 11
842 2
843 6
844 5
845 5
846 5
847 3
848 3
849 3
850 4
851 209
852 14
853 3
854 2
855 5
856 19
857 4
858 4
859 3
860 4
861 14
862 5
863 6
864 4
865 13
866 9
867 7
868 4
869 7
870 10
871 2
872 9
873 5
874 7
875 2
876 8
877 4
878 6
879 5
880 5
881 222
882 8
883 7
884 12
885 5
886 216
887 3
888 4
889 4
890 6
891 3
892 14
893 8
894 7
895 13
896 4
897 3
898 3
899 3
900 3
901 17
902 5
903 4
904 3
905 33
906 6
907 6
908 33
909 7
910 5
911 3
912 8
913 7
914 5
915 2
916 9
917 4
918 2
919 233
920 24
921 7
922 4
923 8
924 10
925 3
926 4
927 15
928 2
929 16
930 3
931 3
932 13
933 12
934 7
935 5
936 4
937 207
938 4
939 2
940 4
941 27
942 15
943 2
944 5
945 2
946 25
947 6
948 5
949 5
950 6
951 13
952 6
953 18
954 6
955 4
956 12
957 225
958 10
959 7
960 5
961 2
962 2
963 11
964 4
965 14
966 21
967 8
968 10
969 3
970 5
971 4
972 232
973 2
974 5
975 5
976 3
977 7
978 17
979 11
980 6
981 6
982 23
983 4
984 6
985 3
986 5
987 4
988 2
989 17
990 3
991 6
992 5
993 8
994 3
995 2
996 2
997 14
998 9
999 4
1000 4
1001 2
1002 5
1003 5
1004 3
1005 7
1006 6
1007 12
1008 6
1009 10
1010 3
1011 6
1012 2
1013 2
1014 19
1015 5
1016 4
1017 4
1018 9
1019 2
1020 4
1021 13
1022 3
1023 5
1024 6
1025 3
1026 6
1027 5
1028 4
1029 9
1030 6
1031 3
1032 5
1033 7
1034 3
1035 6
1036 6
1037 4
1038 3
1039 10
1040 6
1041 3
1042 221
1043 3
1044 5
1045 3
1046 6
1047 4
1048 8
1049 5
1050 3
1051 6
1052 4
1053 4
1054 2
1055 54
1056 5
1057 6
1058 11
1059 3
1060 3
1061 4
1062 4
1063 4
1064 3
1065 7
1066 3
1067 11
1068 11
1069 7
1070 10
1071 6
1072 13
1073 223
1074 213
1075 15
1076 231
1077 7
1078 3
1079 7
1080 228
1081 2
1082 3
1083 4
1084 4
1085 5
1086 6
1087 7
1088 4
1089 13
1090 3
1091 4
1092 5
1093 3
1094 6
1095 4
1096 6
1097 7
1098 2
1099 4
1100 3
1101 4
1102 3
1103 3
1104 6
1105 3
1106 7
1107 3
1108 5
1109 18
1110 5
1111 6
1112 8
1113 10
1114 3
1115 3
1116 3
1117 2
1118 4
1119 2
1120 4
1121 4
1122 5
1123 6
1124 6
1125 4
1126 10
1127 13
1128 3
1129 12
1130 5
1131 12
1132 16
1133 8
1134 4
1135 19
1136 11
1137 2
1138 4
1139 5
1140 6
1141 8
1142 5
1143 6
1144 4
1145 18
1146 10
1147 4
1148 2
1149 216
1150 6
1151 6
1152 6
1153 2
1154 4
1155 12
1156 8
1157 3
1158 11
1159 5
1160 6
1161 14
1162 5
1163 3
1164 13
1165 4
1166 5
1167 4
1168 5
1169 3
1170 28
1171 6
1172 3
1173 7
1174 219
1175 3
1176 9
1177 7
1178 3
1179 10
1180 6
1181 3
1182 4
1183 19
1184 5
1185 7
1186 11
1187 6
1188 15
1189 19
1190 4
1191 13
1192 11
1193 3
1194 7
1195 5
1196 10
1197 2
1198 8
1199 11
1200 2
1201 6
1202 4
1203 6
1204 24
1205 6
1206 3
1207 3
1208 3
1209 3
1210 6
1211 18
1212 4
1213 11
1214 4
1215 2
1216 5
1217 10
1218 8
1219 3
1220 9
1221 5
1222 3
1223 4
1224 5
1225 6
1226 2
1227 5
1228 7
1229 4
1230 4
1231 14
1232 6
1233 4
1234 4
1235 5
1236 5
1237 7
1238 2
1239 4
1240 3
1241 7
1242 3
1243 3
1244 6
1245 4
1246 5
1247 4
1248 4
1249 4
1250 3
1251 3
1252 3
1253 3
1254 8
1255 14
1256 2
1257 3
1258 5
1259 3
1260 2
1261 4
1262 5
1263 3
1264 7
1265 3
1266 3
1267 18
1268 3
1269 4
1270 4
1271 5
1272 7
1273 3
1274 3
1275 3
1276 13
1277 5
1278 4
1279 8
1280 211
1281 5
1282 5
1283 3
1284 5
1285 2
1286 5
1287 4
1288 2
1289 655
1290 6
1291 3
1292 5
1293 11
1294 2
1295 5
1296 3
1297 12
1298 9
1299 15
1300 11
1301 5
1302 12
1303 217
1304 2
1305 6
1306 17
1307 3
1308 3
1309 207
1310 5
1311 5
1312 4
1313 5
1314 9
1315 3
1316 2
1317 8
1318 5
1319 4
1320 3
1321 2
1322 5
1323 12
1324 4
1325 14
1326 5
1327 4
1328 2
1329 13
1330 5
1331 8
1332 4
1333 225
1334 4
1335 3
1336 4
1337 5
1338 4
1339 3
1340 3
1341 6
1342 23
1343 9
1344 2
1345 6
1346 7
1347 233
1348 4
1349 4
1350 6
1351 18
1352 3
1353 4
1354 6
1355 3
1356 4
1357 4
1358 2
1359 3
1360 7
1361 4
1362 13
1363 227
1364 4
1365 3
1366 5
1367 4
1368 2
1369 12
1370 9
1371 17
1372 3
1373 7
1374 14
1375 6
1376 4
1377 5
1378 21
1379 4
1380 8
1381 9
1382 2
1383 9
1384 25
1385 16
1386 3
1387 6
1388 4
1389 7
1390 8
1391 5
1392 2
1393 3
1394 5
1395 4
1396 3
1397 3
1398 5
1399 3
1400 3
1401 3
1402 2
1403 3
1404 19
1405 2
1406 4
1407 3
1408 4
1409 2
1410 3
1411 4
1412 4
1413 2
1414 4
1415 3
1416 3
1417 3
1418 2
1419 6
1420 3
1421 17
1422 5
1423 6
1424 4
1425 3
1426 13
1427 5
1428 3
1429 3
1430 3
1431 4
1432 9
1433 4
1434 2
1435 14
1436 12
1437 4
1438 5
1439 24
1440 4
1441 3
1442 37
1443 12
1444 11
1445 21
1446 3
1447 4
1448 3
1449 13
1450 4
1451 2
1452 3
1453 15
1454 4
1455 11
1456 4
1457 4
1458 3
1459 8
1460 3
1461 4
1462 4
1463 12
1464 8
1465 5
1466 3
1467 3
1468 4
1469 2
1470 220
1471 3
1472 5
1473 223
1474 3
1475 3
1476 3
1477 10
1478 3
1479 15
1480 4
1481 241
1482 9
1483 7
1484 3
1485 6
1486 6
1487 23
1488 4
1489 13
1490 7
1491 3
1492 4
1493 7
1494 4
1495 9
1496 3
1497 3
1498 4
1499 10
1500 5
1501 5
1502 1
1503 5
1504 24
1505 2
1506 4
1507 5
1508 5
1509 6
1510 14
1511 3
1512 8
1513 2
1514 3
1515 5
1516 13
1517 13
1518 3
1519 5
1520 2
1521 3
1522 15
1523 3
1524 4
1525 2
1526 10
1527 4
1528 4
1529 4
1530 5
1531 5
1532 3
1533 5
1534 3
1535 4
1536 7
1537 4
1538 27
1539 3
1540 6
1541 4
1542 15
1543 3
1544 5
1545 6
1546 6
1547 5
1548 4
1549 8
1550 3
1551 9
1552 2
1553 6
1554 3
1555 4
1556 3
1557 7
1558 4
1559 18
1560 3
1561 11
1562 3
1563 3
1564 8
1565 9
1566 7
1567 24
1568 3
1569 219
1570 7
1571 10
1572 4
1573 5
1574 9
1575 12
1576 2
1577 5
1578 4
1579 4
1580 4
1581 3
1582 3
1583 19
1584 5
1585 8
1586 16
1587 8
1588 6
1589 22
1590 3
1591 23
1592 3
1593 242
1594 9
1595 4
1596 3
1597 3
1598 5
1599 7
1600 3
1601 3
1602 5
1603 8
1604 3
1605 7
1606 5
1607 14
1608 8
1609 10
1610 3
1611 4
1612 3
1613 7
1614 4
1615 6
1616 7
1617 4
1618 10
1619 4
1620 3
1621 11
1622 3
1623 7
1624 10
1625 3
1626 13
1627 6
1628 8
1629 12
1630 10
1631 5
1632 7
1633 9
1634 3
1635 4
1636 7
1637 7
1638 10
1639 8
1640 30
1641 9
1642 19
1643 4
1644 3
1645 19
1646 15
1647 4
1648 13
1649 3
1650 215
1651 223
1652 4
1653 7
1654 4
1655 8
1656 17
1657 16
1658 3
1659 7
1660 6
1661 5
1662 5
1663 4
1664 12
1665 3
1666 7
1667 4
1668 4
1669 13
1670 4
1671 5
1672 2
1673 5
1674 6
1675 5
1676 6
1677 6
1678 7
1679 10
1680 18
1681 23
1682 9
1683 3
1684 3
1685 6
1686 5
1687 2
1688 4
1689 2
1690 7
1691 3
1692 3
1693 2
1694 5
1695 5
1696 14
1697 10
1698 224
1699 6
1700 3
1701 4
1702 3
1703 7
1704 5
1705 9
1706 3
1707 6
1708 4
1709 2
1710 5
1711 11
1712 4
1713 3
1714 3
1715 2
1716 8
1717 4
1718 7
1719 4
1720 10
1721 7
1722 3
1723 3
1724 18
1725 18
1726 17
1727 3
1728 3
1729 3
1730 4
1731 5
1732 3
1733 3
1734 4
1735 12
1736 7
1737 3
1738 11
1739 13
1740 5
1741 4
1742 7
1743 13
1744 5
1745 4
1746 11
1747 3
1748 12
1749 3
1750 6
1751 4
1752 4
1753 21
1754 4
1755 6
1756 9
1757 5
1758 3
1759 10
1760 8
1761 4
1762 6
1763 4
1764 4
1765 6
1766 5
1767 4
1768 8
1769 6
1770 4
1771 6
1772 4
1773 4
1774 5
1775 9
1776 6
1777 3
1778 4
1779 2
1780 9
1781 3
1782 18
1783 2
1784 4
1785 3
1786 13
1787 3
1788 6
1789 6
1790 8
1791 7
1792 9
1793 3
1794 2
1795 16
1796 3
1797 4
1798 6
1799 3
1800 2
1801 33
1802 22
1803 14
1804 4
1805 9
1806 12
1807 4
1808 5
1809 6
1810 3
1811 23
1812 9
1813 4
1814 3
1815 5
1816 5
1817 3
1818 4
1819 5
1820 3
1821 5
1822 3
1823 10
1824 4
1825 5
1826 5
1827 8
1828 4
1829 4
1830 6
1831 8
1832 5
1833 4
1834 3
1835 4
1836 6
1837 3
1838 3
1839 3
1840 5
1841 9
1842 12
1843 6
1844 5
1845 9
1846 3
1847 5
1848 3
1849 2
1850 2
1851 2
1852 18
1853 3
1854 2
1855 21
1856 2
1857 5
1858 4
1859 6
1860 4
1861 5
1862 10
1863 3
1864 9
1865 3
1866 2
1867 10
1868 7
1869 3
1870 6
1871 6
1872 4
1873 4
1874 8
1875 12
1876 7
1877 3
1878 7
1879 3
1880 3
1881 9
1882 3
1883 4
1884 5
1885 4
1886 4
1887 5
1888 5
1889 10
1890 15
1891 4
1892 4
1893 14
1894 6
1895 227
1896 3
1897 14
1898 5
1899 216
1900 22
1901 5
1902 4
1903 2
1904 2
1905 6
1906 3
1907 4
1908 2
1909 9
1910 9
1911 4
1912 3
1913 28
1914 13
1915 11
1916 4
1917 5
1918 3
1919 3
1920 2
1921 3
1922 3
1923 5
1924 3
1925 4
1926 3
1927 5
1928 23
1929 26
1930 3
1931 4
1932 5
1933 6
1934 4
1935 6
1936 3
1937 5
1938 5
1939 3
1940 4
1941 3
1942 2
1943 2
1944 2
1945 7
1946 14
1947 3
1948 6
1949 7
1950 17
1951 2
1952 2
1953 15
1954 14
1955 16
1956 4
1957 6
1958 7
1959 13
1960 6
1961 4
1962 5
1963 6
1964 16
1965 3
1966 3
1967 28
1968 3
1969 6
1970 15
1971 3
1972 9
1973 2
1974 4
1975 6
1976 3
1977 3
1978 22
1979 4
1980 12
1981 6
1982 7
1983 2
1984 5
1985 4
1986 10
1987 3
1988 16
1989 6
1990 9
1991 2
1992 5
1993 12
1994 7
1995 5
1996 5
1997 5
1998 5
1999 2
2000 11
2001 9
2002 17
2003 4
2004 3
2005 11
2006 7
2007 3
2008 5
2009 15
2010 4
2011 3
2012 4
2013 211
2014 8
2015 7
2016 5
2017 4
2018 7
2019 6
2020 7
2021 6
2022 3
2023 6
2024 5
2025 6
2026 5
2027 3
2028 4
2029 4
2030 26
2031 4
2032 6
2033 10
2034 4
2035 4
2036 3
2037 2
2038 3
2039 3
2040 4
2041 5
2042 9
2043 3
2044 9
2045 4
2046 4
2047 5
2048 5
2049 8
2050 2
2051 4
2052 2
2053 3
2054 8
2055 4
2056 11
2057 19
2058 5
2059 8
2060 6
2061 3
2062 5
2063 6
2064 12
2065 3
2066 2
2067 4
2068 16
2069 12
2070 3
2071 4
2072 4
2073 8
2074 6
2075 5
2076 6
2077 6
2078 219
2079 8
2080 222
2081 6
2082 16
2083 3
2084 13
2085 19
2086 5
2087 4
2088 3
2089 11
2090 6
2091 10
2092 4
2093 7
2094 7
2095 12
2096 5
2097 3
2098 3
2099 5
2100 6
2101 10
2102 3
2103 8
2104 2
2105 5
2106 4
2107 7
2108 2
2109 4
2110 4
2111 2
2112 12
2113 9
2114 6
2115 4
2116 2
2117 40
2118 2
2119 4
2120 10
2121 4
2122 223
2123 4
2124 2
2125 20
2126 6
2127 7
2128 24
2129 5
2130 4
2131 5
2132 2
2133 20
2134 16
2135 6
2136 5
2137 13
2138 2
2139 3
2140 3
2141 19
2142 3
2143 2
2144 4
2145 5
2146 6
2147 7
2148 11
2149 12
2150 5
2151 6
2152 7
2153 7
2154 3
2155 5
2156 3
2157 5
2158 3
2159 14
2160 3
2161 4
2162 4
2163 2
2164 11
2165 1
2166 7
2167 3
2168 9
2169 6
2170 11
2171 12
2172 5
2173 8
2174 6
2175 221
2176 4
2177 2
2178 12
2179 4
2180 3
2181 15
2182 4
2183 5
2184 226
2185 7
2186 218
2187 7
2188 5
2189 4
2190 5
2191 18
2192 4
2193 5
2194 9
2195 4
2196 4
2197 2
2198 9
2199 18
2200 18
2201 9
2202 5
2203 6
2204 6
2205 3
2206 3
2207 7
2208 3
2209 5
2210 4
2211 4
2212 4
2213 12
2214 3
2215 6
2216 31
2217 5
2218 4
2219 7
2220 3
2221 6
2222 5
2223 6
2224 5
2225 11
2226 2
2227 2
2228 11
2229 11
2230 6
2231 7
2232 5
2233 8
2234 7
2235 10
2236 5
2237 23
2238 7
2239 4
2240 3
2241 5
2242 34
2243 2
2244 5
2245 23
2246 7
2247 3
2248 6
2249 8
2250 4
2251 4
2252 4
2253 2
2254 5
2255 3
2256 8
2257 5
2258 4
2259 8
2260 25
2261 2
2262 3
2263 17
2264 8
2265 3
2266 4
2267 8
2268 7
2269 3
2270 15
2271 6
2272 5
2273 7
2274 21
2275 9
2276 5
2277 6
2278 6
2279 5
2280 3
2281 2
2282 3
2283 10
2284 3
2285 6
2286 3
2287 14
2288 7
2289 4
2290 4
2291 8
2292 7
2293 8
2294 2
2295 6
2296 12
2297 4
2298 213
2299 6
2300 5
2301 21
2302 8
2303 2
2304 5
2305 23
2306 3
2307 11
2308 2
2309 3
2310 6
2311 25
2312 2
2313 3
2314 6
2315 7
2316 6
2317 6
2318 4
2319 4
2320 6
2321 3
2322 17
2323 9
2324 7
2325 6
2326 4
2327 3
2328 10
2329 7
2330 2
2331 3
2332 3
2333 3
2334 11
2335 8
2336 3
2337 7
2338 6
2339 4
2340 14
2341 36
2342 3
2343 4
2344 3
2345 3
2346 22
2347 13
2348 21
2349 4
2350 2
2351 7
2352 4
2353 4
2354 17
2355 15
2356 3
2357 7
2358 11
2359 2
2360 4
2361 7
2362 6
2363 209
2364 6
2365 3
2366 2
2367 2
2368 24
2369 4
2370 9
2371 4
2372 3
2373 3
2374 3
2375 29
2376 2
2377 2
2378 4
2379 3
2380 3
2381 5
2382 4
2383 6
2384 3
2385 3
2386 2
2387 4
0 // Package quantile computes approximate quantiles over an unbounded data
1 // stream within low memory and CPU bounds.
2 //
3 // A small amount of accuracy is traded to achieve the above properties.
4 //
5 // Multiple streams can be merged before calling Query to generate a single set
6 // of results. This is meaningful when the streams represent the same type of
7 // data. See Merge and Samples.
8 //
9 // For more detailed information about the algorithm used, see:
10 //
11 // Effective Computation of Biased Quantiles over Data Streams
12 //
13 // http://www.cs.rutgers.edu/~muthu/bquant.pdf
14 package quantile
15
16 import (
17 "math"
18 "sort"
19 )
20
21 // Sample holds an observed value and meta information for compression. JSON
22 // tags have been added for convenience.
23 type Sample struct {
24 Value float64 `json:",string"`
25 Width float64 `json:",string"`
26 Delta float64 `json:",string"`
27 }
28
29 // Samples represents a slice of samples. It implements sort.Interface.
30 type Samples []Sample
31
32 func (a Samples) Len() int { return len(a) }
33 func (a Samples) Less(i, j int) bool { return a[i].Value < a[j].Value }
34 func (a Samples) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
35
36 type invariant func(s *stream, r float64) float64
37
38 // NewLowBiased returns an initialized Stream for low-biased quantiles
39 // (e.g. 0.01, 0.1, 0.5) where the needed quantiles are not known a priori, but
40 // error guarantees can still be given even for the lower ranks of the data
41 // distribution.
42 //
43 // The provided epsilon is a relative error, i.e. the true quantile of a value
44 // returned by a query is guaranteed to be within (1±Epsilon)*Quantile.
45 //
46 // See http://www.cs.rutgers.edu/~muthu/bquant.pdf for time, space, and error
47 // properties.
48 func NewLowBiased(epsilon float64) *Stream {
49 Ć’ := func(s *stream, r float64) float64 {
50 return 2 * epsilon * r
51 }
52 return newStream(Ć’)
53 }
54
55 // NewHighBiased returns an initialized Stream for high-biased quantiles
56 // (e.g. 0.01, 0.1, 0.5) where the needed quantiles are not known a priori, but
57 // error guarantees can still be given even for the higher ranks of the data
58 // distribution.
59 //
60 // The provided epsilon is a relative error, i.e. the true quantile of a value
61 // returned by a query is guaranteed to be within 1-(1±Epsilon)*(1-Quantile).
62 //
63 // See http://www.cs.rutgers.edu/~muthu/bquant.pdf for time, space, and error
64 // properties.
65 func NewHighBiased(epsilon float64) *Stream {
66 Ć’ := func(s *stream, r float64) float64 {
67 return 2 * epsilon * (s.n - r)
68 }
69 return newStream(Ć’)
70 }
71
72 // NewTargeted returns an initialized Stream concerned with a particular set of
73 // quantile values that are supplied a priori. Knowing these a priori reduces
74 // space and computation time. The targets map maps the desired quantiles to
75 // their absolute errors, i.e. the true quantile of a value returned by a query
76 // is guaranteed to be within (Quantile±Epsilon).
77 //
78 // See http://www.cs.rutgers.edu/~muthu/bquant.pdf for time, space, and error properties.
79 func NewTargeted(targets map[float64]float64) *Stream {
80 Ć’ := func(s *stream, r float64) float64 {
81 var m = math.MaxFloat64
82 var f float64
83 for quantile, epsilon := range targets {
84 if quantile*s.n <= r {
85 f = (2 * epsilon * r) / quantile
86 } else {
87 f = (2 * epsilon * (s.n - r)) / (1 - quantile)
88 }
89 if f < m {
90 m = f
91 }
92 }
93 return m
94 }
95 return newStream(Ć’)
96 }
97
98 // Stream computes quantiles for a stream of float64s. It is not thread-safe by
99 // design. Take care when using across multiple goroutines.
100 type Stream struct {
101 *stream
102 b Samples
103 sorted bool
104 }
105
106 func newStream(Ć’ invariant) *Stream {
107 x := &stream{Ć’: Ć’}
108 return &Stream{x, make(Samples, 0, 500), true}
109 }
110
111 // Insert inserts v into the stream.
112 func (s *Stream) Insert(v float64) {
113 s.insert(Sample{Value: v, Width: 1})
114 }
115
116 func (s *Stream) insert(sample Sample) {
117 s.b = append(s.b, sample)
118 s.sorted = false
119 if len(s.b) == cap(s.b) {
120 s.flush()
121 }
122 }
123
124 // Query returns the computed qth percentiles value. If s was created with
125 // NewTargeted, and q is not in the set of quantiles provided a priori, Query
126 // will return an unspecified result.
127 func (s *Stream) Query(q float64) float64 {
128 if !s.flushed() {
129 // Fast path when there hasn't been enough data for a flush;
130 // this also yields better accuracy for small sets of data.
131 l := len(s.b)
132 if l == 0 {
133 return 0
134 }
135 i := int(float64(l) * q)
136 if i > 0 {
137 i -= 1
138 }
139 s.maybeSort()
140 return s.b[i].Value
141 }
142 s.flush()
143 return s.stream.query(q)
144 }
145
146 // Merge merges samples into the underlying streams samples. This is handy when
147 // merging multiple streams from separate threads, database shards, etc.
148 //
149 // ATTENTION: This method is broken and does not yield correct results. The
150 // underlying algorithm is not capable of merging streams correctly.
151 func (s *Stream) Merge(samples Samples) {
152 sort.Sort(samples)
153 s.stream.merge(samples)
154 }
155
156 // Reset reinitializes and clears the list reusing the samples buffer memory.
157 func (s *Stream) Reset() {
158 s.stream.reset()
159 s.b = s.b[:0]
160 }
161
162 // Samples returns stream samples held by s.
163 func (s *Stream) Samples() Samples {
164 if !s.flushed() {
165 return s.b
166 }
167 s.flush()
168 return s.stream.samples()
169 }
170
171 // Count returns the total number of samples observed in the stream
172 // since initialization.
173 func (s *Stream) Count() int {
174 return len(s.b) + s.stream.count()
175 }
176
177 func (s *Stream) flush() {
178 s.maybeSort()
179 s.stream.merge(s.b)
180 s.b = s.b[:0]
181 }
182
183 func (s *Stream) maybeSort() {
184 if !s.sorted {
185 s.sorted = true
186 sort.Sort(s.b)
187 }
188 }
189
190 func (s *Stream) flushed() bool {
191 return len(s.stream.l) > 0
192 }
193
194 type stream struct {
195 n float64
196 l []Sample
197 Ć’ invariant
198 }
199
200 func (s *stream) reset() {
201 s.l = s.l[:0]
202 s.n = 0
203 }
204
205 func (s *stream) insert(v float64) {
206 s.merge(Samples{{v, 1, 0}})
207 }
208
209 func (s *stream) merge(samples Samples) {
210 // TODO(beorn7): This tries to merge not only individual samples, but
211 // whole summaries. The paper doesn't mention merging summaries at
212 // all. Unittests show that the merging is inaccurate. Find out how to
213 // do merges properly.
214 var r float64
215 i := 0
216 for _, sample := range samples {
217 for ; i < len(s.l); i++ {
218 c := s.l[i]
219 if c.Value > sample.Value {
220 // Insert at position i.
221 s.l = append(s.l, Sample{})
222 copy(s.l[i+1:], s.l[i:])
223 s.l[i] = Sample{
224 sample.Value,
225 sample.Width,
226 math.Max(sample.Delta, math.Floor(s.Ć’(s, r))-1),
227 // TODO(beorn7): How to calculate delta correctly?
228 }
229 i++
230 goto inserted
231 }
232 r += c.Width
233 }
234 s.l = append(s.l, Sample{sample.Value, sample.Width, 0})
235 i++
236 inserted:
237 s.n += sample.Width
238 r += sample.Width
239 }
240 s.compress()
241 }
242
243 func (s *stream) count() int {
244 return int(s.n)
245 }
246
247 func (s *stream) query(q float64) float64 {
248 t := math.Ceil(q * s.n)
249 t += math.Ceil(s.Ć’(s, t) / 2)
250 p := s.l[0]
251 var r float64
252 for _, c := range s.l[1:] {
253 r += p.Width
254 if r+c.Width+c.Delta > t {
255 return p.Value
256 }
257 p = c
258 }
259 return p.Value
260 }
261
262 func (s *stream) compress() {
263 if len(s.l) < 2 {
264 return
265 }
266 x := s.l[len(s.l)-1]
267 xi := len(s.l) - 1
268 r := s.n - 1 - x.Width
269
270 for i := len(s.l) - 2; i >= 0; i-- {
271 c := s.l[i]
272 if c.Width+x.Width+x.Delta <= s.Ć’(s, r) {
273 x.Width += c.Width
274 s.l[xi] = x
275 // Remove element at i.
276 copy(s.l[i:], s.l[i+1:])
277 s.l = s.l[:len(s.l)-1]
278 xi -= 1
279 } else {
280 x = c
281 xi = i
282 }
283 r -= c.Width
284 }
285 }
286
287 func (s *stream) samples() Samples {
288 samples := make(Samples, len(s.l))
289 copy(samples, s.l)
290 return samples
291 }
0 package quantile
1
2 import (
3 "math"
4 "math/rand"
5 "sort"
6 "testing"
7 )
8
9 var (
10 Targets = map[float64]float64{
11 0.01: 0.001,
12 0.10: 0.01,
13 0.50: 0.05,
14 0.90: 0.01,
15 0.99: 0.001,
16 }
17 TargetsSmallEpsilon = map[float64]float64{
18 0.01: 0.0001,
19 0.10: 0.001,
20 0.50: 0.005,
21 0.90: 0.001,
22 0.99: 0.0001,
23 }
24 LowQuantiles = []float64{0.01, 0.1, 0.5}
25 HighQuantiles = []float64{0.99, 0.9, 0.5}
26 )
27
28 const RelativeEpsilon = 0.01
29
30 func verifyPercsWithAbsoluteEpsilon(t *testing.T, a []float64, s *Stream) {
31 sort.Float64s(a)
32 for quantile, epsilon := range Targets {
33 n := float64(len(a))
34 k := int(quantile * n)
35 lower := int((quantile - epsilon) * n)
36 if lower < 1 {
37 lower = 1
38 }
39 upper := int(math.Ceil((quantile + epsilon) * n))
40 if upper > len(a) {
41 upper = len(a)
42 }
43 w, min, max := a[k-1], a[lower-1], a[upper-1]
44 if g := s.Query(quantile); g < min || g > max {
45 t.Errorf("q=%f: want %v [%f,%f], got %v", quantile, w, min, max, g)
46 }
47 }
48 }
49
50 func verifyLowPercsWithRelativeEpsilon(t *testing.T, a []float64, s *Stream) {
51 sort.Float64s(a)
52 for _, qu := range LowQuantiles {
53 n := float64(len(a))
54 k := int(qu * n)
55
56 lowerRank := int((1 - RelativeEpsilon) * qu * n)
57 upperRank := int(math.Ceil((1 + RelativeEpsilon) * qu * n))
58 w, min, max := a[k-1], a[lowerRank-1], a[upperRank-1]
59 if g := s.Query(qu); g < min || g > max {
60 t.Errorf("q=%f: want %v [%f,%f], got %v", qu, w, min, max, g)
61 }
62 }
63 }
64
65 func verifyHighPercsWithRelativeEpsilon(t *testing.T, a []float64, s *Stream) {
66 sort.Float64s(a)
67 for _, qu := range HighQuantiles {
68 n := float64(len(a))
69 k := int(qu * n)
70
71 lowerRank := int((1 - (1+RelativeEpsilon)*(1-qu)) * n)
72 upperRank := int(math.Ceil((1 - (1-RelativeEpsilon)*(1-qu)) * n))
73 w, min, max := a[k-1], a[lowerRank-1], a[upperRank-1]
74 if g := s.Query(qu); g < min || g > max {
75 t.Errorf("q=%f: want %v [%f,%f], got %v", qu, w, min, max, g)
76 }
77 }
78 }
79
80 func populateStream(s *Stream) []float64 {
81 a := make([]float64, 0, 1e5+100)
82 for i := 0; i < cap(a); i++ {
83 v := rand.NormFloat64()
84 // Add 5% asymmetric outliers.
85 if i%20 == 0 {
86 v = v*v + 1
87 }
88 s.Insert(v)
89 a = append(a, v)
90 }
91 return a
92 }
93
94 func TestTargetedQuery(t *testing.T) {
95 rand.Seed(42)
96 s := NewTargeted(Targets)
97 a := populateStream(s)
98 verifyPercsWithAbsoluteEpsilon(t, a, s)
99 }
100
101 func TestLowBiasedQuery(t *testing.T) {
102 rand.Seed(42)
103 s := NewLowBiased(RelativeEpsilon)
104 a := populateStream(s)
105 verifyLowPercsWithRelativeEpsilon(t, a, s)
106 }
107
108 func TestHighBiasedQuery(t *testing.T) {
109 rand.Seed(42)
110 s := NewHighBiased(RelativeEpsilon)
111 a := populateStream(s)
112 verifyHighPercsWithRelativeEpsilon(t, a, s)
113 }
114
115 // BrokenTestTargetedMerge is broken, see Merge doc comment.
116 func BrokenTestTargetedMerge(t *testing.T) {
117 rand.Seed(42)
118 s1 := NewTargeted(Targets)
119 s2 := NewTargeted(Targets)
120 a := populateStream(s1)
121 a = append(a, populateStream(s2)...)
122 s1.Merge(s2.Samples())
123 verifyPercsWithAbsoluteEpsilon(t, a, s1)
124 }
125
126 // BrokenTestLowBiasedMerge is broken, see Merge doc comment.
127 func BrokenTestLowBiasedMerge(t *testing.T) {
128 rand.Seed(42)
129 s1 := NewLowBiased(RelativeEpsilon)
130 s2 := NewLowBiased(RelativeEpsilon)
131 a := populateStream(s1)
132 a = append(a, populateStream(s2)...)
133 s1.Merge(s2.Samples())
134 verifyLowPercsWithRelativeEpsilon(t, a, s2)
135 }
136
137 // BrokenTestHighBiasedMerge is broken, see Merge doc comment.
138 func BrokenTestHighBiasedMerge(t *testing.T) {
139 rand.Seed(42)
140 s1 := NewHighBiased(RelativeEpsilon)
141 s2 := NewHighBiased(RelativeEpsilon)
142 a := populateStream(s1)
143 a = append(a, populateStream(s2)...)
144 s1.Merge(s2.Samples())
145 verifyHighPercsWithRelativeEpsilon(t, a, s2)
146 }
147
148 func TestUncompressed(t *testing.T) {
149 q := NewTargeted(Targets)
150 for i := 100; i > 0; i-- {
151 q.Insert(float64(i))
152 }
153 if g := q.Count(); g != 100 {
154 t.Errorf("want count 100, got %d", g)
155 }
156 // Before compression, Query should have 100% accuracy.
157 for quantile := range Targets {
158 w := quantile * 100
159 if g := q.Query(quantile); g != w {
160 t.Errorf("want %f, got %f", w, g)
161 }
162 }
163 }
164
165 func TestUncompressedSamples(t *testing.T) {
166 q := NewTargeted(map[float64]float64{0.99: 0.001})
167 for i := 1; i <= 100; i++ {
168 q.Insert(float64(i))
169 }
170 if g := q.Samples().Len(); g != 100 {
171 t.Errorf("want count 100, got %d", g)
172 }
173 }
174
175 func TestUncompressedOne(t *testing.T) {
176 q := NewTargeted(map[float64]float64{0.99: 0.01})
177 q.Insert(3.14)
178 if g := q.Query(0.90); g != 3.14 {
179 t.Error("want PI, got", g)
180 }
181 }
182
183 func TestDefaults(t *testing.T) {
184 if g := NewTargeted(map[float64]float64{0.99: 0.001}).Query(0.99); g != 0 {
185 t.Errorf("want 0, got %f", g)
186 }
187 }
0 package topk
1
2 import (
3 "sort"
4 )
5
6 // http://www.cs.ucsb.edu/research/tech_reports/reports/2005-23.pdf
7
8 type Element struct {
9 Value string
10 Count int
11 }
12
13 type Samples []*Element
14
15 func (sm Samples) Len() int {
16 return len(sm)
17 }
18
19 func (sm Samples) Less(i, j int) bool {
20 return sm[i].Count < sm[j].Count
21 }
22
23 func (sm Samples) Swap(i, j int) {
24 sm[i], sm[j] = sm[j], sm[i]
25 }
26
27 type Stream struct {
28 k int
29 mon map[string]*Element
30
31 // the minimum Element
32 min *Element
33 }
34
35 func New(k int) *Stream {
36 s := new(Stream)
37 s.k = k
38 s.mon = make(map[string]*Element)
39 s.min = &Element{}
40
41 // Track k+1 so that less frequenet items contended for that spot,
42 // resulting in k being more accurate.
43 return s
44 }
45
46 func (s *Stream) Insert(x string) {
47 s.insert(&Element{x, 1})
48 }
49
50 func (s *Stream) Merge(sm Samples) {
51 for _, e := range sm {
52 s.insert(e)
53 }
54 }
55
56 func (s *Stream) insert(in *Element) {
57 e := s.mon[in.Value]
58 if e != nil {
59 e.Count++
60 } else {
61 if len(s.mon) < s.k+1 {
62 e = &Element{in.Value, in.Count}
63 s.mon[in.Value] = e
64 } else {
65 e = s.min
66 delete(s.mon, e.Value)
67 e.Value = in.Value
68 e.Count += in.Count
69 s.min = e
70 }
71 }
72 if e.Count < s.min.Count {
73 s.min = e
74 }
75 }
76
77 func (s *Stream) Query() Samples {
78 var sm Samples
79 for _, e := range s.mon {
80 sm = append(sm, e)
81 }
82 sort.Sort(sort.Reverse(sm))
83
84 if len(sm) < s.k {
85 return sm
86 }
87
88 return sm[:s.k]
89 }
0 package topk
1
2 import (
3 "fmt"
4 "math/rand"
5 "sort"
6 "testing"
7 )
8
9 func TestTopK(t *testing.T) {
10 stream := New(10)
11 ss := []*Stream{New(10), New(10), New(10)}
12 m := make(map[string]int)
13 for _, s := range ss {
14 for i := 0; i < 1e6; i++ {
15 v := fmt.Sprintf("%x", int8(rand.ExpFloat64()))
16 s.Insert(v)
17 m[v]++
18 }
19 stream.Merge(s.Query())
20 }
21
22 var sm Samples
23 for x, s := range m {
24 sm = append(sm, &Element{x, s})
25 }
26 sort.Sort(sort.Reverse(sm))
27
28 g := stream.Query()
29 if len(g) != 10 {
30 t.Fatalf("got %d, want 10", len(g))
31 }
32 for i, e := range g {
33 if sm[i].Value != e.Value {
34 t.Errorf("at %d: want %q, got %q", i, sm[i].Value, e.Value)
35 }
36 }
37 }
38
39 func TestQuery(t *testing.T) {
40 queryTests := []struct {
41 value string
42 expected int
43 }{
44 {"a", 1},
45 {"b", 2},
46 {"c", 2},
47 }
48
49 stream := New(2)
50 for _, tt := range queryTests {
51 stream.Insert(tt.value)
52 if n := len(stream.Query()); n != tt.expected {
53 t.Errorf("want %d, got %d", tt.expected, n)
54 }
55 }
56 }