Codebase list
histogram Blake Mizerany 10 years ago
2 changed file(s) with 151 addition(s) and 0 deletion(s).
 0 // Package histogram provides a Go implementation of BigML's histogram package 1 // for Clojure/Java. It is currently experiemental. 2 package histogram 3 4 import ( 5 "container/heap" 6 "math" 7 "sort" 8 ) 9 10 type Bin struct { 11 Count int 12 Sum float64 13 } 14 15 func (b *Bin) Update(x *Bin) { 16 b.Count += x.Count 17 b.Sum += x.Sum 18 } 19 20 func (b *Bin) Mean() float64 { 21 return b.Sum / float64(b.Count) 22 } 23 24 type Bins []*Bin 25 26 func (bs Bins) Len() int { return len(bs) } 27 func (bs Bins) Less(i, j int) bool { return bs[i].Mean() < bs[j].Mean() } 28 func (bs Bins) Swap(i, j int) { bs[i], bs[j] = bs[j], bs[i] } 29 30 func (bs *Bins) Push(x interface{}) { 31 *bs = append(*bs, x.(*Bin)) 32 } 33 34 func (bs *Bins) Pop() interface{} { 35 return bs.remove(len(*bs) - 1) 36 } 37 38 func (bs *Bins) remove(n int) *Bin { 39 old := *bs 40 x := old[n] 41 h := old[0:n] 42 if n < len(old)-1 { 43 t := old[n+1:] 44 out := make([]*Bin, len(old)-1) 45 copy(out, h) 46 copy(out[len(h):], t) 47 *bs = out 48 } else { 49 *bs = h 50 } 51 return x 52 } 53 54 type Histogram struct { 55 res *reservoir 56 } 57 58 func New(maxBins int) *Histogram { 59 return &Histogram{res: newReservoir(maxBins)} 60 } 61 62 func (h *Histogram) Insert(f float64) { 63 h.res.insert(&Bin{1, f}) 64 h.res.compress() 65 } 66 67 func (h *Histogram) Bins() Bins { 68 return h.res.bins 69 } 70 71 type reservoir struct { 72 n int 73 maxBins int 74 bins Bins 75 } 76 77 func newReservoir(maxBins int) *reservoir { 78 return &reservoir{maxBins: maxBins} 79 } 80 81 func (r *reservoir) insert(bin *Bin) { 82 r.n += bin.Count 83 i := sort.Search(len(r.bins), func(i int) bool { 84 return r.bins[i].Mean() >= bin.Mean() 85 }) 86 if i < 0 || i == r.bins.Len() { 87 heap.Push(&r.bins, bin) 88 return 89 } 90 r.bins[i].Update(bin) 91 } 92 93 func (r *reservoir) compress() { 94 for r.bins.Len() > r.maxBins { 95 minGapIndex := -1 96 minGap := math.MaxFloat64 97 for i := 0; i < r.bins.Len()-1; i++ { 98 gap := gapWeight(r.bins[i], r.bins[i+1]) 99 if minGap > gap { 100 minGap = gap 101 minGapIndex = i 102 } 103 } 104 prev := r.bins[minGapIndex] 105 next := r.bins.remove(minGapIndex + 1) 106 prev.Update(next) 107 } 108 } 109 110 func gapWeight(prev, next *Bin) float64 { 111 return next.Mean() - prev.Mean() 112 }
 0 package histogram 1 2 import ( 3 "math/rand" 4 "testing" 5 ) 6 7 func TestHistogram(t *testing.T) { 8 const numPoints = 1e6 9 const maxBins = 3 10 11 h := New(maxBins) 12 for i := 0; i < numPoints; i++ { 13 f := rand.ExpFloat64() 14 h.Insert(f) 15 } 16 17 bins := h.Bins() 18 if g := len(bins); g > maxBins { 19 t.Fatalf("got %d bins, wanted <= %d", g, maxBins) 20 } 21 22 for _, b := range bins { 23 t.Logf("%+v", b) 24 } 25 26 if g := count(h.Bins()); g != numPoints { 27 t.Fatalf("binned %d points, wanted %d", g, numPoints) 28 } 29 } 30 31 func count(bins Bins) int { 32 binCounts := 0 33 for _, b := range bins { 34 binCounts += b.Count 35 } 36 return binCounts 37 }