Codebase list golang-github-influxdata-tdigest / d332540
Import upstream version 0.0.1+git20210216.1.fc98d27 Debian Janitor 2 years ago
11 changed file(s) with 545 addition(s) and 190 deletion(s). Raw diff Collapse all Expand all
11
22 This is an implementation of Ted Dunning's [t-digest](https://github.com/tdunning/t-digest/) in Go.
33
4 The implementaion is based off [Derrick Burns' C++ implementation](https://github.com/derrickburns/tdigest).
4 The implementation is based off [Derrick Burns' C++ implementation](https://github.com/derrickburns/tdigest).
55
66 ## Example
77
3434 log.Println("CDF(5) = ", td.CDF(5))
3535 }
3636 ```
37
38 ## TODO
39
40 Only the methods for a single TDigest have been implemented.
41 The methods to merge two or more existing t-digests into a single t-digest have yet to be implemented.
4242 // CentroidList is sorted by the Mean of the centroid, ascending.
4343 type CentroidList []Centroid
4444
45 // Clear clears the list.
4546 func (l *CentroidList) Clear() {
46 *l = (*l)[0:0]
47 *l = (*l)[:0]
4748 }
4849
4950 func (l CentroidList) Len() int { return len(l) }
0 module github.com/influxdata/tdigest
1
2 require (
3 github.com/google/go-cmp v0.2.0
4 golang.org/x/exp v0.0.0-20180321215751-8460e604b9de
5 gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca
6 gonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6 // indirect
7 )
8
9 go 1.13
0 github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
1 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
2 golang.org/x/exp v0.0.0-20180321215751-8460e604b9de h1:xSjD6HQTqT0H/k60N5yYBtnN1OEkVy7WIo/DYyxKRO0=
3 golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
4 golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
5 gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca h1:PupagGYwj8+I4ubCxcmcBRk3VlUWtTg5huQpZR9flmE=
6 gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
7 gonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6 h1:4WsZyVtkthqrHTbDCJfiTs8IWNYE4uvsSDgaV6xpp+o=
8 gonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
44 "sort"
55 )
66
7 // TDigest is a data structure for accurate on-line accumulation of
8 // rank-based statistics such as quantiles and trimmed means.
79 type TDigest struct {
810 Compression float64
911
1820 max float64
1921 }
2022
23 // New initializes a new distribution with a default compression.
2124 func New() *TDigest {
2225 return NewWithCompression(1000)
2326 }
27
28 // NewWithCompression initializes a new distribution with custom compression.
2429 func NewWithCompression(c float64) *TDigest {
2530 t := &TDigest{
2631 Compression: c,
2732 }
2833 t.maxProcessed = processedSize(0, t.Compression)
2934 t.maxUnprocessed = unprocessedSize(0, t.Compression)
30 t.processed = make([]Centroid, 0, t.maxProcessed)
31 t.unprocessed = make([]Centroid, 0, t.maxUnprocessed+1)
35 t.processed = make(CentroidList, 0, t.maxProcessed)
36 t.unprocessed = make(CentroidList, 0, t.maxUnprocessed+1)
37 t.Reset()
38 return t
39 }
40
41 // Calculate number of bytes needed for a tdigest of size c,
42 // where c is the compression value
43 func ByteSizeForCompression(comp float64) int {
44 c := int(comp)
45 // // A centroid is 2 float64s, so we need 16 bytes for each centroid
46 // float_size := 8
47 // centroid_size := 2 * float_size
48
49 // // Unprocessed and processed can grow up to length c
50 // unprocessed_size := centroid_size * c
51 // processed_size := unprocessed_size
52
53 // // the cumulative field can also be of length c, but each item is a single float64
54 // cumulative_size := float_size * c // <- this could also be unprocessed_size / 2
55
56 // return unprocessed_size + processed_size + cumulative_size
57
58 // // or, more succinctly:
59 // return float_size * c * 5
60
61 // or even more succinctly
62 return c * 40
63 }
64
65 // Reset resets the distribution to its initial state.
66 func (t *TDigest) Reset() {
67 t.processed = t.processed[:0]
68 t.unprocessed = t.unprocessed[:0]
69 t.cumulative = t.cumulative[:0]
70 t.processedWeight = 0
71 t.unprocessedWeight = 0
3272 t.min = math.MaxFloat64
3373 t.max = -math.MaxFloat64
34 return t
35 }
36
74 }
75
76 // Add adds a value x with a weight w to the distribution.
3777 func (t *TDigest) Add(x, w float64) {
38 if math.IsNaN(x) {
78 t.AddCentroid(Centroid{Mean: x, Weight: w})
79 }
80
81 // AddCentroidList can quickly add multiple centroids.
82 func (t *TDigest) AddCentroidList(c CentroidList) {
83 // It's possible to optimize this by bulk-copying the slice, but this
84 // yields just a 1-2% speedup (most time is in process()), so not worth
85 // the complexity.
86 for i := range c {
87 t.AddCentroid(c[i])
88 }
89 }
90
91 // AddCentroid adds a single centroid.
92 // Weights which are not a number or are <= 0 are ignored, as are NaN means.
93 func (t *TDigest) AddCentroid(c Centroid) {
94 if math.IsNaN(c.Mean) || c.Weight <= 0 || math.IsNaN(c.Weight) || math.IsInf(c.Weight, 1) {
3995 return
4096 }
41 t.AddCentroid(Centroid{Mean: x, Weight: w})
42 }
43
44 func (t *TDigest) AddCentroidList(c CentroidList) {
45 l := c.Len()
46 for i := 0; i < l; i++ {
47 diff := l - i
48 room := t.maxUnprocessed - t.unprocessed.Len()
49 mid := i + diff
50 if room < diff {
51 mid = i + room
52 }
53 for i < mid {
54 t.AddCentroid(c[i])
55 i++
56 }
57 }
58 }
59
60 func (t *TDigest) AddCentroid(c Centroid) {
97
6198 t.unprocessed = append(t.unprocessed, c)
6299 t.unprocessedWeight += c.Weight
63100
65102 t.unprocessed.Len() > t.maxUnprocessed {
66103 t.process()
67104 }
105 }
106
107 // Merges the supplied digest into this digest. Functionally equivalent to
108 // calling t.AddCentroidList(t2.Centroids(nil)), but avoids making an extra
109 // copy of the CentroidList.
110 func (t *TDigest) Merge(t2 *TDigest) {
111 t2.process()
112 t.AddCentroidList(t2.processed)
68113 }
69114
70115 func (t *TDigest) process() {
97142 }
98143 t.min = math.Min(t.min, t.processed[0].Mean)
99144 t.max = math.Max(t.max, t.processed[t.processed.Len()-1].Mean)
100 t.updateCumulative()
101145 t.unprocessed.Clear()
102146 }
103147 }
104148
149 // Centroids returns a copy of processed centroids.
150 // Useful when aggregating multiple t-digests.
151 //
152 // Centroids are appended to the passed CentroidList; if you're re-using a
153 // buffer, be sure to pass cl[:0].
154 func (t *TDigest) Centroids(cl CentroidList) CentroidList {
155 t.process()
156 return append(cl, t.processed...)
157 }
158
159 func (t *TDigest) Count() float64 {
160 t.process()
161
162 // t.process always updates t.processedWeight to the total count of all
163 // centroids, so we don't need to re-count here.
164 return t.processedWeight
165 }
166
105167 func (t *TDigest) updateCumulative() {
106 t.cumulative = make([]float64, t.processed.Len()+1)
168 // Weight can only increase, so the final cumulative value will always be
169 // either equal to, or less than, the total weight. If they are the same,
170 // then nothing has changed since the last update.
171 if len(t.cumulative) > 0 && t.cumulative[len(t.cumulative)-1] == t.processedWeight {
172 return
173 }
174
175 if n := t.processed.Len() + 1; n <= cap(t.cumulative) {
176 t.cumulative = t.cumulative[:n]
177 } else {
178 t.cumulative = make([]float64, n)
179 }
180
107181 prev := 0.0
108182 for i, centroid := range t.processed {
109183 cur := centroid.Weight
113187 t.cumulative[t.processed.Len()] = prev
114188 }
115189
190 // Quantile returns the (approximate) quantile of
191 // the distribution. Accepted values for q are between 0.0 and 1.0.
192 // Returns NaN if Count is zero or bad inputs.
116193 func (t *TDigest) Quantile(q float64) float64 {
117194 t.process()
195 t.updateCumulative()
118196 if q < 0 || q > 1 || t.processed.Len() == 0 {
119197 return math.NaN()
120198 }
141219 return weightedAverage(t.processed[t.processed.Len()-1].Mean, z1, t.max, z2)
142220 }
143221
222 // CDF returns the cumulative distribution function for a given value x.
144223 func (t *TDigest) CDF(x float64) float64 {
145224 t.process()
225 t.updateCumulative()
146226 switch t.processed.Len() {
147227 case 0:
148228 return 0.0
00 package tdigest_test
11
22 import (
3 "math/rand"
3 "fmt"
4 "math"
5 "reflect"
46 "testing"
57
6 "github.com/gonum/stat/distuv"
78 "github.com/influxdata/tdigest"
9 "golang.org/x/exp/rand"
10 "gonum.org/v1/gonum/stat/distuv"
811 )
912
1013 const (
2427
2528 func init() {
2629 dist := distuv.Normal{
27 Mu: Mu,
28 Sigma: Sigma,
29 Source: rand.New(rand.NewSource(seed)),
30 Mu: Mu,
31 Sigma: Sigma,
32 Src: rand.New(rand.NewSource(seed)),
3033 }
3134 uniform := rand.New(rand.NewSource(seed))
3235
4245
4346 UniformData[i] = uniform.Float64() * 100
4447 UniformDigest.Add(UniformData[i], 1)
48 }
49 }
50
51 // Compares the quantile results of two digests, and fails if the
52 // fractional err exceeds maxErr.
53 // Always fails if the total count differs.
54 func compareQuantiles(td1, td2 *tdigest.TDigest, maxErr float64) error {
55 if td1.Count() != td2.Count() {
56 return fmt.Errorf("counts are not equal, %d vs %d", int64(td1.Count()), int64(td2.Count()))
57 }
58 for q := 0.05; q < 1; q += 0.05 {
59 if math.Abs(td1.Quantile(q)-td2.Quantile(q))/td1.Quantile(q) > maxErr {
60 return fmt.Errorf("quantile %g differs, %g vs %g", q, td1.Quantile(q), td2.Quantile(q))
61 }
62 }
63 return nil
64 }
65
66 // All Add methods should yield equivalent results.
67 func TestTdigest_AddFuncs(t *testing.T) {
68 centroids := NormalDigest.Centroids(nil)
69
70 addDigest := tdigest.NewWithCompression(100)
71 addCentroidDigest := tdigest.NewWithCompression(100)
72 addCentroidListDigest := tdigest.NewWithCompression(100)
73
74 for _, c := range centroids {
75 addDigest.Add(c.Mean, c.Weight)
76 addCentroidDigest.AddCentroid(c)
77 }
78 addCentroidListDigest.AddCentroidList(centroids)
79
80 if err := compareQuantiles(addDigest, addCentroidDigest, 0.01); err != nil {
81 t.Errorf("AddCentroid() differs from from Add(): %s", err.Error())
82 }
83 if err := compareQuantiles(addDigest, addCentroidListDigest, 0.01); err != nil {
84 t.Errorf("AddCentroidList() differs from from Add(): %s", err.Error())
85 }
86 }
87
88 func TestTdigest_Count(t *testing.T) {
89 tests := []struct {
90 name string
91 data []float64
92 digest *tdigest.TDigest
93 want float64
94 }{
95 {
96 name: "empty",
97 data: []float64{},
98 want: 0,
99 },
100 {
101 name: "not empty",
102 data: []float64{5, 4},
103 want: 2,
104 },
105 }
106
107 for _, tt := range tests {
108 t.Run(tt.name, func(t *testing.T) {
109 td := tt.digest
110 if td == nil {
111 td = tdigest.NewWithCompression(1000)
112 for _, x := range tt.data {
113 td.Add(x, 1)
114 }
115 }
116 got := td.Count()
117 if got != tt.want {
118 t.Errorf("unexpected count, got %g want %g", got, tt.want)
119 }
120 })
121 }
122
123 got := NormalDigest.Count()
124 want := float64(len(NormalData))
125 if got != want {
126 t.Errorf("unexpected count for NormalDigest, got %g want %g", got, want)
127 }
128
129 got = UniformDigest.Count()
130 want = float64(len(UniformData))
131 if got != want {
132 t.Errorf("unexpected count for UniformDigest, got %g want %g", got, want)
45133 }
46134 }
47135
81169 name: "normal 50",
82170 quantile: 0.5,
83171 digest: NormalDigest,
84 want: 9.997821231634168,
172 want: 10.000673533707138,
85173 },
86174 {
87175 name: "normal 90",
88176 quantile: 0.9,
89177 digest: NormalDigest,
90 want: 13.843815760607427,
178 want: 13.842132136909889,
91179 },
92180 {
93181 name: "uniform 50",
94182 quantile: 0.5,
95183 digest: UniformDigest,
96 want: 50.02682856274754,
184 want: 49.992502345843555,
97185 },
98186 {
99187 name: "uniform 90",
100188 quantile: 0.9,
101189 digest: UniformDigest,
102 want: 90.02117754660424,
190 want: 89.98281777095822,
103191 },
104192 {
105193 name: "uniform 99",
106194 quantile: 0.99,
107195 digest: UniformDigest,
108 want: 99.00246731511771,
196 want: 98.98503400959562,
109197 },
110198 {
111199 name: "uniform 99.9",
112200 quantile: 0.999,
113201 digest: UniformDigest,
114 want: 99.90178495422307,
202 want: 99.90103781043621,
115203 },
116204 }
117205 for _, tt := range tests {
161249 name: "normal mean",
162250 cdf: 10,
163251 data: NormalData,
164 want: 0.500298235578106,
252 want: 0.4999156505250766,
165253 },
166254 {
167255 name: "normal high",
179267 name: "uniform 50",
180268 cdf: 50,
181269 data: UniformData,
182 want: 0.49972989818712815,
270 want: 0.5000756133965755,
183271 },
184272 {
185273 name: "uniform min",
197285 name: "uniform 10",
198286 cdf: 10,
199287 data: UniformData,
200 want: 0.099715527526992,
288 want: 0.09987932577650871,
201289 },
202290 {
203291 name: "uniform 90",
204292 cdf: 90,
205293 data: UniformData,
206 want: 0.8997838903965611,
294 want: 0.9001667885256108,
207295 },
208296 }
209297 for _, tt := range tests {
223311 }
224312 }
225313
314 func TestTdigest_Reset(t *testing.T) {
315 td := tdigest.New()
316 for _, x := range NormalData {
317 td.Add(x, 1)
318 }
319 q1 := td.Quantile(0.9)
320
321 td.Reset()
322 for _, x := range NormalData {
323 td.Add(x, 1)
324 }
325 if q2 := td.Quantile(0.9); q2 != q1 {
326 t.Errorf("unexpected quantile, got %g want %g", q2, q1)
327 }
328 }
329
330 func TestTdigest_OddInputs(t *testing.T) {
331 td := tdigest.New()
332 td.Add(math.NaN(), 1)
333 td.Add(1, math.NaN())
334 td.Add(1, 0)
335 td.Add(1, -1000)
336 if td.Count() != 0 {
337 t.Error("invalid value was alloed to be added")
338 }
339
340 // Infinite values are allowed.
341 td.Add(1, 1)
342 td.Add(2, 1)
343 td.Add(math.Inf(1), 1)
344 if q := td.Quantile(0.5); q != 2 {
345 t.Errorf("expected median value 2, got %f", q)
346 }
347 if q := td.Quantile(0.9); !math.IsInf(q, 1) {
348 t.Errorf("expected median value 2, got %f", q)
349 }
350 }
351
352 func TestTdigest_Merge(t *testing.T) {
353 // Repeat merges enough times to ensure we call compress()
354 numRepeats := 20
355 addDigest := tdigest.New()
356 for i := 0; i < numRepeats; i++ {
357 for _, c := range NormalDigest.Centroids(nil) {
358 addDigest.AddCentroid(c)
359 }
360 for _, c := range UniformDigest.Centroids(nil) {
361 addDigest.AddCentroid(c)
362 }
363 }
364
365 mergeDigest := tdigest.New()
366 for i := 0; i < numRepeats; i++ {
367 mergeDigest.Merge(NormalDigest)
368 mergeDigest.Merge(UniformDigest)
369 }
370
371 if err := compareQuantiles(addDigest, mergeDigest, 0.001); err != nil {
372 t.Errorf("AddCentroid() differs from from Merge(): %s", err.Error())
373 }
374
375 // Empty merge does nothing and has no effect on underlying centroids.
376 c1 := addDigest.Centroids(nil)
377 addDigest.Merge(tdigest.New())
378 c2 := addDigest.Centroids(nil)
379 if !reflect.DeepEqual(c1, c2) {
380 t.Error("Merging an empty digest altered data")
381 }
382 }
383
226384 var quantiles = []float64{0.1, 0.5, 0.9, 0.99, 0.999}
227385
228386 func BenchmarkTDigest_Add(b *testing.B) {
233391 }
234392 }
235393 }
394
395 func BenchmarkTDigest_AddCentroid(b *testing.B) {
396 centroids := make(tdigest.CentroidList, len(NormalData))
397 for i := range centroids {
398 centroids[i].Mean = NormalData[i]
399 centroids[i].Weight = 1
400 }
401
402 b.ResetTimer()
403 for n := 0; n < b.N; n++ {
404 td := tdigest.NewWithCompression(1000)
405 for i := range centroids {
406 td.AddCentroid(centroids[i])
407 }
408 }
409 }
410
411 func BenchmarkTDigest_AddCentroidList(b *testing.B) {
412 centroids := make(tdigest.CentroidList, len(NormalData))
413 for i := range centroids {
414 centroids[i].Mean = NormalData[i]
415 centroids[i].Weight = 1
416 }
417
418 b.ResetTimer()
419 for n := 0; n < b.N; n++ {
420 td := tdigest.NewWithCompression(1000)
421 td.AddCentroidList(centroids)
422 }
423 }
424
425 func BenchmarkTDigest_Merge(b *testing.B) {
426 b.Run("AddCentroid", func(b *testing.B) {
427 var cl tdigest.CentroidList
428 td := tdigest.New()
429 for n := 0; n < b.N; n++ {
430 cl = NormalDigest.Centroids(cl[:0])
431 for i := range cl {
432 td.AddCentroid(cl[i])
433 }
434 }
435 })
436 b.Run("Merge", func(b *testing.B) {
437 td := tdigest.New()
438 for n := 0; n < b.N; n++ {
439 td.Merge(NormalDigest)
440 }
441 })
442 }
443
236444 func BenchmarkTDigest_Quantile(b *testing.B) {
237445 td := tdigest.NewWithCompression(1000)
238446 for _, x := range NormalData {
246454 }
247455 }
248456 }
457
458 func TestTdigest_Centroids(t *testing.T) {
459 tests := []struct {
460 name string
461 data []float64
462 digest *tdigest.TDigest
463 want tdigest.CentroidList
464 }{
465 {
466 name: "increasing",
467 data: []float64{1, 2, 3, 4, 5},
468 want: tdigest.CentroidList{
469 tdigest.Centroid{
470 Mean: 1.0,
471 Weight: 1.0,
472 },
473
474 tdigest.Centroid{
475 Mean: 2.5,
476 Weight: 2.0,
477 },
478
479 tdigest.Centroid{
480 Mean: 4.0,
481 Weight: 1.0,
482 },
483
484 tdigest.Centroid{
485 Mean: 5.0,
486 Weight: 1.0,
487 },
488 },
489 },
490 }
491
492 for _, tt := range tests {
493 t.Run(tt.name, func(t *testing.T) {
494 var got tdigest.CentroidList
495 td := tt.digest
496 if td == nil {
497 td = tdigest.NewWithCompression(3)
498 for _, x := range tt.data {
499 td.Add(x, 1)
500 }
501 }
502 got = td.Centroids(got[:0])
503 if !reflect.DeepEqual(got, tt.want) {
504 t.Errorf("unexpected list got %g want %g", got, tt.want)
505 }
506 })
507 }
508 }
0 package main
1
2 import (
3 "os"
4 "strconv"
5
6 "golang.org/x/exp/rand"
7 "gonum.org/v1/gonum/stat/distuv"
8 )
9
10 const (
11 N = 1e6
12 Mu = 10
13 Sigma = 3
14
15 seed = 42
16 )
17
18 func main() {
19 // Generate uniform and normal data
20 uniform := rand.New(rand.NewSource(seed))
21 dist := distuv.Normal{
22 Mu: Mu,
23 Sigma: Sigma,
24 Src: rand.New(rand.NewSource(seed)),
25 }
26
27 uniformData := make([]float64, N)
28 normalData := make([]float64, N)
29 for i := range normalData {
30 normalData[i] = dist.Rand()
31 uniformData[i] = uniform.Float64() * 100
32 }
33
34 smallData := []float64{1, 2, 3, 4, 5, 5, 4, 3, 2, 1}
35
36 writeData("uniform.dat", uniformData)
37 writeData("normal.dat", normalData)
38 writeData("small.dat", smallData)
39 }
40
41 func writeData(name string, data []float64) {
42 f, err := os.Create(name)
43 if err != nil {
44 panic(err)
45 }
46 defer f.Close()
47
48 buf := make([]byte, 0, 64)
49 for _, x := range data {
50 buf = strconv.AppendFloat(buf, x, 'f', -1, 64)
51 _, err := f.Write(buf)
52 if err != nil {
53 panic(err)
54 }
55 _, err = f.Write([]byte{'\n'})
56 if err != nil {
57 panic(err)
58 }
59 buf = buf[0:0]
60 }
61 }
+0
-62
test/gen.go less more
0 package main
1
2 import (
3 "math/rand"
4 "os"
5 "strconv"
6
7 "github.com/gonum/stat/distuv"
8 )
9
10 const (
11 N = 1e6
12 Mu = 10
13 Sigma = 3
14
15 seed = 42
16 )
17
18 func main() {
19 // Generate uniform and normal data
20 uniform := rand.New(rand.NewSource(seed))
21 dist := distuv.Normal{
22 Mu: Mu,
23 Sigma: Sigma,
24 Source: rand.New(rand.NewSource(seed)),
25 }
26
27 uniformData := make([]float64, N)
28 normalData := make([]float64, N)
29 for i := range normalData {
30 normalData[i] = dist.Rand()
31 uniformData[i] = uniform.Float64() * 100
32 }
33
34 smallData := []float64{1, 2, 3, 4, 5, 5, 4, 3, 2, 1}
35
36 writeData("uniform.dat", uniformData)
37 writeData("normal.dat", normalData)
38 writeData("small.dat", smallData)
39 }
40
41 func writeData(name string, data []float64) {
42 f, err := os.Create(name)
43 if err != nil {
44 panic(err)
45 }
46 defer f.Close()
47
48 buf := make([]byte, 0, 64)
49 for _, x := range data {
50 buf = strconv.AppendFloat(buf, x, 'f', -1, 64)
51 _, err := f.Write(buf)
52 if err != nil {
53 panic(err)
54 }
55 _, err = f.Write([]byte{'\n'})
56 if err != nil {
57 panic(err)
58 }
59 buf = buf[0:0]
60 }
61 }
44 DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
55 cd "$DIR"
66
7 go run gen.go
7 go run gen/main.go
88 go run main.go
9 g++ -o cpp.test main.cpp
9 g++ -std=c++11 -o cpp.test main.cpp
1010 ./cpp.test 2>/dev/null
1111 rm cpp.test
1212
13 go run validate.go
13 go run validate/main.go
0 package main
1
2 import (
3 "bufio"
4 "log"
5 "math"
6 "os"
7 "strconv"
8 "strings"
9 )
10
11 var dataFiles = []string{
12 "small.dat",
13 "uniform.dat",
14 "normal.dat",
15 }
16
17 const (
18 cppQExt = ".cpp.quantiles"
19 goQExt = ".go.quantiles"
20
21 cppCDFExt = ".cpp.cdfs"
22 goCDFExt = ".go.cdfs"
23
24 epsilon = 1e-6
25 )
26
27 func main() {
28 for _, f := range dataFiles {
29 // Validate Quantiles
30 cppQuantiles := loadResults(f + cppQExt)
31 goQuantiles := loadResults(f + goQExt)
32 if len(cppQuantiles) != len(goQuantiles) {
33 log.Fatal("differing number of quantiles results")
34 }
35
36 for i := range cppQuantiles {
37 if math.Abs(cppQuantiles[i]-goQuantiles[i]) > epsilon {
38 log.Fatalf("differing quantile result go: %f cpp: %f", goQuantiles[i], cppQuantiles[i])
39 }
40 }
41
42 // Validate CDFs
43 cppCDFs := loadResults(f + cppCDFExt)
44 goCDFs := loadResults(f + goCDFExt)
45 if len(cppCDFs) != len(goCDFs) {
46 log.Fatal("differing number of CDFs results")
47 }
48
49 for i := range cppCDFs {
50 if math.Abs(cppCDFs[i]-goCDFs[i]) > epsilon {
51 log.Fatalf("differing CDF result go: %f cpp: %f", goCDFs[i], cppCDFs[i])
52 }
53 }
54 }
55 }
56
57 func loadResults(name string) []float64 {
58 f, err := os.Open(name)
59 if err != nil {
60 panic(err)
61 }
62 defer f.Close()
63 s := bufio.NewScanner(f)
64 var data []float64
65 for s.Scan() {
66 parts := strings.SplitN(s.Text(), " ", 2)
67 x, err := strconv.ParseFloat(parts[0], 64)
68 if err != nil {
69 panic(err)
70 }
71 data = append(data, x)
72 }
73 return data
74 }
+0
-75
test/validate.go less more
0 package main
1
2 import (
3 "bufio"
4 "log"
5 "math"
6 "os"
7 "strconv"
8 "strings"
9 )
10
11 var dataFiles = []string{
12 "small.dat",
13 "uniform.dat",
14 "normal.dat",
15 }
16
17 const (
18 cppQExt = ".cpp.quantiles"
19 goQExt = ".go.quantiles"
20
21 cppCDFExt = ".cpp.cdfs"
22 goCDFExt = ".go.cdfs"
23
24 epsilon = 1e-6
25 )
26
27 func main() {
28 for _, f := range dataFiles {
29 // Validate Quantiles
30 cppQuantiles := loadResults(f + cppQExt)
31 goQuantiles := loadResults(f + goQExt)
32 if len(cppQuantiles) != len(goQuantiles) {
33 log.Fatal("differing number of quantiles results")
34 }
35
36 for i := range cppQuantiles {
37 if math.Abs(cppQuantiles[i]-goQuantiles[i]) > epsilon {
38 log.Fatalf("differing quantile result go: %f cpp: %f", goQuantiles[i], cppQuantiles[i])
39 }
40 }
41
42 // Validate CDFs
43 cppCDFs := loadResults(f + cppCDFExt)
44 goCDFs := loadResults(f + goCDFExt)
45 if len(cppCDFs) != len(goCDFs) {
46 log.Fatal("differing number of CDFs results")
47 }
48
49 for i := range cppCDFs {
50 if math.Abs(cppCDFs[i]-goCDFs[i]) > epsilon {
51 log.Fatalf("differing CDF result go: %f cpp: %f", goCDFs[i], cppCDFs[i])
52 }
53 }
54 }
55 }
56
57 func loadResults(name string) []float64 {
58 f, err := os.Open(name)
59 if err != nil {
60 panic(err)
61 }
62 defer f.Close()
63 s := bufio.NewScanner(f)
64 var data []float64
65 for s.Scan() {
66 parts := strings.SplitN(s.Text(), " ", 2)
67 x, err := strconv.ParseFloat(parts[0], 64)
68 if err != nil {
69 panic(err)
70 }
71 data = append(data, x)
72 }
73 return data
74 }