Commit d332540a28db3caee9ab43bb9fcd14ef714a6718 - golang-github-influxdata-tdigest

Import upstream version 0.0.1+git20210216.1.fc98d27 Debian Janitor 2 years ago

11 changed file(s) with 545 addition(s) and 190 deletion(s). Raw diff Collapse all Expand all

-6

README.md less more

1	1
2	2	This is an implementation of Ted Dunning's [t-digest](https://github.com/tdunning/t-digest/) in Go.
3	3
4		The implementaion is based off [Derrick Burns' C++ implementation](https://github.com/derrickburns/tdigest).
	4	The implementation is based off [Derrick Burns' C++ implementation](https://github.com/derrickburns/tdigest).
5	5
6	6	## Example
7	7

34	34	log.Println("CDF(5) = ", td.CDF(5))
35	35	}
36	36	```
37
38		## TODO
39
40		Only the methods for a single TDigest have been implemented.
41		The methods to merge two or more existing t-digests into a single t-digest have yet to be implemented.

-1

centroid.go less more

42	42	// CentroidList is sorted by the Mean of the centroid, ascending.
43	43	type CentroidList []Centroid
44	44
	45	// Clear clears the list.
45	46	func (l *CentroidList) Clear() {
46		l = (l)[0:0]
	47	l = (l)[:0]
47	48	}
48	49
49	50	func (l CentroidList) Len() int { return len(l) }

+10

-0

go.mod less more

	0	module github.com/influxdata/tdigest
	1
	2	require (
	3	github.com/google/go-cmp v0.2.0
	4	golang.org/x/exp v0.0.0-20180321215751-8460e604b9de
	5	gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca
	6	gonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6 // indirect
	7	)
	8
	9	go 1.13

-0

go.sum less more

	0	github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
	1	github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
	2	golang.org/x/exp v0.0.0-20180321215751-8460e604b9de h1:xSjD6HQTqT0H/k60N5yYBtnN1OEkVy7WIo/DYyxKRO0=
	3	golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
	4	golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
	5	gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca h1:PupagGYwj8+I4ubCxcmcBRk3VlUWtTg5huQpZR9flmE=
	6	gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
	7	gonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6 h1:4WsZyVtkthqrHTbDCJfiTs8IWNYE4uvsSDgaV6xpp+o=
	8	gonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=

+108

-28

tdigest.go less more

4	4	"sort"
5	5	)
6	6
	7	// TDigest is a data structure for accurate on-line accumulation of
	8	// rank-based statistics such as quantiles and trimmed means.
7	9	type TDigest struct {
8	10	Compression float64
9	11

18	20	max float64
19	21	}
20	22
	23	// New initializes a new distribution with a default compression.
21	24	func New() *TDigest {
22	25	return NewWithCompression(1000)
23	26	}
	27
	28	// NewWithCompression initializes a new distribution with custom compression.
24	29	func NewWithCompression(c float64) *TDigest {
25	30	t := &TDigest{
26	31	Compression: c,
27	32	}
28	33	t.maxProcessed = processedSize(0, t.Compression)
29	34	t.maxUnprocessed = unprocessedSize(0, t.Compression)
30		t.processed = make([]Centroid, 0, t.maxProcessed)
31		t.unprocessed = make([]Centroid, 0, t.maxUnprocessed+1)
	35	t.processed = make(CentroidList, 0, t.maxProcessed)
	36	t.unprocessed = make(CentroidList, 0, t.maxUnprocessed+1)
	37	t.Reset()
	38	return t
	39	}
	40
	41	// Calculate number of bytes needed for a tdigest of size c,
	42	// where c is the compression value
	43	func ByteSizeForCompression(comp float64) int {
	44	c := int(comp)
	45	// // A centroid is 2 float64s, so we need 16 bytes for each centroid
	46	// float_size := 8
	47	// centroid_size := 2 * float_size
	48
	49	// // Unprocessed and processed can grow up to length c
	50	// unprocessed_size := centroid_size * c
	51	// processed_size := unprocessed_size
	52
	53	// // the cumulative field can also be of length c, but each item is a single float64
	54	// cumulative_size := float_size * c // <- this could also be unprocessed_size / 2
	55
	56	// return unprocessed_size + processed_size + cumulative_size
	57
	58	// // or, more succinctly:
	59	// return float_size * c * 5
	60
	61	// or even more succinctly
	62	return c * 40
	63	}
	64
	65	// Reset resets the distribution to its initial state.
	66	func (t *TDigest) Reset() {
	67	t.processed = t.processed[:0]
	68	t.unprocessed = t.unprocessed[:0]
	69	t.cumulative = t.cumulative[:0]
	70	t.processedWeight = 0
	71	t.unprocessedWeight = 0
32	72	t.min = math.MaxFloat64
33	73	t.max = -math.MaxFloat64
34		return t
35		}
36
	74	}
	75
	76	// Add adds a value x with a weight w to the distribution.
37	77	func (t *TDigest) Add(x, w float64) {
38		if math.IsNaN(x) {
	78	t.AddCentroid(Centroid{Mean: x, Weight: w})
	79	}
	80
	81	// AddCentroidList can quickly add multiple centroids.
	82	func (t *TDigest) AddCentroidList(c CentroidList) {
	83	// It's possible to optimize this by bulk-copying the slice, but this
	84	// yields just a 1-2% speedup (most time is in process()), so not worth
	85	// the complexity.
	86	for i := range c {
	87	t.AddCentroid(c[i])
	88	}
	89	}
	90
	91	// AddCentroid adds a single centroid.
	92	// Weights which are not a number or are <= 0 are ignored, as are NaN means.
	93	func (t *TDigest) AddCentroid(c Centroid) {
	94	if math.IsNaN(c.Mean) \|\| c.Weight <= 0 \|\| math.IsNaN(c.Weight) \|\| math.IsInf(c.Weight, 1) {
39	95	return
40	96	}
41		t.AddCentroid(Centroid{Mean: x, Weight: w})
42		}
43
44		func (t *TDigest) AddCentroidList(c CentroidList) {
45		l := c.Len()
46		for i := 0; i < l; i++ {
47		diff := l - i
48		room := t.maxUnprocessed - t.unprocessed.Len()
49		mid := i + diff
50		if room < diff {
51		mid = i + room
52		}
53		for i < mid {
54		t.AddCentroid(c[i])
55		i++
56		}
57		}
58		}
59
60		func (t *TDigest) AddCentroid(c Centroid) {
	97
61	98	t.unprocessed = append(t.unprocessed, c)
62	99	t.unprocessedWeight += c.Weight
63	100

65	102	t.unprocessed.Len() > t.maxUnprocessed {
66	103	t.process()
67	104	}
	105	}
	106
	107	// Merges the supplied digest into this digest. Functionally equivalent to
	108	// calling t.AddCentroidList(t2.Centroids(nil)), but avoids making an extra
	109	// copy of the CentroidList.
	110	func (t TDigest) Merge(t2 TDigest) {
	111	t2.process()
	112	t.AddCentroidList(t2.processed)
68	113	}
69	114
70	115	func (t *TDigest) process() {

97	142	}
98	143	t.min = math.Min(t.min, t.processed[0].Mean)
99	144	t.max = math.Max(t.max, t.processed[t.processed.Len()-1].Mean)
100		t.updateCumulative()
101	145	t.unprocessed.Clear()
102	146	}
103	147	}
104	148
	149	// Centroids returns a copy of processed centroids.
	150	// Useful when aggregating multiple t-digests.
	151	//
	152	// Centroids are appended to the passed CentroidList; if you're re-using a
	153	// buffer, be sure to pass cl[:0].
	154	func (t *TDigest) Centroids(cl CentroidList) CentroidList {
	155	t.process()
	156	return append(cl, t.processed...)
	157	}
	158
	159	func (t *TDigest) Count() float64 {
	160	t.process()
	161
	162	// t.process always updates t.processedWeight to the total count of all
	163	// centroids, so we don't need to re-count here.
	164	return t.processedWeight
	165	}
	166
105	167	func (t *TDigest) updateCumulative() {
106		t.cumulative = make([]float64, t.processed.Len()+1)
	168	// Weight can only increase, so the final cumulative value will always be
	169	// either equal to, or less than, the total weight. If they are the same,
	170	// then nothing has changed since the last update.
	171	if len(t.cumulative) > 0 && t.cumulative[len(t.cumulative)-1] == t.processedWeight {
	172	return
	173	}
	174
	175	if n := t.processed.Len() + 1; n <= cap(t.cumulative) {
	176	t.cumulative = t.cumulative[:n]
	177	} else {
	178	t.cumulative = make([]float64, n)
	179	}
	180
107	181	prev := 0.0
108	182	for i, centroid := range t.processed {
109	183	cur := centroid.Weight

113	187	t.cumulative[t.processed.Len()] = prev
114	188	}
115	189
	190	// Quantile returns the (approximate) quantile of
	191	// the distribution. Accepted values for q are between 0.0 and 1.0.
	192	// Returns NaN if Count is zero or bad inputs.
116	193	func (t *TDigest) Quantile(q float64) float64 {
117	194	t.process()
	195	t.updateCumulative()
118	196	if q < 0 \|\| q > 1 \|\| t.processed.Len() == 0 {
119	197	return math.NaN()
120	198	}

141	219	return weightedAverage(t.processed[t.processed.Len()-1].Mean, z1, t.max, z2)
142	220	}
143	221
	222	// CDF returns the cumulative distribution function for a given value x.
144	223	func (t *TDigest) CDF(x float64) float64 {
145	224	t.process()
	225	t.updateCumulative()
146	226	switch t.processed.Len() {
147	227	case 0:
148	228	return 0.0

+275

-15

tdigest_test.go less more

0	0	package tdigest_test
1	1
2	2	import (
3		"math/rand"
	3	"fmt"
	4	"math"
	5	"reflect"
4	6	"testing"
5	7
6		"github.com/gonum/stat/distuv"
7	8	"github.com/influxdata/tdigest"
	9	"golang.org/x/exp/rand"
	10	"gonum.org/v1/gonum/stat/distuv"
8	11	)
9	12
10	13	const (

24	27
25	28	func init() {
26	29	dist := distuv.Normal{
27		Mu: Mu,
28		Sigma: Sigma,
29		Source: rand.New(rand.NewSource(seed)),
	30	Mu: Mu,
	31	Sigma: Sigma,
	32	Src: rand.New(rand.NewSource(seed)),
30	33	}
31	34	uniform := rand.New(rand.NewSource(seed))
32	35

42	45
43	46	UniformData[i] = uniform.Float64() * 100
44	47	UniformDigest.Add(UniformData[i], 1)
	48	}
	49	}
	50
	51	// Compares the quantile results of two digests, and fails if the
	52	// fractional err exceeds maxErr.
	53	// Always fails if the total count differs.
	54	func compareQuantiles(td1, td2 *tdigest.TDigest, maxErr float64) error {
	55	if td1.Count() != td2.Count() {
	56	return fmt.Errorf("counts are not equal, %d vs %d", int64(td1.Count()), int64(td2.Count()))
	57	}
	58	for q := 0.05; q < 1; q += 0.05 {
	59	if math.Abs(td1.Quantile(q)-td2.Quantile(q))/td1.Quantile(q) > maxErr {
	60	return fmt.Errorf("quantile %g differs, %g vs %g", q, td1.Quantile(q), td2.Quantile(q))
	61	}
	62	}
	63	return nil
	64	}
	65
	66	// All Add methods should yield equivalent results.
	67	func TestTdigest_AddFuncs(t *testing.T) {
	68	centroids := NormalDigest.Centroids(nil)
	69
	70	addDigest := tdigest.NewWithCompression(100)
	71	addCentroidDigest := tdigest.NewWithCompression(100)
	72	addCentroidListDigest := tdigest.NewWithCompression(100)
	73
	74	for _, c := range centroids {
	75	addDigest.Add(c.Mean, c.Weight)
	76	addCentroidDigest.AddCentroid(c)
	77	}
	78	addCentroidListDigest.AddCentroidList(centroids)
	79
	80	if err := compareQuantiles(addDigest, addCentroidDigest, 0.01); err != nil {
	81	t.Errorf("AddCentroid() differs from from Add(): %s", err.Error())
	82	}
	83	if err := compareQuantiles(addDigest, addCentroidListDigest, 0.01); err != nil {
	84	t.Errorf("AddCentroidList() differs from from Add(): %s", err.Error())
	85	}
	86	}
	87
	88	func TestTdigest_Count(t *testing.T) {
	89	tests := []struct {
	90	name string
	91	data []float64
	92	digest *tdigest.TDigest
	93	want float64
	94	}{
	95	{
	96	name: "empty",
	97	data: []float64{},
	98	want: 0,
	99	},
	100	{
	101	name: "not empty",
	102	data: []float64{5, 4},
	103	want: 2,
	104	},
	105	}
	106
	107	for _, tt := range tests {
	108	t.Run(tt.name, func(t *testing.T) {
	109	td := tt.digest
	110	if td == nil {
	111	td = tdigest.NewWithCompression(1000)
	112	for _, x := range tt.data {
	113	td.Add(x, 1)
	114	}
	115	}
	116	got := td.Count()
	117	if got != tt.want {
	118	t.Errorf("unexpected count, got %g want %g", got, tt.want)
	119	}
	120	})
	121	}
	122
	123	got := NormalDigest.Count()
	124	want := float64(len(NormalData))
	125	if got != want {
	126	t.Errorf("unexpected count for NormalDigest, got %g want %g", got, want)
	127	}
	128
	129	got = UniformDigest.Count()
	130	want = float64(len(UniformData))
	131	if got != want {
	132	t.Errorf("unexpected count for UniformDigest, got %g want %g", got, want)
45	133	}
46	134	}
47	135

81	169	name: "normal 50",
82	170	quantile: 0.5,
83	171	digest: NormalDigest,
84		want: 9.997821231634168,
	172	want: 10.000673533707138,
85	173	},
86	174	{
87	175	name: "normal 90",
88	176	quantile: 0.9,
89	177	digest: NormalDigest,
90		want: 13.843815760607427,
	178	want: 13.842132136909889,
91	179	},
92	180	{
93	181	name: "uniform 50",
94	182	quantile: 0.5,
95	183	digest: UniformDigest,
96		want: 50.02682856274754,
	184	want: 49.992502345843555,
97	185	},
98	186	{
99	187	name: "uniform 90",
100	188	quantile: 0.9,
101	189	digest: UniformDigest,
102		want: 90.02117754660424,
	190	want: 89.98281777095822,
103	191	},
104	192	{
105	193	name: "uniform 99",
106	194	quantile: 0.99,
107	195	digest: UniformDigest,
108		want: 99.00246731511771,
	196	want: 98.98503400959562,
109	197	},
110	198	{
111	199	name: "uniform 99.9",
112	200	quantile: 0.999,
113	201	digest: UniformDigest,
114		want: 99.90178495422307,
	202	want: 99.90103781043621,
115	203	},
116	204	}
117	205	for _, tt := range tests {

161	249	name: "normal mean",
162	250	cdf: 10,
163	251	data: NormalData,
164		want: 0.500298235578106,
	252	want: 0.4999156505250766,
165	253	},
166	254	{
167	255	name: "normal high",

179	267	name: "uniform 50",
180	268	cdf: 50,
181	269	data: UniformData,
182		want: 0.49972989818712815,
	270	want: 0.5000756133965755,
183	271	},
184	272	{
185	273	name: "uniform min",

197	285	name: "uniform 10",
198	286	cdf: 10,
199	287	data: UniformData,
200		want: 0.099715527526992,
	288	want: 0.09987932577650871,
201	289	},
202	290	{
203	291	name: "uniform 90",
204	292	cdf: 90,
205	293	data: UniformData,
206		want: 0.8997838903965611,
	294	want: 0.9001667885256108,
207	295	},
208	296	}
209	297	for _, tt := range tests {

223	311	}
224	312	}
225	313
	314	func TestTdigest_Reset(t *testing.T) {
	315	td := tdigest.New()
	316	for _, x := range NormalData {
	317	td.Add(x, 1)
	318	}
	319	q1 := td.Quantile(0.9)
	320
	321	td.Reset()
	322	for _, x := range NormalData {
	323	td.Add(x, 1)
	324	}
	325	if q2 := td.Quantile(0.9); q2 != q1 {
	326	t.Errorf("unexpected quantile, got %g want %g", q2, q1)
	327	}
	328	}
	329
	330	func TestTdigest_OddInputs(t *testing.T) {
	331	td := tdigest.New()
	332	td.Add(math.NaN(), 1)
	333	td.Add(1, math.NaN())
	334	td.Add(1, 0)
	335	td.Add(1, -1000)
	336	if td.Count() != 0 {
	337	t.Error("invalid value was alloed to be added")
	338	}
	339
	340	// Infinite values are allowed.
	341	td.Add(1, 1)
	342	td.Add(2, 1)
	343	td.Add(math.Inf(1), 1)
	344	if q := td.Quantile(0.5); q != 2 {
	345	t.Errorf("expected median value 2, got %f", q)
	346	}
	347	if q := td.Quantile(0.9); !math.IsInf(q, 1) {
	348	t.Errorf("expected median value 2, got %f", q)
	349	}
	350	}
	351
	352	func TestTdigest_Merge(t *testing.T) {
	353	// Repeat merges enough times to ensure we call compress()
	354	numRepeats := 20
	355	addDigest := tdigest.New()
	356	for i := 0; i < numRepeats; i++ {
	357	for _, c := range NormalDigest.Centroids(nil) {
	358	addDigest.AddCentroid(c)
	359	}
	360	for _, c := range UniformDigest.Centroids(nil) {
	361	addDigest.AddCentroid(c)
	362	}
	363	}
	364
	365	mergeDigest := tdigest.New()
	366	for i := 0; i < numRepeats; i++ {
	367	mergeDigest.Merge(NormalDigest)
	368	mergeDigest.Merge(UniformDigest)
	369	}
	370
	371	if err := compareQuantiles(addDigest, mergeDigest, 0.001); err != nil {
	372	t.Errorf("AddCentroid() differs from from Merge(): %s", err.Error())
	373	}
	374
	375	// Empty merge does nothing and has no effect on underlying centroids.
	376	c1 := addDigest.Centroids(nil)
	377	addDigest.Merge(tdigest.New())
	378	c2 := addDigest.Centroids(nil)
	379	if !reflect.DeepEqual(c1, c2) {
	380	t.Error("Merging an empty digest altered data")
	381	}
	382	}
	383
226	384	var quantiles = []float64{0.1, 0.5, 0.9, 0.99, 0.999}
227	385
228	386	func BenchmarkTDigest_Add(b *testing.B) {

233	391	}
234	392	}
235	393	}
	394
	395	func BenchmarkTDigest_AddCentroid(b *testing.B) {
	396	centroids := make(tdigest.CentroidList, len(NormalData))
	397	for i := range centroids {
	398	centroids[i].Mean = NormalData[i]
	399	centroids[i].Weight = 1
	400	}
	401
	402	b.ResetTimer()
	403	for n := 0; n < b.N; n++ {
	404	td := tdigest.NewWithCompression(1000)
	405	for i := range centroids {
	406	td.AddCentroid(centroids[i])
	407	}
	408	}
	409	}
	410
	411	func BenchmarkTDigest_AddCentroidList(b *testing.B) {
	412	centroids := make(tdigest.CentroidList, len(NormalData))
	413	for i := range centroids {
	414	centroids[i].Mean = NormalData[i]
	415	centroids[i].Weight = 1
	416	}
	417
	418	b.ResetTimer()
	419	for n := 0; n < b.N; n++ {
	420	td := tdigest.NewWithCompression(1000)
	421	td.AddCentroidList(centroids)
	422	}
	423	}
	424
	425	func BenchmarkTDigest_Merge(b *testing.B) {
	426	b.Run("AddCentroid", func(b *testing.B) {
	427	var cl tdigest.CentroidList
	428	td := tdigest.New()
	429	for n := 0; n < b.N; n++ {
	430	cl = NormalDigest.Centroids(cl[:0])
	431	for i := range cl {
	432	td.AddCentroid(cl[i])
	433	}
	434	}
	435	})
	436	b.Run("Merge", func(b *testing.B) {
	437	td := tdigest.New()
	438	for n := 0; n < b.N; n++ {
	439	td.Merge(NormalDigest)
	440	}
	441	})
	442	}
	443
236	444	func BenchmarkTDigest_Quantile(b *testing.B) {
237	445	td := tdigest.NewWithCompression(1000)
238	446	for _, x := range NormalData {

246	454	}
247	455	}
248	456	}
	457
	458	func TestTdigest_Centroids(t *testing.T) {
	459	tests := []struct {
	460	name string
	461	data []float64
	462	digest *tdigest.TDigest
	463	want tdigest.CentroidList
	464	}{
	465	{
	466	name: "increasing",
	467	data: []float64{1, 2, 3, 4, 5},
	468	want: tdigest.CentroidList{
	469	tdigest.Centroid{
	470	Mean: 1.0,
	471	Weight: 1.0,
	472	},
	473
	474	tdigest.Centroid{
	475	Mean: 2.5,
	476	Weight: 2.0,
	477	},
	478
	479	tdigest.Centroid{
	480	Mean: 4.0,
	481	Weight: 1.0,
	482	},
	483
	484	tdigest.Centroid{
	485	Mean: 5.0,
	486	Weight: 1.0,
	487	},
	488	},
	489	},
	490	}
	491
	492	for _, tt := range tests {
	493	t.Run(tt.name, func(t *testing.T) {
	494	var got tdigest.CentroidList
	495	td := tt.digest
	496	if td == nil {
	497	td = tdigest.NewWithCompression(3)
	498	for _, x := range tt.data {
	499	td.Add(x, 1)
	500	}
	501	}
	502	got = td.Centroids(got[:0])
	503	if !reflect.DeepEqual(got, tt.want) {
	504	t.Errorf("unexpected list got %g want %g", got, tt.want)
	505	}
	506	})
	507	}
	508	}

+62

-0

test/gen/main.go less more

	0	package main
	1
	2	import (
	3	"os"
	4	"strconv"
	5
	6	"golang.org/x/exp/rand"
	7	"gonum.org/v1/gonum/stat/distuv"
	8	)
	9
	10	const (
	11	N = 1e6
	12	Mu = 10
	13	Sigma = 3
	14
	15	seed = 42
	16	)
	17
	18	func main() {
	19	// Generate uniform and normal data
	20	uniform := rand.New(rand.NewSource(seed))
	21	dist := distuv.Normal{
	22	Mu: Mu,
	23	Sigma: Sigma,
	24	Src: rand.New(rand.NewSource(seed)),
	25	}
	26
	27	uniformData := make([]float64, N)
	28	normalData := make([]float64, N)
	29	for i := range normalData {
	30	normalData[i] = dist.Rand()
	31	uniformData[i] = uniform.Float64() * 100
	32	}
	33
	34	smallData := []float64{1, 2, 3, 4, 5, 5, 4, 3, 2, 1}
	35
	36	writeData("uniform.dat", uniformData)
	37	writeData("normal.dat", normalData)
	38	writeData("small.dat", smallData)
	39	}
	40
	41	func writeData(name string, data []float64) {
	42	f, err := os.Create(name)
	43	if err != nil {
	44	panic(err)
	45	}
	46	defer f.Close()
	47
	48	buf := make([]byte, 0, 64)
	49	for _, x := range data {
	50	buf = strconv.AppendFloat(buf, x, 'f', -1, 64)
	51	_, err := f.Write(buf)
	52	if err != nil {
	53	panic(err)
	54	}
	55	_, err = f.Write([]byte{'\n'})
	56	if err != nil {
	57	panic(err)
	58	}
	59	buf = buf[0:0]
	60	}
	61	}

-62

~~test/gen.go~~ less more

0		package main
1
2		import (
3		"math/rand"
4		"os"
5		"strconv"
6
7		"github.com/gonum/stat/distuv"
8		)
9
10		const (
11		N = 1e6
12		Mu = 10
13		Sigma = 3
14
15		seed = 42
16		)
17
18		func main() {
19		// Generate uniform and normal data
20		uniform := rand.New(rand.NewSource(seed))
21		dist := distuv.Normal{
22		Mu: Mu,
23		Sigma: Sigma,
24		Source: rand.New(rand.NewSource(seed)),
25		}
26
27		uniformData := make([]float64, N)
28		normalData := make([]float64, N)
29		for i := range normalData {
30		normalData[i] = dist.Rand()
31		uniformData[i] = uniform.Float64() * 100
32		}
33
34		smallData := []float64{1, 2, 3, 4, 5, 5, 4, 3, 2, 1}
35
36		writeData("uniform.dat", uniformData)
37		writeData("normal.dat", normalData)
38		writeData("small.dat", smallData)
39		}
40
41		func writeData(name string, data []float64) {
42		f, err := os.Create(name)
43		if err != nil {
44		panic(err)
45		}
46		defer f.Close()
47
48		buf := make([]byte, 0, 64)
49		for _, x := range data {
50		buf = strconv.AppendFloat(buf, x, 'f', -1, 64)
51		_, err := f.Write(buf)
52		if err != nil {
53		panic(err)
54		}
55		_, err = f.Write([]byte{'\n'})
56		if err != nil {
57		panic(err)
58		}
59		buf = buf[0:0]
60		}
61		}

-3

test/test.sh less more

4	4	DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
5	5	cd "$DIR"
6	6
7		go run gen.go
	7	go run gen/main.go
8	8	go run main.go
9		g++ -o cpp.test main.cpp
	9	g++ -std=c++11 -o cpp.test main.cpp
10	10	./cpp.test 2>/dev/null
11	11	rm cpp.test
12	12
13		go run validate.go
	13	go run validate/main.go

+75

-0

test/validate/main.go less more

	0	package main
	1
	2	import (
	3	"bufio"
	4	"log"
	5	"math"
	6	"os"
	7	"strconv"
	8	"strings"
	9	)
	10
	11	var dataFiles = []string{
	12	"small.dat",
	13	"uniform.dat",
	14	"normal.dat",
	15	}
	16
	17	const (
	18	cppQExt = ".cpp.quantiles"
	19	goQExt = ".go.quantiles"
	20
	21	cppCDFExt = ".cpp.cdfs"
	22	goCDFExt = ".go.cdfs"
	23
	24	epsilon = 1e-6
	25	)
	26
	27	func main() {
	28	for _, f := range dataFiles {
	29	// Validate Quantiles
	30	cppQuantiles := loadResults(f + cppQExt)
	31	goQuantiles := loadResults(f + goQExt)
	32	if len(cppQuantiles) != len(goQuantiles) {
	33	log.Fatal("differing number of quantiles results")
	34	}
	35
	36	for i := range cppQuantiles {
	37	if math.Abs(cppQuantiles[i]-goQuantiles[i]) > epsilon {
	38	log.Fatalf("differing quantile result go: %f cpp: %f", goQuantiles[i], cppQuantiles[i])
	39	}
	40	}
	41
	42	// Validate CDFs
	43	cppCDFs := loadResults(f + cppCDFExt)
	44	goCDFs := loadResults(f + goCDFExt)
	45	if len(cppCDFs) != len(goCDFs) {
	46	log.Fatal("differing number of CDFs results")
	47	}
	48
	49	for i := range cppCDFs {
	50	if math.Abs(cppCDFs[i]-goCDFs[i]) > epsilon {
	51	log.Fatalf("differing CDF result go: %f cpp: %f", goCDFs[i], cppCDFs[i])
	52	}
	53	}
	54	}
	55	}
	56
	57	func loadResults(name string) []float64 {
	58	f, err := os.Open(name)
	59	if err != nil {
	60	panic(err)
	61	}
	62	defer f.Close()
	63	s := bufio.NewScanner(f)
	64	var data []float64
	65	for s.Scan() {
	66	parts := strings.SplitN(s.Text(), " ", 2)
	67	x, err := strconv.ParseFloat(parts[0], 64)
	68	if err != nil {
	69	panic(err)
	70	}
	71	data = append(data, x)
	72	}
	73	return data
	74	}

-75

~~test/validate.go~~ less more

0		package main
1
2		import (
3		"bufio"
4		"log"
5		"math"
6		"os"
7		"strconv"
8		"strings"
9		)
10
11		var dataFiles = []string{
12		"small.dat",
13		"uniform.dat",
14		"normal.dat",
15		}
16
17		const (
18		cppQExt = ".cpp.quantiles"
19		goQExt = ".go.quantiles"
20
21		cppCDFExt = ".cpp.cdfs"
22		goCDFExt = ".go.cdfs"
23
24		epsilon = 1e-6
25		)
26
27		func main() {
28		for _, f := range dataFiles {
29		// Validate Quantiles
30		cppQuantiles := loadResults(f + cppQExt)
31		goQuantiles := loadResults(f + goQExt)
32		if len(cppQuantiles) != len(goQuantiles) {
33		log.Fatal("differing number of quantiles results")
34		}
35
36		for i := range cppQuantiles {
37		if math.Abs(cppQuantiles[i]-goQuantiles[i]) > epsilon {
38		log.Fatalf("differing quantile result go: %f cpp: %f", goQuantiles[i], cppQuantiles[i])
39		}
40		}
41
42		// Validate CDFs
43		cppCDFs := loadResults(f + cppCDFExt)
44		goCDFs := loadResults(f + goCDFExt)
45		if len(cppCDFs) != len(goCDFs) {
46		log.Fatal("differing number of CDFs results")
47		}
48
49		for i := range cppCDFs {
50		if math.Abs(cppCDFs[i]-goCDFs[i]) > epsilon {
51		log.Fatalf("differing CDF result go: %f cpp: %f", goCDFs[i], cppCDFs[i])
52		}
53		}
54		}
55		}
56
57		func loadResults(name string) []float64 {
58		f, err := os.Open(name)
59		if err != nil {
60		panic(err)
61		}
62		defer f.Close()
63		s := bufio.NewScanner(f)
64		var data []float64
65		for s.Scan() {
66		parts := strings.SplitN(s.Text(), " ", 2)
67		x, err := strconv.ParseFloat(parts[0], 64)
68		if err != nil {
69		panic(err)
70		}
71		data = append(data, x)
72		}
73		return data
74		}