added sampling logic to kafka collector and some code cleanup
Bas van Beek
8 years ago
0 | 0 | package zipkin |
1 | 1 | |
2 | 2 | import ( |
3 | "math/rand" | |
4 | ||
3 | 5 | "github.com/apache/thrift/lib/go/thrift" |
4 | 6 | "gopkg.in/Shopify/sarama.v1" |
5 | 7 | |
6 | 8 | "github.com/go-kit/kit/log" |
7 | 9 | ) |
8 | 10 | |
9 | // KafkaTopic sets the Kafka topic our Collector will publish on. The | |
10 | // default topic for zipkin-receiver-kafka is "zipkin", see: | |
11 | // defaultKafkaTopic sets the standard Kafka topic our Collector will publish | |
12 | // on. The default topic for zipkin-receiver-kafka is "zipkin", see: | |
11 | 13 | // https://github.com/openzipkin/zipkin/tree/master/zipkin-receiver-kafka |
12 | var KafkaTopic = "zipkin" | |
14 | const defaultKafkaTopic = "zipkin" | |
13 | 15 | |
14 | // KafkaCollector implements Collector by forwarding spans to a Kafka | |
15 | // service. | |
16 | // KafkaCollector implements Collector by publishing spans to a Kafka | |
17 | // broker. | |
16 | 18 | type KafkaCollector struct { |
17 | producer sarama.AsyncProducer | |
18 | logger log.Logger | |
19 | producer sarama.AsyncProducer | |
20 | logger log.Logger | |
21 | topic string | |
22 | shouldSample Sampler | |
19 | 23 | } |
20 | 24 | |
21 | 25 | // KafkaOption sets a parameter for the KafkaCollector |
22 | type KafkaOption func(s *KafkaCollector) | |
26 | type KafkaOption func(c *KafkaCollector) | |
23 | 27 | |
24 | 28 | // KafkaLogger sets the logger used to report errors in the collection |
25 | 29 | // process. By default, a no-op logger is used, i.e. no errors are logged |
26 | 30 | // anywhere. It's important to set this option. |
27 | 31 | func KafkaLogger(logger log.Logger) KafkaOption { |
28 | return func(k *KafkaCollector) { k.logger = logger } | |
32 | return func(c *KafkaCollector) { c.logger = logger } | |
29 | 33 | } |
30 | 34 | |
31 | 35 | // KafkaProducer sets the producer used to produce to Kafka. |
33 | 37 | return func(c *KafkaCollector) { c.producer = p } |
34 | 38 | } |
35 | 39 | |
40 | // KafkaTopic sets the kafka topic to attach the collector producer on. | |
41 | func KafkaTopic(t string) KafkaOption { | |
42 | return func(c *KafkaCollector) { c.topic = t } | |
43 | } | |
44 | ||
45 | // KafkaSampleRate sets the sample rate used to determine if a trace will be | |
46 | // sent to the collector. By default, the sample rate is 1.0, i.e. all traces | |
47 | // are sent. | |
48 | func KafkaSampleRate(sr Sampler) KafkaOption { | |
49 | return func(c *KafkaCollector) { c.shouldSample = sr } | |
50 | } | |
51 | ||
36 | 52 | // NewKafkaCollector returns a new Kafka-backed Collector. addrs should be a |
37 | 53 | // slice of TCP endpoints of the form "host:port". |
38 | 54 | func NewKafkaCollector(addrs []string, options ...KafkaOption) (Collector, error) { |
39 | c := &KafkaCollector{} | |
55 | c := &KafkaCollector{ | |
56 | logger: log.NewNopLogger(), | |
57 | topic: defaultKafkaTopic, | |
58 | shouldSample: SampleRate(1.0, rand.Int63()), | |
59 | } | |
60 | ||
40 | 61 | for _, option := range options { |
41 | 62 | option(c) |
42 | 63 | } |
64 | ||
43 | 65 | if c.producer == nil { |
44 | 66 | p, err := sarama.NewAsyncProducer(addrs, nil) |
45 | 67 | if err != nil { |
47 | 69 | } |
48 | 70 | c.producer = p |
49 | 71 | } |
50 | if c.logger == nil { | |
51 | c.logger = log.NewNopLogger() | |
52 | } | |
53 | 72 | |
54 | 73 | go c.logErrors() |
74 | ||
55 | 75 | return c, nil |
56 | 76 | } |
57 | 77 | |
63 | 83 | |
64 | 84 | // Collect implements Collector. |
65 | 85 | func (c *KafkaCollector) Collect(s *Span) error { |
66 | c.producer.Input() <- &sarama.ProducerMessage{ | |
67 | Topic: KafkaTopic, | |
68 | Key: nil, | |
69 | Value: sarama.ByteEncoder(byteSerialize(s)), | |
86 | if c.shouldSample(s.traceID) { | |
87 | c.producer.Input() <- &sarama.ProducerMessage{ | |
88 | Topic: c.topic, | |
89 | Key: nil, | |
90 | Value: sarama.ByteEncoder(kafkaSerialize(s)), | |
91 | } | |
70 | 92 | } |
71 | 93 | return nil |
72 | 94 | } |
76 | 98 | return c.producer.Close() |
77 | 99 | } |
78 | 100 | |
79 | func byteSerialize(s *Span) []byte { | |
101 | func kafkaSerialize(s *Span) []byte { | |
80 | 102 | t := thrift.NewTMemoryBuffer() |
81 | 103 | p := thrift.NewTBinaryProtocolTransport(t) |
82 | 104 | if err := s.Encode().Write(p); err != nil { |
0 | package zipkin | |
1 | ||
2 | import "math" | |
3 | ||
4 | // Sampler functions return if a Zipkin span should be sampled, based on its | |
5 | // traceID. | |
6 | type Sampler func(id int64) bool | |
7 | ||
8 | // SampleRate returns a sampler function using a particular sample rate and a | |
9 | // sample salt to identify if a Zipkin span based on its spanID should be | |
10 | // collected. | |
11 | func SampleRate(rate float64, salt int64) Sampler { | |
12 | if rate <= 0 { | |
13 | return func(_ int64) bool { | |
14 | return false | |
15 | } | |
16 | } | |
17 | if rate >= 1.0 { | |
18 | return func(_ int64) bool { | |
19 | return true | |
20 | } | |
21 | } | |
22 | return func(id int64) bool { | |
23 | return int64(math.Abs(float64(id^salt)))%10000 < int64(rate*10000) | |
24 | } | |
25 | } |
0 | package zipkin | |
1 | ||
2 | import "testing" | |
3 | ||
4 | func TestSampleRate(t *testing.T) { | |
5 | type triple struct { | |
6 | id, salt int64 | |
7 | rate float64 | |
8 | } | |
9 | for input, want := range map[triple]bool{ | |
10 | triple{123, 456, 1.0}: true, | |
11 | triple{123, 456, 999}: true, | |
12 | triple{123, 456, 0.0}: false, | |
13 | triple{123, 456, -42}: false, | |
14 | triple{1229998, 0, 0.01}: false, | |
15 | triple{1229999, 0, 0.01}: false, | |
16 | triple{1230000, 0, 0.01}: true, | |
17 | triple{1230001, 0, 0.01}: true, | |
18 | triple{1230098, 0, 0.01}: true, | |
19 | triple{1230099, 0, 0.01}: true, | |
20 | triple{1230100, 0, 0.01}: false, | |
21 | triple{1230101, 0, 0.01}: false, | |
22 | triple{1, 9999999, 0.01}: false, | |
23 | triple{999, 0, 0.99}: true, | |
24 | triple{9999, 0, 0.99}: false, | |
25 | } { | |
26 | sampler := SampleRate(input.rate, input.salt) | |
27 | if have := sampler(input.id); want != have { | |
28 | t.Errorf("%#+v: want %v, have %v", input, want, have) | |
29 | } | |
30 | } | |
31 | } |
2 | 2 | import ( |
3 | 3 | "encoding/base64" |
4 | 4 | "fmt" |
5 | "math" | |
6 | 5 | "math/rand" |
7 | 6 | "net" |
8 | 7 | "time" |
12 | 11 | "github.com/go-kit/kit/log" |
13 | 12 | "github.com/go-kit/kit/tracing/zipkin/_thrift/gen-go/scribe" |
14 | 13 | ) |
14 | ||
15 | const defaultScribeCategory = "zipkin" | |
16 | ||
17 | // defaultBatchInterval in seconds | |
18 | const defaultBatchInterval = 1 | |
15 | 19 | |
16 | 20 | // ScribeCollector implements Collector by forwarding spans to a Scribe |
17 | 21 | // service, in batches. |
24 | 28 | nextSend time.Time |
25 | 29 | batchInterval time.Duration |
26 | 30 | batchSize int |
27 | sampleRate float64 | |
28 | sampleSalt int64 | |
31 | shouldSample Sampler | |
29 | 32 | logger log.Logger |
33 | category string | |
34 | quit chan struct{} | |
30 | 35 | } |
31 | 36 | |
32 | 37 | // NewScribeCollector returns a new Scribe-backed Collector. addr should be a |
41 | 46 | if err != nil { |
42 | 47 | return nil, err |
43 | 48 | } |
44 | defaultBatchInterval := time.Second | |
45 | 49 | c := &ScribeCollector{ |
46 | 50 | client: client, |
47 | 51 | factory: factory, |
48 | 52 | spanc: make(chan *Span), |
49 | 53 | sendc: make(chan struct{}), |
50 | 54 | batch: []*scribe.LogEntry{}, |
51 | nextSend: time.Now().Add(defaultBatchInterval), | |
52 | batchInterval: defaultBatchInterval, | |
55 | batchInterval: defaultBatchInterval * time.Second, | |
53 | 56 | batchSize: 100, |
54 | sampleRate: 1.0, | |
55 | sampleSalt: rand.Int63(), | |
57 | shouldSample: SampleRate(1.0, rand.Int63()), | |
56 | 58 | logger: log.NewNopLogger(), |
59 | category: defaultScribeCategory, | |
60 | quit: make(chan struct{}), | |
57 | 61 | } |
58 | 62 | for _, option := range options { |
59 | 63 | option(c) |
60 | 64 | } |
65 | c.nextSend = time.Now().Add(c.batchInterval) | |
61 | 66 | go c.loop() |
62 | 67 | return c, nil |
63 | 68 | } |
68 | 73 | return nil // accepted |
69 | 74 | } |
70 | 75 | |
76 | // Close implements Collector. | |
71 | 77 | func (c *ScribeCollector) Close() error { |
72 | // TODO Close the underlying transport here? | |
78 | close(c.quit) | |
73 | 79 | return nil |
74 | 80 | } |
75 | 81 | |
79 | 85 | for { |
80 | 86 | select { |
81 | 87 | case span := <-c.spanc: |
82 | if !shouldSample(span.traceID, c.sampleSalt, c.sampleRate) { | |
88 | if !c.shouldSample(span.traceID) { | |
83 | 89 | continue |
84 | 90 | } |
85 | 91 | c.batch = append(c.batch, &scribe.LogEntry{ |
86 | Category: "zipkin", // TODO parameterize? | |
87 | Message: serialize(span), | |
92 | Category: c.category, | |
93 | Message: scribeSerialize(span), | |
88 | 94 | }) |
89 | 95 | if len(c.batch) >= c.batchSize { |
90 | 96 | go c.sendNow() |
101 | 107 | c.logger.Log("err", err.Error()) |
102 | 108 | } |
103 | 109 | c.batch = c.batch[:0] |
110 | case <-c.quit: | |
111 | return | |
104 | 112 | } |
105 | 113 | } |
106 | 114 | } |
144 | 152 | // ScribeSampleRate sets the sample rate used to determine if a trace will be |
145 | 153 | // sent to the collector. By default, the sample rate is 1.0, i.e. all traces |
146 | 154 | // are sent. |
147 | func ScribeSampleRate(f float64) ScribeOption { | |
148 | return func(s *ScribeCollector) { s.sampleRate = f } | |
155 | func ScribeSampleRate(sr Sampler) ScribeOption { | |
156 | return func(s *ScribeCollector) { s.shouldSample = sr } | |
149 | 157 | } |
150 | 158 | |
151 | 159 | // ScribeLogger sets the logger used to report errors in the collection |
153 | 161 | // anywhere. It's important to set this option in a production service. |
154 | 162 | func ScribeLogger(logger log.Logger) ScribeOption { |
155 | 163 | return func(s *ScribeCollector) { s.logger = logger } |
164 | } | |
165 | ||
166 | // ScribeCategory sets the Scribe category used to transmit the spans. | |
167 | func ScribeCategory(category string) ScribeOption { | |
168 | return func(s *ScribeCollector) { s.category = category } | |
156 | 169 | } |
157 | 170 | |
158 | 171 | func scribeClientFactory(addr string, timeout time.Duration) func() (scribe.Scribe, error) { |
173 | 186 | } |
174 | 187 | } |
175 | 188 | |
176 | func serialize(s *Span) string { | |
189 | func scribeSerialize(s *Span) string { | |
177 | 190 | t := thrift.NewTMemoryBuffer() |
178 | 191 | p := thrift.NewTBinaryProtocolTransport(t) |
179 | 192 | if err := s.Encode().Write(p); err != nil { |
181 | 194 | } |
182 | 195 | return base64.StdEncoding.EncodeToString(t.Buffer.Bytes()) |
183 | 196 | } |
184 | ||
185 | func shouldSample(id int64, salt int64, rate float64) bool { | |
186 | if rate <= 0 { | |
187 | return false | |
188 | } | |
189 | if rate >= 1.0 { | |
190 | return true | |
191 | } | |
192 | return int64(math.Abs(float64(id^salt)))%10000 < int64(rate*10000) | |
193 | } |
0 | package zipkin | |
1 | ||
2 | import "testing" | |
3 | ||
4 | func TestShouldSample(t *testing.T) { | |
5 | type triple struct { | |
6 | id, salt int64 | |
7 | rate float64 | |
8 | } | |
9 | for input, want := range map[triple]bool{ | |
10 | triple{123, 456, 1.0}: true, | |
11 | triple{123, 456, 999}: true, | |
12 | triple{123, 456, 0.0}: false, | |
13 | triple{123, 456, -42}: false, | |
14 | triple{1229998, 0, 0.01}: false, | |
15 | triple{1229999, 0, 0.01}: false, | |
16 | triple{1230000, 0, 0.01}: true, | |
17 | triple{1230001, 0, 0.01}: true, | |
18 | triple{1230098, 0, 0.01}: true, | |
19 | triple{1230099, 0, 0.01}: true, | |
20 | triple{1230100, 0, 0.01}: false, | |
21 | triple{1230101, 0, 0.01}: false, | |
22 | triple{1, 9999999, 0.01}: false, | |
23 | triple{999, 0, 0.99}: true, | |
24 | triple{9999, 0, 0.99}: false, | |
25 | } { | |
26 | if have := shouldSample(input.id, input.salt, input.rate); want != have { | |
27 | t.Errorf("%#+v: want %v, have %v", input, want, have) | |
28 | } | |
29 | } | |
30 | } |
8 | 8 | "time" |
9 | 9 | |
10 | 10 | "github.com/go-kit/kit/tracing/zipkin/_thrift/gen-go/zipkincore" |
11 | ) | |
12 | ||
13 | var ( | |
14 | // SpanContextKey represents the Span in the request context. | |
15 | SpanContextKey = "Zipkin-Span" | |
16 | 11 | ) |
17 | 12 | |
18 | 13 | // A Span is a named collection of annotations. It represents meaningful |
24 | 24 | // • https://gist.github.com/yoavaa/3478d3a0df666f21a98c |
25 | 25 | |
26 | 26 | const ( |
27 | // SpanContextKey holds the key used to store Zipkin spans in the context. | |
28 | SpanContextKey = "Zipkin-Span" | |
29 | ||
27 | 30 | // https://github.com/racker/tryfer#headers |
28 | 31 | traceIDHTTPHeader = "X-B3-TraceId" |
29 | 32 | spanIDHTTPHeader = "X-B3-SpanId" |