Merge pull request #79 from go-kit/zipkin-enhancements
Zipkin enhancements
Peter Bourgon
8 years ago
95 | 95 | if zipkinCollector, err = zipkin.NewScribeCollector( |
96 | 96 | *zipkinCollectorAddr, |
97 | 97 | *zipkinCollectorTimeout, |
98 | *zipkinCollectorBatchSize, | |
99 | *zipkinCollectorBatchInterval, | |
98 | zipkin.ScribeBatchSize(*zipkinCollectorBatchSize), | |
99 | zipkin.ScribeBatchInterval(*zipkinCollectorBatchInterval), | |
100 | zipkin.ScribeLogger(logger), | |
100 | 101 | ); err != nil { |
101 | 102 | logger.Log("err", err) |
102 | 103 | os.Exit(1) |
104 | 105 | } |
105 | 106 | zipkinMethodName := "add" |
106 | 107 | zipkinSpanFunc := zipkin.MakeNewSpanFunc(zipkinHostPort, *zipkinServiceName, zipkinMethodName) |
107 | zipkin.Log.Swap(logger) // log diagnostic/error details | |
108 | 108 | |
109 | 109 | // Our business and operational domain |
110 | 110 | var a Add = pureAdd |
141 | 141 | go func() { |
142 | 142 | ctx, cancel := context.WithCancel(root) |
143 | 143 | defer cancel() |
144 | before := []httptransport.BeforeFunc{zipkin.ToContext(zipkinSpanFunc)} | |
144 | before := []httptransport.BeforeFunc{zipkin.ToContext(zipkinSpanFunc, logger)} | |
145 | 145 | after := []httptransport.AfterFunc{} |
146 | 146 | handler := makeHTTPBinding(ctx, e, before, after) |
147 | 147 | logger.Log("addr", *httpAddr, "transport", "HTTP/JSON") |
0 | package log | |
1 | ||
2 | type nopLogger struct{} | |
3 | ||
4 | func (nopLogger) Log(...interface{}) error { return nil } | |
5 | ||
6 | func NewNopLogger() Logger { return nopLogger{} } |
0 | package log_test | |
1 | ||
2 | import ( | |
3 | "testing" | |
4 | ||
5 | "github.com/go-kit/kit/log" | |
6 | ) | |
7 | ||
8 | func TestNopLogger(t *testing.T) { | |
9 | logger := log.NewNopLogger() | |
10 | if err := logger.Log("abc", 123); err != nil { | |
11 | t.Error(err) | |
12 | } | |
13 | if err := log.NewContext(logger).With("def", "ghi").Log(); err != nil { | |
14 | t.Error(err) | |
15 | } | |
16 | } |
3 | 3 | "encoding/base64" |
4 | 4 | "errors" |
5 | 5 | "fmt" |
6 | "math" | |
7 | "math/rand" | |
6 | 8 | "net" |
7 | 9 | "strings" |
8 | 10 | "time" |
9 | 11 | |
10 | 12 | "github.com/apache/thrift/lib/go/thrift" |
11 | 13 | |
14 | "github.com/go-kit/kit/log" | |
12 | 15 | "github.com/go-kit/kit/tracing/zipkin/_thrift/gen-go/scribe" |
13 | 16 | ) |
14 | 17 | |
25 | 28 | factory func() (scribe.Scribe, error) |
26 | 29 | spanc chan *Span |
27 | 30 | sendc chan struct{} |
28 | quitc chan chan struct{} | |
29 | 31 | batch []*scribe.LogEntry |
30 | 32 | nextSend time.Time |
31 | 33 | batchInterval time.Duration |
32 | 34 | batchSize int |
33 | } | |
34 | ||
35 | // NewScribeCollector returns a new Scribe-backed Collector, ready for use. | |
36 | func NewScribeCollector(addr string, timeout time.Duration, batchSize int, batchInterval time.Duration) (Collector, error) { | |
35 | sampleRate float64 | |
36 | sampleSalt int64 | |
37 | logger log.Logger | |
38 | } | |
39 | ||
40 | // NewScribeCollector returns a new Scribe-backed Collector. addr should be a | |
41 | // TCP endpoint of the form "host:port". timeout is passed to the Thrift dial | |
42 | // function NewTSocketFromAddrTimeout. batchSize and batchInterval control the | |
43 | // maximum size and interval of a batch of spans; as soon as either limit is | |
44 | // reached, the batch is sent. The logger is used to log errors, such as batch | |
45 | // send failures; users should provide an appropriate context, if desired. | |
46 | func NewScribeCollector(addr string, timeout time.Duration, options ...ScribeOption) (Collector, error) { | |
37 | 47 | factory := scribeClientFactory(addr, timeout) |
38 | 48 | client, err := factory() |
39 | 49 | if err != nil { |
40 | 50 | return nil, err |
41 | 51 | } |
52 | defaultBatchInterval := time.Second | |
42 | 53 | c := &ScribeCollector{ |
43 | 54 | client: client, |
44 | 55 | factory: factory, |
45 | 56 | spanc: make(chan *Span), |
46 | 57 | sendc: make(chan struct{}), |
47 | 58 | batch: []*scribe.LogEntry{}, |
48 | nextSend: time.Now().Add(batchInterval), | |
49 | batchInterval: batchInterval, | |
50 | batchSize: batchSize, | |
59 | nextSend: time.Now().Add(defaultBatchInterval), | |
60 | batchInterval: defaultBatchInterval, | |
61 | batchSize: 100, | |
62 | sampleRate: 1.0, | |
63 | sampleSalt: rand.Int63(), | |
64 | logger: log.NewNopLogger(), | |
65 | } | |
66 | for _, option := range options { | |
67 | option(c) | |
51 | 68 | } |
52 | 69 | go c.loop() |
53 | 70 | return c, nil |
65 | 82 | for { |
66 | 83 | select { |
67 | 84 | case span := <-c.spanc: |
85 | if !shouldSample(span.traceID, c.sampleSalt, c.sampleRate) { | |
86 | continue | |
87 | } | |
68 | 88 | c.batch = append(c.batch, &scribe.LogEntry{ |
69 | 89 | Category: "zipkin", // TODO parameterize? |
70 | 90 | Message: serialize(span), |
81 | 101 | case <-c.sendc: |
82 | 102 | c.nextSend = time.Now().Add(c.batchInterval) |
83 | 103 | if err := c.send(c.batch); err != nil { |
84 | Log.Log("err", err.Error()) | |
85 | continue | |
104 | c.logger.Log("err", err.Error()) | |
86 | 105 | } |
87 | 106 | c.batch = c.batch[:0] |
88 | 107 | } |
110 | 129 | return nil |
111 | 130 | } |
112 | 131 | |
132 | // ScribeOption sets a parameter for the StdlibAdapter. | |
133 | type ScribeOption func(s *ScribeCollector) | |
134 | ||
135 | // ScribeBatchSize sets the maximum batch size, after which a collect will be | |
136 | // triggered. The default batch size is 100 traces. | |
137 | func ScribeBatchSize(n int) ScribeOption { | |
138 | return func(s *ScribeCollector) { s.batchSize = n } | |
139 | } | |
140 | ||
141 | // ScribeBatchInterval sets the maximum duration we will buffer traces before | |
142 | // emitting them to the collector. The default batch interval is 1 second. | |
143 | func ScribeBatchInterval(d time.Duration) ScribeOption { | |
144 | return func(s *ScribeCollector) { s.batchInterval = d } | |
145 | } | |
146 | ||
147 | // ScribeSampleRate sets the sample rate used to determine if a trace will be | |
148 | // sent to the collector. By default, the sample rate is 1.0, i.e. all traces | |
149 | // are sent. | |
150 | func ScribeSampleRate(f float64) ScribeOption { | |
151 | return func(s *ScribeCollector) { s.sampleRate = f } | |
152 | } | |
153 | ||
154 | // ScribeLogger sets the logger used to report errors in the collection | |
155 | // process. By default, a no-op logger is used, i.e. no errors are logged | |
156 | // anywhere. It's important to set this option in a production service. | |
157 | func ScribeLogger(logger log.Logger) ScribeOption { | |
158 | return func(s *ScribeCollector) { s.logger = logger } | |
159 | } | |
160 | ||
113 | 161 | func scribeClientFactory(addr string, timeout time.Duration) func() (scribe.Scribe, error) { |
114 | 162 | return func() (scribe.Scribe, error) { |
115 | 163 | a, err := net.ResolveTCPAddr("tcp", addr) |
135 | 183 | panic(err) |
136 | 184 | } |
137 | 185 | return base64.StdEncoding.EncodeToString(t.Buffer.Bytes()) |
186 | } | |
187 | ||
188 | func shouldSample(id int64, salt int64, rate float64) bool { | |
189 | if rate <= 0 { | |
190 | return false | |
191 | } | |
192 | if rate >= 1.0 { | |
193 | return true | |
194 | } | |
195 | return int64(math.Abs(float64(id^salt)))%10000 < int64(rate*10000) | |
138 | 196 | } |
139 | 197 | |
140 | 198 | // NopCollector implements Collector but performs no work. |
0 | package zipkin | |
1 | ||
2 | import "testing" | |
3 | ||
4 | func TestShouldSample(t *testing.T) { | |
5 | type triple struct { | |
6 | id, salt int64 | |
7 | rate float64 | |
8 | } | |
9 | for input, want := range map[triple]bool{ | |
10 | triple{123, 456, 1.0}: true, | |
11 | triple{123, 456, 999}: true, | |
12 | triple{123, 456, 0.0}: false, | |
13 | triple{123, 456, -42}: false, | |
14 | triple{1229998, 0, 0.01}: false, | |
15 | triple{1229999, 0, 0.01}: false, | |
16 | triple{1230000, 0, 0.01}: true, | |
17 | triple{1230001, 0, 0.01}: true, | |
18 | triple{1230098, 0, 0.01}: true, | |
19 | triple{1230099, 0, 0.01}: true, | |
20 | triple{1230100, 0, 0.01}: false, | |
21 | triple{1230101, 0, 0.01}: false, | |
22 | triple{1, 9999999, 0.01}: false, | |
23 | triple{999, 0, 0.99}: true, | |
24 | triple{9999, 0, 0.99}: false, | |
25 | } { | |
26 | if have := shouldSample(input.id, input.salt, input.rate); want != have { | |
27 | t.Errorf("%#+v: want %v, have %v", input, want, have) | |
28 | } | |
29 | } | |
30 | } |
20 | 20 | |
21 | 21 | timeout := time.Second |
22 | 22 | batchInterval := time.Millisecond |
23 | c, err := zipkin.NewScribeCollector(server.addr(), timeout, 0, batchInterval) | |
23 | c, err := zipkin.NewScribeCollector(server.addr(), timeout, zipkin.ScribeBatchSize(0), zipkin.ScribeBatchInterval(batchInterval)) | |
24 | 24 | if err != nil { |
25 | 25 | t.Fatal(err) |
26 | 26 | } |
29 | 29 | //binaryAnnotations []BinaryAnnotation // TODO |
30 | 30 | } |
31 | 31 | |
32 | // NewSpan returns a new Span object ready for use. | |
32 | // NewSpan returns a new Span, which can be annotated and collected by a | |
33 | // collector. Spans are passed through the request context to each middleware | |
34 | // under the SpanContextKey. | |
33 | 35 | func NewSpan(hostport, serviceName, methodName string, traceID, spanID, parentSpanID int64) *Span { |
34 | 36 | return &Span{ |
35 | 37 | host: makeEndpoint(hostport, serviceName), |
40 | 42 | } |
41 | 43 | } |
42 | 44 | |
43 | // makeEndpoint will return a nil Endpoint if the input parameters are | |
44 | // malformed. | |
45 | // makeEndpoint takes the hostport and service name that represent this Zipkin | |
46 | // service, and returns an endpoint that's embedded into the Zipkin core Span | |
47 | // type. It will return a nil endpoint if the input parameters are malformed. | |
45 | 48 | func makeEndpoint(hostport, serviceName string) *zipkincore.Endpoint { |
46 | 49 | host, port, err := net.SplitHostPort(hostport) |
47 | 50 | if err != nil { |
48 | Log.Log("hostport", hostport, "err", err) | |
49 | 51 | return nil |
50 | 52 | } |
51 | 53 | addrs, err := net.LookupIP(host) |
52 | 54 | if err != nil { |
53 | Log.Log("host", host, "err", err) | |
54 | 55 | return nil |
55 | 56 | } |
56 | 57 | if len(addrs) <= 0 { |
57 | Log.Log("host", host, "err", "no IPs") | |
58 | 58 | return nil |
59 | 59 | } |
60 | 60 | portInt, err := strconv.ParseInt(port, 10, 16) |
61 | 61 | if err != nil { |
62 | Log.Log("port", port, "err", err) | |
63 | 62 | return nil |
64 | 63 | } |
65 | 64 | endpoint := zipkincore.NewEndpoint() |
20 | 20 | // • http://www.slideshare.net/johanoskarsson/zipkin-runtime-open-house |
21 | 21 | // • https://groups.google.com/forum/#!topic/zipkin-user/KilwtSA0g1k |
22 | 22 | // • https://gist.github.com/yoavaa/3478d3a0df666f21a98c |
23 | ||
24 | // Log is used to report diagnostic information. To enable it, swap in your | |
25 | // application's logger. | |
26 | var Log log.SwapLogger | |
27 | 23 | |
28 | 24 | const ( |
29 | 25 | // https://github.com/racker/tryfer#headers |
94 | 90 | // ToContext returns a function that satisfies transport/http.BeforeFunc. It |
95 | 91 | // takes a Zipkin span from the incoming HTTP request, and saves it in the |
96 | 92 | // request context. It's designed to be wired into a server's HTTP transport |
97 | // Before stack. | |
98 | func ToContext(newSpan NewSpanFunc) func(ctx context.Context, r *http.Request) context.Context { | |
93 | // Before stack. The logger is used to report errors. | |
94 | func ToContext(newSpan NewSpanFunc, logger log.Logger) func(ctx context.Context, r *http.Request) context.Context { | |
99 | 95 | return func(ctx context.Context, r *http.Request) context.Context { |
100 | return context.WithValue(ctx, SpanContextKey, fromHTTP(newSpan, r)) | |
96 | return context.WithValue(ctx, SpanContextKey, fromHTTP(newSpan, r, logger)) | |
101 | 97 | } |
102 | 98 | } |
103 | 99 | |
125 | 121 | } |
126 | 122 | } |
127 | 123 | |
128 | func fromHTTP(newSpan NewSpanFunc, r *http.Request) *Span { | |
124 | func fromHTTP(newSpan NewSpanFunc, r *http.Request, logger log.Logger) *Span { | |
129 | 125 | traceIDStr := r.Header.Get(traceIDHTTPHeader) |
130 | 126 | if traceIDStr == "" { |
131 | Log.Log("debug", "make new span") | |
132 | 127 | return newSpan(newID(), newID(), 0) // normal; just make a new one |
133 | 128 | } |
134 | 129 | traceID, err := strconv.ParseInt(traceIDStr, 16, 64) |
135 | 130 | if err != nil { |
136 | Log.Log(traceIDHTTPHeader, traceIDStr, "err", err) | |
131 | logger.Log(traceIDHTTPHeader, traceIDStr, "err", err) | |
137 | 132 | return newSpan(newID(), newID(), 0) |
138 | 133 | } |
139 | 134 | spanIDStr := r.Header.Get(spanIDHTTPHeader) |
140 | 135 | if spanIDStr == "" { |
141 | Log.Log("msg", "trace ID without span ID") // abnormal | |
142 | spanIDStr = strconv.FormatInt(newID(), 64) // deal with it | |
136 | logger.Log("msg", "trace ID without span ID") // abnormal | |
137 | spanIDStr = strconv.FormatInt(newID(), 64) // deal with it | |
143 | 138 | } |
144 | 139 | spanID, err := strconv.ParseInt(spanIDStr, 16, 64) |
145 | 140 | if err != nil { |
146 | Log.Log(spanIDHTTPHeader, spanIDStr, "err", err) // abnormal | |
147 | spanID = newID() // deal with it | |
141 | logger.Log(spanIDHTTPHeader, spanIDStr, "err", err) // abnormal | |
142 | spanID = newID() // deal with it | |
148 | 143 | } |
149 | 144 | parentSpanIDStr := r.Header.Get(parentSpanIDHTTPHeader) |
150 | 145 | if parentSpanIDStr == "" { |
152 | 147 | } |
153 | 148 | parentSpanID, err := strconv.ParseInt(parentSpanIDStr, 16, 64) |
154 | 149 | if err != nil { |
155 | Log.Log(parentSpanIDHTTPHeader, parentSpanIDStr, "err", err) // abnormal | |
156 | parentSpanID = 0 // the only way to deal with it | |
150 | logger.Log(parentSpanIDHTTPHeader, parentSpanIDStr, "err", err) // abnormal | |
151 | parentSpanID = 0 // the only way to deal with it | |
157 | 152 | } |
158 | 153 | return newSpan(traceID, spanID, parentSpanID) |
159 | 154 | } |
1 | 1 | |
2 | 2 | import ( |
3 | 3 | "fmt" |
4 | "io/ioutil" | |
4 | 5 | "net/http" |
5 | 6 | "reflect" |
6 | 7 | "runtime" |
11 | 12 | "golang.org/x/net/context" |
12 | 13 | |
13 | 14 | "github.com/go-kit/kit/endpoint" |
15 | "github.com/go-kit/kit/log" | |
14 | 16 | "github.com/go-kit/kit/tracing/zipkin" |
15 | 17 | ) |
16 | 18 | |
30 | 32 | r.Header.Set("X-B3-ParentSpanId", strconv.FormatInt(parentSpanID, 16)) |
31 | 33 | |
32 | 34 | newSpan := zipkin.MakeNewSpanFunc(hostport, serviceName, methodName) |
33 | toContext := zipkin.ToContext(newSpan) | |
35 | toContext := zipkin.ToContext(newSpan, log.NewLogfmtLogger(ioutil.Discard)) | |
34 | 36 | |
35 | 37 | ctx := toContext(context.Background(), r) |
36 | 38 | val := ctx.Value(zipkin.SpanContextKey) |