0 | 0 |
// Package graphite implements a Graphite backend for package metrics. Metrics
|
1 | |
// will be emitted to a Graphite server in the plaintext protocol
|
2 | |
// (http://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol)
|
3 | |
// which looks like:
|
|
1 |
// will be emitted to a Graphite server in the plaintext protocol which looks
|
|
2 |
// like:
|
|
3 |
//
|
4 | 4 |
// "<metric path> <metric value> <metric timestamp>"
|
5 | 5 |
//
|
|
6 |
// See http://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol.
|
6 | 7 |
// The current implementation ignores fields.
|
7 | 8 |
package graphite
|
8 | 9 |
|
|
22 | 23 |
"github.com/go-kit/kit/metrics"
|
23 | 24 |
)
|
24 | 25 |
|
25 | |
// Emitter will keep track of all metrics and, once started,
|
26 | |
// will emit the metrics via the Flush method to the given address.
|
|
26 |
// Emitter will keep track of all metrics and, once started, will emit the
|
|
27 |
// metrics via the Flush method to the given address.
|
27 | 28 |
type Emitter struct {
|
28 | |
prefix string
|
29 | |
|
30 | |
network, addr string
|
31 | |
conn net.Conn
|
32 | |
dialer Dialer
|
33 | |
start sync.Once
|
34 | |
stop chan bool
|
35 | |
|
36 | 29 |
mtx sync.Mutex
|
|
30 |
prefix string
|
|
31 |
mgr *manager
|
37 | 32 |
counters []*counter
|
38 | 33 |
histograms []*windowedHistogram
|
39 | 34 |
gauges []*gauge
|
40 | |
|
41 | |
logger log.Logger
|
42 | |
}
|
43 | |
|
44 | |
// NewEmitter will return an Emitter that will prefix all
|
45 | |
// metrics names with the given prefix. Once started, it will attempt to create
|
46 | |
// a connection with the given network and address via `net.Dial` and periodically post
|
47 | |
// metrics to the connection in the Graphite plaintext protocol.
|
48 | |
func NewEmitter(network, addr string, metricsPrefix string, logger log.Logger) *Emitter {
|
49 | |
return NewEmitterDial(network, addr, net.Dial, metricsPrefix, logger)
|
50 | |
}
|
51 | |
|
52 | |
// NewEmitter will return an Emitter that will prefix all
|
53 | |
// metrics names with the given prefix. Once started, it will attempt to create
|
54 | |
// a connection with the given network and address via the given Dialer and periodically post
|
55 | |
// metrics to the connection in the Graphite plaintext protocol.
|
56 | |
func NewEmitterDial(network, addr string, dialer Dialer, metricsPrefix string, logger log.Logger) *Emitter {
|
57 | |
return &Emitter{
|
58 | |
network: network,
|
59 | |
addr: addr,
|
60 | |
dialer: net.Dial,
|
61 | |
stop: make(chan bool),
|
62 | |
prefix: metricsPrefix,
|
63 | |
logger: logger,
|
64 | |
}
|
|
35 |
logger log.Logger
|
|
36 |
quitc chan chan struct{}
|
|
37 |
}
|
|
38 |
|
|
39 |
// NewEmitter will return an Emitter that will prefix all metrics names with the
|
|
40 |
// given prefix. Once started, it will attempt to create a connection with the
|
|
41 |
// given network and address via `net.Dial` and periodically post metrics to the
|
|
42 |
// connection in the Graphite plaintext protocol.
|
|
43 |
func NewEmitter(network, address string, metricsPrefix string, flushInterval time.Duration, logger log.Logger) *Emitter {
|
|
44 |
return NewEmitterDial(net.Dial, network, address, metricsPrefix, flushInterval, logger)
|
|
45 |
}
|
|
46 |
|
|
47 |
// NewEmitterDial is the same as NewEmitter, but allows you to specify your own
|
|
48 |
// Dialer function. This is primarily useful for tests.
|
|
49 |
func NewEmitterDial(dialer Dialer, network, address string, metricsPrefix string, flushInterval time.Duration, logger log.Logger) *Emitter {
|
|
50 |
e := &Emitter{
|
|
51 |
prefix: metricsPrefix,
|
|
52 |
mgr: newManager(dialer, network, address, time.After, logger),
|
|
53 |
logger: logger,
|
|
54 |
quitc: make(chan chan struct{}),
|
|
55 |
}
|
|
56 |
go e.loop(flushInterval)
|
|
57 |
return e
|
65 | 58 |
}
|
66 | 59 |
|
67 | 60 |
// NewCounter returns a Counter whose value will be periodically emitted in
|
68 | 61 |
// a Graphite-compatible format once the Emitter is started. Fields are ignored.
|
69 | 62 |
func (e *Emitter) NewCounter(name string) metrics.Counter {
|
70 | |
// only one flush at a time
|
|
63 |
e.mtx.Lock()
|
|
64 |
defer e.mtx.Unlock()
|
71 | 65 |
c := &counter{name, 0}
|
72 | |
e.mtx.Lock()
|
73 | 66 |
e.counters = append(e.counters, c)
|
74 | |
e.mtx.Unlock()
|
75 | 67 |
return c
|
76 | 68 |
}
|
77 | 69 |
|
|
79 | 71 |
// windowed HDR histogram which drops data older than five minutes.
|
80 | 72 |
//
|
81 | 73 |
// The histogram exposes metrics for each passed quantile as gauges. Quantiles
|
82 | |
// should be integers in the range 1..99. The gauge names are assigned by
|
83 | |
// using the passed name as a prefix and appending "_pNN" e.g. "_p50".
|
84 | |
//
|
85 | |
// The values of this histogram will be periodically emitted in a Graphite-compatible
|
86 | |
// format once the Emitter is started. Fields are ignored.
|
|
74 |
// should be integers in the range 1..99. The gauge names are assigned by using
|
|
75 |
// the passed name as a prefix and appending "_pNN" e.g. "_p50".
|
|
76 |
//
|
|
77 |
// The values of this histogram will be periodically emitted in a
|
|
78 |
// Graphite-compatible format once the Emitter is started. Fields are ignored.
|
87 | 79 |
func (e *Emitter) NewHistogram(name string, minValue, maxValue int64, sigfigs int, quantiles ...int) (metrics.Histogram, error) {
|
88 | 80 |
gauges := map[int]metrics.Gauge{}
|
89 | 81 |
for _, quantile := range quantiles {
|
|
95 | 87 |
h := newWindowedHistogram(name, minValue, maxValue, sigfigs, gauges, e.logger)
|
96 | 88 |
|
97 | 89 |
e.mtx.Lock()
|
|
90 |
defer e.mtx.Unlock()
|
98 | 91 |
e.histograms = append(e.histograms, h)
|
99 | |
e.mtx.Unlock()
|
100 | 92 |
return h, nil
|
101 | 93 |
}
|
102 | 94 |
|
103 | |
// NewTimeHistogram returns a TimeHistogram wrapper around the windowed
|
104 | |
// HDR histrogram provided by this package.
|
|
95 |
// NewTimeHistogram returns a TimeHistogram wrapper around the windowed HDR
|
|
96 |
// histrogram provided by this package.
|
105 | 97 |
func (e *Emitter) NewTimeHistogram(name string, unit time.Duration, minValue, maxValue int64, sigfigs int, quantiles ...int) (metrics.TimeHistogram, error) {
|
106 | 98 |
h, err := e.NewHistogram(name, minValue, maxValue, sigfigs, quantiles...)
|
107 | 99 |
if err != nil {
|
|
110 | 102 |
return metrics.NewTimeHistogram(unit, h), nil
|
111 | 103 |
}
|
112 | 104 |
|
113 | |
// NewGauge returns a Gauge whose value will be periodically emitted in
|
114 | |
// a Graphite-compatible format once the Emitter is started. Fields are ignored.
|
|
105 |
// NewGauge returns a Gauge whose value will be periodically emitted in a
|
|
106 |
// Graphite-compatible format once the Emitter is started. Fields are ignored.
|
115 | 107 |
func (e *Emitter) NewGauge(name string) metrics.Gauge {
|
116 | 108 |
e.mtx.Lock()
|
117 | 109 |
defer e.mtx.Unlock()
|
|
124 | 116 |
return g
|
125 | 117 |
}
|
126 | 118 |
|
127 | |
func (e *Emitter) dial() error {
|
128 | |
var err error
|
129 | |
e.conn, err = e.dialer(e.network, e.addr)
|
130 | |
return err
|
131 | |
}
|
132 | |
|
133 | |
type Dialer func(network, addr string) (net.Conn, error)
|
134 | |
|
135 | |
// Start will kick off a background goroutine to
|
136 | |
// call Flush once every interval.
|
137 | |
func (e *Emitter) Start(interval time.Duration) error {
|
138 | |
var err error
|
139 | |
e.start.Do(func() {
|
140 | |
err = e.dial()
|
141 | |
if err != nil {
|
|
119 |
func (e *Emitter) loop(d time.Duration) {
|
|
120 |
ticker := time.NewTicker(d)
|
|
121 |
defer ticker.Stop()
|
|
122 |
|
|
123 |
for {
|
|
124 |
select {
|
|
125 |
case <-ticker.C:
|
|
126 |
e.Flush()
|
|
127 |
|
|
128 |
case q := <-e.quitc:
|
|
129 |
e.Flush()
|
|
130 |
close(q)
|
142 | 131 |
return
|
143 | 132 |
}
|
144 | |
go func() {
|
145 | |
t := time.Tick(interval)
|
146 | |
for {
|
147 | |
select {
|
148 | |
case <-t:
|
149 | |
e.Flush()
|
150 | |
case <-e.stop:
|
151 | |
return
|
152 | |
}
|
153 | |
}
|
154 | |
}()
|
155 | |
})
|
156 | |
return err
|
157 | |
}
|
158 | |
|
159 | |
// Stop will flush the current metrics and close the
|
160 | |
// current Graphite connection, if it exists.
|
161 | |
func (e *Emitter) Stop() error {
|
162 | |
if e.conn == nil {
|
163 | |
return nil
|
164 | |
}
|
165 | |
// stop the ticking flush loop
|
166 | |
e.stop <- true
|
167 | |
// get one last flush in
|
168 | |
e.Flush()
|
169 | |
// close the connection
|
170 | |
err := e.conn.Close()
|
171 | |
// nil the conn to avoid problems
|
172 | |
// if Stop() is called more than once.
|
173 | |
e.conn = nil
|
174 | |
return err
|
175 | |
}
|
176 | |
|
177 | |
var (
|
178 | |
RetryMax = 10
|
179 | |
RetryWait = 2 * time.Millisecond
|
180 | |
RetryMultiplier = 2
|
181 | |
)
|
182 | |
|
183 | |
// Flush will write the current metrics to the Emitter's
|
184 | |
// connection in the Graphite plaintext protocol.
|
185 | |
func (e *Emitter) Flush() error {
|
186 | |
// only one flush at a time
|
187 | |
e.mtx.Lock()
|
|
133 |
}
|
|
134 |
}
|
|
135 |
|
|
136 |
// Stop will flush the current metrics and close the active connection. Calling
|
|
137 |
// stop more than once is a programmer error.
|
|
138 |
func (e *Emitter) Stop() {
|
|
139 |
q := make(chan struct{})
|
|
140 |
e.quitc <- q
|
|
141 |
<-q
|
|
142 |
}
|
|
143 |
|
|
144 |
// Flush will write the current metrics to the Emitter's connection in the
|
|
145 |
// Graphite plaintext protocol.
|
|
146 |
func (e *Emitter) Flush() {
|
|
147 |
e.mtx.Lock() // one flush at a time
|
188 | 148 |
defer e.mtx.Unlock()
|
189 | 149 |
|
190 | |
// set the system up to perform a retry loop
|
191 | |
var err error
|
192 | |
wait := RetryWait
|
193 | |
for attempts := 1; ; attempts++ {
|
194 | |
err = e.flush(e.conn)
|
195 | |
// no error? return immediately.
|
196 | |
if err == nil {
|
197 | |
return nil
|
198 | |
}
|
199 | |
// we're at our last attempt? give up.
|
200 | |
if attempts >= RetryMax {
|
201 | |
break
|
202 | |
}
|
203 | |
// log, wait, and try again
|
204 | |
e.logger.Log(
|
205 | |
"err", err,
|
206 | |
"msg", fmt.Sprintf("unable to flush metrics on attempt %d, waiting %s", attempts, wait),
|
207 | |
)
|
208 | |
time.Sleep(wait)
|
209 | |
wait = wait * time.Duration(RetryMultiplier)
|
210 | |
}
|
211 | |
// log if we were unable to emit metrics
|
|
150 |
conn := e.mgr.take()
|
|
151 |
if conn == nil {
|
|
152 |
e.logger.Log("during", "flush", "err", "connection unavailable")
|
|
153 |
return
|
|
154 |
}
|
|
155 |
|
|
156 |
err := e.flush(conn)
|
212 | 157 |
if err != nil {
|
213 | |
e.logger.Log(
|
214 | |
"err", err,
|
215 | |
"msg", fmt.Sprintf("unable to flush metrics after %d attempts. giving up.", RetryMax),
|
216 | |
)
|
217 | |
}
|
218 | |
return err
|
|
158 |
e.logger.Log("during", "flush", "err", err)
|
|
159 |
}
|
|
160 |
e.mgr.put(err)
|
219 | 161 |
}
|
220 | 162 |
|
221 | 163 |
func (e *Emitter) flush(conn io.Writer) error {
|
222 | |
|
223 | |
// buffer the writer and make sure to flush it
|
224 | 164 |
w := bufio.NewWriter(conn)
|
225 | 165 |
|
226 | |
// emit counter stats
|
227 | 166 |
for _, c := range e.counters {
|
228 | 167 |
fmt.Fprintf(w, "%s.%s.count %d %d\n", e.prefix, c.Name(), c.count, time.Now().Unix())
|
229 | 168 |
}
|
230 | 169 |
|
231 | |
// emit histogram specific stats
|
232 | 170 |
for _, h := range e.histograms {
|
233 | 171 |
hist := h.hist.Merge()
|
234 | 172 |
now := time.Now().Unix()
|
|
239 | 177 |
fmt.Fprintf(w, "%s.%s.std-dev %.2f %d\n", e.prefix, h.Name(), hist.StdDev(), now)
|
240 | 178 |
}
|
241 | 179 |
|
242 | |
// emit gauge stats (which can include some histogram quantiles)
|
243 | 180 |
for _, g := range e.gauges {
|
244 | 181 |
fmt.Fprintf(w, "%s.%s %.2f %d\n", e.prefix, g.Name(), g.Get(), time.Now().Unix())
|
245 | 182 |
}
|
246 | 183 |
|
247 | |
// check for error
|
248 | 184 |
return w.Flush()
|
249 | 185 |
}
|
250 | 186 |
|
|
295 | 231 |
logger log.Logger
|
296 | 232 |
}
|
297 | 233 |
|
298 | |
// newWindowedHistogram is taken from http://github.com/codahale/metrics. It returns a
|
299 | |
// windowed HDR histogram which drops data older than five minutes.
|
300 | |
//
|
301 | |
// The histogram exposes metrics for each passed quantile as gauges. Users are expected
|
302 | |
// to provide their own set of Gauges for quantiles to make this Histogram work across multiple
|
303 | |
// metrics providers.
|
|
234 |
// newWindowedHistogram is taken from http://github.com/codahale/metrics. It
|
|
235 |
// returns a windowed HDR histogram which drops data older than five minutes.
|
|
236 |
//
|
|
237 |
// The histogram exposes metrics for each passed quantile as gauges. Users are
|
|
238 |
// expected to provide their own set of Gauges for quantiles to make this
|
|
239 |
// Histogram work across multiple metrics providers.
|
304 | 240 |
func newWindowedHistogram(name string, minValue, maxValue int64, sigfigs int, quantiles map[int]metrics.Gauge, logger log.Logger) *windowedHistogram {
|
305 | 241 |
h := &windowedHistogram{
|
306 | 242 |
hist: hdrhistogram.NewWindowed(5, minValue, maxValue, sigfigs),
|
|
312 | 248 |
return h
|
313 | 249 |
}
|
314 | 250 |
|
315 | |
func (h *windowedHistogram) Name() string { return h.name }
|
|
251 |
func (h *windowedHistogram) Name() string { return h.name }
|
|
252 |
|
316 | 253 |
func (h *windowedHistogram) With(metrics.Field) metrics.Histogram { return h }
|
317 | 254 |
|
318 | 255 |
func (h *windowedHistogram) Observe(value int64) {
|