Imported Upstream version 0.9.13
Jonas Genannt
9 years ago
0 | 0 | Metadata-Version: 1.0 |
1 | 1 | Name: carbon |
2 | Version: 0.9.12 | |
2 | Version: 0.9.13 | |
3 | 3 | Summary: Backend data caching and persistence daemon for Graphite |
4 | 4 | Home-page: http://graphite-project.github.com |
5 | 5 | Author: Chris Davis |
108 | 108 | datapoint = (float(timestamp), float(value)) |
109 | 109 | assert datapoint[1] == datapoint[1] # filter out NaNs |
110 | 110 | client_manager.sendDatapoint(metric, datapoint) |
111 | except: | |
111 | except ValueError: | |
112 | 112 | log.err(None, 'Dropping invalid line: %s' % line) |
113 | 113 | |
114 | 114 | def connectionLost(self, reason): |
31 | 31 | # |
32 | 32 | # <env>.applications.<app>.all.<app_metric> (60) = sum <env>.applications.<app>.*.<<app_metric>> |
33 | 33 | # |
34 | # It is also possible to use regular expressions. Following the example above | |
35 | # when using: | |
36 | # | |
37 | # <env>.applications.<app>.<domain>.requests (60) = sum <env>.applications.<app>.<domain>\d{2}.requests | |
38 | # | |
39 | # You will end up with 'prod.applications.apache.www.requests' instead of | |
40 | # 'prod.applications.apache.all.requests'. | |
41 | # | |
34 | 42 | # Note that any time this file is modified, it will be re-read automatically. |
29 | 29 | # |
30 | 30 | #LOCAL_DATA_DIR = /opt/graphite/storage/whisper/ |
31 | 31 | |
32 | # Enable daily log rotation. If disabled, a kill -HUP can be used after a manual rotate | |
32 | # Enable daily log rotation. If disabled, carbon will automatically re-open | |
33 | # the file if it's rotated out of place (e.g. by logrotate daemon) | |
33 | 34 | ENABLE_LOGROTATION = True |
34 | 35 | |
35 | 36 | # Specify the user to drop privileges to |
55 | 56 | # If defined, this changes the MAX_UPDATES_PER_SECOND in Carbon when a |
56 | 57 | # stop/shutdown is initiated. This helps when MAX_UPDATES_PER_SECOND is |
57 | 58 | # relatively low and carbon has cached a lot of updates; it enables the carbon |
58 | # daemon to shutdown more quickly. | |
59 | # daemon to shutdown more quickly. | |
59 | 60 | # MAX_UPDATES_PER_SECOND_ON_SHUTDOWN = 1000 |
60 | 61 | |
61 | 62 | # Softly limits the number of whisper files that get created each minute. |
265 | 266 | MAX_DATAPOINTS_PER_MESSAGE = 500 |
266 | 267 | MAX_QUEUE_SIZE = 10000 |
267 | 268 | |
269 | # This is the percentage that the queue must be empty before it will accept | |
270 | # more messages. For a larger site, if the queue is very large it makes sense | |
271 | # to tune this to allow for incoming stats. So if you have an average | |
272 | # flow of 100k stats/minute, and a MAX_QUEUE_SIZE of 3,000,000, it makes sense | |
273 | # to allow stats to start flowing when you've cleared the queue to 95% since | |
274 | # you should have space to accommodate the next minute's worth of stats | |
275 | # even before the relay incrementally clears more of the queue | |
276 | QUEUE_LOW_WATERMARK_PCT = 0.8 | |
277 | ||
268 | 278 | # Set this to False to drop datapoints when any send queue (sending datapoints |
269 | 279 | # to a downstream carbon daemon) hits MAX_QUEUE_SIZE. If this is True (the |
270 | 280 | # default) then sockets over which metrics are received will temporarily stop accepting |
271 | # data until the send queues fall below 80% MAX_QUEUE_SIZE. | |
281 | # data until the send queues fall below QUEUE_LOW_WATERMARK_PCT * MAX_QUEUE_SIZE. | |
272 | 282 | USE_FLOW_CONTROL = True |
273 | 283 | |
274 | 284 | # Set this to True to enable whitelisting and blacklisting of metrics in |
295 | 305 | |
296 | 306 | # If set true, metric received will be forwarded to DESTINATIONS in addition to |
297 | 307 | # the output of the aggregation rules. If set false the carbon-aggregator will |
298 | # only ever send the output of aggregation. | |
299 | FORWARD_ALL = True | |
308 | # only ever send the output of aggregation. Default value is set to false and will not forward | |
309 | FORWARD_ALL = False | |
310 | ||
311 | # Filenames of the configuration files to use for this instance of aggregator. | |
312 | # Filenames are relative to CONF_DIR. | |
313 | # | |
314 | # AGGREGATION_RULES = aggregation-rules.conf | |
315 | # REWRITE_RULES = rewrite-rules.conf | |
300 | 316 | |
301 | 317 | # This is a list of carbon daemons we will send any relayed or |
302 | 318 | # generated metrics to. The default provided would send to a single |
304 | 320 | # use multiple carbon-cache instances then it would look like this: |
305 | 321 | # |
306 | 322 | # DESTINATIONS = 127.0.0.1:2004:a, 127.0.0.1:2104:b |
307 | # | |
323 | # | |
308 | 324 | # The format is comma-delimited IP:PORT:INSTANCE where the :INSTANCE part is |
309 | 325 | # optional and refers to the "None" instance if omitted. |
310 | 326 | # |
0 | #!/bin/bash | |
1 | # chkconfig: - 25 75 | |
2 | # description: carbon-aggregator | |
3 | # processname: carbon-aggregator | |
4 | ||
5 | export PYTHONPATH="$GRAPHITE_DIR/lib:$PYTHONPATH" | |
6 | ||
7 | # Source function library. | |
8 | if [ -e /etc/rc.d/init.d/functions ]; then | |
9 | . /etc/rc.d/init.d/functions; | |
10 | fi; | |
11 | ||
12 | CARBON_DAEMON="aggregator" | |
13 | GRAPHITE_DIR="/opt/graphite" | |
14 | INSTANCES=`grep "^\[${CARBON_DAEMON}" ${GRAPHITE_DIR}/conf/carbon.conf | cut -d \[ -f 2 | cut -d \] -f 1 | cut -d : -f 2` | |
15 | ||
16 | function die { | |
17 | echo $1 | |
18 | exit 1 | |
19 | } | |
20 | ||
21 | start(){ | |
22 | cd $GRAPHITE_DIR; | |
23 | ||
24 | for INSTANCE in ${INSTANCES}; do | |
25 | if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then | |
26 | INSTANCE="a"; | |
27 | fi; | |
28 | echo "Starting carbon-${CARBON_DAEMON}:${INSTANCE}..." | |
29 | bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} start; | |
30 | ||
31 | if [ $? -eq 0 ]; then | |
32 | echo_success | |
33 | else | |
34 | echo_failure | |
35 | fi; | |
36 | echo "" | |
37 | done; | |
38 | } | |
39 | ||
40 | stop(){ | |
41 | cd $GRAPHITE_DIR | |
42 | ||
43 | for INSTANCE in ${INSTANCES}; do | |
44 | if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then | |
45 | INSTANCE="a"; | |
46 | fi; | |
47 | echo "Stopping carbon-${CARBON_DAEMON}:${INSTANCE}..." | |
48 | bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} stop | |
49 | ||
50 | if [ `sleep 3; /usr/bin/pgrep -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}" | /usr/bin/wc -l` -gt 0 ]; then | |
51 | echo "Carbon did not stop yet. Sleeping longer, then force killing it..."; | |
52 | sleep 20; | |
53 | /usr/bin/pkill -9 -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}"; | |
54 | fi; | |
55 | ||
56 | if [ $? -eq 0 ]; then | |
57 | echo_success | |
58 | else | |
59 | echo_failure | |
60 | fi; | |
61 | echo "" | |
62 | done; | |
63 | } | |
64 | ||
65 | status(){ | |
66 | cd $GRAPHITE_DIR; | |
67 | ||
68 | for INSTANCE in ${INSTANCES}; do | |
69 | if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then | |
70 | INSTANCE="a"; | |
71 | fi; | |
72 | bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} status; | |
73 | ||
74 | if [ $? -eq 0 ]; then | |
75 | echo_success | |
76 | else | |
77 | echo_failure | |
78 | fi; | |
79 | echo "" | |
80 | done; | |
81 | } | |
82 | ||
83 | case "$1" in | |
84 | start) | |
85 | start | |
86 | ;; | |
87 | stop) | |
88 | stop | |
89 | ;; | |
90 | status) | |
91 | status | |
92 | ;; | |
93 | restart|reload) | |
94 | stop | |
95 | start | |
96 | ;; | |
97 | *) | |
98 | echo $"Usage: $0 {start|stop|restart|status}" | |
99 | exit 1 | |
100 | esac | |
101 |
0 | #!/bin/bash | |
1 | # chkconfig: - 25 75 | |
2 | # description: carbon-cache | |
3 | # processname: carbon-cache | |
4 | ||
5 | export PYTHONPATH="$GRAPHITE_DIR/lib:$PYTHONPATH" | |
6 | ||
7 | # Source function library. | |
8 | if [ -e /etc/rc.d/init.d/functions ]; then | |
9 | . /etc/rc.d/init.d/functions; | |
10 | fi; | |
11 | ||
12 | CARBON_DAEMON="cache" | |
13 | GRAPHITE_DIR="/opt/graphite" | |
14 | INSTANCES=`grep "^\[${CARBON_DAEMON}" ${GRAPHITE_DIR}/conf/carbon.conf | cut -d \[ -f 2 | cut -d \] -f 1 | cut -d : -f 2` | |
15 | ||
16 | function die { | |
17 | echo $1 | |
18 | exit 1 | |
19 | } | |
20 | ||
21 | start(){ | |
22 | cd $GRAPHITE_DIR; | |
23 | ||
24 | for INSTANCE in ${INSTANCES}; do | |
25 | if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then | |
26 | INSTANCE="a"; | |
27 | fi; | |
28 | echo "Starting carbon-${CARBON_DAEMON}:${INSTANCE}..." | |
29 | bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} start; | |
30 | ||
31 | if [ $? -eq 0 ]; then | |
32 | echo_success | |
33 | else | |
34 | echo_failure | |
35 | fi; | |
36 | echo "" | |
37 | done; | |
38 | } | |
39 | ||
40 | stop(){ | |
41 | cd $GRAPHITE_DIR | |
42 | ||
43 | for INSTANCE in ${INSTANCES}; do | |
44 | if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then | |
45 | INSTANCE="a"; | |
46 | fi; | |
47 | echo "Stopping carbon-${CARBON_DAEMON}:${INSTANCE}..." | |
48 | bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} stop | |
49 | ||
50 | if [ `sleep 3; /usr/bin/pgrep -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}" | /usr/bin/wc -l` -gt 0 ]; then | |
51 | echo "Carbon did not stop yet. Sleeping longer, then force killing it..."; | |
52 | sleep 20; | |
53 | /usr/bin/pkill -9 -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}"; | |
54 | fi; | |
55 | ||
56 | if [ $? -eq 0 ]; then | |
57 | echo_success | |
58 | else | |
59 | echo_failure | |
60 | fi; | |
61 | echo "" | |
62 | done; | |
63 | } | |
64 | ||
65 | status(){ | |
66 | cd $GRAPHITE_DIR; | |
67 | ||
68 | for INSTANCE in ${INSTANCES}; do | |
69 | if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then | |
70 | INSTANCE="a"; | |
71 | fi; | |
72 | bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} status; | |
73 | ||
74 | if [ $? -eq 0 ]; then | |
75 | echo_success | |
76 | else | |
77 | echo_failure | |
78 | fi; | |
79 | echo "" | |
80 | done; | |
81 | } | |
82 | ||
83 | case "$1" in | |
84 | start) | |
85 | start | |
86 | ;; | |
87 | stop) | |
88 | stop | |
89 | ;; | |
90 | status) | |
91 | status | |
92 | ;; | |
93 | restart|reload) | |
94 | stop | |
95 | start | |
96 | ;; | |
97 | *) | |
98 | echo $"Usage: $0 {start|stop|restart|status}" | |
99 | exit 1 | |
100 | esac | |
101 |
0 | #!/bin/bash | |
1 | # chkconfig: - 25 75 | |
2 | # description: carbon-relay | |
3 | # processname: carbon-relay | |
4 | ||
5 | export PYTHONPATH="$GRAPHITE_DIR/lib:$PYTHONPATH" | |
6 | ||
7 | # Source function library. | |
8 | if [ -e /etc/rc.d/init.d/functions ]; then | |
9 | . /etc/rc.d/init.d/functions; | |
10 | fi; | |
11 | ||
12 | CARBON_DAEMON="relay" | |
13 | GRAPHITE_DIR="/opt/graphite" | |
14 | INSTANCES=`grep "^\[${CARBON_DAEMON}" ${GRAPHITE_DIR}/conf/carbon.conf | cut -d \[ -f 2 | cut -d \] -f 1 | cut -d : -f 2` | |
15 | ||
16 | function die { | |
17 | echo $1 | |
18 | exit 1 | |
19 | } | |
20 | ||
21 | start(){ | |
22 | cd $GRAPHITE_DIR; | |
23 | ||
24 | for INSTANCE in ${INSTANCES}; do | |
25 | if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then | |
26 | INSTANCE="a"; | |
27 | fi; | |
28 | echo "Starting carbon-${CARBON_DAEMON}:${INSTANCE}..." | |
29 | bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} start; | |
30 | ||
31 | if [ $? -eq 0 ]; then | |
32 | echo_success | |
33 | else | |
34 | echo_failure | |
35 | fi; | |
36 | echo "" | |
37 | done; | |
38 | } | |
39 | ||
40 | stop(){ | |
41 | cd $GRAPHITE_DIR | |
42 | ||
43 | for INSTANCE in ${INSTANCES}; do | |
44 | if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then | |
45 | INSTANCE="a"; | |
46 | fi; | |
47 | echo "Stopping carbon-${CARBON_DAEMON}:${INSTANCE}..." | |
48 | bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} stop | |
49 | ||
50 | if [ `sleep 3; /usr/bin/pgrep -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}" | /usr/bin/wc -l` -gt 0 ]; then | |
51 | echo "Carbon did not stop yet. Sleeping longer, then force killing it..."; | |
52 | sleep 20; | |
53 | /usr/bin/pkill -9 -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}"; | |
54 | fi; | |
55 | ||
56 | if [ $? -eq 0 ]; then | |
57 | echo_success | |
58 | else | |
59 | echo_failure | |
60 | fi; | |
61 | echo "" | |
62 | done; | |
63 | } | |
64 | ||
65 | status(){ | |
66 | cd $GRAPHITE_DIR; | |
67 | ||
68 | for INSTANCE in ${INSTANCES}; do | |
69 | if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then | |
70 | INSTANCE="a"; | |
71 | fi; | |
72 | bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} status; | |
73 | ||
74 | if [ $? -eq 0 ]; then | |
75 | echo_success | |
76 | else | |
77 | echo_failure | |
78 | fi; | |
79 | echo "" | |
80 | done; | |
81 | } | |
82 | ||
83 | case "$1" in | |
84 | start) | |
85 | start | |
86 | ;; | |
87 | stop) | |
88 | stop | |
89 | ;; | |
90 | status) | |
91 | status | |
92 | ;; | |
93 | restart|reload) | |
94 | stop | |
95 | start | |
96 | ;; | |
97 | *) | |
98 | echo $"Usage: $0 {start|stop|restart|status}" | |
99 | exit 1 | |
100 | esac | |
101 |
28 | 28 | # Only read if the rules file has been modified |
29 | 29 | try: |
30 | 30 | mtime = getmtime(self.rules_file) |
31 | except: | |
31 | except OSError: | |
32 | 32 | log.err("Failed to get mtime of %s" % self.rules_file) |
33 | 33 | return |
34 | 34 | if mtime <= self.rules_last_read: |
58 | 58 | frequency = int( frequency.lstrip('(').rstrip(')') ) |
59 | 59 | return AggregationRule(input_pattern, output_pattern, method, frequency) |
60 | 60 | |
61 | except: | |
61 | except ValueError: | |
62 | 62 | log.err("Failed to parse line: %s" % line) |
63 | 63 | raise |
64 | 64 | |
89 | 89 | extracted_fields = match.groupdict() |
90 | 90 | try: |
91 | 91 | result = self.output_template % extracted_fields |
92 | except: | |
92 | except TypeError: | |
93 | 93 | log.err("Failed to interpolate template %s with fields %s" % (self.output_template, extracted_fields)) |
94 | 94 | |
95 | 95 | self.cache[metric_path] = result |
44 | 44 | |
45 | 45 | try: |
46 | 46 | import carbon |
47 | except: | |
47 | except ImportError: | |
48 | 48 | # this is being run directly, carbon is not installed |
49 | 49 | LIB_DIR = os.path.dirname(os.path.dirname(__file__)) |
50 | 50 | sys.path.insert(0, LIB_DIR) |
115 | 115 | else: |
116 | 116 | timestamp = time.time() |
117 | 117 | |
118 | except: | |
118 | except ValueError: | |
119 | 119 | parser.print_usage() |
120 | 120 | raise SystemExit(1) |
121 | 121 |
16 | 16 | from carbon.conf import settings |
17 | 17 | try: |
18 | 18 | from collections import defaultdict |
19 | except: | |
19 | except ImportError: | |
20 | 20 | from util import defaultdict |
21 | 21 | |
22 | 22 | |
23 | 23 | class _MetricCache(defaultdict): |
24 | 24 | def __init__(self, defaultfactory=deque, method="sorted"): |
25 | self.size = 0 | |
25 | 26 | self.method = method |
26 | 27 | if self.method == "sorted": |
27 | 28 | self.queue = self.gen_queue() |
38 | 39 | while queue: |
39 | 40 | yield queue.pop()[0] |
40 | 41 | |
41 | @property | |
42 | def size(self): | |
43 | return reduce(lambda x, y: x + len(y), self.values(), 0) | |
44 | ||
45 | 42 | def store(self, metric, datapoint): |
43 | self.size += 1 | |
46 | 44 | self[metric].append(datapoint) |
47 | 45 | if self.isFull(): |
48 | 46 | log.msg("MetricCache is full: self.size=%d" % self.size) |
58 | 56 | raise KeyError(metric) |
59 | 57 | elif not metric and self.method == "max": |
60 | 58 | metric = max(self.items(), key=lambda x: len(x[1]))[0] |
59 | datapoints = (metric, super(_MetricCache, self).pop(metric)) | |
61 | 60 | elif not metric and self.method == "naive": |
62 | return self.popitem() | |
61 | datapoints = self.popitem() | |
63 | 62 | elif not metric and self.method == "sorted": |
64 | 63 | metric = self.queue.next() |
65 | datapoints = (metric, super(_MetricCache, self).pop(metric)) | |
64 | # Save only last value for each timestamp | |
65 | popped = super(_MetricCache, self).pop(metric) | |
66 | ordered = sorted(dict(popped).items(), key=lambda x: x[0]) | |
67 | datapoints = (metric, deque(ordered)) | |
68 | self.size -= len(datapoints[1]) | |
66 | 69 | return datapoints |
67 | 70 | |
68 | 71 | @property |
6 | 6 | from carbon.util import pickle |
7 | 7 | from carbon import log, state, instrumentation |
8 | 8 | |
9 | ||
10 | SEND_QUEUE_LOW_WATERMARK = settings.MAX_QUEUE_SIZE * 0.8 | |
9 | try: | |
10 | import signal | |
11 | except ImportError: | |
12 | log.debug("Couldn't import signal module") | |
13 | ||
14 | ||
15 | SEND_QUEUE_LOW_WATERMARK = settings.MAX_QUEUE_SIZE * settings.QUEUE_LOW_WATERMARK_PCT | |
11 | 16 | |
12 | 17 | |
13 | 18 | class CarbonClientProtocol(Int32StringReceiver): |
101 | 106 | self.attemptedRelays = 'destinations.%s.attemptedRelays' % self.destinationName |
102 | 107 | self.fullQueueDrops = 'destinations.%s.fullQueueDrops' % self.destinationName |
103 | 108 | self.queuedUntilConnected = 'destinations.%s.queuedUntilConnected' % self.destinationName |
109 | self.relayMaxQueueLength = 'destinations.%s.relayMaxQueueLength' % self.destinationName | |
104 | 110 | |
105 | 111 | def queueFullCallback(self, result): |
106 | 112 | state.events.cacheFull() |
152 | 158 | |
153 | 159 | def sendDatapoint(self, metric, datapoint): |
154 | 160 | instrumentation.increment(self.attemptedRelays) |
161 | instrumentation.max(self.relayMaxQueueLength, self.queueSize) | |
155 | 162 | queueSize = self.queueSize |
156 | 163 | if queueSize >= settings.MAX_QUEUE_SIZE: |
157 | 164 | if not self.queueFull.called: |
204 | 211 | self.client_factories = {} # { destination : CarbonClientFactory() } |
205 | 212 | |
206 | 213 | def startService(self): |
214 | if 'signal' in globals().keys(): | |
215 | log.debug("Installing SIG_IGN for SIGHUP") | |
216 | signal.signal(signal.SIGHUP, signal.SIG_IGN) | |
207 | 217 | Service.startService(self) |
208 | 218 | for factory in self.client_factories.values(): |
209 | 219 | if not factory.started: |
52 | 52 | MAX_AGGREGATION_INTERVALS=5, |
53 | 53 | FORWARD_ALL=False, |
54 | 54 | MAX_QUEUE_SIZE=1000, |
55 | QUEUE_LOW_WATERMARK_PCT = 0.8, | |
55 | 56 | ENABLE_AMQP=False, |
56 | 57 | AMQP_VERBOSE=False, |
57 | 58 | BIND_PATTERNS=['#'], |
72 | 73 | WRITE_BACK_FREQUENCY=None, |
73 | 74 | ENABLE_LOGROTATION=True, |
74 | 75 | LOG_LISTENER_CONNECTIONS=True, |
76 | AGGREGATION_RULES='aggregation-rules.conf', | |
77 | REWRITE_RULES='rewrite-rules.conf', | |
78 | RELAY_RULES='relay-rules.conf', | |
75 | 79 | ) |
76 | 80 | |
77 | 81 | |
148 | 152 | # Attempt to figure out numeric types automatically |
149 | 153 | try: |
150 | 154 | value = int(value) |
151 | except: | |
155 | except ValueError: | |
152 | 156 | try: |
153 | 157 | value = float(value) |
154 | except: | |
158 | except ValueError: | |
155 | 159 | pass |
156 | 160 | |
157 | 161 | self[key] = value |
249 | 253 | logdir = settings.LOG_DIR |
250 | 254 | if not isdir(logdir): |
251 | 255 | os.makedirs(logdir) |
256 | if settings.USER: | |
257 | # We have not yet switched to the specified user, | |
258 | # but that user must be able to create files in this | |
259 | # directory. | |
260 | os.chown(logdir, self.parent["uid"], self.parent["gid"]) | |
252 | 261 | log.logToDir(logdir) |
253 | 262 | |
254 | 263 | if self["whitelist"] is None: |
286 | 295 | try: |
287 | 296 | pid = int(pf.read().strip()) |
288 | 297 | pf.close() |
289 | except: | |
298 | except IOError: | |
290 | 299 | print "Could not read pidfile %s" % pidfile |
291 | 300 | raise SystemExit(1) |
292 | 301 | print "Sending kill signal to pid %d" % pid |
308 | 317 | try: |
309 | 318 | pid = int(pf.read().strip()) |
310 | 319 | pf.close() |
311 | except: | |
320 | except IOError: | |
312 | 321 | print "Failed to read pid from %s" % pidfile |
313 | 322 | raise SystemExit(1) |
314 | 323 | |
326 | 335 | try: |
327 | 336 | pid = int(pf.read().strip()) |
328 | 337 | pf.close() |
329 | except: | |
338 | except IOError: | |
330 | 339 | print "Could not read pidfile %s" % pidfile |
331 | 340 | raise SystemExit(1) |
332 | 341 | if _process_alive(pid): |
337 | 346 | print "Removing stale pidfile %s" % pidfile |
338 | 347 | try: |
339 | 348 | os.unlink(pidfile) |
340 | except: | |
349 | except IOError: | |
341 | 350 | print "Could not remove pidfile %s" % pidfile |
342 | 351 | |
343 | 352 | print "Starting %s (instance %s)" % (program, instance) |
358 | 367 | def postOptions(self): |
359 | 368 | CarbonCacheOptions.postOptions(self) |
360 | 369 | if self["rules"] is None: |
361 | self["rules"] = join(settings["CONF_DIR"], "aggregation-rules.conf") | |
370 | self["rules"] = join(settings["CONF_DIR"], settings['AGGREGATION_RULES']) | |
362 | 371 | settings["aggregation-rules"] = self["rules"] |
363 | 372 | |
364 | 373 | if self["rewrite-rules"] is None: |
365 | 374 | self["rewrite-rules"] = join(settings["CONF_DIR"], |
366 | "rewrite-rules.conf") | |
375 | settings['REWRITE_RULES']) | |
367 | 376 | settings["rewrite-rules"] = self["rewrite-rules"] |
368 | 377 | |
369 | 378 | |
377 | 386 | def postOptions(self): |
378 | 387 | CarbonCacheOptions.postOptions(self) |
379 | 388 | if self["rules"] is None: |
380 | self["rules"] = join(settings["CONF_DIR"], "relay-rules.conf") | |
389 | self["rules"] = join(settings["CONF_DIR"], settings['RELAY_RULES']) | |
381 | 390 | settings["relay-rules"] = self["rules"] |
382 | 391 | |
383 | 392 | if self["aggregation-rules"] is None: |
384 | self["aggregation-rules"] = join(settings["CONF_DIR"], "aggregation-rules.conf") | |
393 | self["aggregation-rules"] = join(settings["CONF_DIR"], settings['AGGREGATION_RULES']) | |
385 | 394 | settings["aggregation-rules"] = self["aggregation-rules"] |
386 | 395 | |
387 | 396 | if settings["RELAY_METHOD"] not in ("rules", "consistent-hashing", "aggregated-consistent-hashing"): |
397 | 406 | parser.add_option( |
398 | 407 | "--debug", action="store_true", |
399 | 408 | help="Run in the foreground, log to stdout") |
409 | parser.add_option( | |
410 | "--nodaemon", action="store_true", | |
411 | help="Run in the foreground") | |
400 | 412 | parser.add_option( |
401 | 413 | "--profile", |
402 | 414 | help="Record performance profile data to the given file") |
17 | 17 | for handler in self.handlers: |
18 | 18 | try: |
19 | 19 | handler(*args, **kwargs) |
20 | except: | |
20 | except Exception: | |
21 | 21 | log.err(None, "Exception in %s event handler: args=%s kwargs=%s" % (self.name, args, kwargs)) |
22 | 22 | |
23 | 23 |
28 | 28 | except KeyError: |
29 | 29 | stats[stat] = increase |
30 | 30 | |
31 | def max(stat, newval): | |
32 | try: | |
33 | if stats[stat] < newval: | |
34 | stats[stat] = newval | |
35 | except KeyError: | |
36 | stats[stat] = newval | |
31 | 37 | |
32 | 38 | def append(stat, value): |
33 | 39 | try: |
75 | 81 | creates = myStats.get('creates', 0) |
76 | 82 | errors = myStats.get('errors', 0) |
77 | 83 | cacheQueries = myStats.get('cacheQueries', 0) |
84 | cacheBulkQueries = myStats.get('cacheBulkQueries', 0) | |
78 | 85 | cacheOverflow = myStats.get('cache.overflow', 0) |
86 | cacheBulkQuerySizes = myStats.get('cacheBulkQuerySize', []) | |
79 | 87 | |
80 | 88 | # Calculate cache-data-structure-derived metrics prior to storing anything |
81 | 89 | # in the cache itself -- which would otherwise affect said metrics. |
92 | 100 | pointsPerUpdate = float(committedPoints) / len(updateTimes) |
93 | 101 | record('pointsPerUpdate', pointsPerUpdate) |
94 | 102 | |
103 | if cacheBulkQuerySizes: | |
104 | avgBulkSize = sum(cacheBulkQuerySizes) / len(cacheBulkQuerySizes) | |
105 | record('cache.bulk_queries_average_size', avgBulkSize) | |
106 | ||
95 | 107 | record('updateOperations', len(updateTimes)) |
96 | 108 | record('committedPoints', committedPoints) |
97 | 109 | record('creates', creates) |
98 | 110 | record('errors', errors) |
99 | 111 | record('cache.queries', cacheQueries) |
112 | record('cache.bulk_queries', cacheBulkQueries) | |
100 | 113 | record('cache.overflow', cacheOverflow) |
101 | 114 | |
102 | 115 | # aggregator metrics |
117 | 130 | |
118 | 131 | # common metrics |
119 | 132 | record('metricsReceived', myStats.get('metricsReceived', 0)) |
133 | record('blacklistMatches', myStats.get('blacklistMatches', 0)) | |
134 | record('whitelistRejects', myStats.get('whitelistRejects', 0)) | |
120 | 135 | record('cpuUsage', getCpuUsage()) |
121 | 136 | try: # This only works on Linux |
122 | 137 | record('memUsage', getMemUsage()) |
123 | except: | |
138 | except Exception: | |
124 | 139 | pass |
125 | 140 | |
126 | 141 |
11 | 11 | try: |
12 | 12 | value = whisper.info(wsp_path)['aggregationMethod'] |
13 | 13 | return dict(value=value) |
14 | except: | |
14 | except Exception: | |
15 | 15 | log.err() |
16 | 16 | return dict(error=traceback.format_exc()) |
17 | 17 | |
24 | 24 | try: |
25 | 25 | old_value = whisper.setAggregationMethod(wsp_path, value) |
26 | 26 | return dict(old_value=old_value, new_value=value) |
27 | except: | |
27 | except Exception: | |
28 | 28 | log.err() |
29 | 29 | return dict(error=traceback.format_exc()) |
70 | 70 | try: |
71 | 71 | metric, value, timestamp = line.strip().split() |
72 | 72 | datapoint = (float(timestamp), float(value)) |
73 | except: | |
73 | except ValueError: | |
74 | 74 | log.listener('invalid line received from client %s, ignoring' % self.peerName) |
75 | 75 | return |
76 | 76 | |
85 | 85 | datapoint = (float(timestamp), float(value)) |
86 | 86 | |
87 | 87 | self.metricReceived(metric, datapoint) |
88 | except: | |
88 | except ValueError: | |
89 | 89 | log.listener('invalid line received from %s, ignoring' % host) |
90 | 90 | |
91 | 91 | |
99 | 99 | def stringReceived(self, data): |
100 | 100 | try: |
101 | 101 | datapoints = self.unpickler.loads(data) |
102 | except: | |
102 | except pickle.UnpicklingError: | |
103 | 103 | log.listener('invalid pickle received from %s, ignoring' % self.peerName) |
104 | 104 | return |
105 | 105 | |
110 | 110 | log.listener('Error decoding pickle: %s' % e) |
111 | 111 | try: |
112 | 112 | datapoint = (float(value), float(timestamp)) # force proper types |
113 | except: | |
113 | except ValueError: | |
114 | 114 | continue |
115 | 115 | |
116 | 116 | self.metricReceived(metric, datapoint) |
117 | 117 | |
118 | 118 | |
119 | 119 | class CacheManagementHandler(Int32StringReceiver): |
120 | MAX_LENGTH = 1024 ** 3 # 1mb | |
121 | ||
120 | 122 | def connectionMade(self): |
121 | 123 | peer = self.transport.getPeer() |
122 | 124 | self.peerAddr = "%s:%d" % (peer.host, peer.port) |
135 | 137 | metric = request['metric'] |
136 | 138 | datapoints = MetricCache.get(metric, []) |
137 | 139 | result = dict(datapoints=datapoints) |
138 | if settings.LOG_CACHE_HITS is True: | |
140 | if settings.LOG_CACHE_HITS: | |
139 | 141 | log.query('[%s] cache query for \"%s\" returned %d values' % (self.peerAddr, metric, len(datapoints))) |
140 | 142 | instrumentation.increment('cacheQueries') |
143 | ||
144 | elif request['type'] == 'cache-query-bulk': | |
145 | datapointsByMetric = {} | |
146 | metrics = request['metrics'] | |
147 | for metric in metrics: | |
148 | datapointsByMetric[metric] = MetricCache.get(metric, []) | |
149 | ||
150 | result = dict(datapointsByMetric=datapointsByMetric) | |
151 | ||
152 | if settings.LOG_CACHE_HITS: | |
153 | log.query('[%s] cache query bulk for \"%d\" metrics returned %d values' % | |
154 | (self.peerAddr, len(metrics), sum([len(datapoints) for datapoints in datapointsByMetric.values()]))) | |
155 | instrumentation.increment('cacheBulkQueries') | |
156 | instrumentation.append('cacheBulkQuerySize', len(metrics)) | |
141 | 157 | |
142 | 158 | elif request['type'] == 'get-metadata': |
143 | 159 | result = management.getMetadata(request['metric'], request['key']) |
26 | 26 | |
27 | 27 | try: |
28 | 28 | mtime = os.path.getmtime(self.list_file) |
29 | except: | |
29 | except OSError: | |
30 | 30 | log.err("Failed to get mtime of %s" % self.list_file) |
31 | 31 | return |
32 | 32 | |
41 | 41 | continue |
42 | 42 | try: |
43 | 43 | new_regex_list.append(re.compile(pattern)) |
44 | except: | |
44 | except re.error: | |
45 | 45 | log.err("Failed to parse '%s' in '%s'. Ignoring line" % (pattern, self.list_file)) |
46 | 46 | |
47 | 47 | self.regex_list = new_regex_list |
28 | 28 | # Only read if the rules file has been modified |
29 | 29 | try: |
30 | 30 | mtime = getmtime(self.rules_file) |
31 | except: | |
31 | except OSError: | |
32 | 32 | log.err("Failed to get mtime of %s" % self.rules_file) |
33 | 33 | return |
34 | 34 | if mtime <= self.rules_last_read: |
19 | 19 | from twisted.python.components import Componentized |
20 | 20 | from twisted.python.log import ILogObserver |
21 | 21 | # Attaching modules to the global state module simplifies import order hassles |
22 | from carbon import util, state, events, instrumentation | |
22 | from carbon import state, util, events | |
23 | 23 | from carbon.log import carbonLogObserver |
24 | 24 | from carbon.exceptions import CarbonConfigException |
25 | state.events = events | |
26 | state.instrumentation = instrumentation | |
25 | ||
26 | state.events = state | |
27 | 27 | |
28 | 28 | |
29 | 29 | class CarbonRootService(MultiService): |
37 | 37 | |
38 | 38 | def createBaseService(config): |
39 | 39 | from carbon.conf import settings |
40 | from carbon import instrumentation | |
41 | ||
42 | global state | |
43 | state.instrumentation = instrumentation | |
44 | ||
40 | 45 | from carbon.protocols import (MetricLineReceiver, MetricPickleReceiver, |
41 | 46 | MetricDatagramReceiver) |
42 | 47 | |
146 | 151 | root_service = createBaseService(config) |
147 | 152 | |
148 | 153 | # Configure application components |
149 | router = ConsistentHashingRouter() | |
154 | router = ConsistentHashingRouter(settings.REPLICATION_FACTOR) | |
150 | 155 | client_manager = CarbonClientManager(router) |
151 | 156 | client_manager.setServiceParent(root_service) |
152 | 157 |
170 | 170 | assert 0 <= xFilesFactor <= 1 |
171 | 171 | if aggregationMethod is not None: |
172 | 172 | assert aggregationMethod in whisper.aggregationMethods |
173 | except: | |
173 | except ValueError: | |
174 | 174 | log.msg("Invalid schemas found in %s." % section) |
175 | 175 | continue |
176 | 176 |
10 | 10 | try: |
11 | 11 | import cPickle as pickle |
12 | 12 | USING_CPICKLE = True |
13 | except: | |
13 | except ImportError: | |
14 | 14 | import pickle |
15 | 15 | USING_CPICKLE = False |
16 | 16 | |
17 | 17 | from time import sleep, time |
18 | 18 | from twisted.python.util import initgroups |
19 | 19 | from twisted.scripts.twistd import runApp |
20 | from twisted.scripts._twistd_unix import daemonize | |
21 | ||
22 | ||
23 | daemonize = daemonize # Backwards compatibility | |
24 | 20 | |
25 | 21 | |
26 | 22 | def dropprivs(user): |
62 | 58 | try: |
63 | 59 | from twisted.internet import epollreactor |
64 | 60 | twistd_options.append("--reactor=epoll") |
65 | except: | |
61 | except ImportError: | |
66 | 62 | pass |
67 | 63 | |
68 | if options.debug: | |
64 | if options.debug or options.nodaemon: | |
69 | 65 | twistd_options.extend(["--nodaemon"]) |
70 | 66 | if options.profile: |
71 | 67 | twistd_options.append("--profile") |
82 | 78 | |
83 | 79 | for option_name, option_value in vars(options).items(): |
84 | 80 | if (option_value is not None and |
85 | option_name not in ("debug", "profile", "pidfile", "umask")): | |
81 | option_name not in ("debug", "profile", "pidfile", "umask", "nodaemon")): | |
86 | 82 | twistd_options.extend(["--%s" % option_name.replace("_", "-"), |
87 | 83 | option_value]) |
88 | 84 | |
198 | 194 | return True |
199 | 195 | return False |
200 | 196 | |
197 | def setCapacityAndFillRate(self, new_capacity, new_fill_rate): | |
198 | delta = float(new_capacity) - self.capacity | |
199 | self.capacity = float(new_capacity) | |
200 | self.fill_rate = float(new_fill_rate) | |
201 | self._tokens = delta + self._tokens | |
202 | ||
201 | 203 | @property |
202 | 204 | def tokens(self): |
203 | 205 | '''The tokens property will return the current number of tokens in the |
27 | 27 | from twisted.internet import reactor |
28 | 28 | from twisted.internet.task import LoopingCall |
29 | 29 | from twisted.application.service import Service |
30 | ||
31 | try: | |
32 | import signal | |
33 | except ImportError: | |
34 | log.debug("Couldn't import signal module") | |
30 | 35 | |
31 | 36 | |
32 | 37 | SCHEMAS = loadStorageSchemas() |
106 | 111 | dbDir = dirname(dbFilePath) |
107 | 112 | try: |
108 | 113 | if not exists(dbDir): |
109 | os.makedirs(dbDir, 0755) | |
114 | os.makedirs(dbDir) | |
110 | 115 | except OSError, e: |
111 | 116 | log.err("%s" % e) |
112 | 117 | log.creates("creating database file %s (archive=%s xff=%s agg=%s)" % |
113 | 118 | (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod)) |
114 | whisper.create( | |
115 | dbFilePath, | |
116 | archiveConfig, | |
117 | xFilesFactor, | |
118 | aggregationMethod, | |
119 | settings.WHISPER_SPARSE_CREATE, | |
120 | settings.WHISPER_FALLOCATE_CREATE) | |
121 | instrumentation.increment('creates') | |
119 | try: | |
120 | whisper.create( | |
121 | dbFilePath, | |
122 | archiveConfig, | |
123 | xFilesFactor, | |
124 | aggregationMethod, | |
125 | settings.WHISPER_SPARSE_CREATE, | |
126 | settings.WHISPER_FALLOCATE_CREATE) | |
127 | instrumentation.increment('creates') | |
128 | except: | |
129 | log.err("Error creating %s" % (dbFilePath)) | |
130 | continue | |
122 | 131 | # If we've got a rate limit configured lets makes sure we enforce it |
123 | 132 | if UPDATE_BUCKET: |
124 | 133 | UPDATE_BUCKET.drain(1, blocking=True) |
126 | 135 | t1 = time.time() |
127 | 136 | whisper.update_many(dbFilePath, datapoints) |
128 | 137 | updateTime = time.time() - t1 |
129 | except: | |
138 | except Exception: | |
130 | 139 | log.msg("Error writing to %s" % (dbFilePath)) |
131 | 140 | log.err() |
132 | 141 | instrumentation.increment('errors') |
146 | 155 | while reactor.running: |
147 | 156 | try: |
148 | 157 | writeCachedDataPoints() |
149 | except: | |
158 | except Exception: | |
150 | 159 | log.err() |
151 | 160 | time.sleep(1) # The writer thread only sleeps when the cache is empty or an error occurs |
152 | 161 | |
155 | 164 | global SCHEMAS |
156 | 165 | try: |
157 | 166 | SCHEMAS = loadStorageSchemas() |
158 | except: | |
167 | except Exception: | |
159 | 168 | log.msg("Failed to reload storage SCHEMAS") |
160 | 169 | log.err() |
161 | 170 | |
164 | 173 | global AGGREGATION_SCHEMAS |
165 | 174 | try: |
166 | 175 | AGGREGATION_SCHEMAS = loadAggregationSchemas() |
167 | except: | |
176 | except Exception: | |
168 | 177 | log.msg("Failed to reload aggregation SCHEMAS") |
169 | 178 | log.err() |
170 | 179 | |
171 | 180 | |
172 | 181 | def shutdownModifyUpdateSpeed(): |
173 | 182 | try: |
174 | settings.MAX_UPDATES_PER_SECOND = settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN | |
183 | shut = settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN | |
184 | if UPDATE_BUCKET: | |
185 | UPDATE_BUCKET.setCapacityAndFillRate(shut,shut) | |
186 | if CREATE_BUCKET: | |
187 | CREATE_BUCKET.setCapacityAndFillRate(shut,shut) | |
175 | 188 | log.msg("Carbon shutting down. Changed the update rate to: " + str(settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN)) |
176 | 189 | except KeyError: |
177 | 190 | log.msg("Carbon shutting down. Update rate not changed") |
184 | 197 | self.aggregation_reload_task = LoopingCall(reloadAggregationSchemas) |
185 | 198 | |
186 | 199 | def startService(self): |
200 | if 'signal' in globals().keys(): | |
201 | log.debug("Installing SIG_IGN for SIGHUP") | |
202 | signal.signal(signal.SIGHUP, signal.SIG_IGN) | |
187 | 203 | self.storage_reload_task.start(60, False) |
188 | 204 | self.aggregation_reload_task.start(60, False) |
189 | 205 | reactor.addSystemEventTrigger('before', 'shutdown', shutdownModifyUpdateSpeed) |