2 * collectd - src/statsd.c
3 * Copyright (C) 2013 Florian octo Forster
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Florian octo Forster <octo at collectd.org>
30 #include "utils/avltree/avltree.h"
31 #include "utils/common/common.h"
32 #include "utils/latency/latency.h"
36 #include <sys/types.h>
38 /* AIX doesn't have MSG_DONTWAIT */
40 #define MSG_DONTWAIT MSG_NONBLOCK
43 #ifndef STATSD_DEFAULT_NODE
44 #define STATSD_DEFAULT_NODE NULL
47 #ifndef STATSD_DEFAULT_SERVICE
48 #define STATSD_DEFAULT_SERVICE "8125"
51 enum metric_type_e { STATSD_COUNTER, STATSD_TIMER, STATSD_GAUGE, STATSD_SET };
52 typedef enum metric_type_e metric_type_t;
54 struct statsd_metric_s {
58 latency_counter_t *latency;
60 unsigned long updates_num;
62 typedef struct statsd_metric_s statsd_metric_t;
64 static c_avl_tree_t *metrics_tree;
65 static pthread_mutex_t metrics_lock = PTHREAD_MUTEX_INITIALIZER;
67 static pthread_t network_thread;
68 static bool network_thread_running;
69 static bool network_thread_shutdown;
71 static char *conf_node;
72 static char *conf_service;
74 static bool conf_delete_counters;
75 static bool conf_delete_timers;
76 static bool conf_delete_gauges;
77 static bool conf_delete_sets;
79 static double *conf_timer_percentile;
80 static size_t conf_timer_percentile_num;
82 static bool conf_counter_sum;
83 static bool conf_timer_lower;
84 static bool conf_timer_upper;
85 static bool conf_timer_sum;
86 static bool conf_timer_count;
88 /* Must hold metrics_lock when calling this function. */
89 static statsd_metric_t *statsd_metric_lookup_unsafe(char const *name, /* {{{ */
91 char key[DATA_MAX_NAME_LEN + 2];
93 statsd_metric_t *metric;
114 sstrncpy(&key[2], name, sizeof(key) - 2);
116 status = c_avl_get(metrics_tree, key, (void *)&metric);
120 key_copy = strdup(key);
121 if (key_copy == NULL) {
122 ERROR("statsd plugin: strdup failed.");
126 metric = calloc(1, sizeof(*metric));
127 if (metric == NULL) {
128 ERROR("statsd plugin: calloc failed.");
134 metric->latency = NULL;
137 status = c_avl_insert(metrics_tree, key_copy, metric);
139 ERROR("statsd plugin: c_avl_insert failed.");
146 } /* }}} statsd_metric_lookup_unsafe */
148 static int statsd_metric_set(char const *name, double value, /* {{{ */
149 metric_type_t type) {
150 statsd_metric_t *metric;
152 pthread_mutex_lock(&metrics_lock);
154 metric = statsd_metric_lookup_unsafe(name, type);
155 if (metric == NULL) {
156 pthread_mutex_unlock(&metrics_lock);
160 metric->value = value;
161 metric->updates_num++;
163 pthread_mutex_unlock(&metrics_lock);
166 } /* }}} int statsd_metric_set */
168 static int statsd_metric_add(char const *name, double delta, /* {{{ */
169 metric_type_t type) {
170 statsd_metric_t *metric;
172 pthread_mutex_lock(&metrics_lock);
174 metric = statsd_metric_lookup_unsafe(name, type);
175 if (metric == NULL) {
176 pthread_mutex_unlock(&metrics_lock);
180 metric->value += delta;
181 metric->updates_num++;
183 pthread_mutex_unlock(&metrics_lock);
186 } /* }}} int statsd_metric_add */
188 static void statsd_metric_free(statsd_metric_t *metric) /* {{{ */
193 if (metric->latency != NULL) {
194 latency_counter_destroy(metric->latency);
195 metric->latency = NULL;
198 if (metric->set != NULL) {
202 while (c_avl_pick(metric->set, &key, &value) == 0) {
204 assert(value == NULL);
207 c_avl_destroy(metric->set);
212 } /* }}} void statsd_metric_free */
214 static int statsd_parse_value(char const *str, value_t *ret_value) /* {{{ */
218 ret_value->gauge = (gauge_t)strtod(str, &endptr);
219 if ((str == endptr) || ((endptr != NULL) && (*endptr != 0)))
223 } /* }}} int statsd_parse_value */
225 static int statsd_handle_counter(char const *name, /* {{{ */
226 char const *value_str, char const *extra) {
231 if ((extra != NULL) && (extra[0] != '@'))
236 status = statsd_parse_value(extra + 1, &scale);
240 if (!isfinite(scale.gauge) || (scale.gauge <= 0.0) || (scale.gauge > 1.0))
245 status = statsd_parse_value(value_str, &value);
249 /* Changes to the counter are added to (statsd_metric_t*)->value. ->counter is
250 * only updated in statsd_metric_submit_unsafe(). */
251 return statsd_metric_add(name, (double)(value.gauge / scale.gauge),
253 } /* }}} int statsd_handle_counter */
255 static int statsd_handle_gauge(char const *name, /* {{{ */
256 char const *value_str) {
261 status = statsd_parse_value(value_str, &value);
265 if ((value_str[0] == '+') || (value_str[0] == '-'))
266 return statsd_metric_add(name, (double)value.gauge, STATSD_GAUGE);
268 return statsd_metric_set(name, (double)value.gauge, STATSD_GAUGE);
269 } /* }}} int statsd_handle_gauge */
271 static int statsd_handle_timer(char const *name, /* {{{ */
272 char const *value_str, char const *extra) {
273 statsd_metric_t *metric;
279 if ((extra != NULL) && (extra[0] != '@'))
284 status = statsd_parse_value(extra + 1, &scale);
288 if (!isfinite(scale.gauge) || (scale.gauge <= 0.0) || (scale.gauge > 1.0))
293 status = statsd_parse_value(value_str, &value_ms);
297 value = MS_TO_CDTIME_T(value_ms.gauge / scale.gauge);
299 pthread_mutex_lock(&metrics_lock);
301 metric = statsd_metric_lookup_unsafe(name, STATSD_TIMER);
302 if (metric == NULL) {
303 pthread_mutex_unlock(&metrics_lock);
307 if (metric->latency == NULL)
308 metric->latency = latency_counter_create();
309 if (metric->latency == NULL) {
310 pthread_mutex_unlock(&metrics_lock);
314 latency_counter_add(metric->latency, value);
315 metric->updates_num++;
317 pthread_mutex_unlock(&metrics_lock);
319 } /* }}} int statsd_handle_timer */
321 static int statsd_handle_set(char const *name, /* {{{ */
322 char const *set_key_orig) {
323 statsd_metric_t *metric = NULL;
327 pthread_mutex_lock(&metrics_lock);
329 metric = statsd_metric_lookup_unsafe(name, STATSD_SET);
330 if (metric == NULL) {
331 pthread_mutex_unlock(&metrics_lock);
335 /* Make sure metric->set exists. */
336 if (metric->set == NULL)
337 metric->set = c_avl_create((int (*)(const void *, const void *))strcmp);
339 if (metric->set == NULL) {
340 pthread_mutex_unlock(&metrics_lock);
341 ERROR("statsd plugin: c_avl_create failed.");
345 set_key = strdup(set_key_orig);
346 if (set_key == NULL) {
347 pthread_mutex_unlock(&metrics_lock);
348 ERROR("statsd plugin: strdup failed.");
352 status = c_avl_insert(metric->set, set_key, /* value = */ NULL);
354 pthread_mutex_unlock(&metrics_lock);
355 ERROR("statsd plugin: c_avl_insert (\"%s\") failed with status %i.",
359 } else if (status > 0) /* key already exists */
364 metric->updates_num++;
366 pthread_mutex_unlock(&metrics_lock);
368 } /* }}} int statsd_handle_set */
370 static int statsd_parse_line(char *buffer) /* {{{ */
377 type = strchr(name, '|');
383 value = strrchr(name, ':');
389 extra = strchr(type, '|');
395 if (strcmp("c", type) == 0)
396 return statsd_handle_counter(name, value, extra);
397 else if (strcmp("ms", type) == 0)
398 return statsd_handle_timer(name, value, extra);
400 /* extra is only valid for counters and timers */
404 if (strcmp("g", type) == 0)
405 return statsd_handle_gauge(name, value);
406 else if (strcmp("s", type) == 0)
407 return statsd_handle_set(name, value);
410 } /* }}} void statsd_parse_line */
412 static void statsd_parse_buffer(char *buffer) /* {{{ */
414 while (buffer != NULL) {
419 next = strchr(buffer, '\n');
430 sstrncpy(orig, buffer, sizeof(orig));
432 status = statsd_parse_line(buffer);
434 ERROR("statsd plugin: Unable to parse line: \"%s\"", orig);
438 } /* }}} void statsd_parse_buffer */
440 static void statsd_network_read(int fd) /* {{{ */
446 status = recv(fd, buffer, sizeof(buffer), /* flags = */ MSG_DONTWAIT);
449 if ((errno == EAGAIN) || (errno == EWOULDBLOCK))
452 ERROR("statsd plugin: recv(2) failed: %s", STRERRNO);
456 buffer_size = (size_t)status;
457 if (buffer_size >= sizeof(buffer))
458 buffer_size = sizeof(buffer) - 1;
459 buffer[buffer_size] = 0;
461 statsd_parse_buffer(buffer);
462 } /* }}} void statsd_network_read */
464 static int statsd_network_init(struct pollfd **ret_fds, /* {{{ */
465 size_t *ret_fds_num) {
466 struct pollfd *fds = NULL;
469 struct addrinfo *ai_list;
472 char const *node = (conf_node != NULL) ? conf_node : STATSD_DEFAULT_NODE;
473 char const *service =
474 (conf_service != NULL) ? conf_service : STATSD_DEFAULT_SERVICE;
476 struct addrinfo ai_hints = {.ai_family = AF_UNSPEC,
477 .ai_flags = AI_PASSIVE | AI_ADDRCONFIG,
478 .ai_socktype = SOCK_DGRAM};
480 status = getaddrinfo(node, service, &ai_hints, &ai_list);
482 ERROR("statsd plugin: getaddrinfo (\"%s\", \"%s\") failed: %s", node,
483 service, gai_strerror(status));
487 for (struct addrinfo *ai_ptr = ai_list; ai_ptr != NULL;
488 ai_ptr = ai_ptr->ai_next) {
492 char str_node[NI_MAXHOST];
493 char str_service[NI_MAXSERV];
495 fd = socket(ai_ptr->ai_family, ai_ptr->ai_socktype, ai_ptr->ai_protocol);
497 ERROR("statsd plugin: socket(2) failed: %s", STRERRNO);
501 /* allow multiple sockets to use the same PORT number */
503 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) == -1) {
504 ERROR("statsd plugin: setsockopt (reuseaddr): %s", STRERRNO);
509 getnameinfo(ai_ptr->ai_addr, ai_ptr->ai_addrlen, str_node, sizeof(str_node),
510 str_service, sizeof(str_service),
511 NI_DGRAM | NI_NUMERICHOST | NI_NUMERICSERV);
512 DEBUG("statsd plugin: Trying to bind to [%s]:%s ...", str_node,
515 status = bind(fd, ai_ptr->ai_addr, ai_ptr->ai_addrlen);
517 ERROR("statsd plugin: bind(2) to [%s]:%s failed: %s", str_node,
518 str_service, STRERRNO);
523 tmp = realloc(fds, sizeof(*fds) * (fds_num + 1));
525 ERROR("statsd plugin: realloc failed.");
533 memset(tmp, 0, sizeof(*tmp));
535 tmp->events = POLLIN | POLLPRI;
536 INFO("statsd plugin: Listening on [%s]:%s.", str_node, str_service);
539 freeaddrinfo(ai_list);
542 ERROR("statsd plugin: Unable to create listening socket for [%s]:%s.",
543 (node != NULL) ? node : "::", service);
548 *ret_fds_num = fds_num;
550 } /* }}} int statsd_network_init */
552 static void *statsd_network_thread(void *args) /* {{{ */
554 struct pollfd *fds = NULL;
558 status = statsd_network_init(&fds, &fds_num);
560 ERROR("statsd plugin: Unable to open listening sockets.");
561 pthread_exit((void *)0);
564 while (!network_thread_shutdown) {
565 status = poll(fds, (nfds_t)fds_num, /* timeout = */ -1);
568 if ((errno == EINTR) || (errno == EAGAIN))
571 ERROR("statsd plugin: poll(2) failed: %s", STRERRNO);
575 for (size_t i = 0; i < fds_num; i++) {
576 if ((fds[i].revents & (POLLIN | POLLPRI)) == 0)
579 statsd_network_read(fds[i].fd);
582 } /* while (!network_thread_shutdown) */
585 for (size_t i = 0; i < fds_num; i++)
590 } /* }}} void *statsd_network_thread */
592 static int statsd_config_timer_percentile(oconfig_item_t *ci) /* {{{ */
594 double percent = NAN;
598 status = cf_util_get_double(ci, &percent);
602 if ((percent <= 0.0) || (percent >= 100)) {
603 ERROR("statsd plugin: The value for \"%s\" must be between 0 and 100, "
609 tmp = realloc(conf_timer_percentile, sizeof(*conf_timer_percentile) *
610 (conf_timer_percentile_num + 1));
612 ERROR("statsd plugin: realloc failed.");
615 conf_timer_percentile = tmp;
616 conf_timer_percentile[conf_timer_percentile_num] = percent;
617 conf_timer_percentile_num++;
620 } /* }}} int statsd_config_timer_percentile */
622 static int statsd_config(oconfig_item_t *ci) /* {{{ */
624 for (int i = 0; i < ci->children_num; i++) {
625 oconfig_item_t *child = ci->children + i;
627 if (strcasecmp("Host", child->key) == 0)
628 cf_util_get_string(child, &conf_node);
629 else if (strcasecmp("Port", child->key) == 0)
630 cf_util_get_service(child, &conf_service);
631 else if (strcasecmp("DeleteCounters", child->key) == 0)
632 cf_util_get_boolean(child, &conf_delete_counters);
633 else if (strcasecmp("DeleteTimers", child->key) == 0)
634 cf_util_get_boolean(child, &conf_delete_timers);
635 else if (strcasecmp("DeleteGauges", child->key) == 0)
636 cf_util_get_boolean(child, &conf_delete_gauges);
637 else if (strcasecmp("DeleteSets", child->key) == 0)
638 cf_util_get_boolean(child, &conf_delete_sets);
639 else if (strcasecmp("CounterSum", child->key) == 0)
640 cf_util_get_boolean(child, &conf_counter_sum);
641 else if (strcasecmp("TimerLower", child->key) == 0)
642 cf_util_get_boolean(child, &conf_timer_lower);
643 else if (strcasecmp("TimerUpper", child->key) == 0)
644 cf_util_get_boolean(child, &conf_timer_upper);
645 else if (strcasecmp("TimerSum", child->key) == 0)
646 cf_util_get_boolean(child, &conf_timer_sum);
647 else if (strcasecmp("TimerCount", child->key) == 0)
648 cf_util_get_boolean(child, &conf_timer_count);
649 else if (strcasecmp("TimerPercentile", child->key) == 0)
650 statsd_config_timer_percentile(child);
652 ERROR("statsd plugin: The \"%s\" config option is not valid.",
657 } /* }}} int statsd_config */
659 static int statsd_init(void) /* {{{ */
661 pthread_mutex_lock(&metrics_lock);
662 if (metrics_tree == NULL)
663 metrics_tree = c_avl_create((int (*)(const void *, const void *))strcmp);
665 if (!network_thread_running) {
668 status = pthread_create(&network_thread,
669 /* attr = */ NULL, statsd_network_thread,
672 pthread_mutex_unlock(&metrics_lock);
673 ERROR("statsd plugin: pthread_create failed: %s", STRERRNO);
677 network_thread_running = true;
679 pthread_mutex_unlock(&metrics_lock);
682 } /* }}} int statsd_init */
684 /* Must hold metrics_lock when calling this function. */
685 static int statsd_metric_clear_set_unsafe(statsd_metric_t *metric) /* {{{ */
690 if ((metric == NULL) || (metric->type != STATSD_SET))
693 if (metric->set == NULL)
696 while (c_avl_pick(metric->set, &key, &value) == 0) {
702 } /* }}} int statsd_metric_clear_set_unsafe */
704 /* Must hold metrics_lock when calling this function. */
705 static int statsd_metric_submit_unsafe(char const *name,
706 statsd_metric_t *metric) /* {{{ */
708 value_list_t vl = VALUE_LIST_INIT;
710 vl.values = &(value_t){.gauge = NAN};
712 sstrncpy(vl.plugin, "statsd", sizeof(vl.plugin));
714 if (metric->type == STATSD_GAUGE)
715 sstrncpy(vl.type, "gauge", sizeof(vl.type));
716 else if (metric->type == STATSD_TIMER)
717 sstrncpy(vl.type, "latency", sizeof(vl.type));
718 else if (metric->type == STATSD_SET)
719 sstrncpy(vl.type, "objects", sizeof(vl.type));
720 else /* if (metric->type == STATSD_COUNTER) */
721 sstrncpy(vl.type, "derive", sizeof(vl.type));
723 sstrncpy(vl.type_instance, name, sizeof(vl.type_instance));
725 if (metric->type == STATSD_GAUGE)
726 vl.values[0].gauge = (gauge_t)metric->value;
727 else if (metric->type == STATSD_TIMER) {
728 bool have_events = (metric->updates_num > 0);
730 /* Make sure all timer metrics share the *same* timestamp. */
733 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-average", name);
736 ? CDTIME_T_TO_DOUBLE(latency_counter_get_average(metric->latency))
738 plugin_dispatch_values(&vl);
740 if (conf_timer_lower) {
741 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-lower", name);
744 ? CDTIME_T_TO_DOUBLE(latency_counter_get_min(metric->latency))
746 plugin_dispatch_values(&vl);
749 if (conf_timer_upper) {
750 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-upper", name);
753 ? CDTIME_T_TO_DOUBLE(latency_counter_get_max(metric->latency))
755 plugin_dispatch_values(&vl);
758 if (conf_timer_sum) {
759 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-sum", name);
762 ? CDTIME_T_TO_DOUBLE(latency_counter_get_sum(metric->latency))
764 plugin_dispatch_values(&vl);
767 for (size_t i = 0; i < conf_timer_percentile_num; i++) {
768 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-percentile-%.0f",
769 name, conf_timer_percentile[i]);
771 have_events ? CDTIME_T_TO_DOUBLE(latency_counter_get_percentile(
772 metric->latency, conf_timer_percentile[i]))
774 plugin_dispatch_values(&vl);
777 /* Keep this at the end, since vl.type is set to "gauge" here. The
778 * vl.type's above are implicitly set to "latency". */
779 if (conf_timer_count) {
780 sstrncpy(vl.type, "gauge", sizeof(vl.type));
781 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-count", name);
782 vl.values[0].gauge = latency_counter_get_num(metric->latency);
783 plugin_dispatch_values(&vl);
786 latency_counter_reset(metric->latency);
788 } else if (metric->type == STATSD_SET) {
789 if (metric->set == NULL)
790 vl.values[0].gauge = 0.0;
792 vl.values[0].gauge = (gauge_t)c_avl_size(metric->set);
793 } else { /* STATSD_COUNTER */
794 gauge_t delta = nearbyint(metric->value);
796 /* Etsy's statsd writes counters as two metrics: a rate and the change since
797 * the last write. Since collectd does not reset its DERIVE metrics to zero,
798 * this makes little sense, but we're dispatching a "count" metric here
799 * anyway - if requested by the user - for compatibility reasons. */
800 if (conf_counter_sum) {
801 sstrncpy(vl.type, "count", sizeof(vl.type));
802 vl.values[0].gauge = delta;
803 plugin_dispatch_values(&vl);
805 /* restore vl.type */
806 sstrncpy(vl.type, "derive", sizeof(vl.type));
809 /* Rather than resetting value to zero, subtract delta so we correctly keep
810 * track of residuals. */
811 metric->value -= delta;
812 metric->counter += (derive_t)delta;
814 vl.values[0].derive = metric->counter;
817 return plugin_dispatch_values(&vl);
818 } /* }}} int statsd_metric_submit_unsafe */
820 static int statsd_read(void) /* {{{ */
822 c_avl_iterator_t *iter;
824 statsd_metric_t *metric;
826 char **to_be_deleted = NULL;
827 size_t to_be_deleted_num = 0;
829 pthread_mutex_lock(&metrics_lock);
831 if (metrics_tree == NULL) {
832 pthread_mutex_unlock(&metrics_lock);
836 iter = c_avl_get_iterator(metrics_tree);
837 while (c_avl_iterator_next(iter, (void *)&name, (void *)&metric) == 0) {
838 if ((metric->updates_num == 0) &&
839 ((conf_delete_counters && (metric->type == STATSD_COUNTER)) ||
840 (conf_delete_timers && (metric->type == STATSD_TIMER)) ||
841 (conf_delete_gauges && (metric->type == STATSD_GAUGE)) ||
842 (conf_delete_sets && (metric->type == STATSD_SET)))) {
843 DEBUG("statsd plugin: Deleting metric \"%s\".", name);
844 strarray_add(&to_be_deleted, &to_be_deleted_num, name);
848 /* Names have a prefix, e.g. "c:", which determines the (statsd) type.
849 * Remove this here. */
850 statsd_metric_submit_unsafe(name + 2, metric);
852 /* Reset the metric. */
853 metric->updates_num = 0;
854 if (metric->type == STATSD_SET)
855 statsd_metric_clear_set_unsafe(metric);
857 c_avl_iterator_destroy(iter);
859 for (size_t i = 0; i < to_be_deleted_num; i++) {
862 status = c_avl_remove(metrics_tree, to_be_deleted[i], (void *)&name,
865 ERROR("stats plugin: c_avl_remove (\"%s\") failed with status %i.",
866 to_be_deleted[i], status);
871 statsd_metric_free(metric);
874 pthread_mutex_unlock(&metrics_lock);
876 strarray_free(to_be_deleted, to_be_deleted_num);
879 } /* }}} int statsd_read */
881 static int statsd_shutdown(void) /* {{{ */
886 if (network_thread_running) {
887 network_thread_shutdown = true;
888 pthread_kill(network_thread, SIGTERM);
889 pthread_join(network_thread, /* retval = */ NULL);
891 network_thread_running = false;
893 pthread_mutex_lock(&metrics_lock);
895 while (c_avl_pick(metrics_tree, &key, &value) == 0) {
897 statsd_metric_free(value);
899 c_avl_destroy(metrics_tree);
905 pthread_mutex_unlock(&metrics_lock);
908 } /* }}} int statsd_shutdown */
910 void module_register(void) {
911 plugin_register_complex_config("statsd", statsd_config);
912 plugin_register_init("statsd", statsd_init);
913 plugin_register_read("statsd", statsd_read);
914 plugin_register_shutdown("statsd", statsd_shutdown);