From 423cb2489520aaa15f0d77f2699aa0da74903c2f Mon Sep 17 00:00:00 2001 From: Florian Forster Date: Sat, 22 Jun 2013 13:21:27 +0200 Subject: [PATCH] statsd plugin: Implement the "TimerPercentile" configuration option. --- src/Makefile.am | 3 +- src/collectd.conf.in | 1 + src/collectd.conf.pod | 9 ++ src/statsd.c | 288 +++++++++++++++++++++++++++++++------------------- src/utils_latency.c | 173 ++++++++++++++++++++++++++++++ src/utils_latency.h | 45 ++++++++ 6 files changed, 407 insertions(+), 112 deletions(-) create mode 100644 src/utils_latency.c create mode 100644 src/utils_latency.h diff --git a/src/Makefile.am b/src/Makefile.am index b443ae32..9f16fd7c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1071,7 +1071,8 @@ endif if BUILD_PLUGIN_STATSD pkglib_LTLIBRARIES += statsd.la -statsd_la_SOURCES = statsd.c +statsd_la_SOURCES = statsd.c \ + utils_latency.h utils_latency.c statsd_la_LDFLAGS = -module -avoid-version statsd_la_LIBADD = -lpthread collectd_LDADD += "-dlopen" statsd.la diff --git a/src/collectd.conf.in b/src/collectd.conf.in index 98a75967..c5ab3ce8 100644 --- a/src/collectd.conf.in +++ b/src/collectd.conf.in @@ -935,6 +935,7 @@ # DeleteTimers false # DeleteGauges false # DeleteSets false +# TimerPercentile 90.0 # # diff --git a/src/collectd.conf.pod b/src/collectd.conf.pod index 39b3792d..4a34fe5d 100644 --- a/src/collectd.conf.pod +++ b/src/collectd.conf.pod @@ -5141,6 +5141,15 @@ rate of counters and size of sets will be zero, timers report C and gauges are unchanged. If set to B, the such metrics are not dispatched and removed from the internal cache. +=item B I + +Calculate and dispatch the configured percentile, i.e. compute the latency, so +that I of all reported timers are smaller than or equal to the +computed latency. This is useful for cutting off the long tail latency, as it's +often done in I (SLAs). + +If not specified, no percentile is calculated / dispatched. + =back =head2 Plugin C diff --git a/src/statsd.c b/src/statsd.c index 9443fed4..2def1914 100644 --- a/src/statsd.c +++ b/src/statsd.c @@ -25,6 +25,7 @@ #include "configfile.h" #include "utils_avltree.h" #include "utils_complain.h" +#include "utils_latency.h" #include @@ -54,6 +55,7 @@ struct statsd_metric_s { metric_type_t type; int64_t value; + latency_counter_t *latency; c_avl_tree_t *set; unsigned long updates_num; }; @@ -74,85 +76,108 @@ static _Bool conf_delete_timers = 0; static _Bool conf_delete_gauges = 0; static _Bool conf_delete_sets = 0; +static double *conf_timer_percentile = NULL; +static size_t conf_timer_percentile_num = 0; + /* Must hold metrics_lock when calling this function. */ -static int statsd_metric_set_unsafe (char const *name, int64_t value, /* {{{ */ +static statsd_metric_t *statsd_metric_lookup_unsafe (char const *name, metric_type_t type) { + char const *prefix; + char key[DATA_MAX_NAME_LEN + 2]; + char *key_copy; statsd_metric_t *metric; - char *key; int status; - status = c_avl_get (metrics_tree, name, (void *) &metric); - if (status == 0) + switch (type) { - metric->value = value; - metric->updates_num++; + case STATSD_COUNTER: prefix = "c"; break; + case STATSD_TIMER: prefix = "t"; break; + case STATSD_GAUGE: prefix = "g"; break; + case STATSD_SET: prefix = "s"; break; + default: return (NULL); + } - return (0); + ssnprintf (key, sizeof (key), "%s:%s", prefix, name); + + status = c_avl_get (metrics_tree, key, (void *) &metric); + if (status == 0) + return (metric); + + key_copy = strdup (key); + if (key_copy == NULL) + { + ERROR ("statsd plugin: strdup failed."); + return (NULL); } - DEBUG ("stats plugin: Adding new metric \"%s\".", name); - key = strdup (name); - metric = calloc (1, sizeof (*metric)); - if ((key == NULL) || (metric == NULL)) + metric = malloc (sizeof (*metric)); + if (metric == NULL) { - sfree (key); - sfree (metric); - return (-1); + ERROR ("statsd plugin: malloc failed."); + sfree (key_copy); + return (NULL); } + memset (metric, 0, sizeof (*metric)); metric->type = type; - metric->value = value; - metric->updates_num = 1; + metric->latency = NULL; + metric->set = NULL; - status = c_avl_insert (metrics_tree, key, metric); + status = c_avl_insert (metrics_tree, key_copy, metric); if (status != 0) { - sfree (key); + ERROR ("statsd plugin: c_avl_insert failed."); + sfree (key_copy); sfree (metric); - - return (-1); + return (NULL); } - return (0); -} /* }}} int statsd_metric_set_unsafe */ + return (metric); +} /* }}} statsd_metric_lookup_unsafe */ static int statsd_metric_set (char const *name, int64_t value, /* {{{ */ metric_type_t type) { - int status; + statsd_metric_t *metric; pthread_mutex_lock (&metrics_lock); - status = statsd_metric_set_unsafe (name, value, type); + + metric = statsd_metric_lookup_unsafe (name, type); + if (metric == NULL) + { + pthread_mutex_unlock (&metrics_lock); + return (-1); + } + + metric->value = value; + metric->updates_num++; + pthread_mutex_unlock (&metrics_lock); - return (status); + return (0); } /* }}} int statsd_metric_set */ static int statsd_metric_add (char const *name, int64_t delta, /* {{{ */ metric_type_t type) { statsd_metric_t *metric; - int status; pthread_mutex_lock (&metrics_lock); - status = c_avl_get (metrics_tree, name, (void *) &metric); - if (status == 0) + metric = statsd_metric_lookup_unsafe (name, type); + if (metric == NULL) { - metric->value += delta; - metric->updates_num++; - pthread_mutex_unlock (&metrics_lock); - return (0); + return (-1); } - else /* no such value yet */ - { - status = statsd_metric_set_unsafe (name, delta, type); - pthread_mutex_unlock (&metrics_lock); - return (status); - } + metric->value += delta; + metric->updates_num++; + + pthread_mutex_unlock (&metrics_lock); + + return (0); } /* }}} int statsd_metric_add */ static int statsd_handle_counter (char const *name, /* {{{ */ @@ -216,69 +241,57 @@ static int statsd_handle_gauge (char const *name, /* {{{ */ static int statsd_handle_timer (char const *name, /* {{{ */ char const *value_str) { - char key[DATA_MAX_NAME_LEN + 2]; - value_t value; + statsd_metric_t *metric; + value_t value_ms; + cdtime_t value; int status; - value.derive = 0; - status = parse_value (value_str, &value, DS_TYPE_DERIVE); + value_ms.derive = 0; + status = parse_value (value_str, &value_ms, DS_TYPE_DERIVE); if (status != 0) return (status); - ssnprintf (key, sizeof (key), "t:%s", name); + value = MS_TO_CDTIME_T (value_ms.derive); + + pthread_mutex_lock (&metrics_lock); + + metric = statsd_metric_lookup_unsafe (name, STATSD_TIMER); + if (metric == NULL) + { + pthread_mutex_unlock (&metrics_lock); + return (-1); + } + + if (metric->latency == NULL) + metric->latency = latency_counter_create (); + if (metric->latency == NULL) + { + pthread_mutex_unlock (&metrics_lock); + return (-1); + } + + latency_counter_add (metric->latency, value); + metric->updates_num++; - return (statsd_metric_add (key, (int64_t) value.derive, STATSD_TIMER)); + pthread_mutex_unlock (&metrics_lock); + return (0); } /* }}} int statsd_handle_timer */ -static int statsd_handle_set (char const *key_orig, /* {{{ */ - char const *name_orig) +static int statsd_handle_set (char const *name, /* {{{ */ + char const *set_key_orig) { - char key[DATA_MAX_NAME_LEN + 2]; - char *name; statsd_metric_t *metric = NULL; + char *set_key; int status; - ssnprintf (key, sizeof (key), "s:%s", key_orig); - pthread_mutex_lock (&metrics_lock); - status = c_avl_get (metrics_tree, key, (void *) &metric); - if (status != 0) /* Create a new metric */ + metric = statsd_metric_lookup_unsafe (name, STATSD_SET); + if (metric == NULL) { - char *key_copy; - - DEBUG ("stats plugin: Adding new metric \"%s\".", key); - key_copy = strdup (key); - if (key_copy == NULL) - { - pthread_mutex_unlock (&metrics_lock); - ERROR ("statsd plugin: strdup failed."); - return (-1); - } - - metric = calloc (1, sizeof (*metric)); - if (metric == NULL) - { - pthread_mutex_unlock (&metrics_lock); - ERROR ("statsd plugin: calloc failed."); - sfree (key_copy); - return (-1); - } - metric->type = STATSD_SET; - metric->set = NULL; - - status = c_avl_insert (metrics_tree, key_copy, metric); - if (status != 0) - { - pthread_mutex_unlock (&metrics_lock); - ERROR ("statsd plugin: c_avl_insert (\"%s\") failed with status %i.", - key_copy, status); - sfree (key_copy); - sfree (metric); - return (-1); - } + pthread_mutex_unlock (&metrics_lock); + return (-1); } - assert (metric != NULL); /* Make sure metric->set exists. */ if (metric->set == NULL) @@ -291,27 +304,27 @@ static int statsd_handle_set (char const *key_orig, /* {{{ */ return (-1); } - name = strdup (name_orig); - if (name == NULL) + set_key = strdup (set_key_orig); + if (set_key == NULL) { pthread_mutex_unlock (&metrics_lock); ERROR ("statsd plugin: strdup failed."); return (-1); } - status = c_avl_insert (metric->set, name, /* value = */ NULL); + status = c_avl_insert (metric->set, set_key, /* value = */ NULL); if (status < 0) { pthread_mutex_unlock (&metrics_lock); if (status < 0) ERROR ("statsd plugin: c_avl_insert (\"%s\") failed with status %i.", - name, status); - sfree (name); + set_key, status); + sfree (set_key); return (-1); } else if (status > 0) /* key already exists */ { - sfree (name); + sfree (set_key); } metric->updates_num++; @@ -550,6 +563,37 @@ static void *statsd_network_thread (void *args) /* {{{ */ return ((void *) 0); } /* }}} void *statsd_network_thread */ +static int statsd_config_timer_percentile (oconfig_item_t *ci) /* {{{ */ +{ + double percent = NAN; + double *tmp; + int status; + + status = cf_util_get_double (ci, &percent); + if (status != 0) + return (status); + + if ((percent <= 0.0) || (percent >= 100)) + { + ERROR ("statsd plugin: The value for \"%s\" must be between 0 and 100, " + "exclusively.", ci->key); + return (ERANGE); + } + + tmp = realloc (conf_timer_percentile, + sizeof (*conf_timer_percentile) * (conf_timer_percentile_num + 1)); + if (tmp == NULL) + { + ERROR ("statsd plugin: realloc failed."); + return (ENOMEM); + } + conf_timer_percentile = tmp; + conf_timer_percentile[conf_timer_percentile_num] = percent; + conf_timer_percentile_num++; + + return (0); +} /* }}} int statsd_config_timer_percentile */ + static int statsd_config (oconfig_item_t *ci) /* {{{ */ { int i; @@ -570,6 +614,8 @@ static int statsd_config (oconfig_item_t *ci) /* {{{ */ cf_util_get_boolean (child, &conf_delete_gauges); else if (strcasecmp ("DeleteSets", child->key) == 0) cf_util_get_boolean (child, &conf_delete_sets); + else if (strcasecmp ("TimerPercentile", child->key) == 0) + statsd_config_timer_percentile (child); else ERROR ("statsd plugin: The \"%s\" config option is not valid.", child->key); @@ -636,26 +682,6 @@ static int statsd_metric_submit_unsafe (char const *name, /* {{{ */ value_t values[1]; value_list_t vl = VALUE_LIST_INIT; - if (metric->type == STATSD_GAUGE) - values[0].gauge = (gauge_t) metric->value; - else if (metric->type == STATSD_TIMER) - { - if (metric->updates_num == 0) - values[0].gauge = NAN; - else - values[0].gauge = - ((gauge_t) metric->value) / ((gauge_t) metric->updates_num); - } - else if (metric->type == STATSD_SET) - { - if (metric->set == NULL) - values[0].gauge = 0.0; - else - values[0].gauge = (gauge_t) c_avl_size (metric->set); - } - else - values[0].derive = (derive_t) metric->value; - vl.values = values; vl.values_len = 1; sstrncpy (vl.host, hostname_g, sizeof (vl.host)); @@ -672,6 +698,46 @@ static int statsd_metric_submit_unsafe (char const *name, /* {{{ */ sstrncpy (vl.type_instance, name, sizeof (vl.type_instance)); + if (metric->type == STATSD_GAUGE) + values[0].gauge = (gauge_t) metric->value; + else if (metric->type == STATSD_TIMER) + { + size_t i; + + if (metric->updates_num == 0) + return (0); + + vl.time = cdtime (); + + ssnprintf (vl.type_instance, sizeof (vl.type_instance), + "%s-average", name); + values[0].gauge = CDTIME_T_TO_DOUBLE ( + latency_counter_get_average (metric->latency)); + plugin_dispatch_values (&vl); + + for (i = 0; i < conf_timer_percentile_num; i++) + { + ssnprintf (vl.type_instance, sizeof (vl.type_instance), + "%s-percentile-%.0f", name, conf_timer_percentile[i]); + values[0].gauge = CDTIME_T_TO_DOUBLE ( + latency_counter_get_percentile ( + metric->latency, conf_timer_percentile[i])); + plugin_dispatch_values (&vl); + } + + latency_counter_reset (metrics->latency); + return (0); + } + else if (metric->type == STATSD_SET) + { + if (metric->set == NULL) + values[0].gauge = 0.0; + else + values[0].gauge = (gauge_t) c_avl_size (metric->set); + } + else + values[0].derive = (derive_t) metric->value; + return (plugin_dispatch_values (&vl)); } /* }}} int statsd_metric_submit_unsafe */ diff --git a/src/utils_latency.c b/src/utils_latency.c new file mode 100644 index 00000000..01c8b5c2 --- /dev/null +++ b/src/utils_latency.c @@ -0,0 +1,173 @@ +/** + * collectd - src/utils_latency.c + * Copyright (C) 2013 Florian Forster + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Florian Forster + **/ + +#include "collectd.h" +#include "utils_latency.h" +#include "common.h" + +#ifndef LATENCY_HISTOGRAM_SIZE +# define LATENCY_HISTOGRAM_SIZE 1000 +#endif + +struct latency_counter_s +{ + cdtime_t start_time; + + cdtime_t sum; + size_t num; + + cdtime_t min; + cdtime_t max; + + int histogram[LATENCY_HISTOGRAM_SIZE]; +}; + +latency_counter_t *latency_counter_create () /* {{{ */ +{ + latency_counter_t *lc; + + lc = malloc (sizeof (*lc)); + if (lc == NULL) + return (NULL); + + latency_counter_reset (lc); + return (lc); +} /* }}} latency_counter_t *latency_counter_create */ + +void latency_counter_destroy (latency_counter_t *lc) /* {{{ */ +{ + sfree (lc); +} /* }}} void latency_counter_destroy */ + +void latency_counter_add (latency_counter_t *lc, cdtime_t latency) /* {{{ */ +{ + size_t latency_ms; + + if ((lc == NULL) || (latency == 0)) + return; + + lc->sum += latency; + lc->num++; + + if ((lc->min == 0) && (lc->max == 0)) + lc->min = lc->max = latency; + if (lc->min > latency) + lc->min = latency; + if (lc->max < latency) + lc->max = latency; + + /* A latency of _exactly_ 1.0 ms should be stored in the buffer 0, so + * subtract one from the cdtime_t value so that exactly 1.0 ms get sorted + * accordingly. */ + latency_ms = (size_t) CDTIME_T_TO_MS (latency - 1); + if (latency_ms < STATIC_ARRAY_SIZE (lc->histogram)) + lc->histogram[latency_ms]++; +} /* }}} void latency_counter_add */ + +void latency_counter_reset (latency_counter_t *lc) /* {{{ */ +{ + if (lc == NULL) + return; + + memset (lc, 0, sizeof (*lc)); + lc->start_time = cdtime (); +} /* }}} void latency_counter_reset */ + +cdtime_t latency_counter_get_min (latency_counter_t *lc) /* {{{ */ +{ + if (lc == NULL) + return (0); + return (lc->min); +} /* }}} cdtime_t latency_counter_get_min */ + +cdtime_t latency_counter_get_max (latency_counter_t *lc) /* {{{ */ +{ + if (lc == NULL) + return (0); + return (lc->max); +} /* }}} cdtime_t latency_counter_get_max */ + +cdtime_t latency_counter_get_average (latency_counter_t *lc) /* {{{ */ +{ + double average; + + if (lc == NULL) + return (0); + + average = CDTIME_T_TO_DOUBLE (lc->sum) / ((double) lc->num); + return (DOUBLE_TO_CDTIME_T (average)); +} /* }}} cdtime_t latency_counter_get_average */ + +cdtime_t latency_counter_get_percentile (latency_counter_t *lc, + double percent) +{ + double percent_upper; + double percent_lower; + double ms_upper; + double ms_lower; + double ms_interpolated; + int sum; + size_t i; + + if ((lc == NULL) || !((percent > 0.0) && (percent < 100.0))) + return (0); + + /* Find index i so that at least "percent" events are within i+1 ms. */ + percent_upper = 0.0; + percent_lower = 0.0; + sum = 0; + for (i = 0; i < LATENCY_HISTOGRAM_SIZE; i++) + { + percent_lower = percent_upper; + sum += lc->histogram[i]; + if (sum == 0) + percent_upper = 0.0; + else + percent_upper = 100.0 * ((double) sum) / ((double) lc->num); + + if (percent_upper >= percent) + break; + } + + if (i >= LATENCY_HISTOGRAM_SIZE) + return (0); + + assert (percent_upper >= percent); + assert (percent_lower < percent); + + ms_upper = (double) (i + 1); + ms_lower = (double) i; + if (i == 0) + return (MS_TO_CDTIME_T (ms_upper)); + + ms_interpolated = (((percent_upper - percent) * ms_lower) + + ((percent - percent_lower) * ms_upper)) + / (percent_upper - percent_lower); + + return (MS_TO_CDTIME_T (ms_interpolated)); +} /* }}} cdtime_t latency_counter_get_percentile */ + +/* vim: set sw=2 sts=2 et fdm=marker : */ diff --git a/src/utils_latency.h b/src/utils_latency.h new file mode 100644 index 00000000..3da23082 --- /dev/null +++ b/src/utils_latency.h @@ -0,0 +1,45 @@ +/** + * collectd - src/utils_latency.h + * Copyright (C) 2013 Florian Forster + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Florian Forster + **/ + +#include "collectd.h" +#include "utils_time.h" + +struct latency_counter_s; +typedef struct latency_counter_s latency_counter_t; + +latency_counter_t *latency_counter_create (); +void latency_counter_destroy (latency_counter_t *lc); + +void latency_counter_add (latency_counter_t *lc, cdtime_t latency); +void latency_counter_reset (latency_counter_t *lc); + +cdtime_t latency_counter_get_min (latency_counter_t *lc); +cdtime_t latency_counter_get_max (latency_counter_t *lc); +cdtime_t latency_counter_get_average (latency_counter_t *lc); +cdtime_t latency_counter_get_percentile (latency_counter_t *lc, + double percent); + +/* vim: set sw=2 sts=2 et : */ -- 2.11.0