From: Pierre-Yves Ritschard Date: Fri, 6 Mar 2015 07:30:38 +0000 (+0100) Subject: Merge pull request #838 from yogeswaran/histogram X-Git-Tag: collectd-5.5.0~73 X-Git-Url: https://git.octo.it/?p=collectd.git;a=commitdiff_plain;h=a7eecf6018a684dcf8323d4a41a7e704a5d57f02;hp=-c Merge pull request #838 from yogeswaran/histogram statsd histogram to support more than 1 second --- a7eecf6018a684dcf8323d4a41a7e704a5d57f02 diff --combined src/utils_latency.c index 91ddd5fd,cfe90e27..b0ef3595 --- a/src/utils_latency.c +++ b/src/utils_latency.c @@@ -1,6 -1,6 +1,6 @@@ /** * collectd - src/utils_latency.c - * Copyright (C) 2013 Florian Forster + * Copyright (C) 2013 Florian Forster * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@@ -25,13 -25,18 +25,18 @@@ **/ #include "collectd.h" + #include "plugin.h" #include "utils_latency.h" #include "common.h" - #ifndef LATENCY_HISTOGRAM_SIZE - # define LATENCY_HISTOGRAM_SIZE 1000 + #include + + #ifndef HISTOGRAM_NUM_BINS + # define HISTOGRAM_NUM_BINS 1000 #endif + static const int HISTOGRAM_DEFAULT_BIN_WIDTH = 1; + struct latency_counter_s { cdtime_t start_time; @@@ -42,9 -47,69 +47,69 @@@ cdtime_t min; cdtime_t max; - int histogram[LATENCY_HISTOGRAM_SIZE]; + int bin_width; + int histogram[HISTOGRAM_NUM_BINS]; }; + /* + * Histogram represents the distribution of data, it has a list of "bins". + * Each bin represents an interval and has a count (frequency) of + * number of values fall within its interval. + * + * Histogram's range is determined by the number of bins and the bin width, + * There are 1000 bins and all bins have the same width of default 1 millisecond. + * When a value above this range is added, Histogram's range is increased by + * increasing the bin width (note that number of bins remains always at 1000). + * This operation of increasing bin width is little expensive as each bin need + * to be visited to update it's count. To reduce frequent change of bin width, + * new bin width will be the next nearest power of 2. Example: 2, 4, 8, 16, 32, + * 64, 128, 256, 512, 1024, 2048, 5086, ... + * + * So, if the required bin width is 300, then new bin width will be 512 as it is + * the next nearest power of 2. + * + */ + void change_bin_width (latency_counter_t *lc, size_t val) /* {{{ */ + { + int i=0; + /* This function is called because the new value is above histogram's range. + * First find the required bin width: + * requiredBinWidth = (value + 1) / numBins + * then get the next nearest power of 2 + * newBinWidth = 2^(ceil(log2(requiredBinWidth))) + */ + double required_bin_width = (double)(val + 1) / HISTOGRAM_NUM_BINS; + double required_bin_width_logbase2 = log(required_bin_width) / log(2.0); + int new_bin_width = (int)(pow(2.0, ceil( required_bin_width_logbase2))); + int old_bin_width = lc->bin_width; + lc->bin_width = new_bin_width; + + /* + * bin width has been increased, now iterate through all bins and move the + * old bin's count to new bin. + */ + if (lc->num > 0) // if the histogram has data then iterate else skip + { + double width_change_ratio = old_bin_width / new_bin_width; + for (i=0; ihistogram[new_bin] += lc->histogram[i]; + lc->histogram[i] = 0; + } + DEBUG("utils_latency: change_bin_width: fixed all bins"); + } + + DEBUG("utils_latency: change_bin_width: val-[%ld], oldBinWidth-[%d], " + "newBinWidth-[%d], required_bin_width-[%f], " + "required_bin_width_logbase2-[%f]", + val, old_bin_width, new_bin_width, required_bin_width, + required_bin_width_logbase2); + + } /* }}} void change_bin_width */ + latency_counter_t *latency_counter_create () /* {{{ */ { latency_counter_t *lc; @@@ -54,6 -119,7 +119,7 @@@ return (NULL); latency_counter_reset (lc); + lc->bin_width = HISTOGRAM_DEFAULT_BIN_WIDTH; return (lc); } /* }}} latency_counter_t *latency_counter_create */ @@@ -83,8 -149,19 +149,19 @@@ void latency_counter_add (latency_count * subtract one from the cdtime_t value so that exactly 1.0 ms get sorted * accordingly. */ latency_ms = (size_t) CDTIME_T_TO_MS (latency - 1); - if (latency_ms < STATIC_ARRAY_SIZE (lc->histogram)) - lc->histogram[latency_ms]++; + + int bin = (int)(latency_ms / lc->bin_width); + if (bin >= HISTOGRAM_NUM_BINS) + { + change_bin_width(lc, latency_ms); + bin = (int)(latency_ms / lc->bin_width); + if (bin >= HISTOGRAM_NUM_BINS) + { + ERROR("utils_latency: latency_counter_add: Invalid bin %d", bin); + return; + } + } + lc->histogram[bin]++; } /* }}} void latency_counter_add */ void latency_counter_reset (latency_counter_t *lc) /* {{{ */ @@@ -92,7 -169,11 +169,11 @@@ if (lc == NULL) return; + int bin_width = lc->bin_width; memset (lc, 0, sizeof (*lc)); + + /* preserve bin width */ + lc->bin_width = bin_width; lc->start_time = cdtime (); } /* }}} void latency_counter_reset */ @@@ -128,7 -209,7 +209,7 @@@ cdtime_t latency_counter_get_average (l { double average; - if (lc == NULL) + if ((lc == NULL) || (lc->num == 0)) return (0); average = CDTIME_T_TO_DOUBLE (lc->sum) / ((double) lc->num); @@@ -146,14 -227,14 +227,14 @@@ cdtime_t latency_counter_get_percentil int sum; size_t i; - if ((lc == NULL) || !((percent > 0.0) && (percent < 100.0))) + if ((lc == NULL) || (lc->num == 0) || !((percent > 0.0) && (percent < 100.0))) return (0); /* Find index i so that at least "percent" events are within i+1 ms. */ percent_upper = 0.0; percent_lower = 0.0; sum = 0; - for (i = 0; i < LATENCY_HISTOGRAM_SIZE; i++) + for (i = 0; i < HISTOGRAM_NUM_BINS; i++) { percent_lower = percent_upper; sum += lc->histogram[i]; @@@ -166,14 -247,14 +247,14 @@@ break; } - if (i >= LATENCY_HISTOGRAM_SIZE) + if (i >= HISTOGRAM_NUM_BINS) return (0); assert (percent_upper >= percent); assert (percent_lower < percent); - ms_upper = (double) (i + 1); - ms_lower = (double) i; + ms_upper = (double) ( (i + 1) * lc->bin_width ); + ms_lower = (double) ( i * lc->bin_width ); if (i == 0) return (MS_TO_CDTIME_T (ms_upper));