2 * collectd - src/utils_latency.c
3 * Copyright (C) 2013 Florian Forster
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Florian Forster <ff at octo.it>
31 #include "utils_latency.h"
37 #define LLONG_MAX 9223372036854775807LL
40 #ifndef HISTOGRAM_DEFAULT_BIN_WIDTH
41 /* 1048576 = 2^20 ^= 1/1024 s */
42 #define HISTOGRAM_DEFAULT_BIN_WIDTH 1048576
45 struct latency_counter_s {
55 int histogram[HISTOGRAM_NUM_BINS];
59 * Histogram represents the distribution of data, it has a list of "bins".
60 * Each bin represents an interval and has a count (frequency) of
61 * number of values fall within its interval.
63 * Histogram's range is determined by the number of bins and the bin width,
64 * There are 1000 bins and all bins have the same width of default 1 millisecond.
65 * When a value above this range is added, Histogram's range is increased by
66 * increasing the bin width (note that number of bins remains always at 1000).
67 * This operation of increasing bin width is little expensive as each bin need
68 * to be visited to update its count. To reduce frequent change of bin width,
69 * new bin width will be the next nearest power of 2. Example: 2, 4, 8, 16, 32,
70 * 64, 128, 256, 512, 1024, 2048, 5086, ...
72 * So, if the required bin width is 300, then new bin width will be 512 as it is
73 * the next nearest power of 2.
75 static void change_bin_width(latency_counter_t *lc, cdtime_t latency) /* {{{ */
77 /* This function is called because the new value is above histogram's range.
78 * First find the required bin width:
79 * requiredBinWidth = (value + 1) / numBins
80 * then get the next nearest power of 2
81 * newBinWidth = 2^(ceil(log2(requiredBinWidth)))
83 double required_bin_width =
84 ((double)(latency + 1)) / ((double)HISTOGRAM_NUM_BINS);
85 double required_bin_width_logbase2 = log(required_bin_width) / log(2.0);
86 cdtime_t new_bin_width =
87 (cdtime_t)(pow(2.0, ceil(required_bin_width_logbase2)) + .5);
88 cdtime_t old_bin_width = lc->bin_width;
90 lc->bin_width = new_bin_width;
92 /* bin_width has been increased, now iterate through all bins and move the
93 * old bin's count to new bin. */
94 if (lc->num > 0) // if the histogram has data then iterate else skip
96 double width_change_ratio =
97 ((double)old_bin_width) / ((double)new_bin_width);
99 for (size_t i = 0; i < HISTOGRAM_NUM_BINS; i++) {
100 size_t new_bin = (size_t)(((double)i) * width_change_ratio);
105 lc->histogram[new_bin] += lc->histogram[i];
106 lc->histogram[i] = 0;
110 DEBUG("utils_latency: change_bin_width: latency = %.3f; "
111 "old_bin_width = %.3f; new_bin_width = %.3f;",
112 CDTIME_T_TO_DOUBLE(latency), CDTIME_T_TO_DOUBLE(old_bin_width),
113 CDTIME_T_TO_DOUBLE(new_bin_width));
114 } /* }}} void change_bin_width */
116 latency_counter_t *latency_counter_create(void) /* {{{ */
118 latency_counter_t *lc;
120 lc = calloc(1, sizeof(*lc));
124 lc->bin_width = HISTOGRAM_DEFAULT_BIN_WIDTH;
125 latency_counter_reset(lc);
127 } /* }}} latency_counter_t *latency_counter_create */
129 void latency_counter_destroy(latency_counter_t *lc) /* {{{ */
132 } /* }}} void latency_counter_destroy */
134 void latency_counter_add(latency_counter_t *lc, cdtime_t latency) /* {{{ */
138 if ((lc == NULL) || (latency == 0) || (latency > ((cdtime_t)LLONG_MAX)))
144 if ((lc->min == 0) && (lc->max == 0))
145 lc->min = lc->max = latency;
146 if (lc->min > latency)
148 if (lc->max < latency)
151 /* A latency of _exactly_ 1.0 ms is stored in the buffer 0, so
152 * subtract one from the cdtime_t value so that exactly 1.0 ms get sorted
154 bin = (latency - 1) / lc->bin_width;
155 if (bin >= HISTOGRAM_NUM_BINS) {
156 change_bin_width(lc, latency);
157 bin = (latency - 1) / lc->bin_width;
158 if (bin >= HISTOGRAM_NUM_BINS) {
159 ERROR("utils_latency: latency_counter_add: Invalid bin: %" PRIu64, bin);
163 lc->histogram[bin]++;
164 } /* }}} void latency_counter_add */
166 void latency_counter_reset(latency_counter_t *lc) /* {{{ */
171 cdtime_t bin_width = lc->bin_width;
172 cdtime_t max_bin = (lc->max - 1) / lc->bin_width;
175 If max latency is REDUCE_THRESHOLD times less than histogram's range,
176 then cut it in half. REDUCE_THRESHOLD must be >= 2.
177 Value of 4 is selected to reduce frequent changes of bin width.
179 #define REDUCE_THRESHOLD 4
180 if ((lc->num > 0) && (lc->bin_width >= HISTOGRAM_DEFAULT_BIN_WIDTH * 2) &&
181 (max_bin < HISTOGRAM_NUM_BINS / REDUCE_THRESHOLD)) {
182 /* new bin width will be the previous power of 2 */
183 bin_width = bin_width / 2;
185 DEBUG("utils_latency: latency_counter_reset: max_latency = %.3f; "
186 "max_bin = %" PRIu64 "; old_bin_width = %.3f; new_bin_width = %.3f;",
187 CDTIME_T_TO_DOUBLE(lc->max), max_bin,
188 CDTIME_T_TO_DOUBLE(lc->bin_width), CDTIME_T_TO_DOUBLE(bin_width));
191 memset(lc, 0, sizeof(*lc));
193 /* preserve bin width */
194 lc->bin_width = bin_width;
195 lc->start_time = cdtime();
196 } /* }}} void latency_counter_reset */
198 cdtime_t latency_counter_get_min(latency_counter_t *lc) /* {{{ */
203 } /* }}} cdtime_t latency_counter_get_min */
205 cdtime_t latency_counter_get_max(latency_counter_t *lc) /* {{{ */
210 } /* }}} cdtime_t latency_counter_get_max */
212 cdtime_t latency_counter_get_sum(latency_counter_t *lc) /* {{{ */
217 } /* }}} cdtime_t latency_counter_get_sum */
219 size_t latency_counter_get_num(latency_counter_t *lc) /* {{{ */
224 } /* }}} size_t latency_counter_get_num */
226 cdtime_t latency_counter_get_average(latency_counter_t *lc) /* {{{ */
230 if ((lc == NULL) || (lc->num == 0))
233 average = CDTIME_T_TO_DOUBLE(lc->sum) / ((double)lc->num);
234 return DOUBLE_TO_CDTIME_T(average);
235 } /* }}} cdtime_t latency_counter_get_average */
237 cdtime_t latency_counter_get_percentile(latency_counter_t *lc, /* {{{ */
239 double percent_upper;
240 double percent_lower;
242 cdtime_t latency_lower;
243 cdtime_t latency_interpolated;
247 if ((lc == NULL) || (lc->num == 0) || !((percent > 0.0) && (percent < 100.0)))
250 /* Find index i so that at least "percent" events are within i+1 ms. */
254 for (i = 0; i < HISTOGRAM_NUM_BINS; i++) {
255 percent_lower = percent_upper;
256 sum += lc->histogram[i];
260 percent_upper = 100.0 * ((double)sum) / ((double)lc->num);
262 if (percent_upper >= percent)
266 if (i >= HISTOGRAM_NUM_BINS)
269 assert(percent_upper >= percent);
270 assert(percent_lower < percent);
273 return lc->bin_width;
275 latency_lower = ((cdtime_t)i) * lc->bin_width;
276 p = (percent - percent_lower) / (percent_upper - percent_lower);
278 latency_interpolated =
279 latency_lower + DOUBLE_TO_CDTIME_T(p * CDTIME_T_TO_DOUBLE(lc->bin_width));
281 DEBUG("latency_counter_get_percentile: latency_interpolated = %.3f",
282 CDTIME_T_TO_DOUBLE(latency_interpolated));
283 return latency_interpolated;
284 } /* }}} cdtime_t latency_counter_get_percentile */
286 double latency_counter_get_rate(const latency_counter_t *lc, /* {{{ */
287 cdtime_t lower, cdtime_t upper,
288 const cdtime_t now) {
289 if ((lc == NULL) || (lc->num == 0))
292 if (upper && (upper < lower))
297 /* Buckets have an exclusive lower bound and an inclusive upper bound. That
298 * means that the first bucket, index 0, represents (0-bin_width]. That means
299 * that latency==bin_width needs to result in bin=0, that's why we need to
300 * subtract one before dividing by bin_width. */
301 cdtime_t lower_bin = 0;
303 /* lower is *exclusive* => determine bucket for lower+1 */
304 lower_bin = ((lower + 1) - 1) / lc->bin_width;
306 /* lower is greater than the longest latency observed => rate is zero. */
307 if (lower_bin >= HISTOGRAM_NUM_BINS)
310 cdtime_t upper_bin = HISTOGRAM_NUM_BINS - 1;
312 upper_bin = (upper - 1) / lc->bin_width;
314 if (upper_bin >= HISTOGRAM_NUM_BINS) {
315 upper_bin = HISTOGRAM_NUM_BINS - 1;
320 for (size_t i = lower_bin; i <= upper_bin; i++)
321 sum += lc->histogram[i];
324 /* Approximate ratio of requests in lower_bin, that fall between
325 * lower_bin_boundary and lower. This ratio is then subtracted from sum to
326 * increase accuracy. */
327 cdtime_t lower_bin_boundary = lower_bin * lc->bin_width;
328 assert(lower >= lower_bin_boundary);
330 (double)(lower - lower_bin_boundary) / ((double)lc->bin_width);
331 sum -= lower_ratio * lc->histogram[lower_bin];
335 /* As above: approximate ratio of requests in upper_bin, that fall between
336 * upper and upper_bin_boundary. */
337 cdtime_t upper_bin_boundary = (upper_bin + 1) * lc->bin_width;
338 assert(upper <= upper_bin_boundary);
339 double ratio = (double)(upper_bin_boundary - upper) / (double)lc->bin_width;
340 sum -= ratio * lc->histogram[upper_bin];
343 return sum / (CDTIME_T_TO_DOUBLE(now - lc->start_time));
344 } /* }}} double latency_counter_get_rate */