X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=src%2Futils_threshold.c;h=b14c79b5c6005bfdb86b9a3abfebb266a592897e;hb=0afea60611f115a28b8ec331aba610e3038c1ef2;hp=4da8943cdbf3c4a403dfb2e2b7b5ffb5f5405813;hpb=a8d1499f57d3ffaff4c0ef3259a9fbf21b2953c5;p=collectd.git diff --git a/src/utils_threshold.c b/src/utils_threshold.c index 4da8943c..b14c79b5 100644 --- a/src/utils_threshold.c +++ b/src/utils_threshold.c @@ -1,6 +1,9 @@ /** * collectd - src/utils_threshold.c - * Copyright (C) 2007,2008 Florian octo Forster + * Copyright (C) 2007-2009 Florian octo Forster + * Copyright (C) 2008-2009 Sebastian Harl + * Copyright (C) 2009 Andrés J. Díaz + * * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -17,6 +20,8 @@ * * Author: * Florian octo Forster + * Sebastian Harl + * Andrés J. Díaz **/ #include "collectd.h" @@ -24,6 +29,7 @@ #include "plugin.h" #include "utils_avltree.h" #include "utils_cache.h" +#include "utils_threshold.h" #include #include @@ -33,22 +39,7 @@ * {{{ */ #define UT_FLAG_INVERT 0x01 #define UT_FLAG_PERSIST 0x02 - -typedef struct threshold_s -{ - char host[DATA_MAX_NAME_LEN]; - char plugin[DATA_MAX_NAME_LEN]; - char plugin_instance[DATA_MAX_NAME_LEN]; - char type[DATA_MAX_NAME_LEN]; - char type_instance[DATA_MAX_NAME_LEN]; - char data_source[DATA_MAX_NAME_LEN]; - gauge_t warning_min; - gauge_t warning_max; - gauge_t failure_min; - gauge_t failure_max; - int flags; - struct threshold_s *next; -} threshold_t; +#define UT_FLAG_PERCENTAGE 0x04 /* }}} */ /* @@ -184,9 +175,8 @@ static int ut_config_type_instance (threshold_t *th, oconfig_item_t *ci) return (-1); } - strncpy (th->type_instance, ci->values[0].value.string, + sstrncpy (th->type_instance, ci->values[0].value.string, sizeof (th->type_instance)); - th->type_instance[sizeof (th->type_instance) - 1] = '\0'; return (0); } /* int ut_config_type_instance */ @@ -263,6 +253,54 @@ static int ut_config_type_persist (threshold_t *th, oconfig_item_t *ci) return (0); } /* int ut_config_type_persist */ +static int ut_config_type_percentage(threshold_t *th, oconfig_item_t *ci) +{ + if ((ci->values_num != 1) + || (ci->values[0].type != OCONFIG_TYPE_BOOLEAN)) + { + WARNING ("threshold values: The `Percentage' option needs exactly one " + "boolean argument."); + return (-1); + } + + if (ci->values[0].value.boolean) + th->flags |= UT_FLAG_PERCENTAGE; + else + th->flags &= ~UT_FLAG_PERCENTAGE; + + return (0); +} /* int ut_config_type_percentage */ + +static int ut_config_type_hits (threshold_t *th, oconfig_item_t *ci) +{ + if ((ci->values_num != 1) + || (ci->values[0].type != OCONFIG_TYPE_NUMBER)) + { + WARNING ("threshold values: The `%s' option needs exactly one " + "number argument.", ci->key); + return (-1); + } + + th->hits = ci->values[0].value.number; + + return (0); +} /* int ut_config_type_hits */ + +static int ut_config_type_hysteresis (threshold_t *th, oconfig_item_t *ci) +{ + if ((ci->values_num != 1) + || (ci->values[0].type != OCONFIG_TYPE_NUMBER)) + { + WARNING ("threshold values: The `%s' option needs exactly one " + "number argument.", ci->key); + return (-1); + } + + th->hysteresis = ci->values[0].value.number; + + return (0); +} /* int ut_config_type_hysteresis */ + static int ut_config_type (const threshold_t *th_orig, oconfig_item_t *ci) { int i; @@ -284,13 +322,14 @@ static int ut_config_type (const threshold_t *th_orig, oconfig_item_t *ci) } memcpy (&th, th_orig, sizeof (th)); - strncpy (th.type, ci->values[0].value.string, sizeof (th.type)); - th.type[sizeof (th.type) - 1] = '\0'; + sstrncpy (th.type, ci->values[0].value.string, sizeof (th.type)); th.warning_min = NAN; th.warning_max = NAN; th.failure_min = NAN; th.failure_max = NAN; + th.hits = 0; + th.hysteresis = 0; for (i = 0; i < ci->children_num; i++) { @@ -311,6 +350,12 @@ static int ut_config_type (const threshold_t *th_orig, oconfig_item_t *ci) status = ut_config_type_invert (&th, option); else if (strcasecmp ("Persist", option->key) == 0) status = ut_config_type_persist (&th, option); + else if (strcasecmp ("Percentage", option->key) == 0) + status = ut_config_type_percentage (&th, option); + else if (strcasecmp ("Hits", option->key) == 0) + status = ut_config_type_hits (&th, option); + else if (strcasecmp ("Hysteresis", option->key) == 0) + status = ut_config_type_hysteresis (&th, option); else { WARNING ("threshold values: Option `%s' not allowed inside a `Type' " @@ -340,9 +385,8 @@ static int ut_config_plugin_instance (threshold_t *th, oconfig_item_t *ci) return (-1); } - strncpy (th->plugin_instance, ci->values[0].value.string, + sstrncpy (th->plugin_instance, ci->values[0].value.string, sizeof (th->plugin_instance)); - th->plugin_instance[sizeof (th->plugin_instance) - 1] = '\0'; return (0); } /* int ut_config_plugin_instance */ @@ -369,8 +413,7 @@ static int ut_config_plugin (const threshold_t *th_orig, oconfig_item_t *ci) } memcpy (&th, th_orig, sizeof (th)); - strncpy (th.plugin, ci->values[0].value.string, sizeof (th.plugin)); - th.plugin[sizeof (th.plugin) - 1] = '\0'; + sstrncpy (th.plugin, ci->values[0].value.string, sizeof (th.plugin)); for (i = 0; i < ci->children_num; i++) { @@ -417,8 +460,7 @@ static int ut_config_host (const threshold_t *th_orig, oconfig_item_t *ci) } memcpy (&th, th_orig, sizeof (th)); - strncpy (th.host, ci->values[0].value.string, sizeof (th.host)); - th.host[sizeof (th.host) - 1] = '\0'; + sstrncpy (th.host, ci->values[0].value.string, sizeof (th.host)); for (i = 0; i < ci->children_num; i++) { @@ -472,6 +514,9 @@ int ut_config (const oconfig_item_t *ci) th.warning_max = NAN; th.failure_min = NAN; th.failure_max = NAN; + + th.hits = 0; + th.hysteresis = 0; for (i = 0; i < ci->children_num; i++) { @@ -501,46 +546,45 @@ int ut_config (const oconfig_item_t *ci) */ /* }}} */ -static threshold_t *threshold_search (const data_set_t *ds, - const value_list_t *vl) +static threshold_t *threshold_search (const value_list_t *vl) { threshold_t *th; if ((th = threshold_get (vl->host, vl->plugin, vl->plugin_instance, - ds->type, vl->type_instance)) != NULL) + vl->type, vl->type_instance)) != NULL) return (th); else if ((th = threshold_get (vl->host, vl->plugin, vl->plugin_instance, - ds->type, NULL)) != NULL) + vl->type, NULL)) != NULL) return (th); else if ((th = threshold_get (vl->host, vl->plugin, NULL, - ds->type, vl->type_instance)) != NULL) + vl->type, vl->type_instance)) != NULL) return (th); else if ((th = threshold_get (vl->host, vl->plugin, NULL, - ds->type, NULL)) != NULL) + vl->type, NULL)) != NULL) return (th); else if ((th = threshold_get (vl->host, "", NULL, - ds->type, vl->type_instance)) != NULL) + vl->type, vl->type_instance)) != NULL) return (th); else if ((th = threshold_get (vl->host, "", NULL, - ds->type, NULL)) != NULL) + vl->type, NULL)) != NULL) return (th); else if ((th = threshold_get ("", vl->plugin, vl->plugin_instance, - ds->type, vl->type_instance)) != NULL) + vl->type, vl->type_instance)) != NULL) return (th); else if ((th = threshold_get ("", vl->plugin, vl->plugin_instance, - ds->type, NULL)) != NULL) + vl->type, NULL)) != NULL) return (th); else if ((th = threshold_get ("", vl->plugin, NULL, - ds->type, vl->type_instance)) != NULL) + vl->type, vl->type_instance)) != NULL) return (th); else if ((th = threshold_get ("", vl->plugin, NULL, - ds->type, NULL)) != NULL) + vl->type, NULL)) != NULL) return (th); else if ((th = threshold_get ("", "", NULL, - ds->type, vl->type_instance)) != NULL) + vl->type, vl->type_instance)) != NULL) return (th); else if ((th = threshold_get ("", "", NULL, - ds->type, NULL)) != NULL) + vl->type, NULL)) != NULL) return (th); return (NULL); @@ -568,6 +612,22 @@ static int ut_report_state (const data_set_t *ds, int status; + /* Check if hits matched */ + if ( (th->hits != 0) ) + { + int hits = uc_get_hits(ds,vl); + /* The STATE_OKAY always reset hits, or if hits reaise the limit */ + if ( (state == STATE_OKAY) || (hits > th->hits) ) + { + DEBUG("ut_report_state: reset uc_get_hits = 0"); + uc_set_hits(ds,vl,0); /* reset hit counter and notify */ + } else { + DEBUG("ut_report_state: th->hits = %d, uc_get_hits = %d",th->hits,uc_get_hits(ds,vl)); + (void) uc_inc_hits(ds,vl,1); /* increase hit counter */ + return (0); + } + } /* end check hits */ + state_old = uc_get_state (ds, vl); /* If the state didn't change, only report if `persistent' is specified and @@ -597,35 +657,43 @@ static int ut_report_state (const data_set_t *ds, n.time = vl->time; - status = snprintf (buf, bufsize, "Host %s, plugin %s", + status = ssnprintf (buf, bufsize, "Host %s, plugin %s", vl->host, vl->plugin); buf += status; bufsize -= status; if (vl->plugin_instance[0] != '\0') { - status = snprintf (buf, bufsize, " (instance %s)", + status = ssnprintf (buf, bufsize, " (instance %s)", vl->plugin_instance); buf += status; bufsize -= status; } - status = snprintf (buf, bufsize, " type %s", ds->type); + status = ssnprintf (buf, bufsize, " type %s", vl->type); buf += status; bufsize -= status; if (vl->type_instance[0] != '\0') { - status = snprintf (buf, bufsize, " (instance %s)", + status = ssnprintf (buf, bufsize, " (instance %s)", vl->type_instance); buf += status; bufsize -= status; } + plugin_notification_meta_add_string (&n, "DataSource", + ds->ds[ds_index].name); + plugin_notification_meta_add_double (&n, "CurrentValue", values[ds_index]); + plugin_notification_meta_add_double (&n, "WarningMin", th->warning_min); + plugin_notification_meta_add_double (&n, "WarningMax", th->warning_max); + plugin_notification_meta_add_double (&n, "FailureMin", th->failure_min); + plugin_notification_meta_add_double (&n, "FailureMax", th->failure_max); + /* Send an okay notification */ if (state == STATE_OKAY) { - status = snprintf (buf, bufsize, ": All data sources are within range again."); + status = ssnprintf (buf, bufsize, ": All data sources are within range again."); buf += status; bufsize -= status; } @@ -641,25 +709,54 @@ static int ut_report_state (const data_set_t *ds, { if (!isnan (min) && !isnan (max)) { - status = snprintf (buf, bufsize, ": Data source \"%s\" is currently " - "%f. That is within the %s region of %f and %f.", - ds->ds[ds_index].name, values[ds_index], - (state == STATE_ERROR) ? "failure" : "warning", - min, max); + status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently " + "%f. That is within the %s region of %f%s and %f%s.", + ds->ds[ds_index].name, values[ds_index], + (state == STATE_ERROR) ? "failure" : "warning", + min, ((th->flags & UT_FLAG_PERCENTAGE) != 0) ? "%" : "", + max, ((th->flags & UT_FLAG_PERCENTAGE) != 0) ? "%" : ""); } else { - status = snprintf (buf, bufsize, ": Data source \"%s\" is currently " - "%f. That is %s the %s threshold of %f.", + status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently " + "%f. That is %s the %s threshold of %f%s.", ds->ds[ds_index].name, values[ds_index], isnan (min) ? "below" : "above", (state == STATE_ERROR) ? "failure" : "warning", - isnan (min) ? max : min); + isnan (min) ? max : min, + ((th->flags & UT_FLAG_PERCENTAGE) != 0) ? "%" : ""); } } + else if (th->flags & UT_FLAG_PERCENTAGE) + { + gauge_t value; + gauge_t sum; + int i; + + sum = 0.0; + for (i = 0; i < vl->values_len; i++) + { + if (isnan (values[i])) + continue; + + sum += values[i]; + } + + if (sum == 0.0) + value = NAN; + else + value = 100.0 * values[ds_index] / sum; + + status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently " + "%g (%.2f%%). That is %s the %s threshold of %.2f%%.", + ds->ds[ds_index].name, values[ds_index], value, + (value < min) ? "below" : "above", + (state == STATE_ERROR) ? "failure" : "warning", + (value < min) ? min : max); + } else /* is not inverted */ { - status = snprintf (buf, bufsize, ": Data source \"%s\" is currently " + status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently " "%f. That is %s the %s threshold of %f.", ds->ds[ds_index].name, values[ds_index], (values[ds_index] < min) ? "below" : "above", @@ -672,6 +769,7 @@ static int ut_report_state (const data_set_t *ds, plugin_dispatch_notification (&n); + plugin_notification_meta_free (n.meta); return (0); } /* }}} int ut_report_state */ @@ -686,7 +784,7 @@ static int ut_report_state (const data_set_t *ds, * Does not fail. */ static int ut_check_one_data_source (const data_set_t *ds, - const value_list_t *vl, + const value_list_t __attribute__((unused)) *vl, const threshold_t *th, const gauge_t *values, int ds_index) @@ -694,12 +792,16 @@ static int ut_check_one_data_source (const data_set_t *ds, const char *ds_name; int is_warning = 0; int is_failure = 0; + int prev_state = STATE_OKAY; /* check if this threshold applies to this data source */ - ds_name = ds->ds[ds_index].name; - if ((th->data_source[0] != 0) - && (strcmp (ds_name, th->data_source) != 0)) - return (STATE_OKAY); + if (ds != NULL) + { + ds_name = ds->ds[ds_index].name; + if ((th->data_source[0] != 0) + && (strcmp (ds_name, th->data_source) != 0)) + return (STATE_OKAY); + } if ((th->flags & UT_FLAG_INVERT) != 0) { @@ -707,15 +809,39 @@ static int ut_check_one_data_source (const data_set_t *ds, is_failure--; } - if ((!isnan (th->failure_min) && (th->failure_min > values[ds_index])) - || (!isnan (th->failure_max) && (th->failure_max < values[ds_index]))) - is_failure++; + /* XXX: This is an experimental code, not optimized, not fast, not reliable, + * and probably, do not work as you expect. Enjoy! :D */ + if ( (th->hysteresis > 0) && ((prev_state = uc_get_state(ds,vl)) != STATE_OKAY) ) + { + switch(prev_state) + { + case STATE_ERROR: + if ( (!isnan (th->failure_min) && ((th->failure_min + th->hysteresis) < values[ds_index])) || + (!isnan (th->failure_max) && ((th->failure_max - th->hysteresis) > values[ds_index])) ) + return (STATE_OKAY); + else + is_failure++; + case STATE_WARNING: + if ( (!isnan (th->warning_min) && ((th->warning_min + th->hysteresis) < values[ds_index])) || + (!isnan (th->warning_max) && ((th->warning_max - th->hysteresis) > values[ds_index])) ) + return (STATE_OKAY); + else + is_warning++; + } + } + else { /* no hysteresis */ + if ((!isnan (th->failure_min) && (th->failure_min > values[ds_index])) + || (!isnan (th->failure_max) && (th->failure_max < values[ds_index]))) + is_failure++; + + if ((!isnan (th->warning_min) && (th->warning_min > values[ds_index])) + || (!isnan (th->warning_max) && (th->warning_max < values[ds_index]))) + is_warning++; + } + if (is_failure != 0) return (STATE_ERROR); - if ((!isnan (th->warning_min) && (th->warning_min > values[ds_index])) - || (!isnan (th->warning_max) && (th->warning_max < values[ds_index]))) - is_warning++; if (is_warning != 0) return (STATE_WARNING); @@ -739,12 +865,49 @@ static int ut_check_one_threshold (const data_set_t *ds, int ret = -1; int ds_index = -1; int i; + gauge_t values_copy[ds->ds_num]; + + memcpy (values_copy, values, sizeof (values_copy)); + + if ((th->flags & UT_FLAG_PERCENTAGE) != 0) + { + int num = 0; + gauge_t sum=0.0; + + if (ds->ds_num == 1) + { + WARNING ("ut_check_one_threshold: The %s type has only one data " + "source, but you have configured to check this as a percentage. " + "That doesn't make much sense, because the percentage will always " + "be 100%%!", ds->type); + } + + /* Prepare `sum' and `num'. */ + for (i = 0; i < ds->ds_num; i++) + if (!isnan (values[i])) + { + num++; + sum += values[i]; + } + + if ((num == 0) /* All data sources are undefined. */ + || (sum == 0.0)) /* Sum is zero, cannot calculate percentage. */ + { + for (i = 0; i < ds->ds_num; i++) + values_copy[i] = NAN; + } + else /* We can actually calculate the percentage. */ + { + for (i = 0; i < ds->ds_num; i++) + values_copy[i] = 100.0 * values[i] / sum; + } + } /* if (UT_FLAG_PERCENTAGE) */ for (i = 0; i < ds->ds_num; i++) { int status; - status = ut_check_one_data_source (ds, vl, th, values, i); + status = ut_check_one_data_source (ds, vl, th, values_copy, i); if (ret < status) { ret = status; @@ -783,7 +946,7 @@ int ut_check_threshold (const data_set_t *ds, const value_list_t *vl) /* Is this lock really necessary? So far, thresholds are only inserted at * startup. -octo */ pthread_mutex_lock (&threshold_lock); - th = threshold_search (ds, vl); + th = threshold_search (vl); pthread_mutex_unlock (&threshold_lock); if (th == NULL) return (0); @@ -830,8 +993,18 @@ int ut_check_threshold (const data_set_t *ds, const value_list_t *vl) return (0); } /* }}} int ut_check_threshold */ +/* + * int ut_check_interesting (PUBLIC) + * + * Given an identification returns + * 0: No threshold is defined. + * 1: A threshold has been found. The flag `persist' is off. + * 2: A threshold has been found. The flag `persist' is on. + * (That is, it is expected that many notifications are sent until the + * problem disappears.) + */ int ut_check_interesting (const char *name) -{ +{ /* {{{ */ char *name_copy = NULL; char *host = NULL; char *plugin = NULL; @@ -859,38 +1032,49 @@ int ut_check_interesting (const char *name) if (status != 0) { ERROR ("ut_check_interesting: parse_identifier failed."); + sfree (name_copy); return (-1); } memset (&ds, '\0', sizeof (ds)); memset (&vl, '\0', sizeof (vl)); - strncpy (vl.host, host, sizeof (vl.host)); - vl.host[sizeof (vl.host) - 1] = '\0'; - strncpy (vl.plugin, plugin, sizeof (vl.plugin)); - vl.plugin[sizeof (vl.plugin) - 1] = '\0'; + sstrncpy (vl.host, host, sizeof (vl.host)); + sstrncpy (vl.plugin, plugin, sizeof (vl.plugin)); if (plugin_instance != NULL) - { - strncpy (vl.plugin_instance, plugin_instance, sizeof (vl.plugin_instance)); - vl.plugin_instance[sizeof (vl.plugin_instance) - 1] = '\0'; - } - strncpy (ds.type, type, sizeof (ds.type)); - ds.type[sizeof (ds.type) - 1] = '\0'; + sstrncpy (vl.plugin_instance, plugin_instance, sizeof (vl.plugin_instance)); + sstrncpy (ds.type, type, sizeof (ds.type)); + sstrncpy (vl.type, type, sizeof (vl.type)); if (type_instance != NULL) - { - strncpy (vl.type_instance, type_instance, sizeof (vl.type_instance)); - vl.type_instance[sizeof (vl.type_instance) - 1] = '\0'; - } + sstrncpy (vl.type_instance, type_instance, sizeof (vl.type_instance)); sfree (name_copy); host = plugin = plugin_instance = type = type_instance = NULL; - th = threshold_search (&ds, &vl); + th = threshold_search (&vl); if (th == NULL) return (0); if ((th->flags & UT_FLAG_PERSIST) == 0) return (1); return (2); -} /* int ut_check_interesting */ +} /* }}} int ut_check_interesting */ + +int ut_search_threshold (const value_list_t *vl, /* {{{ */ + threshold_t *ret_threshold) +{ + threshold_t *t; + + if (vl == NULL) + return (EINVAL); + + t = threshold_search (vl); + if (t == NULL) + return (ENOENT); + + memcpy (ret_threshold, t, sizeof (*ret_threshold)); + ret_threshold->next = NULL; + + return (0); +} /* }}} int ut_search_threshold */ -/* vim: set sw=2 ts=8 sts=2 tw=78 fdm=marker : */ +/* vim: set sw=2 ts=8 sts=2 tw=78 et fdm=marker : */