/**
* collectd - src/utils_threshold.c
- * Copyright (C) 2007,2008 Florian octo Forster
+ * Copyright (C) 2007-2009 Florian octo Forster
+ * Copyright (C) 2008-2009 Sebastian Harl
+ * Copyright (C) 2009 Andrés J. Díaz
+ *
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
*
* Author:
* Florian octo Forster <octo at verplant.org>
+ * Sebastian Harl <sh at tokkee.org>
+ * Andrés J. Díaz <ajdiaz at connectical.com>
**/
#include "collectd.h"
#include "plugin.h"
#include "utils_avltree.h"
#include "utils_cache.h"
+#include "utils_threshold.h"
#include <assert.h>
#include <pthread.h>
* {{{ */
#define UT_FLAG_INVERT 0x01
#define UT_FLAG_PERSIST 0x02
-
-typedef struct threshold_s
-{
- char host[DATA_MAX_NAME_LEN];
- char plugin[DATA_MAX_NAME_LEN];
- char plugin_instance[DATA_MAX_NAME_LEN];
- char type[DATA_MAX_NAME_LEN];
- char type_instance[DATA_MAX_NAME_LEN];
- char data_source[DATA_MAX_NAME_LEN];
- gauge_t warning_min;
- gauge_t warning_max;
- gauge_t failure_min;
- gauge_t failure_max;
- int flags;
- struct threshold_s *next;
-} threshold_t;
+#define UT_FLAG_PERCENTAGE 0x04
/* }}} */
/*
return (-1);
}
- strncpy (th->type_instance, ci->values[0].value.string,
+ sstrncpy (th->type_instance, ci->values[0].value.string,
sizeof (th->type_instance));
- th->type_instance[sizeof (th->type_instance) - 1] = '\0';
return (0);
} /* int ut_config_type_instance */
return (0);
} /* int ut_config_type_persist */
+static int ut_config_type_percentage(threshold_t *th, oconfig_item_t *ci)
+{
+ if ((ci->values_num != 1)
+ || (ci->values[0].type != OCONFIG_TYPE_BOOLEAN))
+ {
+ WARNING ("threshold values: The `Percentage' option needs exactly one "
+ "boolean argument.");
+ return (-1);
+ }
+
+ if (ci->values[0].value.boolean)
+ th->flags |= UT_FLAG_PERCENTAGE;
+ else
+ th->flags &= ~UT_FLAG_PERCENTAGE;
+
+ return (0);
+} /* int ut_config_type_percentage */
+
+static int ut_config_type_hits (threshold_t *th, oconfig_item_t *ci)
+{
+ if ((ci->values_num != 1)
+ || (ci->values[0].type != OCONFIG_TYPE_NUMBER))
+ {
+ WARNING ("threshold values: The `%s' option needs exactly one "
+ "number argument.", ci->key);
+ return (-1);
+ }
+
+ th->hits = ci->values[0].value.number;
+
+ return (0);
+} /* int ut_config_type_hits */
+
+static int ut_config_type_hysteresis (threshold_t *th, oconfig_item_t *ci)
+{
+ if ((ci->values_num != 1)
+ || (ci->values[0].type != OCONFIG_TYPE_NUMBER))
+ {
+ WARNING ("threshold values: The `%s' option needs exactly one "
+ "number argument.", ci->key);
+ return (-1);
+ }
+
+ th->hysteresis = ci->values[0].value.number;
+
+ return (0);
+} /* int ut_config_type_hysteresis */
+
static int ut_config_type (const threshold_t *th_orig, oconfig_item_t *ci)
{
int i;
}
memcpy (&th, th_orig, sizeof (th));
- strncpy (th.type, ci->values[0].value.string, sizeof (th.type));
- th.type[sizeof (th.type) - 1] = '\0';
+ sstrncpy (th.type, ci->values[0].value.string, sizeof (th.type));
th.warning_min = NAN;
th.warning_max = NAN;
th.failure_min = NAN;
th.failure_max = NAN;
+ th.hits = 0;
+ th.hysteresis = 0;
for (i = 0; i < ci->children_num; i++)
{
status = ut_config_type_invert (&th, option);
else if (strcasecmp ("Persist", option->key) == 0)
status = ut_config_type_persist (&th, option);
+ else if (strcasecmp ("Percentage", option->key) == 0)
+ status = ut_config_type_percentage (&th, option);
+ else if (strcasecmp ("Hits", option->key) == 0)
+ status = ut_config_type_hits (&th, option);
+ else if (strcasecmp ("Hysteresis", option->key) == 0)
+ status = ut_config_type_hysteresis (&th, option);
else
{
WARNING ("threshold values: Option `%s' not allowed inside a `Type' "
return (-1);
}
- strncpy (th->plugin_instance, ci->values[0].value.string,
+ sstrncpy (th->plugin_instance, ci->values[0].value.string,
sizeof (th->plugin_instance));
- th->plugin_instance[sizeof (th->plugin_instance) - 1] = '\0';
return (0);
} /* int ut_config_plugin_instance */
}
memcpy (&th, th_orig, sizeof (th));
- strncpy (th.plugin, ci->values[0].value.string, sizeof (th.plugin));
- th.plugin[sizeof (th.plugin) - 1] = '\0';
+ sstrncpy (th.plugin, ci->values[0].value.string, sizeof (th.plugin));
for (i = 0; i < ci->children_num; i++)
{
}
memcpy (&th, th_orig, sizeof (th));
- strncpy (th.host, ci->values[0].value.string, sizeof (th.host));
- th.host[sizeof (th.host) - 1] = '\0';
+ sstrncpy (th.host, ci->values[0].value.string, sizeof (th.host));
for (i = 0; i < ci->children_num; i++)
{
th.warning_max = NAN;
th.failure_min = NAN;
th.failure_max = NAN;
+
+ th.hits = 0;
+ th.hysteresis = 0;
for (i = 0; i < ci->children_num; i++)
{
*/
/* }}} */
-static threshold_t *threshold_search (const data_set_t *ds,
- const value_list_t *vl)
+static threshold_t *threshold_search (const value_list_t *vl)
{
threshold_t *th;
if ((th = threshold_get (vl->host, vl->plugin, vl->plugin_instance,
- ds->type, vl->type_instance)) != NULL)
+ vl->type, vl->type_instance)) != NULL)
return (th);
else if ((th = threshold_get (vl->host, vl->plugin, vl->plugin_instance,
- ds->type, NULL)) != NULL)
+ vl->type, NULL)) != NULL)
return (th);
else if ((th = threshold_get (vl->host, vl->plugin, NULL,
- ds->type, vl->type_instance)) != NULL)
+ vl->type, vl->type_instance)) != NULL)
return (th);
else if ((th = threshold_get (vl->host, vl->plugin, NULL,
- ds->type, NULL)) != NULL)
+ vl->type, NULL)) != NULL)
return (th);
else if ((th = threshold_get (vl->host, "", NULL,
- ds->type, vl->type_instance)) != NULL)
+ vl->type, vl->type_instance)) != NULL)
return (th);
else if ((th = threshold_get (vl->host, "", NULL,
- ds->type, NULL)) != NULL)
+ vl->type, NULL)) != NULL)
return (th);
else if ((th = threshold_get ("", vl->plugin, vl->plugin_instance,
- ds->type, vl->type_instance)) != NULL)
+ vl->type, vl->type_instance)) != NULL)
return (th);
else if ((th = threshold_get ("", vl->plugin, vl->plugin_instance,
- ds->type, NULL)) != NULL)
+ vl->type, NULL)) != NULL)
return (th);
else if ((th = threshold_get ("", vl->plugin, NULL,
- ds->type, vl->type_instance)) != NULL)
+ vl->type, vl->type_instance)) != NULL)
return (th);
else if ((th = threshold_get ("", vl->plugin, NULL,
- ds->type, NULL)) != NULL)
+ vl->type, NULL)) != NULL)
return (th);
else if ((th = threshold_get ("", "", NULL,
- ds->type, vl->type_instance)) != NULL)
+ vl->type, vl->type_instance)) != NULL)
return (th);
else if ((th = threshold_get ("", "", NULL,
- ds->type, NULL)) != NULL)
+ vl->type, NULL)) != NULL)
return (th);
return (NULL);
int status;
+ /* Check if hits matched */
+ if ( (th->hits != 0) )
+ {
+ int hits = uc_get_hits(ds,vl);
+ /* The STATE_OKAY always reset hits, or if hits reaise the limit */
+ if ( (state == STATE_OKAY) || (hits > th->hits) )
+ {
+ DEBUG("ut_report_state: reset uc_get_hits = 0");
+ uc_set_hits(ds,vl,0); /* reset hit counter and notify */
+ } else {
+ DEBUG("ut_report_state: th->hits = %d, uc_get_hits = %d",th->hits,uc_get_hits(ds,vl));
+ (void) uc_inc_hits(ds,vl,1); /* increase hit counter */
+ return (0);
+ }
+ } /* end check hits */
+
state_old = uc_get_state (ds, vl);
/* If the state didn't change, only report if `persistent' is specified and
n.time = vl->time;
- status = snprintf (buf, bufsize, "Host %s, plugin %s",
+ status = ssnprintf (buf, bufsize, "Host %s, plugin %s",
vl->host, vl->plugin);
buf += status;
bufsize -= status;
if (vl->plugin_instance[0] != '\0')
{
- status = snprintf (buf, bufsize, " (instance %s)",
+ status = ssnprintf (buf, bufsize, " (instance %s)",
vl->plugin_instance);
buf += status;
bufsize -= status;
}
- status = snprintf (buf, bufsize, " type %s", ds->type);
+ status = ssnprintf (buf, bufsize, " type %s", vl->type);
buf += status;
bufsize -= status;
if (vl->type_instance[0] != '\0')
{
- status = snprintf (buf, bufsize, " (instance %s)",
+ status = ssnprintf (buf, bufsize, " (instance %s)",
vl->type_instance);
buf += status;
bufsize -= status;
}
+ plugin_notification_meta_add_string (&n, "DataSource",
+ ds->ds[ds_index].name);
+ plugin_notification_meta_add_double (&n, "CurrentValue", values[ds_index]);
+ plugin_notification_meta_add_double (&n, "WarningMin", th->warning_min);
+ plugin_notification_meta_add_double (&n, "WarningMax", th->warning_max);
+ plugin_notification_meta_add_double (&n, "FailureMin", th->failure_min);
+ plugin_notification_meta_add_double (&n, "FailureMax", th->failure_max);
+
/* Send an okay notification */
if (state == STATE_OKAY)
{
- status = snprintf (buf, bufsize, ": All data sources are within range again.");
+ status = ssnprintf (buf, bufsize, ": All data sources are within range again.");
buf += status;
bufsize -= status;
}
{
if (!isnan (min) && !isnan (max))
{
- status = snprintf (buf, bufsize, ": Data source \"%s\" is currently "
- "%f. That is within the %s region of %f and %f.",
- ds->ds[ds_index].name, values[ds_index],
- (state == STATE_ERROR) ? "failure" : "warning",
- min, max);
+ status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently "
+ "%f. That is within the %s region of %f%s and %f%s.",
+ ds->ds[ds_index].name, values[ds_index],
+ (state == STATE_ERROR) ? "failure" : "warning",
+ min, ((th->flags & UT_FLAG_PERCENTAGE) != 0) ? "%" : "",
+ max, ((th->flags & UT_FLAG_PERCENTAGE) != 0) ? "%" : "");
}
else
{
- status = snprintf (buf, bufsize, ": Data source \"%s\" is currently "
- "%f. That is %s the %s threshold of %f.",
+ status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently "
+ "%f. That is %s the %s threshold of %f%s.",
ds->ds[ds_index].name, values[ds_index],
isnan (min) ? "below" : "above",
(state == STATE_ERROR) ? "failure" : "warning",
- isnan (min) ? max : min);
+ isnan (min) ? max : min,
+ ((th->flags & UT_FLAG_PERCENTAGE) != 0) ? "%" : "");
}
}
+ else if (th->flags & UT_FLAG_PERCENTAGE)
+ {
+ gauge_t value;
+ gauge_t sum;
+ int i;
+
+ sum = 0.0;
+ for (i = 0; i < vl->values_len; i++)
+ {
+ if (isnan (values[i]))
+ continue;
+
+ sum += values[i];
+ }
+
+ if (sum == 0.0)
+ value = NAN;
+ else
+ value = 100.0 * values[ds_index] / sum;
+
+ status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently "
+ "%g (%.2f%%). That is %s the %s threshold of %.2f%%.",
+ ds->ds[ds_index].name, values[ds_index], value,
+ (value < min) ? "below" : "above",
+ (state == STATE_ERROR) ? "failure" : "warning",
+ (value < min) ? min : max);
+ }
else /* is not inverted */
{
- status = snprintf (buf, bufsize, ": Data source \"%s\" is currently "
+ status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently "
"%f. That is %s the %s threshold of %f.",
ds->ds[ds_index].name, values[ds_index],
(values[ds_index] < min) ? "below" : "above",
plugin_dispatch_notification (&n);
+ plugin_notification_meta_free (n.meta);
return (0);
} /* }}} int ut_report_state */
* Does not fail.
*/
static int ut_check_one_data_source (const data_set_t *ds,
- const value_list_t *vl,
+ const value_list_t __attribute__((unused)) *vl,
const threshold_t *th,
const gauge_t *values,
int ds_index)
const char *ds_name;
int is_warning = 0;
int is_failure = 0;
+ int prev_state = STATE_OKAY;
/* check if this threshold applies to this data source */
- ds_name = ds->ds[ds_index].name;
- if ((th->data_source[0] != 0)
- && (strcmp (ds_name, th->data_source) != 0))
- return (STATE_OKAY);
+ if (ds != NULL)
+ {
+ ds_name = ds->ds[ds_index].name;
+ if ((th->data_source[0] != 0)
+ && (strcmp (ds_name, th->data_source) != 0))
+ return (STATE_OKAY);
+ }
if ((th->flags & UT_FLAG_INVERT) != 0)
{
is_failure--;
}
- if ((!isnan (th->failure_min) && (th->failure_min > values[ds_index]))
- || (!isnan (th->failure_max) && (th->failure_max < values[ds_index])))
- is_failure++;
+ /* XXX: This is an experimental code, not optimized, not fast, not reliable,
+ * and probably, do not work as you expect. Enjoy! :D */
+ if ( (th->hysteresis > 0) && ((prev_state = uc_get_state(ds,vl)) != STATE_OKAY) )
+ {
+ switch(prev_state)
+ {
+ case STATE_ERROR:
+ if ( (!isnan (th->failure_min) && ((th->failure_min + th->hysteresis) < values[ds_index])) ||
+ (!isnan (th->failure_max) && ((th->failure_max - th->hysteresis) > values[ds_index])) )
+ return (STATE_OKAY);
+ else
+ is_failure++;
+ case STATE_WARNING:
+ if ( (!isnan (th->warning_min) && ((th->warning_min + th->hysteresis) < values[ds_index])) ||
+ (!isnan (th->warning_max) && ((th->warning_max - th->hysteresis) > values[ds_index])) )
+ return (STATE_OKAY);
+ else
+ is_warning++;
+ }
+ }
+ else { /* no hysteresis */
+ if ((!isnan (th->failure_min) && (th->failure_min > values[ds_index]))
+ || (!isnan (th->failure_max) && (th->failure_max < values[ds_index])))
+ is_failure++;
+
+ if ((!isnan (th->warning_min) && (th->warning_min > values[ds_index]))
+ || (!isnan (th->warning_max) && (th->warning_max < values[ds_index])))
+ is_warning++;
+ }
+
if (is_failure != 0)
return (STATE_ERROR);
- if ((!isnan (th->warning_min) && (th->warning_min > values[ds_index]))
- || (!isnan (th->warning_max) && (th->warning_max < values[ds_index])))
- is_warning++;
if (is_warning != 0)
return (STATE_WARNING);
int ret = -1;
int ds_index = -1;
int i;
+ gauge_t values_copy[ds->ds_num];
+
+ memcpy (values_copy, values, sizeof (values_copy));
+
+ if ((th->flags & UT_FLAG_PERCENTAGE) != 0)
+ {
+ int num = 0;
+ gauge_t sum=0.0;
+
+ if (ds->ds_num == 1)
+ {
+ WARNING ("ut_check_one_threshold: The %s type has only one data "
+ "source, but you have configured to check this as a percentage. "
+ "That doesn't make much sense, because the percentage will always "
+ "be 100%%!", ds->type);
+ }
+
+ /* Prepare `sum' and `num'. */
+ for (i = 0; i < ds->ds_num; i++)
+ if (!isnan (values[i]))
+ {
+ num++;
+ sum += values[i];
+ }
+
+ if ((num == 0) /* All data sources are undefined. */
+ || (sum == 0.0)) /* Sum is zero, cannot calculate percentage. */
+ {
+ for (i = 0; i < ds->ds_num; i++)
+ values_copy[i] = NAN;
+ }
+ else /* We can actually calculate the percentage. */
+ {
+ for (i = 0; i < ds->ds_num; i++)
+ values_copy[i] = 100.0 * values[i] / sum;
+ }
+ } /* if (UT_FLAG_PERCENTAGE) */
for (i = 0; i < ds->ds_num; i++)
{
int status;
- status = ut_check_one_data_source (ds, vl, th, values, i);
+ status = ut_check_one_data_source (ds, vl, th, values_copy, i);
if (ret < status)
{
ret = status;
/* Is this lock really necessary? So far, thresholds are only inserted at
* startup. -octo */
pthread_mutex_lock (&threshold_lock);
- th = threshold_search (ds, vl);
+ th = threshold_search (vl);
pthread_mutex_unlock (&threshold_lock);
if (th == NULL)
return (0);
return (0);
} /* }}} int ut_check_threshold */
+/*
+ * int ut_check_interesting (PUBLIC)
+ *
+ * Given an identification returns
+ * 0: No threshold is defined.
+ * 1: A threshold has been found. The flag `persist' is off.
+ * 2: A threshold has been found. The flag `persist' is on.
+ * (That is, it is expected that many notifications are sent until the
+ * problem disappears.)
+ */
int ut_check_interesting (const char *name)
-{
+{ /* {{{ */
char *name_copy = NULL;
char *host = NULL;
char *plugin = NULL;
memset (&ds, '\0', sizeof (ds));
memset (&vl, '\0', sizeof (vl));
- strncpy (vl.host, host, sizeof (vl.host));
- vl.host[sizeof (vl.host) - 1] = '\0';
- strncpy (vl.plugin, plugin, sizeof (vl.plugin));
- vl.plugin[sizeof (vl.plugin) - 1] = '\0';
+ sstrncpy (vl.host, host, sizeof (vl.host));
+ sstrncpy (vl.plugin, plugin, sizeof (vl.plugin));
if (plugin_instance != NULL)
- {
- strncpy (vl.plugin_instance, plugin_instance, sizeof (vl.plugin_instance));
- vl.plugin_instance[sizeof (vl.plugin_instance) - 1] = '\0';
- }
- strncpy (ds.type, type, sizeof (ds.type));
- ds.type[sizeof (ds.type) - 1] = '\0';
+ sstrncpy (vl.plugin_instance, plugin_instance, sizeof (vl.plugin_instance));
+ sstrncpy (ds.type, type, sizeof (ds.type));
+ sstrncpy (vl.type, type, sizeof (vl.type));
if (type_instance != NULL)
- {
- strncpy (vl.type_instance, type_instance, sizeof (vl.type_instance));
- vl.type_instance[sizeof (vl.type_instance) - 1] = '\0';
- }
+ sstrncpy (vl.type_instance, type_instance, sizeof (vl.type_instance));
sfree (name_copy);
host = plugin = plugin_instance = type = type_instance = NULL;
- th = threshold_search (&ds, &vl);
+ th = threshold_search (&vl);
if (th == NULL)
return (0);
if ((th->flags & UT_FLAG_PERSIST) == 0)
return (1);
return (2);
-} /* int ut_check_interesting */
+} /* }}} int ut_check_interesting */
+
+int ut_search_threshold (const value_list_t *vl, /* {{{ */
+ threshold_t *ret_threshold)
+{
+ threshold_t *t;
+
+ if (vl == NULL)
+ return (EINVAL);
+
+ t = threshold_search (vl);
+ if (t == NULL)
+ return (ENOENT);
+
+ memcpy (ret_threshold, t, sizeof (*ret_threshold));
+ ret_threshold->next = NULL;
+
+ return (0);
+} /* }}} int ut_search_threshold */
-/* vim: set sw=2 ts=8 sts=2 tw=78 fdm=marker : */
+/* vim: set sw=2 ts=8 sts=2 tw=78 et fdm=marker : */