netapp plugin: Subtract snap_norm_used from norm_used in any case.
[collectd.git] / src / netapp.c
index d8077e2..b11e9fa 100644 (file)
@@ -29,6 +29,7 @@
 #include "utils_ignorelist.h"
 
 #include <netapp_api.h>
+#include <netapp_errno.h>
 
 #define HAS_ALL_FLAGS(has,needs) (((has) & (needs)) == (needs))
 
@@ -189,12 +190,15 @@ typedef struct {
 #define HAVE_VOLUME_USAGE_SNAP_USED     0x0080
 #define HAVE_VOLUME_USAGE_SIS_SAVED     0x0100
 #define HAVE_VOLUME_USAGE_ALL           0x01f0
+#define IS_VOLUME_USAGE_OFFLINE         0x0200
 struct data_volume_usage_s;
 typedef struct data_volume_usage_s data_volume_usage_t;
 struct data_volume_usage_s {
        char *name;
        uint32_t flags;
 
+       na_elem_t *snap_query;
+
        uint64_t norm_free;
        uint64_t norm_used;
        uint64_t snap_reserved;
@@ -343,6 +347,8 @@ static void free_cfg_volume_usage (cfg_volume_usage_t *cvu) /* {{{ */
        {
                data_volume_usage_t *next = data->next;
                sfree (data->name);
+               if (data->snap_query != NULL)
+                       na_elem_free(data->snap_query);
                sfree (data);
                data = next;
        }
@@ -384,6 +390,9 @@ static void free_host_config (host_config_t *hc) /* {{{ */
        free_cfg_volume_usage (hc->cfg_volume_usage);
        free_cfg_system (hc->cfg_system);
 
+       if (hc->srv != NULL)
+               na_server_close (hc->srv);
+
        sfree (hc);
 
        free_host_config (next);
@@ -470,8 +479,14 @@ static data_volume_usage_t *get_volume_usage (cfg_volume_usage_t *cvu, /* {{{ */
 
        if (ignore_capacity == 0)
                new->flags |= CFG_VOLUME_USAGE_DF;
-       if (ignore_snapshot == 0)
+       if (ignore_snapshot == 0) {
                new->flags |= CFG_VOLUME_USAGE_SNAP;
+               new->snap_query = na_elem_new ("snapshot-list-info");
+               na_child_add_string(new->snap_query, "target-type", "volume");
+               na_child_add_string(new->snap_query, "target-name", name);
+       } else {
+               new->snap_query = NULL;
+       }
 
        /* Add to end of list. */
        if (last == NULL)
@@ -716,11 +731,19 @@ static int submit_volume_perf_data (const char *hostname, /* {{{ */
                data_volume_perf_t *old_data,
                const data_volume_perf_t *new_data)
 {
+       char plugin_instance[DATA_MAX_NAME_LEN];
+
+       if ((hostname == NULL) || (old_data == NULL) || (new_data == NULL))
+               return (-1);
+
+       ssnprintf (plugin_instance, sizeof (plugin_instance),
+                       "volume-%s", old_data->name);
+
        /* Check for and submit disk-octet values */
        if (HAS_ALL_FLAGS (old_data->flags, CFG_VOLUME_PERF_IO)
                        && HAS_ALL_FLAGS (new_data->flags, HAVE_VOLUME_PERF_BYTES_READ | HAVE_VOLUME_PERF_BYTES_WRITE))
        {
-               submit_two_counters (hostname, old_data->name, "disk_octets", /* type instance = */ NULL,
+               submit_two_counters (hostname, plugin_instance, "disk_octets", /* type instance = */ NULL,
                                (counter_t) new_data->read_bytes, (counter_t) new_data->write_bytes, new_data->timestamp);
        }
 
@@ -728,7 +751,7 @@ static int submit_volume_perf_data (const char *hostname, /* {{{ */
        if (HAS_ALL_FLAGS (old_data->flags, CFG_VOLUME_PERF_OPS)
                        && HAS_ALL_FLAGS (new_data->flags, HAVE_VOLUME_PERF_OPS_READ | HAVE_VOLUME_PERF_OPS_WRITE))
        {
-               submit_two_counters (hostname, old_data->name, "disk_ops", /* type instance = */ NULL,
+               submit_two_counters (hostname, plugin_instance, "disk_ops", /* type instance = */ NULL,
                                (counter_t) new_data->read_ops, (counter_t) new_data->write_ops, new_data->timestamp);
        }
 
@@ -772,7 +795,7 @@ static int submit_volume_perf_data (const char *hostname, /* {{{ */
                                latency_per_op_write = ((gauge_t) diff_latency_write) / ((gauge_t) diff_ops_write);
                }
 
-               submit_two_gauge (hostname, old_data->name, "disk_latency", /* type instance = */ NULL,
+               submit_two_gauge (hostname, plugin_instance, "disk_latency", /* type instance = */ NULL,
                                latency_per_op_read, latency_per_op_write, new_data->timestamp);
        }
 
@@ -1196,7 +1219,7 @@ static int cna_handle_volume_perf_data (const char *hostname, /* {{{ */
                        continue;
 
                /* get_volume_perf may return NULL if this volume is to be ignored. */
-               v = get_volume_perf (cvp, perf_data.name);
+               v = get_volume_perf (cvp, name);
                if (v == NULL)
                        continue;
 
@@ -1334,30 +1357,74 @@ static int cna_submit_volume_usage_data (const char *hostname, /* {{{ */
 
        for (v = cfg_volume->volumes; v != NULL; v = v->next)
        {
+               char plugin_instance[DATA_MAX_NAME_LEN];
+
+               uint64_t norm_used = v->norm_used;
+               uint64_t norm_free = v->norm_free;
+               uint64_t sis_saved = v->sis_saved;
+               uint64_t snap_reserve_used = 0;
+               uint64_t snap_reserve_free = v->snap_reserved;
+               uint64_t snap_norm_used = v->snap_used;
+
+               ssnprintf (plugin_instance, sizeof (plugin_instance),
+                               "volume-%s", v->name);
+
+               if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_SNAP_USED | HAVE_VOLUME_USAGE_SNAP_RSVD)) {
+                       if (v->snap_reserved > v->snap_used) {
+                               snap_reserve_free = v->snap_reserved - v->snap_used;
+                               snap_reserve_used = v->snap_used;
+                               snap_norm_used = 0;
+                       } else {
+                               snap_reserve_free = 0;
+                               snap_reserve_used = v->snap_reserved;
+                               snap_norm_used = v->snap_used - v->snap_reserved;
+                       }
+               }
+
+               /* The space used by snapshots but not reserved for them is included in
+                * both, norm_used and snap_norm_used. If possible, subtract this here. */
+               if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_NORM_USED | HAVE_VOLUME_USAGE_SNAP_USED))
+               {
+                       if (norm_used >= snap_norm_used)
+                               norm_used -= snap_norm_used;
+                       else
+                       {
+                               ERROR ("netapp plugin: (norm_used = %"PRIu64") < (snap_norm_used = "
+                                               "%"PRIu64"). Invalidating both.",
+                                               norm_used, snap_norm_used);
+                               v->flags &= ~(HAVE_VOLUME_USAGE_NORM_USED | HAVE_VOLUME_USAGE_SNAP_USED);
+                       }
+               }
+
                if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_NORM_FREE))
-                       submit_double (hostname, /* plugin instance = */ v->name,
+                       submit_double (hostname, /* plugin instance = */ plugin_instance,
                                        "df_complex", "free",
-                                       (double) v->norm_free, /* timestamp = */ 0);
+                                       (double) norm_free, /* timestamp = */ 0);
+
+               if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_SIS_SAVED))
+                       submit_double (hostname, /* plugin instance = */ plugin_instance,
+                                       "df_complex", "sis_saved",
+                                       (double) sis_saved, /* timestamp = */ 0);
 
                if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_NORM_USED))
-                       submit_double (hostname, /* plugin instance = */ v->name,
+                       submit_double (hostname, /* plugin instance = */ plugin_instance,
                                        "df_complex", "used",
-                                       (double) v->norm_used, /* timestamp = */ 0);
+                                       (double) norm_used, /* timestamp = */ 0);
 
                if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_SNAP_RSVD))
-                       submit_double (hostname, /* plugin instance = */ v->name,
+                       submit_double (hostname, /* plugin instance = */ plugin_instance,
                                        "df_complex", "snap_reserved",
-                                       (double) v->snap_reserved, /* timestamp = */ 0);
+                                       (double) snap_reserve_free, /* timestamp = */ 0);
 
-               if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_SNAP_USED))
-                       submit_double (hostname, /* plugin instance = */ v->name,
-                                       "df_complex", "snap_used",
-                                       (double) v->snap_used, /* timestamp = */ 0);
+               if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_SNAP_USED | HAVE_VOLUME_USAGE_SNAP_RSVD))
+                       submit_double (hostname, /* plugin instance = */ plugin_instance,
+                                       "df_complex", "snap_reserve_used",
+                                       (double) snap_reserve_used, /* timestamp = */ 0);
 
-               if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_SIS_SAVED))
-                       submit_double (hostname, /* plugin instance = */ v->name,
-                                       "df_complex", "sis_saved",
-                                       (double) v->sis_saved, /* timestamp = */ 0);
+               if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_SNAP_USED))
+                       submit_double (hostname, /* plugin instance = */ plugin_instance,
+                                       "df_complex", "snap_normal_used",
+                                       (double) snap_norm_used, /* timestamp = */ 0);
 
                /* Clear all the HAVE_* flags */
                v->flags &= ~HAVE_VOLUME_USAGE_ALL;
@@ -1366,7 +1433,87 @@ static int cna_submit_volume_usage_data (const char *hostname, /* {{{ */
        return (0);
 } /* }}} int cna_submit_volume_usage_data */
 
-static int cna_handle_volume_usage_data (const char *hostname, /* {{{ */
+/* Switch the state of a volume between online and offline and send out a
+ * notification. */
+static int cna_change_volume_status (const char *hostname, /* {{{ */
+               data_volume_usage_t *v)
+{
+       notification_t n;
+
+       memset (&n, 0, sizeof (&n));
+       n.time = time (NULL);
+       sstrncpy (n.host, hostname, sizeof (n.host));
+       sstrncpy (n.plugin, "netapp", sizeof (n.plugin));
+       sstrncpy (n.plugin_instance, v->name, sizeof (n.plugin_instance));
+
+       if ((v->flags & IS_VOLUME_USAGE_OFFLINE) != 0) {
+               n.severity = NOTIF_OKAY;
+               ssnprintf (n.message, sizeof (n.message),
+                               "Volume %s is now online.", v->name);
+               v->flags &= ~IS_VOLUME_USAGE_OFFLINE;
+       } else {
+               n.severity = NOTIF_WARNING;
+               ssnprintf (n.message, sizeof (n.message),
+                               "Volume %s is now offline.", v->name);
+               v->flags |= IS_VOLUME_USAGE_OFFLINE;
+       }
+
+       return (plugin_dispatch_notification (&n));
+} /* }}} int cna_change_volume_status */
+
+static void cna_handle_volume_snap_usage(const host_config_t *host, /* {{{ */
+               data_volume_usage_t *v)
+{
+       uint64_t snap_used = 0, value;
+       na_elem_t *data, *elem_snap, *elem_snapshots;
+       na_elem_iter_t iter_snap;
+
+       data = na_server_invoke_elem(host->srv, v->snap_query);
+       if (na_results_status(data) != NA_OK)
+       {
+               if (na_results_errno(data) == EVOLUMEOFFLINE) {
+                       if ((v->flags & IS_VOLUME_USAGE_OFFLINE) == 0)
+                               cna_change_volume_status (host->name, v);
+               } else {
+                       ERROR ("netapp plugin: cna_handle_volume_snap_usage: na_server_invoke_elem for "
+                                       "volume \"%s\" failed with error %d: %s", v->name,
+                                       na_results_errno(data), na_results_reason(data));
+               }
+               na_elem_free(data);
+               return;
+       }
+
+       if ((v->flags & IS_VOLUME_USAGE_OFFLINE) != 0)
+               cna_change_volume_status (host->name, v);
+
+       elem_snapshots = na_elem_child (data, "snapshots");
+       if (elem_snapshots == NULL)
+       {
+               ERROR ("netapp plugin: cna_handle_volume_snap_usage: "
+                               "na_elem_child (\"snapshots\") failed.");
+               na_elem_free(data);
+               return;
+       }
+
+       iter_snap = na_child_iterator (elem_snapshots);
+       for (elem_snap = na_iterator_next (&iter_snap);
+                       elem_snap != NULL;
+                       elem_snap = na_iterator_next (&iter_snap))
+       {
+               value = na_child_get_uint64(elem_snap, "cumulative-total", 0);
+               /* "cumulative-total" is the total size of the oldest snapshot plus all
+                * newer ones in blocks (1KB). We therefore are looking for the highest
+                * number of all snapshots - that's the size required for the snapshots. */
+               if (value > snap_used)
+                       snap_used = value;
+       }
+       na_elem_free (data);
+       /* snap_used is in 1024 byte blocks */
+       v->snap_used = snap_used * 1024;
+       v->flags |= HAVE_VOLUME_USAGE_SNAP_USED;
+} /* }}} void cna_handle_volume_snap_usage */
+
+static int cna_handle_volume_usage_data (const host_config_t *host, /* {{{ */
                cfg_volume_usage_t *cfg_volume, na_elem_t *data)
 {
        na_elem_t *elem_volume;
@@ -1386,7 +1533,7 @@ static int cna_handle_volume_usage_data (const char *hostname, /* {{{ */
                        elem_volume != NULL;
                        elem_volume = na_iterator_next (&iter_volume))
        {
-               const char *volume_name;
+               const char *volume_name, *state;
 
                data_volume_usage_t *v;
                uint64_t value;
@@ -1399,11 +1546,18 @@ static int cna_handle_volume_usage_data (const char *hostname, /* {{{ */
                if (volume_name == NULL)
                        continue;
 
+               state = na_child_get_string (elem_volume, "state");
+               if ((state == NULL) || (strcmp(state, "online") != 0))
+                       continue;
+
                /* get_volume_usage may return NULL if the volume is to be ignored. */
                v = get_volume_usage (cfg_volume, volume_name);
                if (v == NULL)
                        continue;
 
+               if ((v->flags & CFG_VOLUME_USAGE_SNAP) != 0)
+                       cna_handle_volume_snap_usage(host, v);
+               
                if ((v->flags & CFG_VOLUME_USAGE_DF) == 0)
                        continue;
 
@@ -1423,7 +1577,7 @@ static int cna_handle_volume_usage_data (const char *hostname, /* {{{ */
                value = na_child_get_uint64(elem_volume, "snapshot-blocks-reserved", UINT64_MAX);
                if (value != UINT64_MAX) {
                        /* 1 block == 1024 bytes  as per API docs */
-                       v->norm_used = 1024 * value;
+                       v->snap_reserved = 1024 * value;
                        v->flags |= HAVE_VOLUME_USAGE_SNAP_RSVD;
                }
 
@@ -1435,13 +1589,9 @@ static int cna_handle_volume_usage_data (const char *hostname, /* {{{ */
                if (sis_state == NULL)
                        continue;
 
-               /* If SIS is not enabled, set the HAVE_VOLUME_USAGE_SIS_SAVED flag and set
-                * sis_saved to UINT64_MAX to signal this condition to the submit function. */
-               if (strcmp ("enabled", sis_state) != 0) {
-                       v->sis_saved = UINT64_MAX;
-                       v->flags |= HAVE_VOLUME_USAGE_SIS_SAVED;
+               /* If SIS is not enabled, there's nothing left to do for this volume. */
+               if (strcmp ("enabled", sis_state) != 0)
                        continue;
-               }
 
                sis_saved_reported = na_child_get_uint64(sis, "size-saved", UINT64_MAX);
                if (sis_saved_reported == UINT64_MAX)
@@ -1499,7 +1649,7 @@ static int cna_handle_volume_usage_data (const char *hostname, /* {{{ */
                } /* }}} end of 32-bit workaround */
        } /* for (elem_volume) */
 
-       return (cna_submit_volume_usage_data (hostname, cfg_volume));
+       return (cna_submit_volume_usage_data (host->name, cfg_volume));
 } /* }}} int cna_handle_volume_usage_data */
 
 static int cna_setup_volume_usage (cfg_volume_usage_t *cvu) /* {{{ */
@@ -1517,8 +1667,6 @@ static int cna_setup_volume_usage (cfg_volume_usage_t *cvu) /* {{{ */
                return (-1);
        }
 
-       /* TODO: cvu->snap_query = na_elem_new("snapshot-list-info"); */
-
        return (0);
 } /* }}} int cna_setup_volume_usage */
 
@@ -1554,7 +1702,7 @@ static int cna_query_volume_usage (host_config_t *host) /* {{{ */
                return (-1);
        }
 
-       status = cna_handle_volume_usage_data (host->name, host->cfg_volume_usage, data);
+       status = cna_handle_volume_usage_data (host, host->cfg_volume_usage, data);
 
        if (status == 0)
                host->cfg_volume_usage->interval.last_read = now;
@@ -1757,14 +1905,14 @@ static int cna_config_get_interval (const oconfig_item_t *ci, /* {{{ */
 
        if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_NUMBER))
        {
-               WARNING ("netapp plugin: The `Multiplier' option needs exactly one numeric argument.");
+               WARNING ("netapp plugin: The `Interval' option needs exactly one numeric argument.");
                return (-1);
        }
 
        tmp = (time_t) (ci->values[0].value.number + .5);
        if (tmp < 1)
        {
-               WARNING ("netapp plugin: The `Multiplier' option needs a positive integer argument.");
+               WARNING ("netapp plugin: The `Interval' option needs a positive integer argument.");
                return (-1);
        }
 
@@ -1923,8 +2071,8 @@ static int cna_config_volume_performance (host_config_t *host, /* {{{ */
        return (0);
 } /* }}} int cna_config_volume_performance */
 
-/* Handling of the "Capacity" and "Snapshot" options within a <VolumeUsage />
- * block. */
+/* Handling of the "GetCapacity" and "GetSnapshot" options within a
+ * <VolumeUsage /> block. */
 static void cna_config_volume_usage_option (cfg_volume_usage_t *cvu, /* {{{ */
                const oconfig_item_t *ci)
 {
@@ -1940,9 +2088,9 @@ static void cna_config_volume_usage_option (cfg_volume_usage_t *cvu, /* {{{ */
 
        name = ci->values[0].value.string;
 
-       if (strcasecmp ("Capacity", ci->key) == 0)
+       if (strcasecmp ("GetCapacity", ci->key) == 0)
                il = cvu->il_capacity;
-       else if (strcasecmp ("Snapshot", ci->key) == 0)
+       else if (strcasecmp ("GetSnapshot", ci->key) == 0)
                il = cvu->il_snapshot;
        else
                return;
@@ -2076,17 +2224,17 @@ static int cna_config_wafl(host_config_t *host, oconfig_item_t *ci) /* {{{ */
 
 /*
  * <VolumeUsage>
- *   Capacity "vol0"
- *   Capacity "vol1"
- *   Capacity "vol2"
- *   Capacity "vol3"
- *   Capacity "vol4"
+ *   GetCapacity "vol0"
+ *   GetCapacity "vol1"
+ *   GetCapacity "vol2"
+ *   GetCapacity "vol3"
+ *   GetCapacity "vol4"
  *   IgnoreSelectedCapacity false
  *
- *   Snapshot "vol0"
- *   Snapshot "vol3"
- *   Snapshot "vol4"
- *   Snapshot "vol7"
+ *   GetSnapshot "vol0"
+ *   GetSnapshot "vol3"
+ *   GetSnapshot "vol4"
+ *   GetSnapshot "vol7"
  *   IgnoreSelectedSnapshot false
  * </VolumeUsage>
  */
@@ -2136,9 +2284,9 @@ static int cna_config_volume_usage(host_config_t *host, /* {{{ */
                /* if (!item || !item->key || !*item->key) continue; */
                if (strcasecmp(item->key, "Interval") == 0)
                        cna_config_get_interval (item, &cfg_volume_usage->interval);
-               else if (!strcasecmp(item->key, "Capacity"))
+               else if (!strcasecmp(item->key, "GetCapacity"))
                        cna_config_volume_usage_option (cfg_volume_usage, item);
-               else if (!strcasecmp(item->key, "Snapshot"))
+               else if (!strcasecmp(item->key, "GetSnapshot"))
                        cna_config_volume_usage_option (cfg_volume_usage, item);
                else if (!strcasecmp(item->key, "IgnoreSelectedCapacity"))
                        cna_config_volume_usage_default (cfg_volume_usage, item);
@@ -2399,10 +2547,19 @@ static int cna_read (void) { /* {{{ */
        return 0;
 } /* }}} int cna_read */
 
+static int cna_shutdown (void) /* {{{ */
+{
+       free_host_config (global_host_config);
+       global_host_config = NULL;
+
+       return (0);
+} /* }}} int cna_shutdown */
+
 void module_register(void) {
        plugin_register_complex_config("netapp", cna_config);
        plugin_register_init("netapp", cna_init);
        plugin_register_read("netapp", cna_read);
+       plugin_register_shutdown("netapp", cna_shutdown);
 }
 
 /* vim: set sw=2 ts=2 noet fdm=marker : */