netapp plugin: Use a separate read-thread for each host.
[collectd.git] / src / netapp.c
index 1ef17a5..f01a7b4 100644 (file)
@@ -190,6 +190,7 @@ typedef struct {
 #define HAVE_VOLUME_USAGE_SNAP_USED     0x0080
 #define HAVE_VOLUME_USAGE_SIS_SAVED     0x0100
 #define HAVE_VOLUME_USAGE_ALL           0x01f0
+#define IS_VOLUME_USAGE_OFFLINE         0x0200
 struct data_volume_usage_s;
 typedef struct data_volume_usage_s data_volume_usage_t;
 struct data_volume_usage_s {
@@ -252,11 +253,6 @@ struct host_config_s {
 
        struct host_config_s *next;
 };
-#define HOST_INIT { NULL, NA_SERVER_TRANSPORT_HTTPS, NULL, 0, NULL, NULL, 0, \
-       NULL, NULL, NULL, NULL, NULL, NULL, \
-       NULL}
-
-static host_config_t *global_host_config;
 
 /*
  * Free functions
@@ -730,11 +726,19 @@ static int submit_volume_perf_data (const char *hostname, /* {{{ */
                data_volume_perf_t *old_data,
                const data_volume_perf_t *new_data)
 {
+       char plugin_instance[DATA_MAX_NAME_LEN];
+
+       if ((hostname == NULL) || (old_data == NULL) || (new_data == NULL))
+               return (-1);
+
+       ssnprintf (plugin_instance, sizeof (plugin_instance),
+                       "volume-%s", old_data->name);
+
        /* Check for and submit disk-octet values */
        if (HAS_ALL_FLAGS (old_data->flags, CFG_VOLUME_PERF_IO)
                        && HAS_ALL_FLAGS (new_data->flags, HAVE_VOLUME_PERF_BYTES_READ | HAVE_VOLUME_PERF_BYTES_WRITE))
        {
-               submit_two_counters (hostname, old_data->name, "disk_octets", /* type instance = */ NULL,
+               submit_two_counters (hostname, plugin_instance, "disk_octets", /* type instance = */ NULL,
                                (counter_t) new_data->read_bytes, (counter_t) new_data->write_bytes, new_data->timestamp);
        }
 
@@ -742,7 +746,7 @@ static int submit_volume_perf_data (const char *hostname, /* {{{ */
        if (HAS_ALL_FLAGS (old_data->flags, CFG_VOLUME_PERF_OPS)
                        && HAS_ALL_FLAGS (new_data->flags, HAVE_VOLUME_PERF_OPS_READ | HAVE_VOLUME_PERF_OPS_WRITE))
        {
-               submit_two_counters (hostname, old_data->name, "disk_ops", /* type instance = */ NULL,
+               submit_two_counters (hostname, plugin_instance, "disk_ops", /* type instance = */ NULL,
                                (counter_t) new_data->read_ops, (counter_t) new_data->write_ops, new_data->timestamp);
        }
 
@@ -786,7 +790,7 @@ static int submit_volume_perf_data (const char *hostname, /* {{{ */
                                latency_per_op_write = ((gauge_t) diff_latency_write) / ((gauge_t) diff_ops_write);
                }
 
-               submit_two_gauge (hostname, old_data->name, "disk_latency", /* type instance = */ NULL,
+               submit_two_gauge (hostname, plugin_instance, "disk_latency", /* type instance = */ NULL,
                                latency_per_op_read, latency_per_op_write, new_data->timestamp);
        }
 
@@ -1210,7 +1214,7 @@ static int cna_handle_volume_perf_data (const char *hostname, /* {{{ */
                        continue;
 
                /* get_volume_perf may return NULL if this volume is to be ignored. */
-               v = get_volume_perf (cvp, perf_data.name);
+               v = get_volume_perf (cvp, name);
                if (v == NULL)
                        continue;
 
@@ -1348,6 +1352,8 @@ static int cna_submit_volume_usage_data (const char *hostname, /* {{{ */
 
        for (v = cfg_volume->volumes; v != NULL; v = v->next)
        {
+               char plugin_instance[DATA_MAX_NAME_LEN];
+
                uint64_t norm_used = v->norm_used;
                uint64_t norm_free = v->norm_free;
                uint64_t sis_saved = v->sis_saved;
@@ -1355,6 +1361,9 @@ static int cna_submit_volume_usage_data (const char *hostname, /* {{{ */
                uint64_t snap_reserve_free = v->snap_reserved;
                uint64_t snap_norm_used = v->snap_used;
 
+               ssnprintf (plugin_instance, sizeof (plugin_instance),
+                               "volume-%s", v->name);
+
                if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_SNAP_USED | HAVE_VOLUME_USAGE_SNAP_RSVD)) {
                        if (v->snap_reserved > v->snap_used) {
                                snap_reserve_free = v->snap_reserved - v->snap_used;
@@ -1364,39 +1373,51 @@ static int cna_submit_volume_usage_data (const char *hostname, /* {{{ */
                                snap_reserve_free = 0;
                                snap_reserve_used = v->snap_reserved;
                                snap_norm_used = v->snap_used - v->snap_reserved;
-                               if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_NORM_USED)
-                                               && (norm_used >= snap_norm_used))
-                                       norm_used -= snap_norm_used;
+                       }
+               }
+
+               /* The space used by snapshots but not reserved for them is included in
+                * both, norm_used and snap_norm_used. If possible, subtract this here. */
+               if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_NORM_USED | HAVE_VOLUME_USAGE_SNAP_USED))
+               {
+                       if (norm_used >= snap_norm_used)
+                               norm_used -= snap_norm_used;
+                       else
+                       {
+                               ERROR ("netapp plugin: (norm_used = %"PRIu64") < (snap_norm_used = "
+                                               "%"PRIu64"). Invalidating both.",
+                                               norm_used, snap_norm_used);
+                               v->flags &= ~(HAVE_VOLUME_USAGE_NORM_USED | HAVE_VOLUME_USAGE_SNAP_USED);
                        }
                }
 
                if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_NORM_FREE))
-                       submit_double (hostname, /* plugin instance = */ v->name,
+                       submit_double (hostname, /* plugin instance = */ plugin_instance,
                                        "df_complex", "free",
                                        (double) norm_free, /* timestamp = */ 0);
 
                if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_SIS_SAVED))
-                       submit_double (hostname, /* plugin instance = */ v->name,
+                       submit_double (hostname, /* plugin instance = */ plugin_instance,
                                        "df_complex", "sis_saved",
                                        (double) sis_saved, /* timestamp = */ 0);
 
                if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_NORM_USED))
-                       submit_double (hostname, /* plugin instance = */ v->name,
+                       submit_double (hostname, /* plugin instance = */ plugin_instance,
                                        "df_complex", "used",
                                        (double) norm_used, /* timestamp = */ 0);
 
                if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_SNAP_RSVD))
-                       submit_double (hostname, /* plugin instance = */ v->name,
+                       submit_double (hostname, /* plugin instance = */ plugin_instance,
                                        "df_complex", "snap_reserved",
                                        (double) snap_reserve_free, /* timestamp = */ 0);
 
                if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_SNAP_USED | HAVE_VOLUME_USAGE_SNAP_RSVD))
-                       submit_double (hostname, /* plugin instance = */ v->name,
+                       submit_double (hostname, /* plugin instance = */ plugin_instance,
                                        "df_complex", "snap_reserve_used",
                                        (double) snap_reserve_used, /* timestamp = */ 0);
 
                if (HAS_ALL_FLAGS (v->flags, HAVE_VOLUME_USAGE_SNAP_USED))
-                       submit_double (hostname, /* plugin instance = */ v->name,
+                       submit_double (hostname, /* plugin instance = */ plugin_instance,
                                        "df_complex", "snap_normal_used",
                                        (double) snap_norm_used, /* timestamp = */ 0);
 
@@ -1407,7 +1428,36 @@ static int cna_submit_volume_usage_data (const char *hostname, /* {{{ */
        return (0);
 } /* }}} int cna_submit_volume_usage_data */
 
-static void cna_handle_volume_snap_usage(const host_config_t *host, data_volume_usage_t *v)
+/* Switch the state of a volume between online and offline and send out a
+ * notification. */
+static int cna_change_volume_status (const char *hostname, /* {{{ */
+               data_volume_usage_t *v)
+{
+       notification_t n;
+
+       memset (&n, 0, sizeof (&n));
+       n.time = time (NULL);
+       sstrncpy (n.host, hostname, sizeof (n.host));
+       sstrncpy (n.plugin, "netapp", sizeof (n.plugin));
+       sstrncpy (n.plugin_instance, v->name, sizeof (n.plugin_instance));
+
+       if ((v->flags & IS_VOLUME_USAGE_OFFLINE) != 0) {
+               n.severity = NOTIF_OKAY;
+               ssnprintf (n.message, sizeof (n.message),
+                               "Volume %s is now online.", v->name);
+               v->flags &= ~IS_VOLUME_USAGE_OFFLINE;
+       } else {
+               n.severity = NOTIF_WARNING;
+               ssnprintf (n.message, sizeof (n.message),
+                               "Volume %s is now offline.", v->name);
+               v->flags |= IS_VOLUME_USAGE_OFFLINE;
+       }
+
+       return (plugin_dispatch_notification (&n));
+} /* }}} int cna_change_volume_status */
+
+static void cna_handle_volume_snap_usage(const host_config_t *host, /* {{{ */
+               data_volume_usage_t *v)
 {
        uint64_t snap_used = 0, value;
        na_elem_t *data, *elem_snap, *elem_snapshots;
@@ -1416,14 +1466,21 @@ static void cna_handle_volume_snap_usage(const host_config_t *host, data_volume_
        data = na_server_invoke_elem(host->srv, v->snap_query);
        if (na_results_status(data) != NA_OK)
        {
-               if (na_results_errno(data) != EVOLUMEOFFLINE)
+               if (na_results_errno(data) == EVOLUMEOFFLINE) {
+                       if ((v->flags & IS_VOLUME_USAGE_OFFLINE) == 0)
+                               cna_change_volume_status (host->name, v);
+               } else {
                        ERROR ("netapp plugin: cna_handle_volume_snap_usage: na_server_invoke_elem for "
                                        "volume \"%s\" failed with error %d: %s", v->name,
                                        na_results_errno(data), na_results_reason(data));
+               }
                na_elem_free(data);
                return;
        }
 
+       if ((v->flags & IS_VOLUME_USAGE_OFFLINE) != 0)
+               cna_change_volume_status (host->name, v);
+
        elem_snapshots = na_elem_child (data, "snapshots");
        if (elem_snapshots == NULL)
        {
@@ -1439,15 +1496,17 @@ static void cna_handle_volume_snap_usage(const host_config_t *host, data_volume_
                        elem_snap = na_iterator_next (&iter_snap))
        {
                value = na_child_get_uint64(elem_snap, "cumulative-total", 0);
+               /* "cumulative-total" is the total size of the oldest snapshot plus all
+                * newer ones in blocks (1KB). We therefore are looking for the highest
+                * number of all snapshots - that's the size required for the snapshots. */
                if (value > snap_used)
                        snap_used = value;
        }
        na_elem_free (data);
-       /* snap_used is the total size of the oldest snapshot plus all
-        * newer ones in blocks (1KB). */
+       /* snap_used is in 1024 byte blocks */
        v->snap_used = snap_used * 1024;
        v->flags |= HAVE_VOLUME_USAGE_SNAP_USED;
-}
+} /* }}} void cna_handle_volume_snap_usage */
 
 static int cna_handle_volume_usage_data (const host_config_t *host, /* {{{ */
                cfg_volume_usage_t *cfg_volume, na_elem_t *data)
@@ -2292,8 +2351,7 @@ static int cna_config_system (host_config_t *host, /* {{{ */
 } /* }}} int cna_config_system */
 
 /* Corresponds to a <Host /> block. */
-static host_config_t *cna_config_host (const oconfig_item_t *ci, /* {{{ */
-               const host_config_t *default_host)
+static host_config_t *cna_config_host (const oconfig_item_t *ci) /* {{{ */
 {
        oconfig_item_t *item;
        host_config_t *host;
@@ -2306,7 +2364,18 @@ static host_config_t *cna_config_host (const oconfig_item_t *ci, /* {{{ */
        }
 
        host = malloc(sizeof(*host));
-       memcpy (host, default_host, sizeof (*host));
+       memset (host, 0, sizeof (*host));
+       host->name = NULL;
+       host->protocol = NA_SERVER_TRANSPORT_HTTPS;
+       host->host = NULL;
+       host->username = NULL;
+       host->password = NULL;
+       host->srv = NULL;
+       host->cfg_wafl = NULL;
+       host->cfg_disk = NULL;
+       host->cfg_volume_perf = NULL;
+       host->cfg_volume_usage = NULL;
+       host->cfg_system = NULL;
 
        status = cf_util_get_string (ci, &host->name);
        if (status != 0)
@@ -2393,15 +2462,36 @@ static host_config_t *cna_config_host (const oconfig_item_t *ci, /* {{{ */
  *
  * Pretty standard stuff here.
  */
-static int cna_init(void) { /* {{{ */
-       char err[256];
-       host_config_t *host;
-       
-       if (!global_host_config) {
-               WARNING("netapp plugin: Plugin loaded but no hosts defined.");
-               return 1;
+static int cna_init_host (host_config_t *host) /* {{{ */
+{
+       if (host == NULL)
+               return (EINVAL);
+
+       if (host->srv != NULL)
+               return (0);
+
+       /* Request version 1.1 of the ONTAP API */
+       host->srv = na_server_open(host->host,
+                       /* major version = */ 1, /* minor version = */ 1); 
+       if (host->srv == NULL) {
+               ERROR ("netapp plugin: na_server_open (%s) failed.", host->host);
+               return (-1);
        }
 
+       na_server_set_transport_type(host->srv, host->protocol,
+                       /* transportarg = */ NULL);
+       na_server_set_port(host->srv, host->port);
+       na_server_style(host->srv, NA_STYLE_LOGIN_PASSWORD);
+       na_server_adminuser(host->srv, host->username, host->password);
+       na_server_set_timeout(host->srv, 5 /* seconds */);
+
+       return 0;
+} /* }}} int cna_init_host */
+
+static int cna_init (void) /* {{{ */
+{
+       char err[256];
+
        memset (err, 0, sizeof (err));
        if (!na_startup(err, sizeof(err))) {
                err[sizeof (err) - 1] = 0;
@@ -2409,84 +2499,76 @@ static int cna_init(void) { /* {{{ */
                return 1;
        }
 
-       for (host = global_host_config; host; host = host->next) {
-               /* Request version 1.1 of the ONTAP API */
-               host->srv = na_server_open(host->host,
-                               /* major version = */ 1, /* minor version = */ 1); 
-               if (host->srv == NULL) {
-                       ERROR ("netapp plugin: na_server_open (%s) failed.", host->host);
-                       continue;
-               }
+       return (0);
+} /* }}} cna_init */
+
+static int cna_read (user_data_t *ud) { /* {{{ */
+       host_config_t *host;
+       int status;
 
-               if (host->interval < interval_g)
-                       host->interval = interval_g;
+       if ((ud == NULL) || (ud->data == NULL))
+               return (-1);
+
+       host = ud->data;
+
+       status = cna_init_host (host);
+       if (status != 0)
+               return (status);
+       
+       cna_query_wafl (host);
+       cna_query_disk (host);
+       cna_query_volume_perf (host);
+       cna_query_volume_usage (host);
+       cna_query_system (host);
 
-               na_server_set_transport_type(host->srv, host->protocol,
-                               /* transportarg = */ NULL);
-               na_server_set_port(host->srv, host->port);
-               na_server_style(host->srv, NA_STYLE_LOGIN_PASSWORD);
-               na_server_adminuser(host->srv, host->username, host->password);
-               na_server_set_timeout(host->srv, 5 /* seconds */);
-       }
        return 0;
-} /* }}} int cna_init */
+} /* }}} int cna_read */
 
 static int cna_config (oconfig_item_t *ci) { /* {{{ */
        int i;
        oconfig_item_t *item;
-       host_config_t default_host = HOST_INIT;
-       
+
        for (i = 0; i < ci->children_num; ++i) {
                item = ci->children + i;
 
-               if (!strcasecmp(item->key, "Host")) {
+               if (strcasecmp(item->key, "Host") == 0)
+               {
                        host_config_t *host;
-                       host_config_t *tmp;
+                       char cb_name[256];
+                       struct timespec interval;
+                       user_data_t ud;
 
-                       host = cna_config_host(item, &default_host);
+                       host = cna_config_host (item);
                        if (host == NULL)
                                continue;
 
-                       for (tmp = global_host_config; tmp != NULL; tmp = tmp->next)
-                       {
-                               if (strcasecmp (host->name, tmp->name) == 0)
-                                       WARNING ("netapp plugin: Duplicate definition of host `%s'. "
-                                                       "This is probably a bad idea.",
-                                                       host->name);
+                       ssnprintf (cb_name, sizeof (cb_name), "netapp-%s", host->name);
 
-                               if (tmp->next == NULL)
-                                       break;
-                       }
+                       memset (&interval, 0, sizeof (interval));
+                       interval.tv_sec = host->interval;
 
-                       host->next = NULL;
-                       if (tmp == NULL)
-                               global_host_config = host;
-                       else
-                               tmp->next = host;
-               } else {
+                       memset (&ud, 0, sizeof (ud));
+                       ud.data = host;
+                       ud.free_func = (void (*) (void *)) free_host_config;
+
+                       plugin_register_complex_read (cb_name,
+                                       /* callback  = */ cna_read, 
+                                       /* interval  = */ (host->interval > 0) ? &interval : NULL,
+                                       /* user data = */ &ud);
+                       continue;
+               }
+               else /* if (item->key != "Host") */
+               {
                        WARNING("netapp plugin: Ignoring unknown config option \"%s\".", item->key);
                }
        }
        return 0;
 } /* }}} int cna_config */
 
-static int cna_read (void) { /* {{{ */
-       host_config_t *host;
-       
-       for (host = global_host_config; host; host = host->next) {
-               cna_query_wafl (host);
-               cna_query_disk (host);
-               cna_query_volume_perf (host);
-               cna_query_volume_usage (host);
-               cna_query_system (host);
-       }
-       return 0;
-} /* }}} int cna_read */
-
 static int cna_shutdown (void) /* {{{ */
 {
-       free_host_config (global_host_config);
-       global_host_config = NULL;
+       /* Clean up system resources and stuff. */
+       na_shutdown ();
 
        return (0);
 } /* }}} int cna_shutdown */
@@ -2494,7 +2576,6 @@ static int cna_shutdown (void) /* {{{ */
 void module_register(void) {
        plugin_register_complex_config("netapp", cna_config);
        plugin_register_init("netapp", cna_init);
-       plugin_register_read("netapp", cna_read);
        plugin_register_shutdown("netapp", cna_shutdown);
 }