write_riemann: do not deadlock when sending from flush
[collectd.git] / src / write_riemann.c
index 63e8918..f836b9d 100644 (file)
@@ -37,6 +37,7 @@
 #include "common.h"
 #include "configfile.h"
 #include "utils_cache.h"
+#include "utils_complain.h"
 #include "write_riemann_threshold.h"
 
 #define RIEMANN_HOST           "localhost"
@@ -45,6 +46,7 @@
 #define RIEMANN_BATCH_MAX      8192
 
 struct riemann_host {
+    c_complain_t init_complaint;
        char                    *name;
        char                    *event_service_prefix;
        pthread_mutex_t  lock;
@@ -60,6 +62,7 @@ struct riemann_host {
        double                   ttl_factor;
     cdtime_t         batch_init;
     int              batch_max;
+    int              batch_timeout;
        int                          reference_count;
   riemann_message_t    *batch_msg;
        char                     *tls_ca_file;
@@ -93,21 +96,25 @@ static int wrr_connect(struct riemann_host *host) /* {{{ */
                                             RIEMANN_CLIENT_OPTION_TLS_KEY_FILE, host->tls_key_file,
                                             RIEMANN_CLIENT_OPTION_NONE);
        if (host->client == NULL) {
-               WARNING("write_riemann plugin: Unable to connect to Riemann at %s:%d",
-                       node, port);
+        c_complain (LOG_ERR, &host->init_complaint,
+                    "write_riemann plugin: Unable to connect to Riemann at %s:%d",
+                    node, port);
                return -1;
        }
        if (host->timeout.tv_sec != 0) {
                if (riemann_client_set_timeout(host->client, &host->timeout) != 0) {
                        riemann_client_free(host->client);
                        host->client = NULL;
-                       WARNING("write_riemann plugin: Unable to connect to Riemann at %s:%d",
-                               node, port);
+            c_complain (LOG_ERR, &host->init_complaint,
+                        "write_riemann plugin: Unable to connect to Riemann at %s:%d",
+                        node, port);
                        return -1;
                }
        }
-       DEBUG("write_riemann plugin: got a successful connection for: %s:%d",
-             node, port);
+
+    c_release (LOG_INFO, &host->init_complaint,
+               "write_riemann plugin: Successfully connected to %s:%d",
+               node, port);
 
        return 0;
 } /* }}} int wrr_connect */
@@ -129,21 +136,18 @@ static int wrr_disconnect(struct riemann_host *host) /* {{{ */
  *
  * Acquires the host lock, disconnects on errors.
  */
-static int wrr_send(struct riemann_host *host, riemann_message_t *msg) /* {{{ */
+static int wrr_send_nolock(struct riemann_host *host, riemann_message_t *msg) /* {{{ */
 {
        int status = 0;
-       pthread_mutex_lock (&host->lock);
 
        status = wrr_connect(host);
        if (status != 0) {
-        pthread_mutex_unlock(&host->lock);
                return status;
     }
 
        status = riemann_client_send_message(host->client, msg);
        if (status != 0) {
                wrr_disconnect(host);
-               pthread_mutex_unlock(&host->lock);
                return status;
        }
 
@@ -159,16 +163,24 @@ static int wrr_send(struct riemann_host *host, riemann_message_t *msg) /* {{{ */
                if (response == NULL)
                {
                        wrr_disconnect(host);
-                       pthread_mutex_unlock(&host->lock);
                        return errno;
                }
                riemann_message_free(response);
        }
 
-       pthread_mutex_unlock (&host->lock);
        return 0;
 } /* }}} int wrr_send */
 
+static int wrr_send(struct riemann_host *host, riemann_message_t *msg)
+{
+    int status = 0;
+
+    pthread_mutex_lock (&host->lock);
+    status = wrr_send_nolock(host, msg);
+    pthread_mutex_unlock (&host->lock);
+    return status;
+}
+
 static riemann_message_t *wrr_notification_to_message(struct riemann_host *host, /* {{{ */
                notification_t const *n)
 {
@@ -454,7 +466,7 @@ static int wrr_batch_flush_nolock(cdtime_t timeout,
                if ((host->batch_init + timeout) > now)
                        return status;
        }
-       wrr_send(host, host->batch_msg);
+       wrr_send_nolock(host, host->batch_msg);
        riemann_message_free(host->batch_msg);
 
        if (host->client_type != RIEMANN_CLIENT_UDP)
@@ -491,8 +503,11 @@ static int wrr_batch_flush(cdtime_t timeout,
        pthread_mutex_lock(&host->lock);
        status = wrr_batch_flush_nolock(timeout, host);
        if (status != 0)
-               ERROR("write_riemann plugin: riemann_client_send failed with status %i",
-                     status);
+        c_complain (LOG_ERR, &host->init_complaint,
+                    "write_riemann plugin: riemann_client_send failed with status %i",
+                    status);
+    else
+        c_release (LOG_DEBUG, &host->init_complaint, "write_riemann plugin: batch sent.");
 
        pthread_mutex_unlock(&host->lock);
        return status;
@@ -506,6 +521,7 @@ static int wrr_batch_add_value_list(struct riemann_host *host, /* {{{ */
        riemann_message_t *msg;
        size_t len;
        int ret;
+    cdtime_t timeout;
 
        msg = wrr_value_list_to_message(host, ds, vl, statuses);
        if (msg == NULL)
@@ -537,7 +553,12 @@ static int wrr_batch_add_value_list(struct riemann_host *host, /* {{{ */
        ret = 0;
        if ((host->batch_max < 0) || (((size_t) host->batch_max) <= len)) {
                ret = wrr_batch_flush_nolock(0, host);
-       }
+       } else {
+        if (host->batch_timeout > 0) {
+            timeout = TIME_T_TO_CDTIME_T((time_t)host->batch_timeout);
+            ret = wrr_batch_flush_nolock(timeout, host);
+        }
+    }
 
        pthread_mutex_unlock(&host->lock);
        return ret;
@@ -561,8 +582,12 @@ static int wrr_notification(const notification_t *n, user_data_t *ud) /* {{{ */
 
        status = wrr_send(host, msg);
        if (status != 0)
-               ERROR("write_riemann plugin: riemann_client_send failed with status %i",
-                     status);
+        c_complain (LOG_ERR, &host->init_complaint,
+                    "write_riemann plugin: riemann_client_send failed with status %i",
+                    status);
+    else
+        c_release (LOG_DEBUG, &host->init_complaint,
+                   "write_riemann plugin: riemann_client_send succeeded");
 
        riemann_message_free(msg);
        return (status);
@@ -593,9 +618,6 @@ static int wrr_write(const data_set_t *ds, /* {{{ */
       return (-1);
 
     status = wrr_send(host, msg);
-    if (status != 0)
-      ERROR("write_riemann plugin: riemann_client_send failed with status %i",
-            status);
 
     riemann_message_free(msg);
   }
@@ -638,6 +660,7 @@ static int wrr_config_node(oconfig_item_t *ci) /* {{{ */
     return ENOMEM;
   }
   pthread_mutex_init(&host->lock, NULL);
+  C_COMPLAIN_INIT (&host->init_complaint);
   host->reference_count = 1;
   host->node = NULL;
   host->port = 0;
@@ -648,6 +671,7 @@ static int wrr_config_node(oconfig_item_t *ci) /* {{{ */
   host->batch_mode = 1;
   host->batch_max = RIEMANN_BATCH_MAX; /* typical MSS */
   host->batch_init = cdtime();
+  host->batch_timeout = 0;
   host->ttl_factor = RIEMANN_TTL_FACTOR;
   host->client = NULL;
   host->client_type = RIEMANN_CLIENT_TCP;
@@ -693,6 +717,10 @@ static int wrr_config_node(oconfig_item_t *ci) /* {{{ */
       status = cf_util_get_int(child, &host->batch_max);
       if (status != 0)
         break;
+    } else if (strcasecmp("BatchFlushTimeout", child->key) == 0) {
+      status = cf_util_get_int(child, &host->batch_timeout);
+      if (status != 0)
+        break;
     } else if (strcasecmp("Timeout", child->key) == 0) {
       status = cf_util_get_int(child, (int *)&host->timeout.tv_sec);
       if (status != 0)