Added new WriteQueueLengthLimit (drop values when bigger)
authorYves Mettier <ymettier@free.fr>
Thu, 14 Mar 2013 15:45:02 +0000 (16:45 +0100)
committerFlorian Forster <octo@collectd.org>
Sat, 13 Jul 2013 06:40:19 +0000 (08:40 +0200)
Signed-off-by: Florian Forster <octo@collectd.org>
src/collectd.conf.pod
src/configfile.c
src/plugin.c

index ddf3ac6..f2a971f 100644 (file)
@@ -235,6 +235,23 @@ Number of threads to start for dispatching value lists to write plugins. The
 default value is B<5>, but you may want to increase this if you have more than
 five plugins that may take relatively long to write to.
 
+=item B<WriteQueueLengthLimitHigh> I<Num>
+=item B<WriteQueueLengthLimitLow> I<Num>
+
+Default value for high limit is 0 (no limit).
+Default value for low limit is 50% of high limit.
+
+When the write queue size becomes bigger than the high limit, values I<will> be dropped.
+When the write queue size is between low and high, values I<may> be dropped (depending
+on the queue size)
+
+If high limit is set to 0, there is no limit. This is the default.
+If high limit is set, but not low limit, low will be computed as 50% of high.
+
+If you do not want to randomly drop values when the queue size is between low
+and high value, set the same value for low and high. When low=high and when the
+queue size is bigger, values are just dropped until the queue size becomes smaller.
+
 =item B<Hostname> I<Name>
 
 Sets the hostname that identifies a host. If you omit this setting, the
index d6c224f..88bed1c 100644 (file)
@@ -109,6 +109,8 @@ static cf_global_option_t cf_global_options[] =
        {"Interval",    NULL, NULL},
        {"ReadThreads", NULL, "5"},
        {"WriteThreads", NULL, "5"},
+       {"WriteQueueLengthLimitHigh", NULL, NULL},
+       {"WriteQueueLengthLimitLow", NULL, NULL},
        {"Timeout",     NULL, "2"},
        {"AutoLoadPlugin", NULL, "false"},
        {"PreCacheChain",  NULL, "PreCache"},
index 894b0e5..4e106a5 100644 (file)
@@ -108,6 +108,7 @@ static int             read_threads_num = 0;
 
 static write_queue_t  *write_queue_head;
 static write_queue_t  *write_queue_tail;
+static long           write_queue_size = 0;
 static _Bool           write_loop = 1;
 static pthread_mutex_t write_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_cond_t  write_cond = PTHREAD_COND_INITIALIZER;
@@ -117,6 +118,11 @@ static size_t          write_threads_num = 0;
 static pthread_key_t   plugin_ctx_key;
 static _Bool           plugin_ctx_key_initialized = 0;
 
+static long            writequeuelengthlimit_high = 0;
+static long            writequeuelengthlimit_low = 0;
+
+static unsigned int    random_seed;
+
 /*
  * Static functions
  */
@@ -670,11 +676,13 @@ static int plugin_write_enqueue (value_list_t const *vl) /* {{{ */
        {
                write_queue_head = q;
                write_queue_tail = q;
+               write_queue_size = 1;
        }
        else
        {
                write_queue_tail->next = q;
                write_queue_tail = q;
+               write_queue_size += 1;
        }
 
        pthread_cond_signal (&write_cond);
@@ -701,8 +709,11 @@ static value_list_t *plugin_write_dequeue (void) /* {{{ */
 
        q = write_queue_head;
        write_queue_head = q->next;
-       if (write_queue_head == NULL)
+       write_queue_size -= 1;
+       if (write_queue_head == NULL) {
                write_queue_tail = NULL;
+               write_queue_size = 0; /* Maybe instead of setting write_queue_size to 0, we should assert(write_queue_size == 0) ? */
+               }
 
        pthread_mutex_unlock (&write_lock);
 
@@ -805,6 +816,7 @@ static void stop_write_threads (void) /* {{{ */
        }
        write_queue_head = NULL;
        write_queue_tail = NULL;
+       write_queue_size = 0;
        pthread_mutex_unlock (&write_lock);
 
        if (i > 0)
@@ -1437,6 +1449,61 @@ void plugin_init_all (void)
        post_cache_chain = fc_chain_get_by_name (chain_name);
 
        {
+               const char *str_writequeuelengthlimithigh = global_option_get ("WriteQueueLengthLimitHigh");
+               const char *str_writequeuelengthlimitlow = global_option_get ("WriteQueueLengthLimitLow");
+
+               writequeuelengthlimit_high = 0;
+               writequeuelengthlimit_low = 0;
+
+               if(NULL != str_writequeuelengthlimithigh) {
+                       errno = 0;
+                       /* get high limit */
+                       writequeuelengthlimit_high = strtol(str_writequeuelengthlimithigh, NULL, 10);
+                       if ((errno == ERANGE && (writequeuelengthlimit_high == LONG_MAX || writequeuelengthlimit_high == LONG_MIN))
+                                       || (errno != 0 && writequeuelengthlimit_high == 0)
+                          ) {
+                               writequeuelengthlimit_high = 0;
+                               ERROR("Config 'WriteQueueLengthLimitHigh' accepts one integer value only. Running with no limit !");
+                       }
+                       if(writequeuelengthlimit_high < 0) {
+                               ERROR("Config 'WriteQueueLengthLimitHigh' accepts positive values only. Running with no limit !");
+                               writequeuelengthlimit_high = 0;
+                       }
+               }
+
+               if((writequeuelengthlimit_high > 0) && (NULL != str_writequeuelengthlimitlow)) {
+                       errno = 0;
+                       /* get low limit */
+                       writequeuelengthlimit_low = strtol(str_writequeuelengthlimitlow, NULL, 10);
+                       if ((errno == ERANGE && (writequeuelengthlimit_low == LONG_MAX || writequeuelengthlimit_low == LONG_MIN))
+                                       || (errno != 0 && writequeuelengthlimit_low == 0)
+                          ) {
+                               writequeuelengthlimit_low = 0;
+                               ERROR("Config 'WriteQueueLengthLimitLow' accepts one integer value only. Using default low limit instead");
+                       }
+
+                       if(writequeuelengthlimit_low < 0) {
+                               ERROR("Config 'WriteQueueLengthLimitLow' accepts positive values only. Using default low limit instead");
+                               writequeuelengthlimit_low = 0;
+                       } else if(writequeuelengthlimit_low > writequeuelengthlimit_high) {
+                               ERROR("Config 'WriteQueueLengthLimitLow' (%ld) cannot be bigger than high limit (%ld). Using default low limit instead",
+                                               writequeuelengthlimit_low, writequeuelengthlimit_high);
+                               writequeuelengthlimit_low = 0;
+                       }
+               }
+               /* Check/compute low limit if not/badly defined */
+               if(writequeuelengthlimit_high > 0) {
+                       if(0 == writequeuelengthlimit_low) {
+                               writequeuelengthlimit_low = .5 * writequeuelengthlimit_high;
+                       }
+                       INFO("Config 'WriteQueueLengthLimit*' : Running with limits high=%ld low=%ld", writequeuelengthlimit_high, writequeuelengthlimit_low);
+                       random_seed = time(0);
+               } else {
+                       writequeuelengthlimit_low = 0; /* This should be useless, but in case... */
+               }
+       }
+
+       {
                char const *tmp = global_option_get ("WriteThreads");
                int num = atoi (tmp);
 
@@ -1953,15 +2020,60 @@ static int plugin_dispatch_values_internal (value_list_t *vl)
 int plugin_dispatch_values (value_list_t const *vl)
 {
        int status;
+       int wq_size = write_queue_size;
+       /* We store write_queue_size in a local variable because other threads may update write_queue_size.
+        * Having this in a local variable (like a cache) is better : we do not need a lock */
+       short metric_will_be_dropped = 0;
+
+       if((writequeuelengthlimit_high > 0) && (wq_size > writequeuelengthlimit_low)) {
+               if(wq_size >= writequeuelengthlimit_high) {
+                       /* if high == low, we come here too */
+                       metric_will_be_dropped = 1;
+               } else {
+                       /* here, high != low */
+                       long probability_to_drop;
+                       long n;
+
+                       probability_to_drop = (wq_size - writequeuelengthlimit_low);
+
+                       /* We use rand_r with its bad RNG because it's enough for playing dices.
+                        * There is no security consideration here so rand_r() should be enough here.
+                        */
+                       n = rand_r(&random_seed) % (writequeuelengthlimit_high - writequeuelengthlimit_low) ;
+
+                       /* Let's have X = high - low.
+                        *   n is in range [0..X]
+                        *   probability_to_drop is in range [1..X[
+                        *   probability_to_drop gets bigger when wq_size gets bigger.
+                        */
+                       if(n <= probability_to_drop) {
+                               metric_will_be_dropped = 1;
+                       }
+               }
+       }
 
-       status = plugin_write_enqueue (vl);
-       if (status != 0)
+       if( ! metric_will_be_dropped) {
+               status = plugin_write_enqueue (vl);
+               if (status != 0)
+               {
+                       char errbuf[1024];
+                       ERROR ("plugin_dispatch_values: plugin_write_enqueue failed "
+                                       "with status %i (%s).", status,
+                                       sstrerror (status, errbuf, sizeof (errbuf)));
+                       return (status);
+               }
+       }
+       else
        {
-               char errbuf[1024];
-               ERROR ("plugin_dispatch_values: plugin_write_enqueue failed "
-                               "with status %i (%s).", status,
-                               sstrerror (status, errbuf, sizeof (errbuf)));
-               return (status);
+               /* If you want to count dropped metrics, don't forget to add a lock here */
+               /* dropped_metrics++; */
+               ERROR ("plugin_dispatch_values: value dropped (write queue %ld > %ld) : time = %.3f; interval = %.3f ; %s/%s%s%s/%s%s%s",
+                       write_queue_size, writequeuelengthlimit_low,
+                       CDTIME_T_TO_DOUBLE (vl->time),
+                       CDTIME_T_TO_DOUBLE (vl->interval),
+                       vl->host,
+                       vl->plugin, vl->plugin_instance[0]?"-":"", vl->plugin_instance,
+                       vl->type, vl->type_instance[0]?"-":"", vl->type_instance);
        }
 
        return (0);