processes plugin: Implement the "CollectDelayAccounting" option.
[collectd.git] / src / processes.c
index 30c4954..30f4e32 100644 (file)
@@ -1,7 +1,7 @@
 /**
  * collectd - src/processes.c
  * Copyright (C) 2005       Lyonel Vincent
- * Copyright (C) 2006-2010  Florian octo Forster
+ * Copyright (C) 2006-2017  Florian octo Forster
  * Copyright (C) 2008       Oleg King
  * Copyright (C) 2009       Sebastian Harl
  * Copyright (C) 2009       Andrés J. Díaz
 #include "common.h"
 #include "plugin.h"
 
+#if HAVE_LIBTASKSTATS
+#include "utils_complain.h"
+#include "utils_taskstats.h"
+#endif
+
 /* Include header files for the mach system, if they exist.. */
 #if HAVE_THREAD_INFO
 #if HAVE_MACH_MACH_INIT_H
@@ -195,6 +200,11 @@ typedef struct process_entry_s {
   derive_t cswitch_invol;
   _Bool has_cswitch;
 
+#if HAVE_LIBTASKSTATS
+  ts_delay_t delay;
+#endif
+  _Bool has_delay;
+
   _Bool has_fd;
 
   _Bool has_maps;
@@ -221,6 +231,13 @@ typedef struct procstat_entry_s {
   derive_t cswitch_vol;
   derive_t cswitch_invol;
 
+#if HAVE_LIBTASKSTATS
+  value_to_rate_state_t delay_cpu;
+  value_to_rate_state_t delay_blkio;
+  value_to_rate_state_t delay_swapin;
+  value_to_rate_state_t delay_freepages;
+#endif
+
   struct procstat_entry_s *next;
 } procstat_entry_t;
 
@@ -257,9 +274,16 @@ typedef struct procstat {
   derive_t cswitch_vol;
   derive_t cswitch_invol;
 
+  /* Linux Delay Accounting. Unit is ns/s. */
+  gauge_t delay_cpu;
+  gauge_t delay_blkio;
+  gauge_t delay_swapin;
+  gauge_t delay_freepages;
+
   _Bool report_fd_num;
   _Bool report_maps_num;
   _Bool report_ctx_switch;
+  _Bool report_delay;
 
   struct procstat *next;
   struct procstat_entry_s *instances;
@@ -271,6 +295,7 @@ static _Bool want_init = 1;
 static _Bool report_ctx_switch = 0;
 static _Bool report_fd_num = 0;
 static _Bool report_maps_num = 0;
+static _Bool report_delay = 0;
 
 #if HAVE_THREAD_INFO
 static mach_port_t port_host_self;
@@ -304,6 +329,10 @@ int getthrds64(pid_t, void *, int, tid64_t *, int);
 int getargs(void *processBuffer, int bufferLen, char *argsBuffer, int argsLen);
 #endif /* HAVE_PROCINFO_H */
 
+#if HAVE_LIBTASKSTATS
+static ts_t *taskstats_handle = NULL;
+#endif
+
 /* put name of process from config to list_head_g tree
  * list_head_g is a list of 'procstat_t' structs with
  * processes names we want to watch */
@@ -331,6 +360,7 @@ static procstat_t *ps_list_register(const char *name, const char *regexp) {
   new->report_fd_num = report_fd_num;
   new->report_maps_num = report_maps_num;
   new->report_ctx_switch = report_ctx_switch;
+  new->report_delay = report_delay;
 
 #if HAVE_REGEX_H
   if (regexp != NULL) {
@@ -439,6 +469,39 @@ static void ps_update_counter(derive_t *group_counter, derive_t *curr_counter,
   *group_counter += curr_value;
 }
 
+#if HAVE_LIBTASKSTATS
+static void ps_update_delay_one(gauge_t *out_rate_sum,
+                                value_to_rate_state_t *state, uint64_t cnt,
+                                cdtime_t t) {
+  gauge_t rate = NAN;
+  int status = value_to_rate(&rate, (value_t){.counter = (counter_t)cnt},
+                             DS_TYPE_COUNTER, t, state);
+  if ((status != 0) || isnan(rate)) {
+    return;
+  }
+
+  if (isnan(*out_rate_sum)) {
+    *out_rate_sum = rate;
+  } else {
+    *out_rate_sum += rate;
+  }
+}
+
+static void ps_update_delay(procstat_t *out, procstat_entry_t *prev,
+                            process_entry_t *curr) {
+  cdtime_t now = cdtime();
+
+  ps_update_delay_one(&out->delay_cpu, &prev->delay_cpu, curr->delay.cpu_ns,
+                      now);
+  ps_update_delay_one(&out->delay_blkio, &prev->delay_blkio,
+                      curr->delay.blkio_ns, now);
+  ps_update_delay_one(&out->delay_swapin, &prev->delay_swapin,
+                      curr->delay.swapin_ns, now);
+  ps_update_delay_one(&out->delay_freepages, &prev->delay_freepages,
+                      curr->delay.freepages_ns, now);
+}
+#endif
+
 /* add process entry to 'instances' of process 'name' (or refresh it) */
 static void ps_list_add(const char *name, const char *cmdline,
                         process_entry_t *entry) {
@@ -518,6 +581,10 @@ static void ps_list_add(const char *name, const char *cmdline,
                       entry->cpu_user_counter);
     ps_update_counter(&ps->cpu_system_counter, &pse->cpu_system_counter,
                       entry->cpu_system_counter);
+
+#if HAVE_LIBTASKSTATS
+    ps_update_delay(ps, pse, entry);
+#endif
   }
 }
 
@@ -537,6 +604,11 @@ static void ps_list_reset(void) {
     ps->vmem_code = 0;
     ps->stack_size = 0;
 
+    ps->delay_cpu = NAN;
+    ps->delay_blkio = NAN;
+    ps->delay_swapin = NAN;
+    ps->delay_freepages = NAN;
+
     pse_prev = NULL;
     pse = ps->instances;
     while (pse != NULL) {
@@ -573,8 +645,15 @@ static void ps_tune_instance(oconfig_item_t *ci, procstat_t *ps) {
       cf_util_get_boolean(c, &ps->report_fd_num);
     else if (strcasecmp(c->key, "CollectMemoryMaps") == 0)
       cf_util_get_boolean(c, &ps->report_maps_num);
-    else {
-      ERROR("processes plugin: Option `%s' not allowed here.", c->key);
+    else if (strcasecmp(c->key, "CollectDelayAccounting") == 0) {
+#if HAVE_LIBTASKSTATS
+      cf_util_get_boolean(c, &ps->report_delay);
+#else
+      WARNING("processes plugin: The plugin has been compiled without support "
+              "for the \"CollectDelayAccounting\" option.");
+#endif
+    } else {
+      ERROR("processes plugin: Option `%s' not allowed heeere.", c->key);
     }
   } /* for (ci->children) */
 } /* void ps_tune_instance */
@@ -633,6 +712,13 @@ static int ps_config(oconfig_item_t *ci) {
       cf_util_get_boolean(c, &report_fd_num);
     } else if (strcasecmp(c->key, "CollectMemoryMaps") == 0) {
       cf_util_get_boolean(c, &report_maps_num);
+    } else if (strcasecmp(c->key, "CollectDelayAccounting") == 0) {
+#if HAVE_LIBTASKSTATS
+      cf_util_get_boolean(c, &report_delay);
+#else
+      WARNING("processes plugin: The plugin has been compiled without support "
+              "for the \"CollectDelayAccounting\" option.");
+#endif
     } else {
       ERROR("processes plugin: The `%s' configuration option is not "
             "understood and will be ignored.",
@@ -670,6 +756,15 @@ static int ps_init(void) {
 #elif KERNEL_LINUX
   pagesize_g = sysconf(_SC_PAGESIZE);
   DEBUG("pagesize_g = %li; CONFIG_HZ = %i;", pagesize_g, CONFIG_HZ);
+
+#if HAVE_LIBTASKSTATS
+  if (taskstats_handle == NULL) {
+    taskstats_handle = ts_create();
+    if (taskstats_handle == NULL) {
+      WARNING("processes plugin: Creating taskstats handle failed.");
+    }
+  }
+#endif
 /* #endif KERNEL_LINUX */
 
 #elif HAVE_LIBKVM_GETPROCS &&                                                  \
@@ -804,6 +899,42 @@ static void ps_submit_proc_list(procstat_t *ps) {
     plugin_dispatch_values(&vl);
   }
 
+  /* The ps->delay_* metrics are in nanoseconds per second. This factor converts
+   * them to a percentage. */
+  gauge_t const delay_factor = 100.0 / 1000000000.0;
+
+  if (!isnan(ps->delay_cpu)) {
+    sstrncpy(vl.type, "percent", sizeof(vl.type));
+    sstrncpy(vl.type_instance, "delay-cpu", sizeof(vl.type_instance));
+    vl.values[0].gauge = ps->delay_cpu * delay_factor;
+    vl.values_len = 1;
+    plugin_dispatch_values(&vl);
+  }
+
+  if (!isnan(ps->delay_blkio)) {
+    sstrncpy(vl.type, "percent", sizeof(vl.type));
+    sstrncpy(vl.type_instance, "delay-blkio", sizeof(vl.type_instance));
+    vl.values[0].gauge = ps->delay_blkio * delay_factor;
+    vl.values_len = 1;
+    plugin_dispatch_values(&vl);
+  }
+
+  if (!isnan(ps->delay_swapin)) {
+    sstrncpy(vl.type, "percent", sizeof(vl.type));
+    sstrncpy(vl.type_instance, "delay-swapin", sizeof(vl.type_instance));
+    vl.values[0].gauge = ps->delay_swapin * delay_factor;
+    vl.values_len = 1;
+    plugin_dispatch_values(&vl);
+  }
+
+  if (!isnan(ps->delay_freepages)) {
+    sstrncpy(vl.type, "percent", sizeof(vl.type));
+    sstrncpy(vl.type_instance, "delay-freepages", sizeof(vl.type_instance));
+    vl.values[0].gauge = ps->delay_freepages * delay_factor;
+    vl.values_len = 1;
+    plugin_dispatch_values(&vl);
+  }
+
   DEBUG(
       "name = %s; num_proc = %lu; num_lwp = %lu; num_fd = %lu; num_maps = %lu; "
       "vmem_size = %lu; vmem_rss = %lu; vmem_data = %lu; "
@@ -813,13 +944,16 @@ static void ps_submit_proc_list(procstat_t *ps) {
       "io_rchar = %" PRIi64 "; io_wchar = %" PRIi64 "; "
       "io_syscr = %" PRIi64 "; io_syscw = %" PRIi64 "; "
       "io_diskr = %" PRIi64 "; io_diskw = %" PRIi64 "; "
-      "cswitch_vol = %" PRIi64 "; cswitch_invol = %" PRIi64 ";",
+      "cswitch_vol = %" PRIi64 "; cswitch_invol = %" PRIi64 "; "
+      "delay_cpu = %g; delay_blkio = %g; "
+      "delay_swapin = %g; delay_freepages = %g;",
       ps->name, ps->num_proc, ps->num_lwp, ps->num_fd, ps->num_maps,
       ps->vmem_size, ps->vmem_rss, ps->vmem_data, ps->vmem_code,
       ps->vmem_minflt_counter, ps->vmem_majflt_counter, ps->cpu_user_counter,
       ps->cpu_system_counter, ps->io_rchar, ps->io_wchar, ps->io_syscr,
       ps->io_syscw, ps->io_diskr, ps->io_diskw, ps->cswitch_vol,
-      ps->cswitch_invol);
+      ps->cswitch_invol, ps->delay_cpu, ps->delay_blkio, ps->delay_swapin,
+      ps->delay_freepages);
 
 } /* void ps_submit_proc_list */
 
@@ -1072,6 +1206,33 @@ static int ps_count_fd(int pid) {
   return (count >= 1) ? count : 1;
 } /* int ps_count_fd (pid) */
 
+#if HAVE_LIBTASKSTATS
+static int ps_delay(process_entry_t *ps) {
+  if (taskstats_handle == NULL) {
+    return ENOTCONN;
+  }
+
+  int status = ts_delay_by_tgid(taskstats_handle, (uint32_t)ps->id, &ps->delay);
+  if (status == EPERM) {
+    static c_complain_t c;
+    c_complain(LOG_ERR, &c, "processes plugin: reading delay information "
+                            "failed: \"%s\". This is probably because the "
+                            "taskstats interface requires root privileges.",
+               STRERROR(status));
+    return status;
+  } else if (status != 0) {
+    ERROR("processes plugin: ts_delay_by_tgid failed: %s", STRERROR(status));
+    return status;
+  }
+
+  return 0;
+}
+#else
+static int ps_delay(__attribute__((unused)) process_entry_t *unused) {
+  return -1;
+}
+#endif
+
 static void ps_fill_details(const procstat_t *ps, process_entry_t *entry) {
   if (entry->has_io == 0) {
     ps_read_io(entry);
@@ -1100,8 +1261,17 @@ static void ps_fill_details(const procstat_t *ps, process_entry_t *entry) {
     }
     entry->has_fd = 1;
   }
+
+#if HAVE_LIBTASKSTATS
+  if (ps->report_delay && !entry->has_delay) {
+    if (ps_delay(entry) == 0) {
+      entry->has_delay = 1;
+    }
+  }
+#endif
 } /* void ps_fill_details (...) */
 
+/* ps_read_process reads process counters on Linux. */
 static int ps_read_process(long pid, process_entry_t *ps, char *state) {
   char filename[64];
   char buffer[1024];