X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=src%2Fprocesses.c;h=30f4e32fb7f3bb36d5fd0a2bf216cb702caefb83;hb=4ea7a57256f5e4d77f4fff052490b7f67a9a3829;hp=727ec7fd7d269854fe8db2e309e81457b8859f24;hpb=43ee9ced15521d541dd3fb0698c9c490339dd35f;p=collectd.git diff --git a/src/processes.c b/src/processes.c index 727ec7fd..30f4e32f 100644 --- a/src/processes.c +++ b/src/processes.c @@ -1,7 +1,7 @@ /** * collectd - src/processes.c * Copyright (C) 2005 Lyonel Vincent - * Copyright (C) 2006-2010 Florian octo Forster + * Copyright (C) 2006-2017 Florian octo Forster * Copyright (C) 2008 Oleg King * Copyright (C) 2009 Sebastian Harl * Copyright (C) 2009 Andrés J. Díaz @@ -33,6 +33,7 @@ * Clément Stenac * Cosmin Ioiart * Pavel Rochnyack + * Wilfried Goesgens **/ #include "collectd.h" @@ -40,6 +41,11 @@ #include "common.h" #include "plugin.h" +#if HAVE_LIBTASKSTATS +#include "utils_complain.h" +#include "utils_taskstats.h" +#endif + /* Include header files for the mach system, if they exist.. */ #if HAVE_THREAD_INFO #if HAVE_MACH_MACH_INIT_H @@ -168,6 +174,7 @@ typedef struct process_entry_s { unsigned long num_proc; unsigned long num_lwp; unsigned long num_fd; + unsigned long num_maps; unsigned long vmem_size; unsigned long vmem_rss; unsigned long vmem_data; @@ -193,7 +200,14 @@ typedef struct process_entry_s { derive_t cswitch_invol; _Bool has_cswitch; +#if HAVE_LIBTASKSTATS + ts_delay_t delay; +#endif + _Bool has_delay; + _Bool has_fd; + + _Bool has_maps; } process_entry_t; typedef struct procstat_entry_s { @@ -217,6 +231,13 @@ typedef struct procstat_entry_s { derive_t cswitch_vol; derive_t cswitch_invol; +#if HAVE_LIBTASKSTATS + value_to_rate_state_t delay_cpu; + value_to_rate_state_t delay_blkio; + value_to_rate_state_t delay_swapin; + value_to_rate_state_t delay_freepages; +#endif + struct procstat_entry_s *next; } procstat_entry_t; @@ -229,6 +250,7 @@ typedef struct procstat { unsigned long num_proc; unsigned long num_lwp; unsigned long num_fd; + unsigned long num_maps; unsigned long vmem_size; unsigned long vmem_rss; unsigned long vmem_data; @@ -252,8 +274,16 @@ typedef struct procstat { derive_t cswitch_vol; derive_t cswitch_invol; + /* Linux Delay Accounting. Unit is ns/s. */ + gauge_t delay_cpu; + gauge_t delay_blkio; + gauge_t delay_swapin; + gauge_t delay_freepages; + _Bool report_fd_num; + _Bool report_maps_num; _Bool report_ctx_switch; + _Bool report_delay; struct procstat *next; struct procstat_entry_s *instances; @@ -264,6 +294,8 @@ static procstat_t *list_head_g = NULL; static _Bool want_init = 1; static _Bool report_ctx_switch = 0; static _Bool report_fd_num = 0; +static _Bool report_maps_num = 0; +static _Bool report_delay = 0; #if HAVE_THREAD_INFO static mach_port_t port_host_self; @@ -297,6 +329,10 @@ int getthrds64(pid_t, void *, int, tid64_t *, int); int getargs(void *processBuffer, int bufferLen, char *argsBuffer, int argsLen); #endif /* HAVE_PROCINFO_H */ +#if HAVE_LIBTASKSTATS +static ts_t *taskstats_handle = NULL; +#endif + /* put name of process from config to list_head_g tree * list_head_g is a list of 'procstat_t' structs with * processes names we want to watch */ @@ -322,7 +358,9 @@ static procstat_t *ps_list_register(const char *name, const char *regexp) { new->cswitch_invol = -1; new->report_fd_num = report_fd_num; + new->report_maps_num = report_maps_num; new->report_ctx_switch = report_ctx_switch; + new->report_delay = report_delay; #if HAVE_REGEX_H if (regexp != NULL) { @@ -431,6 +469,39 @@ static void ps_update_counter(derive_t *group_counter, derive_t *curr_counter, *group_counter += curr_value; } +#if HAVE_LIBTASKSTATS +static void ps_update_delay_one(gauge_t *out_rate_sum, + value_to_rate_state_t *state, uint64_t cnt, + cdtime_t t) { + gauge_t rate = NAN; + int status = value_to_rate(&rate, (value_t){.counter = (counter_t)cnt}, + DS_TYPE_COUNTER, t, state); + if ((status != 0) || isnan(rate)) { + return; + } + + if (isnan(*out_rate_sum)) { + *out_rate_sum = rate; + } else { + *out_rate_sum += rate; + } +} + +static void ps_update_delay(procstat_t *out, procstat_entry_t *prev, + process_entry_t *curr) { + cdtime_t now = cdtime(); + + ps_update_delay_one(&out->delay_cpu, &prev->delay_cpu, curr->delay.cpu_ns, + now); + ps_update_delay_one(&out->delay_blkio, &prev->delay_blkio, + curr->delay.blkio_ns, now); + ps_update_delay_one(&out->delay_swapin, &prev->delay_swapin, + curr->delay.swapin_ns, now); + ps_update_delay_one(&out->delay_freepages, &prev->delay_freepages, + curr->delay.freepages_ns, now); +} +#endif + /* add process entry to 'instances' of process 'name' (or refresh it) */ static void ps_list_add(const char *name, const char *cmdline, process_entry_t *entry) { @@ -472,6 +543,7 @@ static void ps_list_add(const char *name, const char *cmdline, ps->num_proc += entry->num_proc; ps->num_lwp += entry->num_lwp; ps->num_fd += entry->num_fd; + ps->num_maps += entry->num_maps; ps->vmem_size += entry->vmem_size; ps->vmem_rss += entry->vmem_rss; ps->vmem_data += entry->vmem_data; @@ -493,7 +565,7 @@ static void ps_list_add(const char *name, const char *cmdline, ps_update_counter(&ps->io_diskw, &pse->io_diskw, entry->io_diskw); } - if ((entry->cswitch_vol != -1) && (entry->cswitch_vol != -1)) { + if ((entry->cswitch_vol != -1) && (entry->cswitch_invol != -1)) { ps_update_counter(&ps->cswitch_vol, &pse->cswitch_vol, entry->cswitch_vol); ps_update_counter(&ps->cswitch_invol, &pse->cswitch_invol, @@ -509,6 +581,10 @@ static void ps_list_add(const char *name, const char *cmdline, entry->cpu_user_counter); ps_update_counter(&ps->cpu_system_counter, &pse->cpu_system_counter, entry->cpu_system_counter); + +#if HAVE_LIBTASKSTATS + ps_update_delay(ps, pse, entry); +#endif } } @@ -521,12 +597,18 @@ static void ps_list_reset(void) { ps->num_proc = 0; ps->num_lwp = 0; ps->num_fd = 0; + ps->num_maps = 0; ps->vmem_size = 0; ps->vmem_rss = 0; ps->vmem_data = 0; ps->vmem_code = 0; ps->stack_size = 0; + ps->delay_cpu = NAN; + ps->delay_blkio = NAN; + ps->delay_swapin = NAN; + ps->delay_freepages = NAN; + pse_prev = NULL; pse = ps->instances; while (pse != NULL) { @@ -561,8 +643,17 @@ static void ps_tune_instance(oconfig_item_t *ci, procstat_t *ps) { cf_util_get_boolean(c, &ps->report_ctx_switch); else if (strcasecmp(c->key, "CollectFileDescriptor") == 0) cf_util_get_boolean(c, &ps->report_fd_num); - else { - ERROR("processes plugin: Option `%s' not allowed here.", c->key); + else if (strcasecmp(c->key, "CollectMemoryMaps") == 0) + cf_util_get_boolean(c, &ps->report_maps_num); + else if (strcasecmp(c->key, "CollectDelayAccounting") == 0) { +#if HAVE_LIBTASKSTATS + cf_util_get_boolean(c, &ps->report_delay); +#else + WARNING("processes plugin: The plugin has been compiled without support " + "for the \"CollectDelayAccounting\" option."); +#endif + } else { + ERROR("processes plugin: Option `%s' not allowed heeere.", c->key); } } /* for (ci->children) */ } /* void ps_tune_instance */ @@ -619,6 +710,15 @@ static int ps_config(oconfig_item_t *ci) { cf_util_get_boolean(c, &report_ctx_switch); } else if (strcasecmp(c->key, "CollectFileDescriptor") == 0) { cf_util_get_boolean(c, &report_fd_num); + } else if (strcasecmp(c->key, "CollectMemoryMaps") == 0) { + cf_util_get_boolean(c, &report_maps_num); + } else if (strcasecmp(c->key, "CollectDelayAccounting") == 0) { +#if HAVE_LIBTASKSTATS + cf_util_get_boolean(c, &report_delay); +#else + WARNING("processes plugin: The plugin has been compiled without support " + "for the \"CollectDelayAccounting\" option."); +#endif } else { ERROR("processes plugin: The `%s' configuration option is not " "understood and will be ignored.", @@ -656,6 +756,15 @@ static int ps_init(void) { #elif KERNEL_LINUX pagesize_g = sysconf(_SC_PAGESIZE); DEBUG("pagesize_g = %li; CONFIG_HZ = %i;", pagesize_g, CONFIG_HZ); + +#if HAVE_LIBTASKSTATS + if (taskstats_handle == NULL) { + taskstats_handle = ts_create(); + if (taskstats_handle == NULL) { + WARNING("processes plugin: Creating taskstats handle failed."); + } + } +#endif /* #endif KERNEL_LINUX */ #elif HAVE_LIBKVM_GETPROCS && \ @@ -768,6 +877,14 @@ static void ps_submit_proc_list(procstat_t *ps) { plugin_dispatch_values(&vl); } + if (ps->num_maps > 0) { + sstrncpy(vl.type, "file_handles", sizeof(vl.type)); + sstrncpy(vl.type_instance, "mapped", sizeof(vl.type_instance)); + vl.values[0].gauge = ps->num_maps; + vl.values_len = 1; + plugin_dispatch_values(&vl); + } + if ((ps->cswitch_vol != -1) && (ps->cswitch_invol != -1)) { sstrncpy(vl.type, "contextswitch", sizeof(vl.type)); sstrncpy(vl.type_instance, "voluntary", sizeof(vl.type_instance)); @@ -782,20 +899,62 @@ static void ps_submit_proc_list(procstat_t *ps) { plugin_dispatch_values(&vl); } - DEBUG("name = %s; num_proc = %lu; num_lwp = %lu; num_fd = %lu; " - "vmem_size = %lu; vmem_rss = %lu; vmem_data = %lu; " - "vmem_code = %lu; " - "vmem_minflt_counter = %" PRIi64 "; vmem_majflt_counter = %" PRIi64 "; " - "cpu_user_counter = %" PRIi64 "; cpu_system_counter = %" PRIi64 "; " - "io_rchar = %" PRIi64 "; io_wchar = %" PRIi64 "; " - "io_syscr = %" PRIi64 "; io_syscw = %" PRIi64 "; " - "io_diskr = %" PRIi64 "; io_diskw = %" PRIi64 "; " - "cswitch_vol = %" PRIi64 "; cswitch_invol = %" PRIi64 ";", - ps->name, ps->num_proc, ps->num_lwp, ps->num_fd, ps->vmem_size, - ps->vmem_rss, ps->vmem_data, ps->vmem_code, ps->vmem_minflt_counter, - ps->vmem_majflt_counter, ps->cpu_user_counter, ps->cpu_system_counter, - ps->io_rchar, ps->io_wchar, ps->io_syscr, ps->io_syscw, ps->io_diskr, - ps->io_diskw, ps->cswitch_vol, ps->cswitch_invol); + /* The ps->delay_* metrics are in nanoseconds per second. This factor converts + * them to a percentage. */ + gauge_t const delay_factor = 100.0 / 1000000000.0; + + if (!isnan(ps->delay_cpu)) { + sstrncpy(vl.type, "percent", sizeof(vl.type)); + sstrncpy(vl.type_instance, "delay-cpu", sizeof(vl.type_instance)); + vl.values[0].gauge = ps->delay_cpu * delay_factor; + vl.values_len = 1; + plugin_dispatch_values(&vl); + } + + if (!isnan(ps->delay_blkio)) { + sstrncpy(vl.type, "percent", sizeof(vl.type)); + sstrncpy(vl.type_instance, "delay-blkio", sizeof(vl.type_instance)); + vl.values[0].gauge = ps->delay_blkio * delay_factor; + vl.values_len = 1; + plugin_dispatch_values(&vl); + } + + if (!isnan(ps->delay_swapin)) { + sstrncpy(vl.type, "percent", sizeof(vl.type)); + sstrncpy(vl.type_instance, "delay-swapin", sizeof(vl.type_instance)); + vl.values[0].gauge = ps->delay_swapin * delay_factor; + vl.values_len = 1; + plugin_dispatch_values(&vl); + } + + if (!isnan(ps->delay_freepages)) { + sstrncpy(vl.type, "percent", sizeof(vl.type)); + sstrncpy(vl.type_instance, "delay-freepages", sizeof(vl.type_instance)); + vl.values[0].gauge = ps->delay_freepages * delay_factor; + vl.values_len = 1; + plugin_dispatch_values(&vl); + } + + DEBUG( + "name = %s; num_proc = %lu; num_lwp = %lu; num_fd = %lu; num_maps = %lu; " + "vmem_size = %lu; vmem_rss = %lu; vmem_data = %lu; " + "vmem_code = %lu; " + "vmem_minflt_counter = %" PRIi64 "; vmem_majflt_counter = %" PRIi64 "; " + "cpu_user_counter = %" PRIi64 "; cpu_system_counter = %" PRIi64 "; " + "io_rchar = %" PRIi64 "; io_wchar = %" PRIi64 "; " + "io_syscr = %" PRIi64 "; io_syscw = %" PRIi64 "; " + "io_diskr = %" PRIi64 "; io_diskw = %" PRIi64 "; " + "cswitch_vol = %" PRIi64 "; cswitch_invol = %" PRIi64 "; " + "delay_cpu = %g; delay_blkio = %g; " + "delay_swapin = %g; delay_freepages = %g;", + ps->name, ps->num_proc, ps->num_lwp, ps->num_fd, ps->num_maps, + ps->vmem_size, ps->vmem_rss, ps->vmem_data, ps->vmem_code, + ps->vmem_minflt_counter, ps->vmem_majflt_counter, ps->cpu_user_counter, + ps->cpu_system_counter, ps->io_rchar, ps->io_wchar, ps->io_syscr, + ps->io_syscw, ps->io_diskr, ps->io_diskw, ps->cswitch_vol, + ps->cswitch_invol, ps->delay_cpu, ps->delay_blkio, ps->delay_swapin, + ps->delay_freepages); + } /* void ps_submit_proc_list */ #if KERNEL_LINUX || KERNEL_SOLARIS @@ -879,9 +1038,7 @@ static int ps_read_tasks_status(process_entry_t *ps) { } /* while (fgets) */ if (fclose(fh)) { - char errbuf[1024]; - WARNING("processes: fclose: %s", - sstrerror(errno, errbuf, sizeof(errbuf))); + WARNING("processes: fclose: %s", STRERRNO); } } closedir(dh); @@ -937,8 +1094,7 @@ static int ps_read_status(long pid, process_entry_t *ps) { } /* while (fgets) */ if (fclose(fh)) { - char errbuf[1024]; - WARNING("processes: fclose: %s", sstrerror(errno, errbuf, sizeof(errbuf))); + WARNING("processes: fclose: %s", STRERRNO); } ps->vmem_data = data * 1024; @@ -998,12 +1154,35 @@ static int ps_read_io(process_entry_t *ps) { } /* while (fgets) */ if (fclose(fh)) { - char errbuf[1024]; - WARNING("processes: fclose: %s", sstrerror(errno, errbuf, sizeof(errbuf))); + WARNING("processes: fclose: %s", STRERRNO); } return 0; } /* int ps_read_io (...) */ +static int ps_count_maps(pid_t pid) { + FILE *fh; + char buffer[1024]; + char filename[64]; + int count = 0; + + snprintf(filename, sizeof(filename), "/proc/%d/maps", pid); + if ((fh = fopen(filename, "r")) == NULL) { + DEBUG("ps_count_maps: Failed to open file `%s'", filename); + return -1; + } + + while (fgets(buffer, sizeof(buffer), fh) != NULL) { + if (strchr(buffer, '\n')) { + count++; + } + } /* while (fgets) */ + + if (fclose(fh)) { + WARNING("processes: fclose: %s", STRERRNO); + } + return count; +} /* int ps_count_maps (...) */ + static int ps_count_fd(int pid) { char dirname[64]; DIR *dh; @@ -1027,6 +1206,33 @@ static int ps_count_fd(int pid) { return (count >= 1) ? count : 1; } /* int ps_count_fd (pid) */ +#if HAVE_LIBTASKSTATS +static int ps_delay(process_entry_t *ps) { + if (taskstats_handle == NULL) { + return ENOTCONN; + } + + int status = ts_delay_by_tgid(taskstats_handle, (uint32_t)ps->id, &ps->delay); + if (status == EPERM) { + static c_complain_t c; + c_complain(LOG_ERR, &c, "processes plugin: reading delay information " + "failed: \"%s\". This is probably because the " + "taskstats interface requires root privileges.", + STRERROR(status)); + return status; + } else if (status != 0) { + ERROR("processes plugin: ts_delay_by_tgid failed: %s", STRERROR(status)); + return status; + } + + return 0; +} +#else +static int ps_delay(__attribute__((unused)) process_entry_t *unused) { + return -1; +} +#endif + static void ps_fill_details(const procstat_t *ps, process_entry_t *entry) { if (entry->has_io == 0) { ps_read_io(entry); @@ -1040,6 +1246,14 @@ static void ps_fill_details(const procstat_t *ps, process_entry_t *entry) { } } + if (ps->report_maps_num) { + int num_maps; + if (entry->has_maps == 0 && (num_maps = ps_count_maps(entry->id)) > 0) { + entry->num_maps = num_maps; + } + entry->has_maps = 1; + } + if (ps->report_fd_num) { int num_fd; if (entry->has_fd == 0 && (num_fd = ps_count_fd(entry->id)) > 0) { @@ -1047,8 +1261,17 @@ static void ps_fill_details(const procstat_t *ps, process_entry_t *entry) { } entry->has_fd = 1; } + +#if HAVE_LIBTASKSTATS + if (ps->report_delay && !entry->has_delay) { + if (ps_delay(entry) == 0) { + entry->has_delay = 1; + } + } +#endif } /* void ps_fill_details (...) */ +/* ps_read_process reads process counters on Linux. */ static int ps_read_process(long pid, process_entry_t *ps, char *state) { char filename[64]; char buffer[1024]; @@ -1202,12 +1425,10 @@ static char *ps_get_cmdline(long pid, char *name, char *buf, size_t buf_len) { errno = 0; fd = open(file, O_RDONLY); if (fd < 0) { - char errbuf[4096]; /* ENOENT means the process exited while we were handling it. * Don't complain about this, it only fills the logs. */ if (errno != ENOENT) - WARNING("processes plugin: Failed to open `%s': %s.", file, - sstrerror(errno, errbuf, sizeof(errbuf))); + WARNING("processes plugin: Failed to open `%s': %s.", file, STRERRNO); return NULL; } @@ -1222,13 +1443,12 @@ static char *ps_get_cmdline(long pid, char *name, char *buf, size_t buf_len) { status = read(fd, (void *)buf_ptr, len); if (status < 0) { - char errbuf[1024]; if ((EAGAIN == errno) || (EINTR == errno)) continue; WARNING("processes plugin: Failed to read from `%s': %s.", file, - sstrerror(errno, errbuf, sizeof(errbuf))); + STRERRNO); close(fd); return NULL; } @@ -1286,9 +1506,7 @@ static int read_fork_rate(void) { proc_stat = fopen("/proc/stat", "r"); if (proc_stat == NULL) { - char errbuf[1024]; - ERROR("processes plugin: fopen (/proc/stat) failed: %s", - sstrerror(errno, errbuf, sizeof(errbuf))); + ERROR("processes plugin: fopen (/proc/stat) failed: %s", STRERRNO); return -1; } @@ -1421,6 +1639,9 @@ static int ps_read_process(long pid, process_entry_t *ps, char *state) { */ ps->num_fd = 0; + /* Number of memory mappings */ + ps->num_maps = 0; + /* * Calculating input/ouput chars * Formula used is total chars / total blocks => chars/block @@ -1656,6 +1877,9 @@ static int ps_read(void) { /* File descriptor count not implemented */ pse.num_fd = 0; + /* Number of memory mappings */ + pse.num_maps = 0; + pse.vmem_minflt_counter = task_events_info.cow_faults; pse.vmem_majflt_counter = task_events_info.faults; @@ -1794,8 +2018,7 @@ static int ps_read(void) { ps_list_reset(); if ((proc = opendir("/proc")) == NULL) { - char errbuf[1024]; - ERROR("Cannot open `/proc': %s", sstrerror(errno, errbuf, sizeof(errbuf))); + ERROR("Cannot open `/proc': %s", STRERRNO); return -1; } @@ -1961,6 +2184,9 @@ static int ps_read(void) { /* file descriptor count not implemented */ pse.num_fd = 0; + /* Number of memory mappings */ + pse.num_maps = 0; + /* context switch counters not implemented */ pse.cswitch_vol = -1; pse.cswitch_invol = -1; @@ -2102,6 +2328,9 @@ static int ps_read(void) { /* file descriptor count not implemented */ pse.num_fd = 0; + /* Number of memory mappings */ + pse.num_maps = 0; + /* context switch counters not implemented */ pse.cswitch_vol = -1; pse.cswitch_invol = -1; @@ -2265,6 +2494,7 @@ static int ps_read(void) { pse.io_diskw = -1; pse.num_fd = 0; + pse.num_maps = 0; pse.cswitch_vol = -1; pse.cswitch_invol = -1;