X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=src%2Fprocesses.c;h=619b6e3777abf1ef2601bf1f1c0746c8b65ebb70;hb=9b3c5eb42021a7043fd0f4d452807df1a6ba877d;hp=dba29eff628c5d058e02b5f14e9d658fbda96b73;hpb=f17fc1f1e4091ab249f88bfc00109c2a90a9bd6a;p=collectd.git diff --git a/src/processes.c b/src/processes.c index dba29eff..619b6e37 100644 --- a/src/processes.c +++ b/src/processes.c @@ -26,47 +26,59 @@ #include "common.h" #include "plugin.h" #include "utils_debug.h" +#include "configfile.h" /* Include header files for the mach system, if they exist.. */ -#if HAVE_MACH_MACH_INIT_H -# include -#endif -#if HAVE_MACH_HOST_PRIV_H -# include -#endif -#if HAVE_MACH_MACH_ERROR_H -# include -#endif -#if HAVE_MACH_MACH_HOST_H -# include -#endif -#if HAVE_MACH_MACH_PORT_H -# include -#endif -#if HAVE_MACH_MACH_TYPES_H -# include -#endif -#if HAVE_MACH_MESSAGE_H -# include -#endif -#if HAVE_MACH_PROCESSOR_SET_H -# include -#endif -#if HAVE_MACH_TASK_H -# include -#endif -#if HAVE_MACH_THREAD_ACT_H -# include -#endif -#if HAVE_MACH_VM_REGION_H -# include -#endif -#if HAVE_MACH_VM_MAP_H -# include -#endif -#if HAVE_MACH_VM_PROT_H -# include -#endif +#if HAVE_THREAD_INFO +# if HAVE_MACH_MACH_INIT_H +# include +# endif +# if HAVE_MACH_HOST_PRIV_H +# include +# endif +# if HAVE_MACH_MACH_ERROR_H +# include +# endif +# if HAVE_MACH_MACH_HOST_H +# include +# endif +# if HAVE_MACH_MACH_PORT_H +# include +# endif +# if HAVE_MACH_MACH_TYPES_H +# include +# endif +# if HAVE_MACH_MESSAGE_H +# include +# endif +# if HAVE_MACH_PROCESSOR_SET_H +# include +# endif +# if HAVE_MACH_TASK_H +# include +# endif +# if HAVE_MACH_THREAD_ACT_H +# include +# endif +# if HAVE_MACH_VM_REGION_H +# include +# endif +# if HAVE_MACH_VM_MAP_H +# include +# endif +# if HAVE_MACH_VM_PROT_H +# include +# endif +/* #endif HAVE_THREAD_INFO */ + +#elif KERNEL_LINUX +# if HAVE_LINUX_CONFIG_H +# include +# endif +# ifndef CONFIG_HZ +# define CONFIG_HZ 100 +# endif +#endif /* KERNEL_LINUX */ #define MODULE_NAME "processes" @@ -78,9 +90,8 @@ #define BUFSIZE 256 -static char *ps_file = "processes.rrd"; - -static char *ds_def[] = +static char *processes_file = "processes.rrd"; +static char *processes_ds_def[] = { "DS:running:GAUGE:"COLLECTD_HEARTBEAT":0:65535", "DS:sleeping:GAUGE:"COLLECTD_HEARTBEAT":0:65535", @@ -90,7 +101,95 @@ static char *ds_def[] = "DS:blocked:GAUGE:"COLLECTD_HEARTBEAT":0:65535", NULL }; -static int ds_num = 6; +static int processes_ds_num = 6; + +static char *ps_rss_file = "processes/ps_rss-%s.rrd"; +static char *ps_rss_ds_def[] = +{ + /* max = 2^63 - 1 */ + "DS:byte:GAUGE:"COLLECTD_HEARTBEAT":0:9223372036854775807", + NULL +}; +static int ps_rss_ds_num = 1; + +static char *ps_cputime_file = "processes/ps_cputime-%s.rrd"; +static char *ps_cputime_ds_def[] = +{ + /* 1 second in user-mode per second ought to be enough.. */ + "DS:user:COUNTER:"COLLECTD_HEARTBEAT":0:1000000", + "DS:syst:COUNTER:"COLLECTD_HEARTBEAT":0:1000000", + NULL +}; +static int ps_cputime_ds_num = 2; + +static char *ps_count_file = "processes/ps_count-%s.rrd"; +static char *ps_count_ds_def[] = +{ + "DS:processes:GAUGE:"COLLECTD_HEARTBEAT":0:65535", + "DS:threads:GAUGE:"COLLECTD_HEARTBEAT":0:65535", + NULL +}; +static int ps_count_ds_num = 2; + +static char *ps_pagefaults_file = "processes/ps_pagefaults-%s.rrd"; +static char *ps_pagefaults_ds_def[] = +{ + /* max = 2^63 - 1 */ + "DS:minflt:COUNTER:"COLLECTD_HEARTBEAT":0:9223372036854775807", + "DS:majflt:COUNTER:"COLLECTD_HEARTBEAT":0:9223372036854775807", + NULL +}; +static int ps_pagefaults_ds_num = 2; + +static char *config_keys[] = +{ + "Process", + NULL +}; +static int config_keys_num = 1; + +typedef struct procstat_entry_s +{ + unsigned long id; + unsigned long age; + + unsigned long num_proc; + unsigned long num_lwp; + unsigned long vmem_rss; + + unsigned long vmem_minflt; + unsigned long vmem_majflt; + unsigned long vmem_minflt_counter; + unsigned long vmem_majflt_counter; + + unsigned long cpu_user; + unsigned long cpu_system; + unsigned long cpu_user_counter; + unsigned long cpu_system_counter; + + struct procstat_entry_s *next; +} procstat_entry_t; + +#define PROCSTAT_NAME_LEN 256 +typedef struct procstat +{ + char name[PROCSTAT_NAME_LEN]; + + unsigned long num_proc; + unsigned long num_lwp; + unsigned long vmem_rss; + + unsigned long vmem_minflt_counter; + unsigned long vmem_majflt_counter; + + unsigned long cpu_user_counter; + unsigned long cpu_system_counter; + + struct procstat *next; + struct procstat_entry_s *instances; +} procstat_t; + +static procstat_t *list_head_g = NULL; #if HAVE_THREAD_INFO static mach_port_t port_host_self; @@ -101,9 +200,226 @@ static mach_msg_type_number_t pset_list_len; /* #endif HAVE_THREAD_INFO */ #elif KERNEL_LINUX -/* No global variables */ +static long pagesize_g; #endif /* KERNEL_LINUX */ +#if HAVE_THREAD_INFO +/* ps_list_* not used yet */ +/* #endif HAVE_THREAD_INFO */ + +#elif KERNEL_LINUX +static void ps_list_register (const char *name) +{ + procstat_t *new; + procstat_t *ptr; + + if ((new = (procstat_t *) malloc (sizeof (procstat_t))) == NULL) + return; + memset (new, 0, sizeof (procstat_t)); + strncpy (new->name, name, PROCSTAT_NAME_LEN); + + for (ptr = list_head_g; ptr != NULL; ptr = ptr->next) + { + if (strcmp (ptr->name, name) == 0) + return; + if (ptr->next == NULL) + break; + } + + if (ptr == NULL) + list_head_g = new; + else + ptr->next = new; +} + +static procstat_t *ps_list_search (const char *name) +{ + procstat_t *ptr; + + for (ptr = list_head_g; ptr != NULL; ptr = ptr->next) + if (strcmp (ptr->name, name) == 0) + break; + + return (ptr); +} + +static void ps_list_add (const char *name, procstat_entry_t *entry) +{ + procstat_t *ps; + procstat_entry_t *pse; + + if (entry->id == 0) + return; + + if ((ps = ps_list_search (name)) == NULL) + return; + + for (pse = ps->instances; pse != NULL; pse = pse->next) + if ((pse->id == entry->id) || (pse->next == NULL)) + break; + + if ((pse == NULL) || (pse->id != entry->id)) + { + procstat_entry_t *new; + + new = (procstat_entry_t *) malloc (sizeof (procstat_entry_t)); + if (new == NULL) + return; + memset (new, 0, sizeof (procstat_entry_t)); + new->id = entry->id; + + if (pse == NULL) + ps->instances = new; + else + pse->next = new; + + pse = new; + } + + pse->age = 0; + pse->num_proc = entry->num_proc; + pse->num_lwp = entry->num_lwp; + pse->vmem_rss = entry->vmem_rss; + + ps->num_proc += pse->num_proc; + ps->num_lwp += pse->num_lwp; + ps->vmem_rss += pse->vmem_rss; + + if ((entry->vmem_minflt_counter == 0) + && (entry->vmem_majflt_counter == 0)) + { + pse->vmem_minflt_counter += entry->vmem_minflt; + pse->vmem_minflt = entry->vmem_minflt; + + pse->vmem_majflt_counter += entry->vmem_majflt; + pse->vmem_majflt = entry->vmem_majflt; + } + else + { + if (entry->vmem_minflt_counter < pse->vmem_minflt_counter) + { + pse->vmem_minflt = entry->vmem_minflt_counter + + (ULONG_MAX - pse->vmem_minflt_counter); + } + else + { + pse->vmem_minflt = entry->vmem_minflt_counter - pse->vmem_minflt_counter; + } + pse->vmem_minflt_counter = entry->vmem_minflt_counter; + + if (entry->vmem_majflt_counter < pse->vmem_majflt_counter) + { + pse->vmem_majflt = entry->vmem_majflt_counter + + (ULONG_MAX - pse->vmem_majflt_counter); + } + else + { + pse->vmem_majflt = entry->vmem_majflt_counter - pse->vmem_majflt_counter; + } + pse->vmem_majflt_counter = entry->vmem_majflt_counter; + } + + ps->vmem_minflt_counter += pse->vmem_minflt; + ps->vmem_majflt_counter += pse->vmem_majflt; + + if ((entry->cpu_user_counter == 0) + && (entry->cpu_system_counter == 0)) + { + pse->cpu_user_counter += entry->cpu_user; + pse->cpu_user = entry->cpu_user; + + pse->cpu_system_counter += entry->cpu_system; + pse->cpu_system = entry->cpu_system; + } + else + { + if (entry->cpu_user_counter < pse->cpu_user_counter) + { + pse->cpu_user = entry->cpu_user_counter + + (ULONG_MAX - pse->cpu_user_counter); + } + else + { + pse->cpu_user = entry->cpu_user_counter - pse->cpu_user_counter; + } + pse->cpu_user_counter = entry->cpu_user_counter; + + if (entry->cpu_system_counter < pse->cpu_system_counter) + { + pse->cpu_system = entry->cpu_system_counter + + (ULONG_MAX - pse->cpu_system_counter); + } + else + { + pse->cpu_system = entry->cpu_system_counter - pse->cpu_system_counter; + } + pse->cpu_system_counter = entry->cpu_system_counter; + } + + ps->cpu_user_counter += pse->cpu_user; + ps->cpu_system_counter += pse->cpu_system; +} + +static void ps_list_reset (void) +{ + procstat_t *ps; + procstat_entry_t *pse; + procstat_entry_t *pse_prev; + + for (ps = list_head_g; ps != NULL; ps = ps->next) + { + ps->num_proc = 0; + ps->num_lwp = 0; + ps->vmem_rss = 0; + + pse_prev = NULL; + pse = ps->instances; + while (pse != NULL) + { + if (pse->age > 10) + { + DBG ("Removing this procstat entry cause it's too old: " + "id = %lu; name = %s;", + pse->id, ps->name); + + if (pse_prev == NULL) + { + ps->instances = pse->next; + free (pse); + pse = ps->instances; + } + else + { + pse_prev->next = pse->next; + free (pse); + pse = pse_prev->next; + } + } + else + { + pse->age++; + pse_prev = pse; + pse = pse->next; + } + } /* while (pse != NULL) */ + } /* for (ps = list_head_g; ps != NULL; ps = ps->next) */ +} +#endif /* KERNEL_LINUX */ + +static int ps_config (char *key, char *value) +{ + if (strcasecmp (key, "Process") == 0) + { + ps_list_register (value); + } + else + { + return (-1); + } + + return (0); +} + static void ps_init (void) { #if HAVE_THREAD_INFO @@ -134,7 +450,9 @@ static void ps_init (void) /* #endif HAVE_THREAD_INFO */ #elif KERNEL_LINUX - /* No init */ + pagesize_g = sysconf(_SC_PAGESIZE); + DBG ("pagesize_g = %li; CONFIG_HZ = %i;", + pagesize_g, CONFIG_HZ); #endif /* KERNEL_LINUX */ return; @@ -142,7 +460,65 @@ static void ps_init (void) static void ps_write (char *host, char *inst, char *val) { - rrd_update_file (host, ps_file, val, ds_def, ds_num); + rrd_update_file (host, processes_file, val, + processes_ds_def, processes_ds_num); +} + +static void ps_rss_write (char *host, char *inst, char *val) +{ + char filename[256]; + int status; + + status = snprintf (filename, 256, ps_rss_file, inst); + if ((status < 1) || (status >= 256)) + return; + + rrd_update_file (host, filename, val, ps_rss_ds_def, ps_rss_ds_num); +} + +static void ps_cputime_write (char *host, char *inst, char *val) +{ + char filename[256]; + int status; + + status = snprintf (filename, 256, ps_cputime_file, inst); + if ((status < 1) || (status >= 256)) + return; + + DBG ("host = %s; filename = %s; val = %s;", + host, filename, val); + rrd_update_file (host, filename, val, + ps_cputime_ds_def, ps_cputime_ds_num); +} + +static void ps_count_write (char *host, char *inst, char *val) +{ + char filename[256]; + int status; + + status = snprintf (filename, 256, ps_count_file, inst); + if ((status < 1) || (status >= 256)) + return; + + DBG ("host = %s; filename = %s; val = %s;", + host, filename, val); + rrd_update_file (host, filename, val, + ps_count_ds_def, ps_count_ds_num); +} + +static void ps_pagefaults_write (char *host, char *inst, char *val) +{ + char filename[256]; + int status; + + status = snprintf (filename, 256, ps_pagefaults_file, inst); + if ((status < 1) || (status >= 256)) + return; + + DBG ("host = %s; filename = %s; val = %s;", + host, filename, val); + rrd_update_file (host, filename, val, + ps_pagefaults_ds_def, ps_pagefaults_ds_num); } #if PROCESSES_HAVE_READ @@ -167,6 +543,219 @@ static void ps_submit (int running, plugin_submit (MODULE_NAME, "-", buf); } +static void ps_submit_proc_list (procstat_t *ps) +{ + char buffer[64]; + + if (ps == NULL) + return; + + snprintf (buffer, 64, "%u:%lu", + (unsigned int) curtime, + ps->vmem_rss); + buffer[63] = '\0'; + plugin_submit ("ps_rss", ps->name, buffer); + + snprintf (buffer, 64, "%u:%u:%u", + (unsigned int) curtime, + /* Make the counter overflow */ + (unsigned int) (ps->cpu_user_counter & 0xFFFFFFFF), + (unsigned int) (ps->cpu_system_counter & 0xFFFFFFFF)); + buffer[63] = '\0'; + plugin_submit ("ps_cputime", ps->name, buffer); + + snprintf (buffer, 64, "%u:%lu:%lu", + (unsigned int) curtime, + ps->num_proc, ps->num_lwp); + buffer[63] = '\0'; + plugin_submit ("ps_count", ps->name, buffer); + + snprintf (buffer, 64, "%u:%lu:%lu", + (unsigned int) curtime, + ps->vmem_minflt_counter, ps->vmem_majflt_counter); + buffer[63] = '\0'; + plugin_submit ("ps_pagefaults", ps->name, buffer); + + DBG ("name = %s; num_proc = %lu; num_lwp = %lu; vmem_rss = %lu; " + "vmem_minflt_counter = %i; vmem_majflt_counter = %i; " + "cpu_user_counter = %i; cpu_system_counter = %i;", + ps->name, ps->num_proc, ps->num_lwp, ps->vmem_rss, + ps->vmem_minflt_counter, ps->vmem_majflt_counter, ps->cpu_user_counter, + ps->cpu_system_counter); + +} + +#if KERNEL_LINUX +static int *ps_read_tasks (int pid) +{ + int *list = NULL; + int list_size = 1; /* size of allocated space, in elements */ + int list_len = 0; /* number of currently used elements */ + + char dirname[64]; + DIR *dh; + struct dirent *ent; + + snprintf (dirname, 64, "/proc/%i/task", pid); + dirname[63] = '\0'; + + if ((dh = opendir (dirname)) == NULL) + { + syslog (LOG_NOTICE, "processes plugin: Failed to open directory `%s'", + dirname); + return (NULL); + } + + while ((ent = readdir (dh)) != NULL) + { + if (!isdigit (ent->d_name[0])) + continue; + + if ((list_len + 1) >= list_size) + { + int *new_ptr; + int new_size = 2 * list_size; + /* Comes in sizes: 2, 4, 8, 16, ... */ + + new_ptr = (int *) realloc (list, (size_t) (sizeof (int) * new_size)); + if (new_ptr == NULL) + { + if (list != NULL) + free (list); + syslog (LOG_ERR, "processes plugin: " + "Failed to allocate more memory."); + return (NULL); + } + + list = new_ptr; + list_size = new_size; + + memset (list + list_len, 0, sizeof (int) * (list_size - list_len)); + } + + list[list_len] = atoi (ent->d_name); + if (list[list_len] != 0) + list_len++; + } + + closedir (dh); + + assert (list_len < list_size); + assert (list[list_len] == 0); + + return (list); +} + +int ps_read_process (int pid, procstat_t *ps, char *state) +{ + char filename[64]; + char buffer[1024]; + FILE *fh; + + char *fields[64]; + char fields_len; + + int *tasks; + int i; + + int ppid; + int name_len; + + long long unsigned cpu_user_counter; + long long unsigned cpu_system_counter; + long long unsigned vmem_rss; + + memset (ps, 0, sizeof (procstat_t)); + + snprintf (filename, 64, "/proc/%i/stat", pid); + filename[63] = '\0'; + + if ((fh = fopen (filename, "r")) == NULL) + return (-1); + + if (fgets (buffer, 1024, fh) == NULL) + { + fclose (fh); + return (-1); + } + + fclose (fh); + + fields_len = strsplit (buffer, fields, 64); + if (fields_len < 24) + { + DBG ("`%s' has only %i fields..", + filename, fields_len); + return (-1); + } + else if (fields_len != 41) + { + DBG ("WARNING: (fields_len = %i) != 41", fields_len); + } + + /* copy the name, strip brackets in the process */ + name_len = strlen (fields[1]) - 2; + if ((fields[1][0] != '(') || (fields[1][name_len + 1] != ')')) + { + DBG ("No brackets found in process name: `%s'", fields[1]); + return (-1); + } + fields[1] = fields[1] + 1; + fields[1][name_len] = '\0'; + strncpy (ps->name, fields[1], PROCSTAT_NAME_LEN); + + ppid = atoi (fields[3]); + + if ((tasks = ps_read_tasks (pid)) == NULL) + { + /* This happends for zombied, e.g. */ + DBG ("ps_read_tasks (%i) failed.", pid); + *state = 'Z'; + ps->num_lwp = 0; + ps->num_proc = 0; + } + else + { + *state = '\0'; + ps->num_lwp = 0; + ps->num_proc = 1; + for (i = 0; tasks[i] != 0; i++) + ps->num_lwp++; + + free (tasks); + tasks = NULL; + } + + /* Leave the rest at zero if this is only an LWP */ + if (ps->num_proc == 0) + { + DBG ("This is only an LWP: pid = %i; name = %s;", + pid, ps->name); + return (0); + } + + cpu_user_counter = atoll (fields[13]); + cpu_system_counter = atoll (fields[14]); + vmem_rss = atoll (fields[23]); + ps->vmem_minflt_counter = atol (fields[9]); + ps->vmem_majflt_counter = atol (fields[11]); + + /* Convert jiffies to useconds */ + cpu_user_counter = cpu_user_counter * 1000000 / CONFIG_HZ; + cpu_system_counter = cpu_system_counter * 1000000 / CONFIG_HZ; + vmem_rss = vmem_rss * pagesize_g; + + ps->cpu_user_counter = (unsigned long) cpu_user_counter; + ps->cpu_system_counter = (unsigned long) cpu_system_counter; + ps->vmem_rss = (unsigned long) vmem_rss; + + *state = fields[2][0]; + + /* success */ + return (0); +} /* int ps_read_process (...) */ +#endif /* KERNEL_LINUX */ + static void ps_read (void) { #if HAVE_THREAD_INFO @@ -346,15 +935,19 @@ static void ps_read (void) int paging = 0; int blocked = 0; - char buf[BUFSIZE]; - char filename[20]; /* need 17 bytes */ - char *fields[BUFSIZE]; - struct dirent *ent; - DIR *proc; - FILE *fh; + DIR *proc; + int pid; + + int status; + procstat_t ps; + procstat_entry_t pse; + char state; + + procstat_t *ps_ptr; running = sleeping = zombies = stopped = paging = blocked = 0; + ps_list_reset (); if ((proc = opendir ("/proc")) == NULL) { @@ -367,33 +960,34 @@ static void ps_read (void) if (!isdigit (ent->d_name[0])) continue; - if (snprintf (filename, 20, "/proc/%s/stat", ent->d_name) >= 20) + if ((pid = atoi (ent->d_name)) < 1) continue; - if ((fh = fopen (filename, "r")) == NULL) + status = ps_read_process (pid, &ps, &state); + if (status != 0) { - syslog (LOG_NOTICE, "Cannot open `%s': %s", filename, - strerror (errno)); + DBG ("ps_read_process failed: %i", status); continue; } - if (fgets (buf, BUFSIZE, fh) == NULL) - { - syslog (LOG_NOTICE, "Unable to read from `%s': %s", - filename, strerror (errno)); - fclose (fh); - continue; - } + pse.id = pid; + pse.age = 0; - fclose (fh); + pse.num_proc = ps.num_proc; + pse.num_lwp = ps.num_lwp; + pse.vmem_rss = ps.vmem_rss; - if (strsplit (buf, fields, BUFSIZE) < 3) - { - DBG ("Line has less than three fields."); - continue; - } + pse.vmem_minflt = 0; + pse.vmem_minflt_counter = ps.vmem_minflt_counter; + pse.vmem_majflt = 0; + pse.vmem_majflt_counter = ps.vmem_majflt_counter; + + pse.cpu_user = 0; + pse.cpu_user_counter = ps.cpu_user_counter; + pse.cpu_system = 0; + pse.cpu_system_counter = ps.cpu_system_counter; - switch (fields[2][0]) + switch (state) { case 'R': running++; break; case 'S': sleeping++; break; @@ -402,11 +996,16 @@ static void ps_read (void) case 'T': stopped++; break; case 'W': paging++; break; } + + ps_list_add (ps.name, &pse); } closedir (proc); ps_submit (running, sleeping, zombies, stopped, paging, blocked); + + for (ps_ptr = list_head_g; ps_ptr != NULL; ps_ptr = ps_ptr->next) + ps_submit_proc_list (ps_ptr); #endif /* KERNEL_LINUX */ } #else @@ -416,6 +1015,11 @@ static void ps_read (void) void module_register (void) { plugin_register (MODULE_NAME, ps_init, ps_read, ps_write); + plugin_register ("ps_rss", NULL, NULL, ps_rss_write); + plugin_register ("ps_cputime", NULL, NULL, ps_cputime_write); + plugin_register ("ps_count", NULL, NULL, ps_count_write); + plugin_register ("ps_pagefaults", NULL, NULL, ps_pagefaults_write); + cf_register (MODULE_NAME, ps_config, config_keys, config_keys_num); } #undef BUFSIZE