Merge branch 'collectd-3.11' into merge/collectd-4
[collectd.git] / src / processes.c
index cb6bb48..c22f0f6 100644 (file)
@@ -1,7 +1,7 @@
 /**
  * collectd - src/processes.c
  * Copyright (C) 2005  Lyonel Vincent
- * Copyright (C) 2006  Florian Forster (Mach code)
+ * Copyright (C) 2006-2007  Florian Forster (Mach code)
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -69,6 +69,9 @@
 #  if HAVE_MACH_VM_PROT_H
 #    include <mach/vm_prot.h>
 #  endif
+#  if HAVE_SYS_SYSCTL_H
+#    include <sys/sysctl.h>
+#  endif
 /* #endif HAVE_THREAD_INFO */
 
 #elif KERNEL_LINUX
 
 #define BUFSIZE 256
 
-static char *processes_file = "processes.rrd";
-static char *processes_ds_def[] =
+static data_source_t state_dsrc[1] =
 {
-       "DS:running:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
-       "DS:sleeping:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
-       "DS:zombies:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
-       "DS:stopped:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
-       "DS:paging:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
-       "DS:blocked:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
-       NULL
+       {"value", DS_TYPE_GAUGE, 0.0, 65535.0}
+};
+
+static data_set_t state_ds =
+{
+       "ps_state", 1, state_dsrc
 };
-static int processes_ds_num = 6;
 
-static char *ps_rss_file = "processes/ps_rss-%s.rrd";
-static char *ps_rss_ds_def[] =
+static data_source_t rss_dsrc[1] =
 {
        /* max = 2^63 - 1 */
-       "DS:byte:GAUGE:"COLLECTD_HEARTBEAT":0:9223372036854775807",
-       NULL
+       {"value", DS_TYPE_GAUGE, 0.0, 9223372036854775807.0}
+};
+
+static data_set_t rss_ds =
+{
+       "ps_rss", 1, rss_dsrc
 };
-static int ps_rss_ds_num = 1;
 
-static char *ps_cputime_file = "processes/ps_cputime-%s.rrd";
-static char *ps_cputime_ds_def[] =
+static data_source_t time_dsrc[2] =
 {
        /* 1 second in user-mode per second ought to be enough.. */
-       "DS:user:COUNTER:"COLLECTD_HEARTBEAT":0:1000000",
-       "DS:syst:COUNTER:"COLLECTD_HEARTBEAT":0:1000000",
-       NULL
+       {"user", DS_TYPE_COUNTER, 0.0, 1000000.0},
+       {"syst", DS_TYPE_COUNTER, 0.0, 1000000.0}
 };
-static int ps_cputime_ds_num = 2;
 
-static char *ps_count_file = "processes/ps_count-%s.rrd";
-static char *ps_count_ds_def[] =
+static data_set_t time_ds =
 {
-       "DS:processes:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
-       "DS:threads:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
-       NULL
+       "ps_cputime", 2, time_dsrc
+};
+
+static data_source_t count_dsrc[2] =
+{
+       /* 1 second in user-mode per second ought to be enough.. */
+       {"processes", DS_TYPE_GAUGE, 0.0, 1000000.0},
+       {"threads",   DS_TYPE_GAUGE, 0.0, 1000000.0}
 };
-static int ps_count_ds_num = 2;
 
-static char *ps_pagefaults_file = "processes/ps_pagefaults-%s.rrd";
-static char *ps_pagefaults_ds_def[] =
+static data_set_t count_ds =
+{
+       "ps_count", 2, count_dsrc
+};
+
+static data_source_t pagefaults_dsrc[2] =
 {
        /* max = 2^63 - 1 */
-       "DS:minflt:COUNTER:"COLLECTD_HEARTBEAT":0:9223372036854775807",
-       "DS:majflt:COUNTER:"COLLECTD_HEARTBEAT":0:9223372036854775807",
-       NULL
+       {"minflt", DS_TYPE_COUNTER, 0.0, 9223372036854775807.0},
+       {"majflt", DS_TYPE_COUNTER, 0.0, 9223372036854775807.0}
+};
+
+static data_set_t pagefaults_ds =
+{
+       "ps_pagefaults", 2, pagefaults_dsrc
 };
-static int ps_pagefaults_ds_num = 2;
 
-static char *config_keys[] =
+#if PROCESSES_HAVE_READ
+#if HAVE_THREAD_INFO | KERNEL_LINUX
+static const char *config_keys[] =
 {
        "Process",
        NULL
 };
 static int config_keys_num = 1;
+#endif
 
 typedef struct procstat_entry_s
 {
@@ -189,7 +201,9 @@ typedef struct procstat
        struct procstat_entry_s *instances;
 } procstat_t;
 
+#if HAVE_THREAD_INFO | KERNEL_LINUX
 static procstat_t *list_head_g = NULL;
+#endif
 
 #if HAVE_THREAD_INFO
 static mach_port_t port_host_self;
@@ -203,6 +217,7 @@ static mach_msg_type_number_t     pset_list_len;
 static long pagesize_g;
 #endif /* KERNEL_LINUX */
 
+#if HAVE_THREAD_INFO | KERNEL_LINUX
 static void ps_list_register (const char *name)
 {
        procstat_t *new;
@@ -400,7 +415,7 @@ static void ps_list_reset (void)
        } /* for (ps = list_head_g; ps != NULL; ps = ps->next) */
 }
 
-static int ps_config (char *key, char *value)
+static int ps_config (const char *key, const char *value)
 {
        if (strcasecmp (key, "Process") == 0)
        {
@@ -413,8 +428,9 @@ static int ps_config (char *key, char *value)
 
        return (0);
 }
+#endif /* HAVE_THREAD_INFO | KERNEL_LINUX */
 
-static void ps_init (void)
+static int ps_init (void)
 {
 #if HAVE_THREAD_INFO
        kern_return_t status;
@@ -439,7 +455,7 @@ static void ps_init (void)
                                mach_error_string (status));
                pset_list = NULL;
                pset_list_len = 0;
-               return;
+               return (-1);
        }
 /* #endif HAVE_THREAD_INFO */
 
@@ -449,135 +465,65 @@ static void ps_init (void)
                        pagesize_g, CONFIG_HZ);
 #endif /* KERNEL_LINUX */
 
-       return;
-}
-
-static void ps_write (char *host, char *inst, char *val)
-{
-       rrd_update_file (host, processes_file, val,
-                       processes_ds_def, processes_ds_num);
-}
-
-static void ps_rss_write (char *host, char *inst, char *val)
-{
-       char filename[256];
-       int status;
-
-       status = snprintf (filename, 256, ps_rss_file, inst);
-       if ((status < 1) || (status >= 256))
-               return;
-
-       rrd_update_file (host, filename, val, ps_rss_ds_def, ps_rss_ds_num);
-}
-
-static void ps_cputime_write (char *host, char *inst, char *val)
-{
-       char filename[256];
-       int status;
-
-       status = snprintf (filename, 256, ps_cputime_file, inst);
-       if ((status < 1) || (status >= 256))
-               return;
-
-       DBG ("host = %s; filename = %s; val = %s;",
-                       host, filename, val);
-       rrd_update_file (host, filename, val,
-                       ps_cputime_ds_def, ps_cputime_ds_num);
-}
-
-static void ps_count_write (char *host, char *inst, char *val)
-{
-       char filename[256];
-       int status;
-
-       status = snprintf (filename, 256, ps_count_file, inst);
-       if ((status < 1) || (status >= 256))
-               return;
-
-       DBG ("host = %s; filename = %s; val = %s;",
-                       host, filename, val);
-       rrd_update_file (host, filename, val,
-                       ps_count_ds_def, ps_count_ds_num);
-}
-
-static void ps_pagefaults_write (char *host, char *inst, char *val)
-{
-       char filename[256];
-       int status;
-
-       status = snprintf (filename, 256, ps_pagefaults_file, inst);
-       if ((status < 1) || (status >= 256))
-               return;
-
-       DBG ("host = %s; filename = %s; val = %s;",
-                       host, filename, val);
-       rrd_update_file (host, filename, val,
-                       ps_pagefaults_ds_def, ps_pagefaults_ds_num);
-}
+       return (0);
+} /* int ps_init */
 
-#if PROCESSES_HAVE_READ
-static void ps_submit (int running,
-               int sleeping,
-               int zombies,
-               int stopped,
-               int paging,
-               int blocked)
+static void ps_submit_state (const char *state, double value)
 {
-       char buf[BUFSIZE];
+       value_t values[1];
+       value_list_t vl = VALUE_LIST_INIT;
 
-       if (snprintf (buf, BUFSIZE, "%u:%i:%i:%i:%i:%i:%i",
-                               (unsigned int) curtime,
-                               running, sleeping, zombies, stopped, paging,
-                               blocked) >= BUFSIZE)
-               return;
+       values[0].gauge = value;
 
-       DBG ("running = %i; sleeping = %i; zombies = %i; stopped = %i; paging = %i; blocked = %i;",
-                       running, sleeping, zombies, stopped, paging, blocked);
+       vl.values = values;
+       vl.values_len = 1;
+       vl.time = time (NULL);
+       strcpy (vl.host, hostname);
+       strcpy (vl.plugin, "processes");
+       strcpy (vl.plugin_instance, "");
+       strncpy (vl.type_instance, state, sizeof (vl.type_instance));
 
-       plugin_submit (MODULE_NAME, "-", buf);
+       plugin_dispatch_values ("ps_state", &vl);
 }
 
 static void ps_submit_proc_list (procstat_t *ps)
 {
-       char buffer[64];
-
-       if (ps == NULL)
-               return;
-
-       snprintf (buffer, 64, "%u:%lu",
-                       (unsigned int) curtime,
-                       ps->vmem_rss);
-       buffer[63] = '\0';
-       plugin_submit ("ps_rss", ps->name, buffer);
-
-       snprintf (buffer, 64, "%u:%u:%u",
-                       (unsigned int) curtime,
-                       /* Make the counter overflow */
-                       (unsigned int) (ps->cpu_user_counter   & 0xFFFFFFFF),
-                       (unsigned int) (ps->cpu_system_counter & 0xFFFFFFFF));
-       buffer[63] = '\0';
-       plugin_submit ("ps_cputime", ps->name, buffer);
-
-       snprintf (buffer, 64, "%u:%lu:%lu",
-                       (unsigned int) curtime,
-                       ps->num_proc, ps->num_lwp);
-       buffer[63] = '\0';
-       plugin_submit ("ps_count", ps->name, buffer);
-
-       snprintf (buffer, 64, "%u:%lu:%lu",
-                       (unsigned int) curtime,
-                       ps->vmem_minflt_counter, ps->vmem_majflt_counter);
-       buffer[63] = '\0';
-       plugin_submit ("ps_pagefaults", ps->name, buffer);
+       value_t values[2];
+       value_list_t vl = VALUE_LIST_INIT;
+
+       vl.values = values;
+       vl.values_len = 2;
+       vl.time = time (NULL);
+       strcpy (vl.host, hostname);
+       strcpy (vl.plugin, "processes");
+       strncpy (vl.plugin_instance, ps->name, sizeof (vl.plugin_instance));
+
+       vl.values[0].gauge = ps->vmem_rss;
+       vl.values_len = 1;
+       plugin_dispatch_values ("ps_rss", &vl);
+
+       vl.values[0].counter = ps->cpu_user_counter;
+       vl.values[1].counter = ps->cpu_system_counter;
+       vl.values_len = 2;
+       plugin_dispatch_values ("ps_cputime", &vl);
+
+       vl.values[0].gauge = ps->num_proc;
+       vl.values[1].gauge = ps->num_lwp;
+       vl.values_len = 2;
+       plugin_dispatch_values ("ps_count", &vl);
+
+       vl.values[0].counter = ps->vmem_minflt_counter;
+       vl.values[1].counter = ps->vmem_majflt_counter;
+       vl.values_len = 2;
+       plugin_dispatch_values ("ps_pagefaults", &vl);
 
        DBG ("name = %s; num_proc = %lu; num_lwp = %lu; vmem_rss = %lu; "
-                       "vmem_minflt_counter = %i; vmem_majflt_counter = %i; "
-                       "cpu_user_counter = %i; cpu_system_counter = %i;",
+                       "vmem_minflt_counter = %lu; vmem_majflt_counter = %lu; "
+                       "cpu_user_counter = %lu; cpu_system_counter = %lu;",
                        ps->name, ps->num_proc, ps->num_lwp, ps->vmem_rss,
-                       ps->vmem_minflt_counter, ps->vmem_majflt_counter, ps->cpu_user_counter,
-                       ps->cpu_system_counter);
-
-}
+                       ps->vmem_minflt_counter, ps->vmem_majflt_counter,
+                       ps->cpu_user_counter, ps->cpu_system_counter);
+} /* void ps_submit_proc_list */
 
 #if KERNEL_LINUX
 static int *ps_read_tasks (int pid)
@@ -595,8 +541,7 @@ static int *ps_read_tasks (int pid)
 
        if ((dh = opendir (dirname)) == NULL)
        {
-               syslog (LOG_NOTICE, "processes plugin: Failed to open directory `%s'",
-                               dirname);
+               DBG ("Failed to open directory `%s'", dirname);
                return (NULL);
        }
 
@@ -750,7 +695,43 @@ int ps_read_process (int pid, procstat_t *ps, char *state)
 } /* int ps_read_process (...) */
 #endif /* KERNEL_LINUX */
 
-static void ps_read (void)
+#if HAVE_THREAD_INFO
+static int mach_get_task_name (task_t t, int *pid, char *name, size_t name_max_len)
+{
+       int mib[4];
+
+       struct kinfo_proc kp;
+       size_t            kp_size;
+
+       mib[0] = CTL_KERN;
+       mib[1] = KERN_PROC;
+       mib[2] = KERN_PROC_PID;
+
+       if (pid_for_task (t, pid) != KERN_SUCCESS)
+               return (-1);
+       mib[3] = *pid;
+
+       kp_size = sizeof (kp);
+       if (sysctl (mib, 4, &kp, &kp_size, NULL, 0) != 0)
+               return (-1);
+
+       if (name_max_len > (MAXCOMLEN + 1))
+               name_max_len = MAXCOMLEN + 1;
+
+       strncpy (name, kp.kp_proc.p_comm, name_max_len - 1);
+       name[name_max_len - 1] = '\0';
+
+       DBG ("pid = %i; name = %s;", *pid, name);
+
+       /* We don't do the special handling for `p_comm == "LaunchCFMApp"' as
+        * `top' does it, because it is a lot of work and only used when
+        * debugging. -octo */
+
+       return (0);
+}
+#endif /* HAVE_THREAD_INFO */
+
+static int ps_read (void)
 {
 #if HAVE_THREAD_INFO
        kern_return_t            status;
@@ -762,6 +743,9 @@ static void ps_read (void)
        task_array_t             task_list;
        mach_msg_type_number_t   task_list_len;
 
+       int                      task_pid;
+       char                     task_name[MAXCOMLEN + 1];
+
        int                      thread;
        thread_act_array_t       thread_list;
        mach_msg_type_number_t   thread_list_len;
@@ -774,6 +758,11 @@ static void ps_read (void)
        int stopped  = 0;
        int blocked  = 0;
 
+       procstat_t *ps;
+       procstat_entry_t pse;
+
+       ps_list_reset ();
+
        /*
         * The Mach-concept is a little different from the traditional UNIX
         * concept: All the work is done in threads. Threads are contained in
@@ -805,6 +794,71 @@ static void ps_read (void)
 
                for (task = 0; task < task_list_len; task++)
                {
+                       ps = NULL;
+                       if (mach_get_task_name (task_list[task],
+                                               &task_pid,
+                                               task_name, PROCSTAT_NAME_LEN) == 0)
+                               ps = ps_list_search (task_name);
+
+                       /* Collect more detailed statistics for this process */
+                       if (ps != NULL)
+                       {
+                               task_basic_info_data_t        task_basic_info;
+                               mach_msg_type_number_t        task_basic_info_len;
+                               task_events_info_data_t       task_events_info;
+                               mach_msg_type_number_t        task_events_info_len;
+                               task_absolutetime_info_data_t task_absolutetime_info;
+                               mach_msg_type_number_t        task_absolutetime_info_len;
+
+                               memset (&pse, '\0', sizeof (pse));
+                               pse.id = task_pid;
+
+                               task_basic_info_len = TASK_BASIC_INFO_COUNT;
+                               status = task_info (task_list[task],
+                                               TASK_BASIC_INFO,
+                                               (task_info_t) &task_basic_info,
+                                               &task_basic_info_len);
+                               if (status != KERN_SUCCESS)
+                               {
+                                       syslog (LOG_ERR, "task_info failed: %s",
+                                                       mach_error_string (status));
+                                       continue; /* with next thread_list */
+                               }
+
+                               task_events_info_len = TASK_EVENTS_INFO_COUNT;
+                               status = task_info (task_list[task],
+                                               TASK_EVENTS_INFO,
+                                               (task_info_t) &task_events_info,
+                                               &task_events_info_len);
+                               if (status != KERN_SUCCESS)
+                               {
+                                       syslog (LOG_ERR, "task_info failed: %s",
+                                                       mach_error_string (status));
+                                       continue; /* with next thread_list */
+                               }
+
+                               task_absolutetime_info_len = TASK_ABSOLUTETIME_INFO_COUNT;
+                               status = task_info (task_list[task],
+                                               TASK_ABSOLUTETIME_INFO,
+                                               (task_info_t) &task_absolutetime_info,
+                                               &task_absolutetime_info_len);
+                               if (status != KERN_SUCCESS)
+                               {
+                                       syslog (LOG_ERR, "task_info failed: %s",
+                                                       mach_error_string (status));
+                                       continue; /* with next thread_list */
+                               }
+
+                               pse.num_proc++;
+                               pse.vmem_rss = task_basic_info.resident_size;
+
+                               pse.vmem_minflt_counter = task_events_info.cow_faults;
+                               pse.vmem_majflt_counter = task_events_info.faults;
+
+                               pse.cpu_user_counter = task_absolutetime_info.total_user;
+                               pse.cpu_system_counter = task_absolutetime_info.total_system;
+                       }
+
                        status = task_threads (task_list[task], &thread_list,
                                        &thread_list_len);
                        if (status != KERN_SUCCESS)
@@ -831,7 +885,7 @@ static void ps_read (void)
                                                &thread_data_len);
                                if (status != KERN_SUCCESS)
                                {
-                                       syslog (LOG_ERR, "thread_info failed: %s\n",
+                                       syslog (LOG_ERR, "thread_info failed: %s",
                                                        mach_error_string (status));
                                        if (task_list[task] != port_task_self)
                                                mach_port_deallocate (port_task_self,
@@ -839,6 +893,9 @@ static void ps_read (void)
                                        continue; /* with next thread_list */
                                }
 
+                               if (ps != NULL)
+                                       pse.num_lwp++;
+
                                switch (thread_data.run_state)
                                {
                                        case TH_STATE_RUNNING:
@@ -898,6 +955,9 @@ static void ps_read (void)
                                        syslog (LOG_ERR, "mach_port_deallocate failed: %s",
                                                        mach_error_string (status));
                        }
+
+                       if (ps != NULL)
+                               ps_list_add (task_name, &pse);
                } /* for (task_list) */
 
                if ((status = vm_deallocate (port_task_self,
@@ -918,7 +978,14 @@ static void ps_read (void)
                }
        } /* for (pset_list) */
 
-       ps_submit (running, sleeping, zombies, stopped, -1, blocked);
+       ps_submit_state ("running", running);
+       ps_submit_state ("sleeping", sleeping);
+       ps_submit_state ("zombies", zombies);
+       ps_submit_state ("stopped", stopped);
+       ps_submit_state ("blocked", blocked);
+
+       for (ps = list_head_g; ps != NULL; ps = ps->next)
+               ps_submit_proc_list (ps);
 /* #endif HAVE_THREAD_INFO */
 
 #elif KERNEL_LINUX
@@ -946,7 +1013,7 @@ static void ps_read (void)
        if ((proc = opendir ("/proc")) == NULL)
        {
                syslog (LOG_ERR, "Cannot open `/proc': %s", strerror (errno));
-               return;
+               return (-1);
        }
 
        while ((ent = readdir (proc)) != NULL)
@@ -996,25 +1063,36 @@ static void ps_read (void)
 
        closedir (proc);
 
-       ps_submit (running, sleeping, zombies, stopped, paging, blocked);
+       ps_submit_state ("running",  running);
+       ps_submit_state ("sleeping", sleeping);
+       ps_submit_state ("zombies",  zombies);
+       ps_submit_state ("stopped",  stopped);
+       ps_submit_state ("paging",   paging);
+       ps_submit_state ("blocked",  blocked);
 
        for (ps_ptr = list_head_g; ps_ptr != NULL; ps_ptr = ps_ptr->next)
                ps_submit_proc_list (ps_ptr);
 #endif /* KERNEL_LINUX */
-}
-#else
-# define ps_read NULL
+
+       return (0);
+} /* int ps_read */
 #endif /* PROCESSES_HAVE_READ */
 
 void module_register (void)
 {
-       plugin_register (MODULE_NAME, ps_init, ps_read, ps_write);
-       plugin_register ("ps_rss", NULL, NULL, ps_rss_write);
-       plugin_register ("ps_cputime", NULL, NULL, ps_cputime_write);
-       plugin_register ("ps_count", NULL, NULL, ps_count_write);
-       plugin_register ("ps_pagefaults", NULL, NULL, ps_pagefaults_write);
-       cf_register (MODULE_NAME, ps_config, config_keys, config_keys_num);
+       plugin_register_data_set (&state_ds);
+       plugin_register_data_set (&rss_ds);
+       plugin_register_data_set (&time_ds);
+       plugin_register_data_set (&count_ds );
+       plugin_register_data_set (&pagefaults_ds );
+
+#if PROCESSES_HAVE_READ
+#if HAVE_THREAD_INFO | KERNEL_LINUX
+       plugin_register_config ("processes", ps_config,
+                       config_keys, config_keys_num);
+#endif
+       plugin_register_init ("processes", ps_init);
+       plugin_register_read ("processes", ps_read);
+#endif /* PROCESSES_HAVE_READ */
 }
 
-#undef BUFSIZE
-#undef MODULE_NAME