Plugin process: collect voluntary and involuntary context switches.
[collectd.git] / src / processes.c
index 210e6f1..c1f92a7 100644 (file)
 #  endif
 /* #endif KERNEL_LINUX */
 
-#elif HAVE_LIBKVM_GETPROCS && HAVE_STRUCT_KINFO_PROC_FREEBSD
+#elif HAVE_LIBKVM_GETPROCS && (HAVE_STRUCT_KINFO_PROC_FREEBSD || HAVE_STRUCT_KINFO_PROC_OPENBSD)
 #  include <kvm.h>
 #  include <sys/param.h>
 #  include <sys/sysctl.h>
 #  include <sys/user.h>
 #  include <sys/proc.h>
-/* #endif HAVE_LIBKVM_GETPROCS && HAVE_STRUCT_KINFO_PROC_FREEBSD */
+/* #endif HAVE_LIBKVM_GETPROCS && (HAVE_STRUCT_KINFO_PROC_FREEBSD || HAVE_STRUCT_KINFO_PROC_OPENBSD) */
 
 #elif HAVE_PROCINFO_H
 #  include <procinfo.h>
 /* #endif HAVE_PROCINFO_H */
 
 #elif KERNEL_SOLARIS
+/* Hack: Avoid #error when building a 32-bit binary with
+ * _FILE_OFFSET_BITS=64. There is a reason for this #error, as one
+ * of the structures in <sys/procfs.h> uses an off_t, but that
+ * isn't relevant to our usage of procfs. */
+#if !defined(_LP64) && _FILE_OFFSET_BITS == 64
+#  define SAVE_FOB_64
+#  undef _FILE_OFFSET_BITS
+#endif
+
 # include <procfs.h>
+
+#ifdef SAVE_FOB_64
+#  define _FILE_OFFSET_BITS 64
+#  undef SAVE_FOB_64
+#endif
+
 # include <dirent.h>
 /* #endif KERNEL_SOLARIS */
 
 # include <kstat.h>
 #endif
 
-#ifndef ARG_MAX
-#  define ARG_MAX 4096
+#ifndef CMDLINE_BUFFER_SIZE
+# if defined(ARG_MAX) && (ARG_MAX < 4096)
+#  define CMDLINE_BUFFER_SIZE ARG_MAX
+# else
+#  define CMDLINE_BUFFER_SIZE 4096
+# endif
 #endif
 
 typedef struct procstat_entry_s
@@ -161,6 +180,9 @@ typedef struct procstat_entry_s
        derive_t io_syscr;
        derive_t io_syscw;
 
+       derive_t cswitch_vol;
+       derive_t cswitch_invol;
+
        struct procstat_entry_s *next;
 } procstat_entry_t;
 
@@ -192,12 +214,17 @@ typedef struct procstat
        derive_t io_syscr;
        derive_t io_syscw;
 
+       derive_t cswitch_vol;
+       derive_t cswitch_invol;
+
        struct procstat   *next;
        struct procstat_entry_s *instances;
 } procstat_t;
 
 static procstat_t *list_head_g = NULL;
 
+static _Bool ps_ctx_switch = 0;
+
 #if HAVE_THREAD_INFO
 static mach_port_t port_host_self;
 static mach_port_t port_task_self;
@@ -210,9 +237,9 @@ static mach_msg_type_number_t     pset_list_len;
 static long pagesize_g;
 /* #endif KERNEL_LINUX */
 
-#elif HAVE_LIBKVM_GETPROCS && HAVE_STRUCT_KINFO_PROC_FREEBSD
+#elif HAVE_LIBKVM_GETPROCS && (HAVE_STRUCT_KINFO_PROC_FREEBSD || HAVE_STRUCT_KINFO_PROC_OPENBSD)
 static int pagesize;
-/* #endif HAVE_LIBKVM_GETPROCS && HAVE_STRUCT_KINFO_PROC_FREEBSD */
+/* #endif HAVE_LIBKVM_GETPROCS && (HAVE_STRUCT_KINFO_PROC_FREEBSD || HAVE_STRUCT_KINFO_PROC_OPENBSD) */
 
 #elif HAVE_PROCINFO_H
 static  struct procentry64 procentry[MAXPROCENTRY];
@@ -223,7 +250,7 @@ static int pagesize;
 int     getprocs64 (void *procsinfo, int sizproc, void *fdsinfo, int sizfd, pid_t *index, int count);
 int     getthrds64( pid_t, void *, int, tid64_t *, int );
 #endif
-int getargs (struct procentry64 *processBuffer, int bufferLen, char *argsBuffer, int argsLen);
+int getargs (void *processBuffer, int bufferLen, char *argsBuffer, int argsLen);
 #endif /* HAVE_PROCINFO_H */
 
 /* put name of process from config to list_head_g tree
@@ -335,7 +362,7 @@ static int ps_list_match (const char *name, const char *cmdline, procstat_t *ps)
 static void ps_list_add (const char *name, const char *cmdline, procstat_entry_t *entry)
 {
        procstat_t *ps;
-       procstat_entry_t *pse;
+        procstat_entry_t *pse;
 
        if (entry->id == 0)
                return;
@@ -379,6 +406,8 @@ static void ps_list_add (const char *name, const char *cmdline, procstat_entry_t
                pse->io_wchar   = entry->io_wchar;
                pse->io_syscr   = entry->io_syscr;
                pse->io_syscw   = entry->io_syscw;
+               pse->cswitch_vol   = entry->cswitch_vol;
+               pse->cswitch_invol = entry->cswitch_invol;
 
                ps->num_proc   += pse->num_proc;
                ps->num_lwp    += pse->num_lwp;
@@ -393,6 +422,9 @@ static void ps_list_add (const char *name, const char *cmdline, procstat_entry_t
                ps->io_syscr   += ((pse->io_syscr == -1)?0:pse->io_syscr);
                ps->io_syscw   += ((pse->io_syscw == -1)?0:pse->io_syscw);
 
+               ps->cswitch_vol   += ((pse->cswitch_vol == -1)?0:pse->cswitch_vol);
+               ps->cswitch_invol += ((pse->cswitch_invol == -1)?0:pse->cswitch_invol);
+
                if ((entry->vmem_minflt_counter == 0)
                                && (entry->vmem_majflt_counter == 0))
                {
@@ -466,6 +498,7 @@ static void ps_list_add (const char *name, const char *cmdline, procstat_entry_t
 
                ps->cpu_user_counter   += pse->cpu_user;
                ps->cpu_system_counter += pse->cpu_system;
+
        }
 }
 
@@ -489,6 +522,8 @@ static void ps_list_reset (void)
                ps->io_wchar = -1;
                ps->io_syscr = -1;
                ps->io_syscw = -1;
+               ps->cswitch_vol   = -1;
+               ps->cswitch_invol = -1;
 
                pse_prev = NULL;
                pse = ps->instances;
@@ -573,6 +608,17 @@ static int ps_config (oconfig_item_t *ci)
                        ps_list_register (c->values[0].value.string,
                                        c->values[1].value.string);
                }
+               else if (strcasecmp (c->key, "CollectContextSwitch") == 0)
+               {
+                       if ((c->values_num != 1) 
+                                       || (c->values[0].type != OCONFIG_TYPE_BOOLEAN))
+                       {
+                               ERROR ("processes plugin: `CollectContextSwitch' needs exactly "
+                                               "one boolean argument.");
+                               continue;
+                       }
+                       ps_ctx_switch = c->values[0].value.boolean ? 1 : 0;
+               }
                else
                {
                        ERROR ("processes plugin: The `%s' configuration option is not "
@@ -619,9 +665,9 @@ static int ps_init (void)
                        pagesize_g, CONFIG_HZ);
 /* #endif KERNEL_LINUX */
 
-#elif HAVE_LIBKVM_GETPROCS && HAVE_STRUCT_KINFO_PROC_FREEBSD
+#elif HAVE_LIBKVM_GETPROCS && (HAVE_STRUCT_KINFO_PROC_FREEBSD || HAVE_STRUCT_KINFO_PROC_OPENBSD)
        pagesize = getpagesize();
-/* #endif HAVE_LIBKVM_GETPROCS && HAVE_STRUCT_KINFO_PROC_FREEBSD */
+/* #endif HAVE_LIBKVM_GETPROCS && (HAVE_STRUCT_KINFO_PROC_FREEBSD || HAVE_STRUCT_KINFO_PROC_OPENBSD) */
 
 #elif HAVE_PROCINFO_H
        pagesize = getpagesize();
@@ -722,19 +768,34 @@ static void ps_submit_proc_list (procstat_t *ps)
                plugin_dispatch_values (&vl);
        }
 
+       if ( ps_ctx_switch )
+       {
+               sstrncpy (vl.type, "ps_cswitch_vol", sizeof (vl.type));
+               vl.values[0].derive = ps->cswitch_vol;
+               vl.values_len = 1;
+               plugin_dispatch_values (&vl);
+
+               sstrncpy (vl.type, "ps_cswitch_invol", sizeof (vl.type));
+               vl.values[0].derive = ps->cswitch_invol;
+               vl.values_len = 1;
+               plugin_dispatch_values (&vl);
+       }
+
        DEBUG ("name = %s; num_proc = %lu; num_lwp = %lu; "
                        "vmem_size = %lu; vmem_rss = %lu; vmem_data = %lu; "
                        "vmem_code = %lu; "
                        "vmem_minflt_counter = %"PRIi64"; vmem_majflt_counter = %"PRIi64"; "
                        "cpu_user_counter = %"PRIi64"; cpu_system_counter = %"PRIi64"; "
                        "io_rchar = %"PRIi64"; io_wchar = %"PRIi64"; "
-                       "io_syscr = %"PRIi64"; io_syscw = %"PRIi64";",
+                       "io_syscr = %"PRIi64"; io_syscw = %"PRIi64"; "
+                       "cswitch_vol = %"PRIi64"; cswitch_invol = %"PRIi64";",
                        ps->name, ps->num_proc, ps->num_lwp,
                        ps->vmem_size, ps->vmem_rss,
                        ps->vmem_data, ps->vmem_code,
                        ps->vmem_minflt_counter, ps->vmem_majflt_counter,
                        ps->cpu_user_counter, ps->cpu_system_counter,
-                       ps->io_rchar, ps->io_wchar, ps->io_syscr, ps->io_syscw);
+                       ps->io_rchar, ps->io_wchar, ps->io_syscr, ps->io_syscw,
+                       ps->cswitch_vol, ps->cswitch_invol);
 } /* void ps_submit_proc_list */
 
 #if KERNEL_LINUX || KERNEL_SOLARIS
@@ -759,6 +820,89 @@ static void ps_submit_fork_rate (derive_t value)
 
 /* ------- additional functions for KERNEL_LINUX/HAVE_THREAD_INFO ------- */
 #if KERNEL_LINUX
+static procstat_t *ps_read_tasks_status (int pid, procstat_t *ps)
+{
+       char           dirname[64];
+       DIR           *dh;
+       char           filename[64];
+       FILE          *fh;
+       struct dirent *ent;
+       unsigned long long cswitch_vol = 0;
+       unsigned long long cswitch_invol = 0;
+       char buffer[1024];
+       char *fields[8];
+       int numfields;
+
+       ssnprintf (dirname, sizeof (dirname), "/proc/%i/task", pid);
+
+       if ((dh = opendir (dirname)) == NULL)
+       {
+               DEBUG ("Failed to open directory `%s'", dirname);
+               return (NULL);
+       }
+
+       while ((ent = readdir (dh)) != NULL)
+       {
+               char *tpid;
+
+               if (!isdigit ((int) ent->d_name[0]))
+                       continue;
+
+               tpid = ent->d_name;
+
+               ssnprintf (filename, sizeof (filename), "/proc/%i/task/%s/status", pid, tpid);
+               if ((fh = fopen (filename, "r")) == NULL)
+               {
+                       DEBUG ("Failed to open file `%s'", filename);
+                       continue;
+               }
+
+               while (fgets (buffer, sizeof(buffer), fh) != NULL)
+               {
+                       long long tmp;
+                       char *endptr;
+
+                       if (strncmp (buffer, "voluntary_ctxt_switches", 23) != 0
+                               && strncmp (buffer, "nonvoluntary_ctxt_switches", 26) != 0)
+                               continue;
+
+                       numfields = strsplit (buffer, fields,
+                               STATIC_ARRAY_SIZE (fields));
+
+                       if (numfields < 2)
+                               continue;
+
+                       errno = 0;
+                       endptr = NULL;
+                       tmp = strtoll (fields[1], &endptr, /* base = */ 10);
+                       if ((errno == 0) && (endptr != fields[1]))
+                       {
+                               if (strncmp (buffer, "voluntary_ctxt_switches", 23) == 0)
+                               {
+                                       cswitch_vol += tmp;
+                               }
+                               else if (strncmp (buffer, "nonvoluntary_ctxt_switches", 26) == 0)
+                               {
+                                       cswitch_invol += tmp;
+                               }
+                       }
+               } /* while (fgets) */
+
+               if (fclose (fh))
+               {
+                       char errbuf[1024];
+                               WARNING ("processes: fclose: %s",
+                                       sstrerror (errno, errbuf, sizeof (errbuf)));
+               }
+       }
+       closedir (dh);
+
+       ps->cswitch_vol = cswitch_vol;
+       ps->cswitch_invol = cswitch_invol;
+
+       return (ps);
+} /* int *ps_read_tasks_status */
+
 static int ps_read_tasks (int pid)
 {
        char           dirname[64];
@@ -1049,6 +1193,18 @@ int ps_read_process (int pid, procstat_t *ps, char *state)
                DEBUG("ps_read_process: not get io data for pid %i",pid);
        }
 
+       if ( ps_ctx_switch )
+       {
+               if ( (ps_read_tasks_status(pid, ps)) == NULL)
+               {
+                       ps->cswitch_vol = -1;
+                       ps->cswitch_invol = -1;
+
+                       DEBUG("ps_read_tasks_status: not get context "
+                                       "switch data for pid %i",pid);
+               }
+       }
+
        /* success */
        return (0);
 } /* int ps_read_process (...) */
@@ -1195,17 +1351,17 @@ static int read_fork_rate ()
 #endif /*KERNEL_LINUX */
 
 #if KERNEL_SOLARIS
-static const char *ps_get_cmdline (pid_t pid, /* {{{ */
+static const char *ps_get_cmdline (long pid, /* {{{ */
                char *buffer, size_t buffer_size)
 {
        char path[PATH_MAX];
        psinfo_t info;
        int status;
 
-       snprintf(path, sizeof (path), "/proc/%i/psinfo", pid);
+       snprintf(path, sizeof (path), "/proc/%li/psinfo", pid);
 
        status = read_file_contents (path, (void *) &info, sizeof (info));
-       if (status != ((int) buffer_size))
+       if (status != sizeof (info))
        {
                ERROR ("processes plugin: Unexpected return value "
                                "while reading \"%s\": "
@@ -1226,7 +1382,7 @@ static const char *ps_get_cmdline (pid_t pid, /* {{{ */
  * The values for input and ouput chars are calculated "by hand"
  * Added a few "solaris" specific process states as well
  */
-static int ps_read_process(int pid, procstat_t *ps, char *state)
+static int ps_read_process(long pid, procstat_t *ps, char *state)
 {
        char filename[64];
        char f_psinfo[64], f_usage[64];
@@ -1236,9 +1392,9 @@ static int ps_read_process(int pid, procstat_t *ps, char *state)
        psinfo_t *myInfo;
        prusage_t *myUsage;
 
-       snprintf(filename, sizeof (filename), "/proc/%i/status", pid);
-       snprintf(f_psinfo, sizeof (f_psinfo), "/proc/%i/psinfo", pid);
-       snprintf(f_usage, sizeof (f_usage), "/proc/%i/usage", pid);
+       snprintf(filename, sizeof (filename), "/proc/%li/status", pid);
+       snprintf(f_psinfo, sizeof (f_psinfo), "/proc/%li/psinfo", pid);
+       snprintf(f_usage, sizeof (f_usage), "/proc/%li/usage", pid);
 
 
        buffer = malloc(sizeof (pstatus_t));
@@ -1686,7 +1842,7 @@ static int ps_read (void)
        DIR           *proc;
        int            pid;
 
-       char cmdline[ARG_MAX];
+       char cmdline[CMDLINE_BUFFER_SIZE];
 
        int        status;
        procstat_t ps;
@@ -1747,6 +1903,9 @@ static int ps_read (void)
                pse.io_syscr = ps.io_syscr;
                pse.io_syscw = ps.io_syscw;
 
+               pse.cswitch_vol = ps.cswitch_vol;
+               pse.cswitch_invol = ps.cswitch_invol;
+
                switch (state)
                {
                        case 'R': running++;  break;
@@ -1787,7 +1946,7 @@ static int ps_read (void)
        int wait     = 0;
 
        kvm_t *kd;
-       char errbuf[1024];
+       char errbuf[_POSIX2_LINE_MAX];
        struct kinfo_proc *procs;          /* array of processes */
        struct kinfo_proc *proc_ptr = NULL;
        int count;                         /* returns number of processes */
@@ -1799,7 +1958,7 @@ static int ps_read (void)
        ps_list_reset ();
 
        /* Open the kvm interface, get a descriptor */
-       kd = kvm_open (NULL, NULL, NULL, 0, errbuf);
+       kd = kvm_openfiles (NULL, "/dev/null", NULL, 0, errbuf);
        if (kd == NULL)
        {
                ERROR ("processes plugin: Cannot open kvm interface: %s",
@@ -1824,7 +1983,7 @@ static int ps_read (void)
                 * filter out threads (duplicate PID entries). */
                if ((proc_ptr == NULL) || (proc_ptr->ki_pid != procs[i].ki_pid))
                {
-                       char cmdline[ARG_MAX] = "";
+                       char cmdline[CMDLINE_BUFFER_SIZE] = "";
                        _Bool have_cmdline = 0;
 
                        proc_ptr = &(procs[i]);
@@ -1921,6 +2080,138 @@ static int ps_read (void)
                ps_submit_proc_list (ps_ptr);
 /* #endif HAVE_LIBKVM_GETPROCS && HAVE_STRUCT_KINFO_PROC_FREEBSD */
 
+#elif HAVE_LIBKVM_GETPROCS && HAVE_STRUCT_KINFO_PROC_OPENBSD
+       int running  = 0;
+       int sleeping = 0;
+       int zombies  = 0;
+       int stopped  = 0;
+       int onproc   = 0;
+       int idle     = 0;
+       int dead     = 0;
+
+       kvm_t *kd;
+       char errbuf[1024];
+       struct kinfo_proc *procs;          /* array of processes */
+       struct kinfo_proc *proc_ptr = NULL;
+       int count;                         /* returns number of processes */
+       int i;
+
+       procstat_t *ps_ptr;
+       procstat_entry_t pse;
+
+       ps_list_reset ();
+
+       /* Open the kvm interface, get a descriptor */
+       kd = kvm_open (NULL, NULL, NULL, 0, errbuf);
+       if (kd == NULL)
+       {
+               ERROR ("processes plugin: Cannot open kvm interface: %s",
+                               errbuf);
+               return (0);
+       }
+
+       /* Get the list of processes. */
+       procs = kvm_getprocs(kd, KERN_PROC_ALL, 0, sizeof(struct kinfo_proc), &count);
+       if (procs == NULL)
+       {
+               ERROR ("processes plugin: Cannot get kvm processes list: %s",
+                               kvm_geterr(kd));
+               kvm_close (kd);
+               return (0);
+       }
+
+       /* Iterate through the processes in kinfo_proc */
+       for (i = 0; i < count; i++)
+       {
+               /* Create only one process list entry per _process_, i.e.
+                * filter out threads (duplicate PID entries). */
+               if ((proc_ptr == NULL) || (proc_ptr->p_pid != procs[i].p_pid))
+               {
+                       char cmdline[CMDLINE_BUFFER_SIZE] = "";
+                       _Bool have_cmdline = 0;
+
+                       proc_ptr = &(procs[i]);
+                       /* Don't probe zombie processes  */
+                       if (!P_ZOMBIE(proc_ptr))
+                       {
+                               char **argv;
+                               int argc;
+                               int status;
+
+                               /* retrieve the arguments */
+                               argv = kvm_getargv (kd, proc_ptr, /* nchr = */ 0);
+                               argc = 0;
+                               if ((argv != NULL) && (argv[0] != NULL))
+                               {
+                                       while (argv[argc] != NULL)
+                                               argc++;
+
+                                       status = strjoin (cmdline, sizeof (cmdline), argv, argc, " ");
+                                       if (status < 0)
+                                               WARNING ("processes plugin: Command line did not fit into buffer.");
+                                       else
+                                               have_cmdline = 1;
+                               }
+                       } /* if (process has argument list) */
+
+                       pse.id       = procs[i].p_pid;
+                       pse.age      = 0;
+
+                       pse.num_proc = 1;
+                       pse.num_lwp  = 1; /* XXX: accumulate p_tid values for a single p_pid ? */
+
+                       pse.vmem_rss = procs[i].p_vm_rssize * pagesize;
+                       pse.vmem_data = procs[i].p_vm_dsize * pagesize;
+                       pse.vmem_code = procs[i].p_vm_tsize * pagesize;
+                       pse.stack_size = procs[i].p_vm_ssize * pagesize;
+                       pse.vmem_size = pse.stack_size + pse.vmem_code + pse.vmem_data;
+                       pse.vmem_minflt = 0;
+                       pse.vmem_minflt_counter = procs[i].p_uru_minflt;
+                       pse.vmem_majflt = 0;
+                       pse.vmem_majflt_counter = procs[i].p_uru_majflt;
+
+                       pse.cpu_user = 0;
+                       pse.cpu_system = 0;
+                       pse.cpu_user_counter = procs[i].p_uutime_usec +
+                                               (1000000lu * procs[i].p_uutime_sec);
+                       pse.cpu_system_counter = procs[i].p_ustime_usec +
+                                               (1000000lu * procs[i].p_ustime_sec);
+
+                       /* no I/O data */
+                       pse.io_rchar = -1;
+                       pse.io_wchar = -1;
+                       pse.io_syscr = -1;
+                       pse.io_syscw = -1;
+
+                       ps_list_add (procs[i].p_comm, have_cmdline ? cmdline : NULL, &pse);
+               } /* if ((proc_ptr == NULL) || (proc_ptr->p_pid != procs[i].p_pid)) */
+
+               switch (procs[i].p_stat)
+               {
+                       case SSTOP:     stopped++;      break;
+                       case SSLEEP:    sleeping++;     break;
+                       case SRUN:      running++;      break;
+                       case SIDL:      idle++;         break;
+                       case SONPROC:   onproc++;       break;
+                       case SDEAD:     dead++;         break;
+                       case SZOMB:     zombies++;      break;
+               }
+       }
+
+       kvm_close(kd);
+
+       ps_submit_state ("running",  running);
+       ps_submit_state ("sleeping", sleeping);
+       ps_submit_state ("zombies",  zombies);
+       ps_submit_state ("stopped",  stopped);
+       ps_submit_state ("onproc",   onproc);
+       ps_submit_state ("idle",     idle);
+       ps_submit_state ("dead",     dead);
+
+       for (ps_ptr = list_head_g; ps_ptr != NULL; ps_ptr = ps_ptr->next)
+               ps_submit_proc_list (ps_ptr);
+/* #endif HAVE_LIBKVM_GETPROCS && HAVE_STRUCT_KINFO_PROC_OPENBSD */
+
 #elif HAVE_PROCINFO_H
        /* AIX */
        int running  = 0;
@@ -2088,14 +2379,16 @@ static int ps_read (void)
 
        while ((ent = readdir(proc)) != NULL)
        {
-               int pid;
+               long pid;
                struct procstat ps;
                procstat_entry_t pse;
+               char *endptr;
 
                if (!isdigit ((int) ent->d_name[0]))
                        continue;
 
-               if ((pid = atoi (ent->d_name)) < 1)
+               pid = strtol (ent->d_name, &endptr, 10);
+               if (*endptr != 0) /* value didn't completely parse as a number */
                        continue;
 
                status = ps_read_process (pid, &ps, &state);
@@ -2145,8 +2438,7 @@ static int ps_read (void)
 
 
                ps_list_add (ps.name,
-                               ps_get_cmdline ((pid_t) pid,
-                                       cmdline, sizeof (cmdline)),
+                               ps_get_cmdline (pid, cmdline, sizeof (cmdline)),
                                &pse);
        } /* while(readdir) */
        closedir (proc);