processes branch: Change many types and includes.
[collectd.git] / src / processes.c
1 /**
2  * collectd - src/processes.c
3  * Copyright (C) 2005  Lyonel Vincent
4  * Copyright (C) 2006  Florian Forster (Mach code)
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; either version 2 of the License, or (at your
9  * option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with this program; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19  *
20  * Authors:
21  *   Lyonel Vincent <lyonel at ezix.org>
22  *   Florian octo Forster <octo at verplant.org>
23  **/
24
25 #include "collectd.h"
26 #include "common.h"
27 #include "plugin.h"
28 #include "utils_debug.h"
29 #include "configfile.h"
30
31 /* Include header files for the mach system, if they exist.. */
32 #if HAVE_THREAD_INFO
33 #  if HAVE_MACH_MACH_INIT_H
34 #    include <mach/mach_init.h>
35 #  endif
36 #  if HAVE_MACH_HOST_PRIV_H
37 #    include <mach/host_priv.h>
38 #  endif
39 #  if HAVE_MACH_MACH_ERROR_H
40 #    include <mach/mach_error.h>
41 #  endif
42 #  if HAVE_MACH_MACH_HOST_H
43 #    include <mach/mach_host.h>
44 #  endif
45 #  if HAVE_MACH_MACH_PORT_H
46 #    include <mach/mach_port.h>
47 #  endif
48 #  if HAVE_MACH_MACH_TYPES_H
49 #    include <mach/mach_types.h>
50 #  endif
51 #  if HAVE_MACH_MESSAGE_H
52 #    include <mach/message.h>
53 #  endif
54 #  if HAVE_MACH_PROCESSOR_SET_H
55 #    include <mach/processor_set.h>
56 #  endif
57 #  if HAVE_MACH_TASK_H
58 #    include <mach/task.h>
59 #  endif
60 #  if HAVE_MACH_THREAD_ACT_H
61 #    include <mach/thread_act.h>
62 #  endif
63 #  if HAVE_MACH_VM_REGION_H
64 #    include <mach/vm_region.h>
65 #  endif
66 #  if HAVE_MACH_VM_MAP_H
67 #    include <mach/vm_map.h>
68 #  endif
69 #  if HAVE_MACH_VM_PROT_H
70 #    include <mach/vm_prot.h>
71 #  endif
72 /* #endif HAVE_THREAD_INFO */
73
74 #elif KERNEL_LINUX
75 #  if HAVE_LINUX_CONFIG_H
76 #    include <linux/config.h>
77 #  endif
78 #  ifndef CONFIG_HZ
79 #    define CONFIG_HZ 100
80 #  endif
81 #endif /* KERNEL_LINUX */
82
83 #define MODULE_NAME "processes"
84
85 #if HAVE_THREAD_INFO || KERNEL_LINUX
86 # define PROCESSES_HAVE_READ 1
87 #else
88 # define PROCESSES_HAVE_READ 0
89 #endif
90
91 #define BUFSIZE 256
92
93 static char *processes_file = "processes.rrd";
94 static char *processes_ds_def[] =
95 {
96         "DS:running:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
97         "DS:sleeping:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
98         "DS:zombies:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
99         "DS:stopped:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
100         "DS:paging:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
101         "DS:blocked:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
102         NULL
103 };
104 static int processes_ds_num = 6;
105
106 static char *config_keys[] =
107 {
108         "CollectName",
109         NULL
110 };
111 static int config_keys_num = 1;
112
113 typedef struct procstat
114 {
115 #define PROCSTAT_NAME_LEN 256
116         char               name[PROCSTAT_NAME_LEN];
117         unsigned int       num_proc;
118         unsigned int       num_lwp;
119         unsigned long      vmem_rss;
120         unsigned long      vmem_minflt;
121         unsigned long      vmem_majflt;
122         unsigned long long cpu_user;
123         unsigned long long cpu_system;
124         struct procstat   *next;
125 } procstat_t;
126
127 static procstat_t *list_head_g = NULL;
128
129 #if HAVE_THREAD_INFO
130 static mach_port_t port_host_self;
131 static mach_port_t port_task_self;
132
133 static processor_set_name_array_t pset_list;
134 static mach_msg_type_number_t     pset_list_len;
135 /* #endif HAVE_THREAD_INFO */
136
137 #elif KERNEL_LINUX
138 static long pagesize_g;
139 #endif /* KERNEL_LINUX */
140
141 static procstat_t *ps_list_append (procstat_t *list, const char *name)
142 {
143         procstat_t *new;
144         procstat_t *ptr;
145
146         if ((new = (procstat_t *) malloc (sizeof (procstat_t))) == NULL)
147                 return (NULL);
148         memset (new, 0, sizeof (procstat_t));
149         strncpy (new->name, name, PROCSTAT_NAME_LEN);
150
151         for (ptr = list; ptr != NULL; ptr = ptr->next)
152                 if (ptr->next == NULL)
153                         break;
154
155         if (ptr != NULL)
156                 ptr->next = new;
157
158         return (new);
159 }
160
161 static void ps_list_add (procstat_t *list, procstat_t *entry)
162 {
163         procstat_t *ptr;
164
165         ptr = list;
166         while ((ptr != NULL) && (strcmp (ptr->name, entry->name) != 0))
167                 ptr = ptr->next;
168
169         if (ptr == NULL)
170                 return;
171
172         ptr->num_proc    += entry->num_proc;
173         ptr->num_lwp     += entry->num_lwp;
174         ptr->vmem_rss    += entry->vmem_rss;
175         ptr->vmem_minflt += entry->vmem_minflt;
176         ptr->vmem_majflt += entry->vmem_majflt;
177         ptr->cpu_user    += entry->cpu_user;
178         ptr->cpu_system  += entry->cpu_system;
179 }
180
181 static void ps_list_reset (procstat_t *ps)
182 {
183         while (ps != NULL)
184         {
185                 ps->num_proc    = 0;
186                 ps->num_lwp     = 0;
187                 ps->vmem_rss    = 0;
188                 ps->vmem_minflt = 0;
189                 ps->vmem_majflt = 0;
190                 ps->cpu_user    = 0;
191                 ps->cpu_system  = 0;
192                 ps = ps->next;
193         }
194 }
195
196 static int ps_config (char *key, char *value)
197 {
198         if (strcasecmp (key, "CollectName") == 0)
199         {
200                 procstat_t *entry;
201
202                 entry = ps_list_append (list_head_g, value);
203                 if (entry == NULL)
204                 {
205                         syslog (LOG_ERR, "processes plugin: ps_list_append failed.");
206                         return (1);
207                 }
208                 if (list_head_g == NULL)
209                         list_head_g = entry;
210         }
211         else
212         {
213                 return (-1);
214         }
215
216         return (0);
217 }
218
219 static void ps_init (void)
220 {
221 #if HAVE_THREAD_INFO
222         kern_return_t status;
223
224         port_host_self = mach_host_self ();
225         port_task_self = mach_task_self ();
226
227         if (pset_list != NULL)
228         {
229                 vm_deallocate (port_task_self,
230                                 (vm_address_t) pset_list,
231                                 pset_list_len * sizeof (processor_set_t));
232                 pset_list = NULL;
233                 pset_list_len = 0;
234         }
235
236         if ((status = host_processor_sets (port_host_self,
237                                         &pset_list,
238                                         &pset_list_len)) != KERN_SUCCESS)
239         {
240                 syslog (LOG_ERR, "host_processor_sets failed: %s\n",
241                                 mach_error_string (status));
242                 pset_list = NULL;
243                 pset_list_len = 0;
244                 return;
245         }
246 /* #endif HAVE_THREAD_INFO */
247
248 #elif KERNEL_LINUX
249         pagesize_g = sysconf(_SC_PAGESIZE);
250 #endif /* KERNEL_LINUX */
251
252         return;
253 }
254
255 static void ps_write (char *host, char *inst, char *val)
256 {
257         rrd_update_file (host, processes_file, val,
258                         processes_ds_def, processes_ds_num);
259 }
260
261 #if PROCESSES_HAVE_READ
262 static void ps_submit (int running,
263                 int sleeping,
264                 int zombies,
265                 int stopped,
266                 int paging,
267                 int blocked)
268 {
269         char buf[BUFSIZE];
270
271         if (snprintf (buf, BUFSIZE, "%u:%i:%i:%i:%i:%i:%i",
272                                 (unsigned int) curtime,
273                                 running, sleeping, zombies, stopped, paging,
274                                 blocked) >= BUFSIZE)
275                 return;
276
277         DBG ("running = %i; sleeping = %i; zombies = %i; stopped = %i; paging = %i; blocked = %i;",
278                         running, sleeping, zombies, stopped, paging, blocked);
279
280         plugin_submit (MODULE_NAME, "-", buf);
281 }
282
283 static void ps_submit_proc (procstat_t *ps)
284 {
285         if (ps == NULL)
286                 return;
287
288         DBG ("name = %s; num_proc = %i; num_lwp = %i; vmem_rss = %i; "
289                         "vmem_minflt = %i; vmem_majflt = %i; "
290                         "cpu_user = %i; cpu_system = %i;",
291                         ps->name, ps->num_proc, ps->num_lwp, ps->vmem_rss,
292                         ps->vmem_minflt, ps->vmem_majflt, ps->cpu_user,
293                         ps->cpu_system);
294 }
295
296 #if KERNEL_LINUX
297 static int *ps_read_tasks (int pid)
298 {
299         int *list = NULL;
300         int  list_size = 1; /* size of allocated space, in elements */
301         int  list_len = 0;  /* number of currently used elements */
302
303         char           dirname[64];
304         DIR           *dh;
305         struct dirent *ent;
306
307         snprintf (dirname, 64, "/proc/%i/task", pid);
308         dirname[63] = '\0';
309
310         if ((dh = opendir (dirname)) == NULL)
311         {
312                 syslog (LOG_NOTICE, "processes plugin: Failed to open directory `%s'",
313                                 dirname);
314                 return (NULL);
315         }
316
317         while ((ent = readdir (dh)) != NULL)
318         {
319                 if (!isdigit (ent->d_name[0]))
320                         continue;
321
322                 if ((list_len + 1) >= list_size)
323                 {
324                         int *new_ptr;
325                         int  new_size = 2 * list_size;
326                         /* Comes in sizes: 2, 4, 8, 16, ... */
327
328                         new_ptr = (int *) realloc (list, (size_t) (sizeof (int) * new_size));
329                         if (new_ptr == NULL)
330                         {
331                                 if (list != NULL)
332                                         free (list);
333                                 syslog (LOG_ERR, "processes plugin: "
334                                                 "Failed to allocate more memory.");
335                                 return (NULL);
336                         }
337
338                         list = new_ptr;
339                         list_size = new_size;
340
341                         memset (list + list_len, 0, sizeof (int) * (list_size - list_len));
342                 }
343
344                 list[list_len] = atoi (ent->d_name);
345                 if (list[list_len] != 0)
346                         list_len++;
347         }
348
349         closedir (dh);
350
351         assert (list_len < list_size);
352         assert (list[list_len] == 0);
353
354         return (list);
355 }
356
357 int ps_read_process (int pid, procstat_t *ps, char *state)
358 {
359         char  filename[64];
360         char  buffer[1024];
361         FILE *fh;
362
363         char *fields[64];
364         char  fields_len;
365
366         int  *tasks;
367         int   i;
368
369         int   ppid;
370         int   name_len;
371
372         memset (ps, 0, sizeof (procstat_t));
373
374         snprintf (filename, 64, "/proc/%i/stat", pid);
375         filename[63] = '\0';
376
377         if ((fh = fopen (filename, "r")) == NULL)
378                 return (-1);
379
380         if (fgets (buffer, 1024, fh) == NULL)
381         {
382                 fclose (fh);
383                 return (-1);
384         }
385
386         fclose (fh);
387
388         fields_len = strsplit (buffer, fields, 64);
389         if (fields_len < 24)
390         {
391                 DBG ("`%s' has only %i fields..",
392                                 filename, fields_len);
393                 return (-1);
394         }
395         else if (fields_len != 41)
396         {
397                 DBG ("WARNING: (fields_len = %i) != 41", fields_len);
398         }
399
400         /* copy the name, strip brackets in the process */
401         name_len = strlen (fields[1]) - 2;
402         if ((fields[1][0] != '(') || (fields[1][name_len + 1] != ')'))
403         {
404                 DBG ("No brackets found in process name: `%s'", fields[1]);
405                 return (-1);
406         }
407         fields[1] = fields[1] + 1;
408         fields[1][name_len] = '\0';
409         strncpy (ps->name, fields[1], PROCSTAT_NAME_LEN);
410
411         ppid = atoi (fields[3]);
412
413         if ((tasks = ps_read_tasks (pid)) == NULL)
414         {
415                 DBG ("ps_read_tasks (%i) failed.", pid);
416                 return (-1);
417         }
418
419         *state = '\0';
420         ps->num_lwp  = 0;
421         ps->num_proc = 1;
422         for (i = 0; tasks[i] != 0; i++)
423                 ps->num_lwp++;
424
425         free (tasks);
426         tasks = NULL;
427
428         /* Leave the rest at zero if this is only an LWP */
429         if (ps->num_proc == 0)
430         {
431                 DBG ("This is only an LWP: pid = %i; name = %s;",
432                                 pid, ps->name);
433                 return (0);
434         }
435
436         ps->vmem_minflt = atol  (fields[9]);
437         ps->vmem_majflt = atol  (fields[11]);
438         ps->cpu_user    = atoll (fields[13]);
439         ps->cpu_system  = atoll (fields[14]);
440         ps->vmem_rss    = atol  (fields[23]);
441         
442         /* Convert jiffies to useconds */
443         ps->cpu_user   = ps->cpu_user   * 1000000 / CONFIG_HZ;
444         ps->cpu_system = ps->cpu_system * 1000000 / CONFIG_HZ;
445         ps->vmem_rss   = ps->vmem_rss * pagesize_g;
446
447         *state = fields[2][0];
448
449         /* success */
450         return (0);
451 } /* int ps_read_process (...) */
452 #endif /* KERNEL_LINUX */
453
454 static void ps_read (void)
455 {
456 #if HAVE_THREAD_INFO
457         kern_return_t            status;
458
459         int                      pset;
460         processor_set_t          port_pset_priv;
461
462         int                      task;
463         task_array_t             task_list;
464         mach_msg_type_number_t   task_list_len;
465
466         int                      thread;
467         thread_act_array_t       thread_list;
468         mach_msg_type_number_t   thread_list_len;
469         thread_basic_info_data_t thread_data;
470         mach_msg_type_number_t   thread_data_len;
471
472         int running  = 0;
473         int sleeping = 0;
474         int zombies  = 0;
475         int stopped  = 0;
476         int blocked  = 0;
477
478         /*
479          * The Mach-concept is a little different from the traditional UNIX
480          * concept: All the work is done in threads. Threads are contained in
481          * `tasks'. Therefore, `task status' doesn't make much sense, since
482          * it's actually a `thread status'.
483          * Tasks are assigned to sets of processors, so that's where you go to
484          * get a list.
485          */
486         for (pset = 0; pset < pset_list_len; pset++)
487         {
488                 if ((status = host_processor_set_priv (port_host_self,
489                                                 pset_list[pset],
490                                                 &port_pset_priv)) != KERN_SUCCESS)
491                 {
492                         syslog (LOG_ERR, "host_processor_set_priv failed: %s\n",
493                                         mach_error_string (status));
494                         continue;
495                 }
496
497                 if ((status = processor_set_tasks (port_pset_priv,
498                                                 &task_list,
499                                                 &task_list_len)) != KERN_SUCCESS)
500                 {
501                         syslog (LOG_ERR, "processor_set_tasks failed: %s\n",
502                                         mach_error_string (status));
503                         mach_port_deallocate (port_task_self, port_pset_priv);
504                         continue;
505                 }
506
507                 for (task = 0; task < task_list_len; task++)
508                 {
509                         status = task_threads (task_list[task], &thread_list,
510                                         &thread_list_len);
511                         if (status != KERN_SUCCESS)
512                         {
513                                 /* Apple's `top' treats this case a zombie. It
514                                  * makes sense to some extend: A `zombie'
515                                  * thread is nonsense, since the task/process
516                                  * is dead. */
517                                 zombies++;
518                                 DBG ("task_threads failed: %s",
519                                                 mach_error_string (status));
520                                 if (task_list[task] != port_task_self)
521                                         mach_port_deallocate (port_task_self,
522                                                         task_list[task]);
523                                 continue; /* with next task_list */
524                         }
525
526                         for (thread = 0; thread < thread_list_len; thread++)
527                         {
528                                 thread_data_len = THREAD_BASIC_INFO_COUNT;
529                                 status = thread_info (thread_list[thread],
530                                                 THREAD_BASIC_INFO,
531                                                 (thread_info_t) &thread_data,
532                                                 &thread_data_len);
533                                 if (status != KERN_SUCCESS)
534                                 {
535                                         syslog (LOG_ERR, "thread_info failed: %s\n",
536                                                         mach_error_string (status));
537                                         if (task_list[task] != port_task_self)
538                                                 mach_port_deallocate (port_task_self,
539                                                                 thread_list[thread]);
540                                         continue; /* with next thread_list */
541                                 }
542
543                                 switch (thread_data.run_state)
544                                 {
545                                         case TH_STATE_RUNNING:
546                                                 running++;
547                                                 break;
548                                         case TH_STATE_STOPPED:
549                                         /* What exactly is `halted'? */
550                                         case TH_STATE_HALTED:
551                                                 stopped++;
552                                                 break;
553                                         case TH_STATE_WAITING:
554                                                 sleeping++;
555                                                 break;
556                                         case TH_STATE_UNINTERRUPTIBLE:
557                                                 blocked++;
558                                                 break;
559                                         /* There is no `zombie' case here,
560                                          * since there are no zombie-threads.
561                                          * There's only zombie tasks, which are
562                                          * handled above. */
563                                         default:
564                                                 syslog (LOG_WARNING,
565                                                                 "Unknown thread status: %s",
566                                                                 thread_data.run_state);
567                                                 break;
568                                 } /* switch (thread_data.run_state) */
569
570                                 if (task_list[task] != port_task_self)
571                                 {
572                                         status = mach_port_deallocate (port_task_self,
573                                                         thread_list[thread]);
574                                         if (status != KERN_SUCCESS)
575                                                 syslog (LOG_ERR, "mach_port_deallocate failed: %s",
576                                                                 mach_error_string (status));
577                                 }
578                         } /* for (thread_list) */
579
580                         if ((status = vm_deallocate (port_task_self,
581                                                         (vm_address_t) thread_list,
582                                                         thread_list_len * sizeof (thread_act_t)))
583                                         != KERN_SUCCESS)
584                         {
585                                 syslog (LOG_ERR, "vm_deallocate failed: %s",
586                                                 mach_error_string (status));
587                         }
588                         thread_list = NULL;
589                         thread_list_len = 0;
590
591                         /* Only deallocate the task port, if it isn't our own.
592                          * Don't know what would happen in that case, but this
593                          * is what Apple's top does.. ;) */
594                         if (task_list[task] != port_task_self)
595                         {
596                                 status = mach_port_deallocate (port_task_self,
597                                                 task_list[task]);
598                                 if (status != KERN_SUCCESS)
599                                         syslog (LOG_ERR, "mach_port_deallocate failed: %s",
600                                                         mach_error_string (status));
601                         }
602                 } /* for (task_list) */
603
604                 if ((status = vm_deallocate (port_task_self,
605                                 (vm_address_t) task_list,
606                                 task_list_len * sizeof (task_t))) != KERN_SUCCESS)
607                 {
608                         syslog (LOG_ERR, "vm_deallocate failed: %s",
609                                         mach_error_string (status));
610                 }
611                 task_list = NULL;
612                 task_list_len = 0;
613
614                 if ((status = mach_port_deallocate (port_task_self, port_pset_priv))
615                                 != KERN_SUCCESS)
616                 {
617                         syslog (LOG_ERR, "mach_port_deallocate failed: %s",
618                                         mach_error_string (status));
619                 }
620         } /* for (pset_list) */
621
622         ps_submit (running, sleeping, zombies, stopped, -1, blocked);
623 /* #endif HAVE_THREAD_INFO */
624
625 #elif KERNEL_LINUX
626         int running  = 0;
627         int sleeping = 0;
628         int zombies  = 0;
629         int stopped  = 0;
630         int paging   = 0;
631         int blocked  = 0;
632
633         struct dirent *ent;
634         DIR           *proc;
635         int            pid;
636
637         int        status;
638         procstat_t ps;
639         char       state;
640
641         procstat_t *ps_ptr;
642
643         running = sleeping = zombies = stopped = paging = blocked = 0;
644         ps_list_reset (list_head_g);
645
646         if ((proc = opendir ("/proc")) == NULL)
647         {
648                 syslog (LOG_ERR, "Cannot open `/proc': %s", strerror (errno));
649                 return;
650         }
651
652         while ((ent = readdir (proc)) != NULL)
653         {
654                 if (!isdigit (ent->d_name[0]))
655                         continue;
656
657                 if ((pid = atoi (ent->d_name)) < 1)
658                         continue;
659
660                 status = ps_read_process (pid, &ps, &state);
661                 if (status != 0)
662                 {
663                         DBG ("ps_read_process failed: %i", status);
664                         continue;
665                 }
666
667                 switch (state)
668                 {
669                         case 'R': running++;  break;
670                         case 'S': sleeping++; break;
671                         case 'D': blocked++;  break;
672                         case 'Z': zombies++;  break;
673                         case 'T': stopped++;  break;
674                         case 'W': paging++;   break;
675                 }
676
677                 if (list_head_g != NULL)
678                         ps_list_add (list_head_g, &ps);
679         }
680
681         closedir (proc);
682
683         ps_submit (running, sleeping, zombies, stopped, paging, blocked);
684
685         for (ps_ptr = list_head_g; ps_ptr != NULL; ps_ptr = ps_ptr->next)
686                 ps_submit_proc (ps_ptr);
687 #endif /* KERNEL_LINUX */
688 }
689 #else
690 # define ps_read NULL
691 #endif /* PROCESSES_HAVE_READ */
692
693 void module_register (void)
694 {
695         plugin_register (MODULE_NAME, ps_init, ps_read, ps_write);
696         cf_register (MODULE_NAME, ps_config, config_keys, config_keys_num);
697 }
698
699 #undef BUFSIZE
700 #undef MODULE_NAME