b4f78a8248d4d19ef2c499a92a19e46340ca1e67
[collectd.git] / src / processes.c
1 /**
2  * collectd - src/processes.c
3  * Copyright (C) 2005  Lyonel Vincent
4  * Copyright (C) 2006  Florian Forster (Mach code)
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; either version 2 of the License, or (at your
9  * option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with this program; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19  *
20  * Authors:
21  *   Lyonel Vincent <lyonel at ezix.org>
22  *   Florian octo Forster <octo at verplant.org>
23  **/
24
25 #include "collectd.h"
26 #include "common.h"
27 #include "plugin.h"
28 #include "utils_debug.h"
29 #include "configfile.h"
30
31 /* Include header files for the mach system, if they exist.. */
32 #if HAVE_MACH_MACH_INIT_H
33 #  include <mach/mach_init.h>
34 #endif
35 #if HAVE_MACH_HOST_PRIV_H
36 #  include <mach/host_priv.h>
37 #endif
38 #if HAVE_MACH_MACH_ERROR_H
39 #  include <mach/mach_error.h>
40 #endif
41 #if HAVE_MACH_MACH_HOST_H
42 #  include <mach/mach_host.h>
43 #endif
44 #if HAVE_MACH_MACH_PORT_H
45 #  include <mach/mach_port.h>
46 #endif
47 #if HAVE_MACH_MACH_TYPES_H
48 #  include <mach/mach_types.h>
49 #endif
50 #if HAVE_MACH_MESSAGE_H
51 #  include <mach/message.h>
52 #endif
53 #if HAVE_MACH_PROCESSOR_SET_H
54 #  include <mach/processor_set.h>
55 #endif
56 #if HAVE_MACH_TASK_H
57 #  include <mach/task.h>
58 #endif
59 #if HAVE_MACH_THREAD_ACT_H
60 #  include <mach/thread_act.h>
61 #endif
62 #if HAVE_MACH_VM_REGION_H
63 #  include <mach/vm_region.h>
64 #endif
65 #if HAVE_MACH_VM_MAP_H
66 #  include <mach/vm_map.h>
67 #endif
68 #if HAVE_MACH_VM_PROT_H
69 #  include <mach/vm_prot.h>
70 #endif
71
72 #define MODULE_NAME "processes"
73
74 #if HAVE_THREAD_INFO || KERNEL_LINUX
75 # define PROCESSES_HAVE_READ 1
76 #else
77 # define PROCESSES_HAVE_READ 0
78 #endif
79
80 #define BUFSIZE 256
81
82 static char *ps_file = "processes.rrd";
83
84 static char *ds_def[] =
85 {
86         "DS:running:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
87         "DS:sleeping:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
88         "DS:zombies:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
89         "DS:stopped:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
90         "DS:paging:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
91         "DS:blocked:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
92         NULL
93 };
94 static int ds_num = 6;
95
96 static char *config_keys[] =
97 {
98         "CollectName",
99         NULL
100 };
101 static int config_keys_num = 1;
102
103 typedef struct procstat
104 {
105 #define PROCSTAT_NAME_LEN 256
106         char         name[PROCSTAT_NAME_LEN];
107         unsigned int num_proc;
108         unsigned int num_lwp;
109         unsigned int vmem_rss;
110         unsigned int vmem_minflt;
111         unsigned int vmem_majflt;
112         unsigned int cpu_user;
113         unsigned int cpu_system;
114         struct procstat *next;
115 } procstat_t;
116
117 static procstat_t *list_head_g = NULL;
118
119 #if HAVE_THREAD_INFO
120 static mach_port_t port_host_self;
121 static mach_port_t port_task_self;
122
123 static processor_set_name_array_t pset_list;
124 static mach_msg_type_number_t     pset_list_len;
125 /* #endif HAVE_THREAD_INFO */
126
127 #elif KERNEL_LINUX
128 static long pagesize_g;
129 #endif /* KERNEL_LINUX */
130
131 static procstat_t *ps_list_append (procstat_t *list, const char *name)
132 {
133         procstat_t *new;
134         procstat_t *ptr;
135
136         if ((new = (procstat_t *) malloc (sizeof (procstat_t))) == NULL)
137                 return (NULL);
138         memset (new, 0, sizeof (procstat_t));
139         strncpy (new->name, name, PROCSTAT_NAME_LEN);
140
141         for (ptr = list; ptr != NULL; ptr = ptr->next)
142                 if (ptr->next == NULL)
143                         break;
144
145         if (ptr != NULL)
146                 ptr->next = new;
147
148         return (new);
149 }
150
151 static void ps_list_add (procstat_t *list, procstat_t *entry)
152 {
153         procstat_t *ptr;
154
155         ptr = list;
156         while ((ptr != NULL) && (strcmp (ptr->name, entry->name) != 0))
157                 ptr = ptr->next;
158
159         if (ptr == NULL)
160                 return;
161
162         ptr->num_proc    += entry->num_proc;
163         ptr->num_lwp     += entry->num_lwp;
164         ptr->vmem_rss    += entry->vmem_rss;
165         ptr->vmem_minflt += entry->vmem_minflt;
166         ptr->vmem_majflt += entry->vmem_majflt;
167         ptr->cpu_user    += entry->cpu_user;
168         ptr->cpu_system  += entry->cpu_system;
169 }
170
171 static void ps_list_reset (procstat_t *ps)
172 {
173         while (ps != NULL)
174         {
175                 ps->num_proc    = 0;
176                 ps->num_lwp     = 0;
177                 ps->vmem_rss    = 0;
178                 ps->vmem_minflt = 0;
179                 ps->vmem_majflt = 0;
180                 ps->cpu_user    = 0;
181                 ps->cpu_system  = 0;
182                 ps = ps->next;
183         }
184 }
185
186 static int ps_config (char *key, char *value)
187 {
188         if (strcasecmp (key, "CollectName") == 0)
189         {
190                 procstat_t *entry;
191
192                 entry = ps_list_append (list_head_g, value);
193                 if (entry == NULL)
194                 {
195                         syslog (LOG_ERR, "processes plugin: ps_list_append failed.");
196                         return (1);
197                 }
198                 if (list_head_g == NULL)
199                         list_head_g = entry;
200         }
201         else
202         {
203                 return (-1);
204         }
205
206         return (0);
207 }
208
209 static void ps_init (void)
210 {
211 #if HAVE_THREAD_INFO
212         kern_return_t status;
213
214         port_host_self = mach_host_self ();
215         port_task_self = mach_task_self ();
216
217         if (pset_list != NULL)
218         {
219                 vm_deallocate (port_task_self,
220                                 (vm_address_t) pset_list,
221                                 pset_list_len * sizeof (processor_set_t));
222                 pset_list = NULL;
223                 pset_list_len = 0;
224         }
225
226         if ((status = host_processor_sets (port_host_self,
227                                         &pset_list,
228                                         &pset_list_len)) != KERN_SUCCESS)
229         {
230                 syslog (LOG_ERR, "host_processor_sets failed: %s\n",
231                                 mach_error_string (status));
232                 pset_list = NULL;
233                 pset_list_len = 0;
234                 return;
235         }
236 /* #endif HAVE_THREAD_INFO */
237
238 #elif KERNEL_LINUX
239         pagesize_g = sysconf(_SC_PAGESIZE);
240 #endif /* KERNEL_LINUX */
241
242         return;
243 }
244
245 static void ps_write (char *host, char *inst, char *val)
246 {
247         rrd_update_file (host, ps_file, val, ds_def, ds_num);
248 }
249
250 #if PROCESSES_HAVE_READ
251 static void ps_submit (int running,
252                 int sleeping,
253                 int zombies,
254                 int stopped,
255                 int paging,
256                 int blocked)
257 {
258         char buf[BUFSIZE];
259
260         if (snprintf (buf, BUFSIZE, "%u:%i:%i:%i:%i:%i:%i",
261                                 (unsigned int) curtime,
262                                 running, sleeping, zombies, stopped, paging,
263                                 blocked) >= BUFSIZE)
264                 return;
265
266         DBG ("running = %i; sleeping = %i; zombies = %i; stopped = %i; paging = %i; blocked = %i;",
267                         running, sleeping, zombies, stopped, paging, blocked);
268
269         plugin_submit (MODULE_NAME, "-", buf);
270 }
271
272 static void ps_submit_proc (procstat_t *ps)
273 {
274         if (ps == NULL)
275                 return;
276
277         DBG ("name = %s; num_proc = %i; num_lwp = %i; vmem_rss = %i; "
278                         "vmem_minflt = %i; vmem_majflt = %i; "
279                         "cpu_user = %i; cpu_system = %i;",
280                         ps->name, ps->num_proc, ps->num_lwp, ps->vmem_rss,
281                         ps->vmem_minflt, ps->vmem_majflt, ps->cpu_user,
282                         ps->cpu_system);
283 }
284
285 #if KERNEL_LINUX
286 static int *ps_read_tasks (int pid)
287 {
288         int *list = NULL;
289         int  list_size = 1; /* size of allocated space, in elements */
290         int  list_len = 0;  /* number of currently used elements */
291
292         char           dirname[64];
293         DIR           *dh;
294         struct dirent *ent;
295
296         snprintf (dirname, 64, "/proc/%i/task", pid);
297         dirname[63] = '\0';
298
299         if ((dh = opendir (dirname)) == NULL)
300         {
301                 syslog (LOG_NOTICE, "processes plugin: Failed to open directory `%s'",
302                                 dirname);
303                 return (NULL);
304         }
305
306         while ((ent = readdir (dh)) != NULL)
307         {
308                 if (!isdigit (ent->d_name[0]))
309                         continue;
310
311                 if ((list_len + 1) >= list_size)
312                 {
313                         int *new_ptr;
314                         int  new_size = 2 * list_size;
315                         /* Comes in sizes: 2, 4, 8, 16, ... */
316
317                         new_ptr = (int *) realloc (list, (size_t) (sizeof (int) * new_size));
318                         if (new_ptr == NULL)
319                         {
320                                 if (list != NULL)
321                                         free (list);
322                                 syslog (LOG_ERR, "processes plugin: "
323                                                 "Failed to allocate more memory.");
324                                 return (NULL);
325                         }
326
327                         list = new_ptr;
328                         list_size = new_size;
329
330                         memset (list + list_len, 0, sizeof (int) * (list_size - list_len));
331                 }
332
333                 list[list_len] = atoi (ent->d_name);
334                 if (list[list_len] != 0)
335                         list_len++;
336         }
337
338         closedir (dh);
339
340         assert (list_len < list_size);
341         assert (list[list_len] == 0);
342
343         return (list);
344 }
345
346 int ps_read_process (int pid, procstat_t *ps, char *state)
347 {
348         char  filename[64];
349         char  buffer[1024];
350         FILE *fh;
351
352         char *fields[64];
353         char  fields_len;
354
355         int  *tasks;
356         int   i;
357
358         int   ppid;
359         int   name_len;
360
361         memset (ps, 0, sizeof (procstat_t));
362
363         snprintf (filename, 64, "/proc/%i/stat", pid);
364         filename[63] = '\0';
365
366         if ((fh = fopen (filename, "r")) == NULL)
367                 return (-1);
368
369         if (fgets (buffer, 1024, fh) == NULL)
370         {
371                 fclose (fh);
372                 return (-1);
373         }
374
375         fclose (fh);
376
377         fields_len = strsplit (buffer, fields, 64);
378         if (fields_len < 24)
379         {
380                 DBG ("`%s' has only %i fields..",
381                                 filename, fields_len);
382                 return (-1);
383         }
384         else if (fields_len != 41)
385         {
386                 DBG ("WARNING: (fields_len = %i) != 41", fields_len);
387         }
388
389         /* copy the name, strip brackets in the process */
390         name_len = strlen (fields[1]) - 2;
391         if ((fields[1][0] != '(') || (fields[1][name_len + 1] != ')'))
392         {
393                 DBG ("No brackets found in process name: `%s'", fields[1]);
394                 return (-1);
395         }
396         fields[1] = fields[1] + 1;
397         fields[1][name_len] = '\0';
398         strncpy (ps->name, fields[1], PROCSTAT_NAME_LEN);
399
400         ppid = atoi (fields[3]);
401
402         if ((tasks = ps_read_tasks (pid)) == NULL)
403         {
404                 DBG ("ps_read_tasks (%i) failed.", pid);
405                 return (-1);
406         }
407
408         *state = '\0';
409         ps->num_lwp  = 0;
410         ps->num_proc = 1;
411         for (i = 0; tasks[i] != 0; i++)
412                 ps->num_lwp++;
413
414         free (tasks);
415         tasks = NULL;
416
417         /* Leave the rest at zero if this is only an LWP */
418         if (ps->num_proc == 0)
419         {
420                 DBG ("This is only an LWP: pid = %i; name = %s;",
421                                 pid, ps->name);
422                 return (0);
423         }
424
425         ps->vmem_minflt = atoi (fields[9]);
426         ps->vmem_majflt = atoi (fields[11]);
427         ps->cpu_user    = atoi (fields[13]);
428         ps->cpu_system  = atoi (fields[14]);
429         ps->vmem_rss    = atoi (fields[23]) * pagesize_g;
430
431         *state = fields[2][0];
432
433         /* success */
434         return (0);
435 } /* int ps_read_process (...) */
436 #endif /* KERNEL_LINUX */
437
438 static void ps_read (void)
439 {
440 #if HAVE_THREAD_INFO
441         kern_return_t            status;
442
443         int                      pset;
444         processor_set_t          port_pset_priv;
445
446         int                      task;
447         task_array_t             task_list;
448         mach_msg_type_number_t   task_list_len;
449
450         int                      thread;
451         thread_act_array_t       thread_list;
452         mach_msg_type_number_t   thread_list_len;
453         thread_basic_info_data_t thread_data;
454         mach_msg_type_number_t   thread_data_len;
455
456         int running  = 0;
457         int sleeping = 0;
458         int zombies  = 0;
459         int stopped  = 0;
460         int blocked  = 0;
461
462         /*
463          * The Mach-concept is a little different from the traditional UNIX
464          * concept: All the work is done in threads. Threads are contained in
465          * `tasks'. Therefore, `task status' doesn't make much sense, since
466          * it's actually a `thread status'.
467          * Tasks are assigned to sets of processors, so that's where you go to
468          * get a list.
469          */
470         for (pset = 0; pset < pset_list_len; pset++)
471         {
472                 if ((status = host_processor_set_priv (port_host_self,
473                                                 pset_list[pset],
474                                                 &port_pset_priv)) != KERN_SUCCESS)
475                 {
476                         syslog (LOG_ERR, "host_processor_set_priv failed: %s\n",
477                                         mach_error_string (status));
478                         continue;
479                 }
480
481                 if ((status = processor_set_tasks (port_pset_priv,
482                                                 &task_list,
483                                                 &task_list_len)) != KERN_SUCCESS)
484                 {
485                         syslog (LOG_ERR, "processor_set_tasks failed: %s\n",
486                                         mach_error_string (status));
487                         mach_port_deallocate (port_task_self, port_pset_priv);
488                         continue;
489                 }
490
491                 for (task = 0; task < task_list_len; task++)
492                 {
493                         status = task_threads (task_list[task], &thread_list,
494                                         &thread_list_len);
495                         if (status != KERN_SUCCESS)
496                         {
497                                 /* Apple's `top' treats this case a zombie. It
498                                  * makes sense to some extend: A `zombie'
499                                  * thread is nonsense, since the task/process
500                                  * is dead. */
501                                 zombies++;
502                                 DBG ("task_threads failed: %s",
503                                                 mach_error_string (status));
504                                 if (task_list[task] != port_task_self)
505                                         mach_port_deallocate (port_task_self,
506                                                         task_list[task]);
507                                 continue; /* with next task_list */
508                         }
509
510                         for (thread = 0; thread < thread_list_len; thread++)
511                         {
512                                 thread_data_len = THREAD_BASIC_INFO_COUNT;
513                                 status = thread_info (thread_list[thread],
514                                                 THREAD_BASIC_INFO,
515                                                 (thread_info_t) &thread_data,
516                                                 &thread_data_len);
517                                 if (status != KERN_SUCCESS)
518                                 {
519                                         syslog (LOG_ERR, "thread_info failed: %s\n",
520                                                         mach_error_string (status));
521                                         if (task_list[task] != port_task_self)
522                                                 mach_port_deallocate (port_task_self,
523                                                                 thread_list[thread]);
524                                         continue; /* with next thread_list */
525                                 }
526
527                                 switch (thread_data.run_state)
528                                 {
529                                         case TH_STATE_RUNNING:
530                                                 running++;
531                                                 break;
532                                         case TH_STATE_STOPPED:
533                                         /* What exactly is `halted'? */
534                                         case TH_STATE_HALTED:
535                                                 stopped++;
536                                                 break;
537                                         case TH_STATE_WAITING:
538                                                 sleeping++;
539                                                 break;
540                                         case TH_STATE_UNINTERRUPTIBLE:
541                                                 blocked++;
542                                                 break;
543                                         /* There is no `zombie' case here,
544                                          * since there are no zombie-threads.
545                                          * There's only zombie tasks, which are
546                                          * handled above. */
547                                         default:
548                                                 syslog (LOG_WARNING,
549                                                                 "Unknown thread status: %s",
550                                                                 thread_data.run_state);
551                                                 break;
552                                 } /* switch (thread_data.run_state) */
553
554                                 if (task_list[task] != port_task_self)
555                                 {
556                                         status = mach_port_deallocate (port_task_self,
557                                                         thread_list[thread]);
558                                         if (status != KERN_SUCCESS)
559                                                 syslog (LOG_ERR, "mach_port_deallocate failed: %s",
560                                                                 mach_error_string (status));
561                                 }
562                         } /* for (thread_list) */
563
564                         if ((status = vm_deallocate (port_task_self,
565                                                         (vm_address_t) thread_list,
566                                                         thread_list_len * sizeof (thread_act_t)))
567                                         != KERN_SUCCESS)
568                         {
569                                 syslog (LOG_ERR, "vm_deallocate failed: %s",
570                                                 mach_error_string (status));
571                         }
572                         thread_list = NULL;
573                         thread_list_len = 0;
574
575                         /* Only deallocate the task port, if it isn't our own.
576                          * Don't know what would happen in that case, but this
577                          * is what Apple's top does.. ;) */
578                         if (task_list[task] != port_task_self)
579                         {
580                                 status = mach_port_deallocate (port_task_self,
581                                                 task_list[task]);
582                                 if (status != KERN_SUCCESS)
583                                         syslog (LOG_ERR, "mach_port_deallocate failed: %s",
584                                                         mach_error_string (status));
585                         }
586                 } /* for (task_list) */
587
588                 if ((status = vm_deallocate (port_task_self,
589                                 (vm_address_t) task_list,
590                                 task_list_len * sizeof (task_t))) != KERN_SUCCESS)
591                 {
592                         syslog (LOG_ERR, "vm_deallocate failed: %s",
593                                         mach_error_string (status));
594                 }
595                 task_list = NULL;
596                 task_list_len = 0;
597
598                 if ((status = mach_port_deallocate (port_task_self, port_pset_priv))
599                                 != KERN_SUCCESS)
600                 {
601                         syslog (LOG_ERR, "mach_port_deallocate failed: %s",
602                                         mach_error_string (status));
603                 }
604         } /* for (pset_list) */
605
606         ps_submit (running, sleeping, zombies, stopped, -1, blocked);
607 /* #endif HAVE_THREAD_INFO */
608
609 #elif KERNEL_LINUX
610         int running  = 0;
611         int sleeping = 0;
612         int zombies  = 0;
613         int stopped  = 0;
614         int paging   = 0;
615         int blocked  = 0;
616
617         struct dirent *ent;
618         DIR           *proc;
619         int            pid;
620
621         int        status;
622         procstat_t ps;
623         char       state;
624
625         procstat_t *ps_ptr;
626
627         running = sleeping = zombies = stopped = paging = blocked = 0;
628         ps_list_reset (list_head_g);
629
630         if ((proc = opendir ("/proc")) == NULL)
631         {
632                 syslog (LOG_ERR, "Cannot open `/proc': %s", strerror (errno));
633                 return;
634         }
635
636         while ((ent = readdir (proc)) != NULL)
637         {
638                 if (!isdigit (ent->d_name[0]))
639                         continue;
640
641                 if ((pid = atoi (ent->d_name)) < 1)
642                         continue;
643
644                 status = ps_read_process (pid, &ps, &state);
645                 if (status != 0)
646                 {
647                         DBG ("ps_read_process failed: %i", status);
648                         continue;
649                 }
650
651                 switch (state)
652                 {
653                         case 'R': running++;  break;
654                         case 'S': sleeping++; break;
655                         case 'D': blocked++;  break;
656                         case 'Z': zombies++;  break;
657                         case 'T': stopped++;  break;
658                         case 'W': paging++;   break;
659                 }
660
661                 if (list_head_g != NULL)
662                         ps_list_add (list_head_g, &ps);
663         }
664
665         closedir (proc);
666
667         ps_submit (running, sleeping, zombies, stopped, paging, blocked);
668
669         for (ps_ptr = list_head_g; ps_ptr != NULL; ps_ptr = ps_ptr->next)
670                 ps_submit_proc (ps_ptr);
671 #endif /* KERNEL_LINUX */
672 }
673 #else
674 # define ps_read NULL
675 #endif /* PROCESSES_HAVE_READ */
676
677 void module_register (void)
678 {
679         plugin_register (MODULE_NAME, ps_init, ps_read, ps_write);
680         cf_register (MODULE_NAME, ps_config, config_keys, config_keys_num);
681 }
682
683 #undef BUFSIZE
684 #undef MODULE_NAME