Merge remote-tracking branch 'origin/pr/1239'
[collectd.git] / src / disk.c
1 /**
2  * collectd - src/disk.c
3  * Copyright (C) 2005-2012  Florian octo Forster
4  * Copyright (C) 2009       Manuel Sanmartin
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; only version 2 of the License is applicable.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
18  *
19  * Authors:
20  *   Florian octo Forster <octo at collectd.org>
21  *   Manuel Sanmartin
22  **/
23
24 #include "collectd.h"
25 #include "common.h"
26 #include "plugin.h"
27 #include "utils_ignorelist.h"
28
29 #if HAVE_MACH_MACH_TYPES_H
30 #  include <mach/mach_types.h>
31 #endif
32 #if HAVE_MACH_MACH_INIT_H
33 #  include <mach/mach_init.h>
34 #endif
35 #if HAVE_MACH_MACH_ERROR_H
36 #  include <mach/mach_error.h>
37 #endif
38 #if HAVE_MACH_MACH_PORT_H
39 #  include <mach/mach_port.h>
40 #endif
41 #if HAVE_COREFOUNDATION_COREFOUNDATION_H
42 #  include <CoreFoundation/CoreFoundation.h>
43 #endif
44 #if HAVE_IOKIT_IOKITLIB_H
45 #  include <IOKit/IOKitLib.h>
46 #endif
47 #if HAVE_IOKIT_IOTYPES_H
48 #  include <IOKit/IOTypes.h>
49 #endif
50 #if HAVE_IOKIT_STORAGE_IOBLOCKSTORAGEDRIVER_H
51 #  include <IOKit/storage/IOBlockStorageDriver.h>
52 #endif
53 #if HAVE_IOKIT_IOBSD_H
54 #  include <IOKit/IOBSD.h>
55 #endif
56 #if KERNEL_FREEBSD
57 #include <devstat.h>
58 #include <libgeom.h>
59 #endif
60
61 #if HAVE_LIMITS_H
62 # include <limits.h>
63 #endif
64 #ifndef UINT_MAX
65 #  define UINT_MAX 4294967295U
66 #endif
67
68 #if HAVE_STATGRAB_H
69 # include <statgrab.h>
70 #endif
71
72 #if HAVE_PERFSTAT
73 # ifndef _AIXVERSION_610
74 # include <sys/systemcfg.h>
75 # endif
76 # include <sys/protosw.h>
77 # include <libperfstat.h>
78 #endif
79
80 #if HAVE_IOKIT_IOKITLIB_H
81 static mach_port_t io_master_port = MACH_PORT_NULL;
82 /* This defaults to false for backwards compatibility. Please fix in the next
83  * major version. */
84 static _Bool use_bsd_name = 0;
85 /* #endif HAVE_IOKIT_IOKITLIB_H */
86
87 #elif KERNEL_LINUX
88 typedef struct diskstats
89 {
90         char *name;
91
92         /* This overflows in roughly 1361 years */
93         unsigned int poll_count;
94
95         derive_t read_sectors;
96         derive_t write_sectors;
97
98         derive_t read_bytes;
99         derive_t write_bytes;
100
101         derive_t read_ops;
102         derive_t write_ops;
103         derive_t read_time;
104         derive_t write_time;
105
106         derive_t avg_read_time;
107         derive_t avg_write_time;
108
109         struct diskstats *next;
110 } diskstats_t;
111
112 static diskstats_t *disklist;
113 /* #endif KERNEL_LINUX */
114 #elif KERNEL_FREEBSD
115 static struct gmesh geom_tree;
116 /* #endif KERNEL_FREEBSD */
117
118 #elif HAVE_LIBKSTAT
119 #define MAX_NUMDISK 1024
120 extern kstat_ctl_t *kc;
121 static kstat_t *ksp[MAX_NUMDISK];
122 static int numdisk = 0;
123 /* #endif HAVE_LIBKSTAT */
124
125 #elif defined(HAVE_LIBSTATGRAB)
126 /* #endif HAVE_LIBKSTATGRAB */
127
128 #elif HAVE_PERFSTAT
129 static perfstat_disk_t * stat_disk;
130 static int numdisk;
131 static int pnumdisk;
132 /* #endif HAVE_PERFSTAT */
133
134 #else
135 # error "No applicable input method."
136 #endif
137
138 #if HAVE_LIBUDEV
139 #include <libudev.h>
140
141 static char *conf_udev_name_attr = NULL;
142 static struct udev *handle_udev;
143 #endif
144
145 static const char *config_keys[] =
146 {
147         "Disk",
148         "UseBSDName",
149         "IgnoreSelected",
150         "UdevNameAttr"
151 };
152 static int config_keys_num = STATIC_ARRAY_SIZE (config_keys);
153
154 static ignorelist_t *ignorelist = NULL;
155
156 static int disk_config (const char *key, const char *value)
157 {
158   if (ignorelist == NULL)
159     ignorelist = ignorelist_create (/* invert = */ 1);
160   if (ignorelist == NULL)
161     return (1);
162
163   if (strcasecmp ("Disk", key) == 0)
164   {
165     ignorelist_add (ignorelist, value);
166   }
167   else if (strcasecmp ("IgnoreSelected", key) == 0)
168   {
169     int invert = 1;
170     if (IS_TRUE (value))
171       invert = 0;
172     ignorelist_set_invert (ignorelist, invert);
173   }
174   else if (strcasecmp ("UseBSDName", key) == 0)
175   {
176 #if HAVE_IOKIT_IOKITLIB_H
177     use_bsd_name = IS_TRUE (value) ? 1 : 0;
178 #else
179     WARNING ("disk plugin: The \"UseBSDName\" option is only supported "
180         "on Mach / Mac OS X and will be ignored.");
181 #endif
182   }
183   else if (strcasecmp ("UdevNameAttr", key) == 0)
184   {
185 #if HAVE_LIBUDEV
186     if (conf_udev_name_attr != NULL)
187     {
188       free (conf_udev_name_attr);
189       conf_udev_name_attr = NULL;
190     }
191     if ((conf_udev_name_attr = strdup (value)) == NULL)
192       return (1);
193 #else
194     WARNING ("disk plugin: The \"UdevNameAttr\" option is only supported "
195         "if collectd is built with libudev support");
196 #endif
197   }
198   else
199   {
200     return (-1);
201   }
202
203   return (0);
204 } /* int disk_config */
205
206 static int disk_init (void)
207 {
208 #if HAVE_IOKIT_IOKITLIB_H
209         kern_return_t status;
210
211         if (io_master_port != MACH_PORT_NULL)
212         {
213                 mach_port_deallocate (mach_task_self (),
214                                 io_master_port);
215                 io_master_port = MACH_PORT_NULL;
216         }
217
218         status = IOMasterPort (MACH_PORT_NULL, &io_master_port);
219         if (status != kIOReturnSuccess)
220         {
221                 ERROR ("IOMasterPort failed: %s",
222                                 mach_error_string (status));
223                 io_master_port = MACH_PORT_NULL;
224                 return (-1);
225         }
226 /* #endif HAVE_IOKIT_IOKITLIB_H */
227
228 #elif KERNEL_LINUX
229         /* do nothing */
230 /* #endif KERNEL_LINUX */
231
232 #elif KERNEL_FREEBSD
233         int rv;
234
235         rv = geom_gettree(&geom_tree);
236         if (rv != 0) {
237                 ERROR ("geom_gettree() failed, returned %d", rv);
238                 return (-1);
239         }
240         rv = geom_stats_open();
241         if (rv != 0) {
242                 ERROR ("geom_stats_open() failed, returned %d", rv);
243                 return (-1);
244         }
245 /* #endif KERNEL_FREEBSD */
246
247 #elif HAVE_LIBKSTAT
248         kstat_t *ksp_chain;
249
250         numdisk = 0;
251
252         if (kc == NULL)
253                 return (-1);
254
255         for (numdisk = 0, ksp_chain = kc->kc_chain;
256                         (numdisk < MAX_NUMDISK) && (ksp_chain != NULL);
257                         ksp_chain = ksp_chain->ks_next)
258         {
259                 if (strncmp (ksp_chain->ks_class, "disk", 4)
260                                 && strncmp (ksp_chain->ks_class, "partition", 9))
261                         continue;
262                 if (ksp_chain->ks_type != KSTAT_TYPE_IO)
263                         continue;
264                 ksp[numdisk++] = ksp_chain;
265         }
266 #endif /* HAVE_LIBKSTAT */
267
268         return (0);
269 } /* int disk_init */
270
271 static void disk_submit (const char *plugin_instance,
272                 const char *type,
273                 derive_t read, derive_t write)
274 {
275         value_t values[2];
276         value_list_t vl = VALUE_LIST_INIT;
277
278         /* Both `ignorelist' and `plugin_instance' may be NULL. */
279         if (ignorelist_match (ignorelist, plugin_instance) != 0)
280           return;
281
282         values[0].derive = read;
283         values[1].derive = write;
284
285         vl.values = values;
286         vl.values_len = 2;
287         sstrncpy (vl.host, hostname_g, sizeof (vl.host));
288         sstrncpy (vl.plugin, "disk", sizeof (vl.plugin));
289         sstrncpy (vl.plugin_instance, plugin_instance,
290                         sizeof (vl.plugin_instance));
291         sstrncpy (vl.type, type, sizeof (vl.type));
292
293         plugin_dispatch_values (&vl);
294 } /* void disk_submit */
295
296 #if KERNEL_FREEBSD || KERNEL_LINUX
297 static void submit_io_time (char const *plugin_instance, derive_t io_time, derive_t weighted_time)
298 {
299         value_t values[2];
300         value_list_t vl = VALUE_LIST_INIT;
301
302         if (ignorelist_match (ignorelist, plugin_instance) != 0)
303           return;
304
305         values[0].derive = io_time;
306         values[1].derive = weighted_time;
307
308         vl.values = values;
309         vl.values_len = 2;
310         sstrncpy (vl.host, hostname_g, sizeof (vl.host));
311         sstrncpy (vl.plugin, "disk", sizeof (vl.plugin));
312         sstrncpy (vl.plugin_instance, plugin_instance, sizeof (vl.plugin_instance));
313         sstrncpy (vl.type, "disk_io_time", sizeof (vl.type));
314
315         plugin_dispatch_values (&vl);
316 } /* void submit_io_time */
317 #endif /* KERNEL_FREEBSD || KERNEL_LINUX */
318
319 #if KERNEL_LINUX
320 static void submit_in_progress (char const *disk_name, gauge_t in_progress)
321 {
322         value_t v;
323         value_list_t vl = VALUE_LIST_INIT;
324
325         if (ignorelist_match (ignorelist, disk_name) != 0)
326           return;
327
328         v.gauge = in_progress;
329
330         vl.values = &v;
331         vl.values_len = 1;
332         sstrncpy (vl.host, hostname_g, sizeof (vl.host));
333         sstrncpy (vl.plugin, "disk", sizeof (vl.plugin));
334         sstrncpy (vl.plugin_instance, disk_name, sizeof (vl.plugin_instance));
335         sstrncpy (vl.type, "pending_operations", sizeof (vl.type));
336
337         plugin_dispatch_values (&vl);
338 }
339
340
341 static counter_t disk_calc_time_incr (counter_t delta_time, counter_t delta_ops)
342 {
343         double interval = CDTIME_T_TO_DOUBLE (plugin_get_interval ());
344         double avg_time = ((double) delta_time) / ((double) delta_ops);
345         double avg_time_incr = interval * avg_time;
346
347         return ((counter_t) (avg_time_incr + .5));
348 }
349 #endif
350
351 #if HAVE_LIBUDEV
352 /**
353  * Attempt to provide an rename disk instance from an assigned udev attribute.
354  *
355  * On success, it returns a strduped char* to the desired attribute value.
356  * Otherwise it returns NULL.
357  */
358
359 static char *disk_udev_attr_name (struct udev *udev, char *disk_name, const char *attr)
360 {
361         struct udev_device *dev;
362         const char *prop;
363         char *output = NULL;
364
365         dev = udev_device_new_from_subsystem_sysname (udev, "block", disk_name);
366         if (dev != NULL)
367         {
368                 prop = udev_device_get_property_value (dev, attr);
369                 if (prop) {
370                         output = strdup (prop);
371                         DEBUG ("disk plugin: renaming %s => %s", disk_name, output);
372                 }
373                 udev_device_unref (dev);
374         }
375         return output;
376 }
377 #endif
378
379 #if HAVE_IOKIT_IOKITLIB_H
380 static signed long long dict_get_value (CFDictionaryRef dict, const char *key)
381 {
382         signed long long val_int;
383         CFNumberRef      val_obj;
384         CFStringRef      key_obj;
385
386         /* `key_obj' needs to be released. */
387         key_obj = CFStringCreateWithCString (kCFAllocatorDefault, key,
388                         kCFStringEncodingASCII);
389         if (key_obj == NULL)
390         {
391                 DEBUG ("CFStringCreateWithCString (%s) failed.", key);
392                 return (-1LL);
393         }
394
395         /* get => we don't need to release (== free) the object */
396         val_obj = (CFNumberRef) CFDictionaryGetValue (dict, key_obj);
397
398         CFRelease (key_obj);
399
400         if (val_obj == NULL)
401         {
402                 DEBUG ("CFDictionaryGetValue (%s) failed.", key);
403                 return (-1LL);
404         }
405
406         if (!CFNumberGetValue (val_obj, kCFNumberSInt64Type, &val_int))
407         {
408                 DEBUG ("CFNumberGetValue (%s) failed.", key);
409                 return (-1LL);
410         }
411
412         return (val_int);
413 }
414 #endif /* HAVE_IOKIT_IOKITLIB_H */
415
416 static int disk_read (void)
417 {
418 #if HAVE_IOKIT_IOKITLIB_H
419         io_registry_entry_t     disk;
420         io_registry_entry_t     disk_child;
421         io_iterator_t           disk_list;
422         CFMutableDictionaryRef  props_dict, child_dict;
423         CFDictionaryRef         stats_dict;
424         CFStringRef             tmp_cf_string_ref;
425         kern_return_t           status;
426
427         signed long long read_ops, read_byt, read_tme;
428         signed long long write_ops, write_byt, write_tme;
429
430         int  disk_major, disk_minor;
431         char disk_name[DATA_MAX_NAME_LEN];
432         char child_disk_name_bsd[DATA_MAX_NAME_LEN], props_disk_name_bsd[DATA_MAX_NAME_LEN];
433
434         /* Get the list of all disk objects. */
435         if (IOServiceGetMatchingServices (io_master_port, IOServiceMatching (kIOBlockStorageDriverClass), &disk_list) != kIOReturnSuccess) {
436                 ERROR ("disk plugin: IOServiceGetMatchingServices failed.");
437                 return (-1);
438         }
439
440         while ((disk = IOIteratorNext (disk_list)) != 0) {
441                 props_dict = NULL;
442                 stats_dict = NULL;
443                 child_dict = NULL;
444
445                 /* get child of disk entry and corresponding property dictionary */
446                 if ((status = IORegistryEntryGetChildEntry (disk, kIOServicePlane, &disk_child)) != kIOReturnSuccess) {
447                         /* This fails for example for DVD/CD drives, which we want to ignore anyway */
448                         DEBUG ("IORegistryEntryGetChildEntry (disk) failed: 0x%08x", status);
449                         IOObjectRelease (disk);
450                         continue;
451                 }
452                 if (IORegistryEntryCreateCFProperties (disk_child, (CFMutableDictionaryRef *) &child_dict, kCFAllocatorDefault, kNilOptions) != kIOReturnSuccess || child_dict == NULL) {
453                         ERROR ("disk plugin: IORegistryEntryCreateCFProperties (disk_child) failed.");
454                         IOObjectRelease (disk_child);
455                         IOObjectRelease (disk);
456                         continue;
457                 }
458
459                 /* extract name and major/minor numbers */
460                 memset (child_disk_name_bsd, 0, sizeof (child_disk_name_bsd));
461                 tmp_cf_string_ref = (CFStringRef) CFDictionaryGetValue (child_dict, CFSTR(kIOBSDNameKey));
462                 if (tmp_cf_string_ref) {
463                         assert (CFGetTypeID (tmp_cf_string_ref) == CFStringGetTypeID ());
464                         CFStringGetCString (tmp_cf_string_ref, child_disk_name_bsd, sizeof (child_disk_name_bsd), kCFStringEncodingUTF8);
465                 }
466                 disk_major = (int) dict_get_value (child_dict, kIOBSDMajorKey);
467                 disk_minor = (int) dict_get_value (child_dict, kIOBSDMinorKey);
468                 DEBUG ("disk plugin: child_disk_name_bsd=\"%s\" major=%d minor=%d", child_disk_name_bsd, disk_major, disk_minor);
469                 CFRelease (child_dict);
470                 IOObjectRelease (disk_child);
471
472                 /* get property dictionary of the disk entry itself */
473                 if (IORegistryEntryCreateCFProperties (disk, (CFMutableDictionaryRef *) &props_dict, kCFAllocatorDefault, kNilOptions) != kIOReturnSuccess || props_dict == NULL) {
474                         ERROR ("disk-plugin: IORegistryEntryCreateCFProperties failed.");
475                         IOObjectRelease (disk);
476                         continue;
477                 }
478
479                 /* extract name and stats dictionary */
480                 memset (props_disk_name_bsd, 0, sizeof (props_disk_name_bsd));
481                 tmp_cf_string_ref = (CFStringRef) CFDictionaryGetValue (props_dict, CFSTR(kIOBSDNameKey));
482                 if (tmp_cf_string_ref) {
483                         assert (CFGetTypeID (tmp_cf_string_ref) == CFStringGetTypeID ());
484                         CFStringGetCString (tmp_cf_string_ref, props_disk_name_bsd, sizeof (props_disk_name_bsd), kCFStringEncodingUTF8);
485                 }
486                 stats_dict = (CFDictionaryRef) CFDictionaryGetValue (props_dict, CFSTR (kIOBlockStorageDriverStatisticsKey));
487                 if (stats_dict == NULL) {
488                         ERROR ("disk plugin: CFDictionaryGetValue (%s) failed.", kIOBlockStorageDriverStatisticsKey);
489                         CFRelease (props_dict);
490                         IOObjectRelease (disk);
491                         continue;
492                 }
493                 DEBUG ("disk plugin: props_disk_name_bsd=\"%s\"", props_disk_name_bsd);
494
495                 /* choose name */
496                 if (use_bsd_name) {
497                         if (child_disk_name_bsd[0] != 0)
498                                 sstrncpy (disk_name, child_disk_name_bsd, sizeof (disk_name));
499                         else if (props_disk_name_bsd[0] != 0)
500                                 sstrncpy (disk_name, props_disk_name_bsd, sizeof (disk_name));
501                         else {
502                                 ERROR ("disk plugin: can't find bsd disk name.");
503                                 ssnprintf (disk_name, sizeof (disk_name), "%i-%i", disk_major, disk_minor);
504                         }
505                 }
506                 else
507                         ssnprintf (disk_name, sizeof (disk_name), "%i-%i", disk_major, disk_minor);
508
509                 /* extract the stats */
510                 read_ops  = dict_get_value (stats_dict, kIOBlockStorageDriverStatisticsReadsKey);
511                 read_byt  = dict_get_value (stats_dict, kIOBlockStorageDriverStatisticsBytesReadKey);
512                 read_tme  = dict_get_value (stats_dict, kIOBlockStorageDriverStatisticsTotalReadTimeKey);
513                 write_ops = dict_get_value (stats_dict, kIOBlockStorageDriverStatisticsWritesKey);
514                 write_byt = dict_get_value (stats_dict, kIOBlockStorageDriverStatisticsBytesWrittenKey);
515                 write_tme = dict_get_value (stats_dict, kIOBlockStorageDriverStatisticsTotalWriteTimeKey);
516                 CFRelease (props_dict);
517                 IOObjectRelease (disk);
518
519                 /* and submit */
520                 DEBUG ("disk plugin: disk_name = \"%s\"", disk_name);
521                 if ((read_byt != -1LL) || (write_byt != -1LL))
522                         disk_submit (disk_name, "disk_octets", read_byt, write_byt);
523                 if ((read_ops != -1LL) || (write_ops != -1LL))
524                         disk_submit (disk_name, "disk_ops", read_ops, write_ops);
525                 if ((read_tme != -1LL) || (write_tme != -1LL))
526                         disk_submit (disk_name, "disk_time", read_tme / 1000, write_tme / 1000);
527
528         }
529         IOObjectRelease (disk_list);
530 /* #endif HAVE_IOKIT_IOKITLIB_H */
531
532 #elif KERNEL_FREEBSD
533         int retry, dirty;
534
535         void *snap = NULL;
536         struct devstat *snap_iter;
537
538         struct gident *geom_id;
539
540         const char *disk_name;
541         long double read_time, write_time, busy_time, total_duration;
542
543         for (retry = 0, dirty = 1; retry < 5 && dirty == 1; retry++) {
544                 if (snap != NULL)
545                         geom_stats_snapshot_free(snap);
546
547                 /* Get a fresh copy of stats snapshot */
548                 snap = geom_stats_snapshot_get();
549                 if (snap == NULL) {
550                         ERROR("disk plugin: geom_stats_snapshot_get() failed.");
551                         return (-1);
552                 }
553
554                 /* Check if we have dirty read from this snapshot */
555                 dirty = 0;
556                 geom_stats_snapshot_reset(snap);
557                 while ((snap_iter = geom_stats_snapshot_next(snap)) != NULL) {
558                         if (snap_iter->id == NULL)
559                                 continue;
560                         geom_id = geom_lookupid(&geom_tree, snap_iter->id);
561
562                         /* New device? refresh GEOM tree */
563                         if (geom_id == NULL) {
564                                 geom_deletetree(&geom_tree);
565                                 if (geom_gettree(&geom_tree) != 0) {
566                                         ERROR("disk plugin: geom_gettree() failed");
567                                         geom_stats_snapshot_free(snap);
568                                         return (-1);
569                                 }
570                                 geom_id = geom_lookupid(&geom_tree, snap_iter->id);
571                         }
572                         /*
573                          * This should be rare: the device come right before we take the
574                          * snapshot and went away right after it.  We will handle this
575                          * case later, so don't mark dirty but silently ignore it.
576                          */
577                         if (geom_id == NULL)
578                                 continue;
579
580                         /* Only collect PROVIDER data */
581                         if (geom_id->lg_what != ISPROVIDER)
582                                 continue;
583
584                         /* Only collect data when rank is 1 (physical devices) */
585                         if (((struct gprovider *)(geom_id->lg_ptr))->lg_geom->lg_rank != 1)
586                                 continue;
587
588                         /* Check if this is a dirty read quit for another try */
589                         if (snap_iter->sequence0 != snap_iter->sequence1) {
590                                 dirty = 1;
591                                 break;
592                         }
593                 }
594         }
595
596         /* Reset iterator */
597         geom_stats_snapshot_reset(snap);
598         for (;;) {
599                 snap_iter = geom_stats_snapshot_next(snap);
600                 if (snap_iter == NULL)
601                         break;
602
603                 if (snap_iter->id == NULL)
604                         continue;
605                 geom_id = geom_lookupid(&geom_tree, snap_iter->id);
606                 if (geom_id == NULL)
607                         continue;
608                 if (geom_id->lg_what != ISPROVIDER)
609                         continue;
610                 if (((struct gprovider *)(geom_id->lg_ptr))->lg_geom->lg_rank != 1)
611                         continue;
612                 /* Skip dirty reads, if present */
613                 if (dirty && (snap_iter->sequence0 != snap_iter->sequence1))
614                         continue;
615
616                 disk_name = ((struct gprovider *)geom_id->lg_ptr)->lg_name;
617
618                 if ((snap_iter->bytes[DEVSTAT_READ] != 0) || (snap_iter->bytes[DEVSTAT_WRITE] != 0)) {
619                         disk_submit(disk_name, "disk_octets",
620                                         (derive_t)snap_iter->bytes[DEVSTAT_READ],
621                                         (derive_t)snap_iter->bytes[DEVSTAT_WRITE]);
622                 }
623
624                 if ((snap_iter->operations[DEVSTAT_READ] != 0) || (snap_iter->operations[DEVSTAT_WRITE] != 0)) {
625                         disk_submit(disk_name, "disk_ops",
626                                         (derive_t)snap_iter->operations[DEVSTAT_READ],
627                                         (derive_t)snap_iter->operations[DEVSTAT_WRITE]);
628                 }
629
630                 read_time = devstat_compute_etime(&snap_iter->duration[DEVSTAT_READ], NULL);
631                 write_time = devstat_compute_etime(&snap_iter->duration[DEVSTAT_WRITE], NULL);
632                 if ((read_time != 0) || (write_time != 0)) {
633                         disk_submit (disk_name, "disk_time",
634                                         (derive_t)(read_time*1000), (derive_t)(write_time*1000));
635                 }
636                 if (devstat_compute_statistics(snap_iter, NULL, 1.0,
637                     DSM_TOTAL_BUSY_TIME, &busy_time,
638                     DSM_TOTAL_DURATION, &total_duration,
639                     DSM_NONE) != 0) {
640                         WARNING("%s", devstat_errbuf);
641                 }
642                 else
643                 {
644                         submit_io_time(disk_name, busy_time, total_duration);
645                 }
646         }
647         geom_stats_snapshot_free(snap);
648
649 #elif KERNEL_LINUX
650         FILE *fh;
651         char buffer[1024];
652
653         char *fields[32];
654         int numfields;
655         int fieldshift = 0;
656
657         int minor = 0;
658
659         derive_t read_sectors  = 0;
660         derive_t write_sectors = 0;
661
662         derive_t read_ops      = 0;
663         derive_t read_merged   = 0;
664         derive_t read_time     = 0;
665         derive_t write_ops     = 0;
666         derive_t write_merged  = 0;
667         derive_t write_time    = 0;
668         gauge_t in_progress    = NAN;
669         derive_t io_time       = 0;
670         derive_t weighted_time = 0;
671         int is_disk = 0;
672
673         diskstats_t *ds, *pre_ds;
674
675         if ((fh = fopen ("/proc/diskstats", "r")) == NULL)
676         {
677                 fh = fopen ("/proc/partitions", "r");
678                 if (fh == NULL)
679                 {
680                         ERROR ("disk plugin: fopen (/proc/{diskstats,partitions}) failed.");
681                         return (-1);
682                 }
683
684                 /* Kernel is 2.4.* */
685                 fieldshift = 1;
686         }
687
688 #if HAVE_LIBUDEV
689         handle_udev = udev_new();
690 #endif
691
692         while (fgets (buffer, sizeof (buffer), fh) != NULL)
693         {
694                 char *disk_name;
695                 char *output_name;
696
697                 numfields = strsplit (buffer, fields, 32);
698
699                 if ((numfields != (14 + fieldshift)) && (numfields != 7))
700                         continue;
701
702                 minor = atoll (fields[1]);
703
704                 disk_name = fields[2 + fieldshift];
705
706                 for (ds = disklist, pre_ds = disklist; ds != NULL; pre_ds = ds, ds = ds->next)
707                         if (strcmp (disk_name, ds->name) == 0)
708                                 break;
709
710                 if (ds == NULL)
711                 {
712                         if ((ds = (diskstats_t *) calloc (1, sizeof (diskstats_t))) == NULL)
713                                 continue;
714
715                         if ((ds->name = strdup (disk_name)) == NULL)
716                         {
717                                 free (ds);
718                                 continue;
719                         }
720
721                         if (pre_ds == NULL)
722                                 disklist = ds;
723                         else
724                                 pre_ds->next = ds;
725                 }
726
727                 is_disk = 0;
728                 if (numfields == 7)
729                 {
730                         /* Kernel 2.6, Partition */
731                         read_ops      = atoll (fields[3]);
732                         read_sectors  = atoll (fields[4]);
733                         write_ops     = atoll (fields[5]);
734                         write_sectors = atoll (fields[6]);
735                 }
736                 else if (numfields == (14 + fieldshift))
737                 {
738                         read_ops  =  atoll (fields[3 + fieldshift]);
739                         write_ops =  atoll (fields[7 + fieldshift]);
740
741                         read_sectors  = atoll (fields[5 + fieldshift]);
742                         write_sectors = atoll (fields[9 + fieldshift]);
743
744                         if ((fieldshift == 0) || (minor == 0))
745                         {
746                                 is_disk = 1;
747                                 read_merged  = atoll (fields[4 + fieldshift]);
748                                 read_time    = atoll (fields[6 + fieldshift]);
749                                 write_merged = atoll (fields[8 + fieldshift]);
750                                 write_time   = atoll (fields[10+ fieldshift]);
751
752                                 in_progress = atof (fields[11 + fieldshift]);
753
754                                 io_time       = atof (fields[12 + fieldshift]);
755                                 weighted_time = atof (fields[13 + fieldshift]);
756                         }
757                 }
758                 else
759                 {
760                         DEBUG ("numfields = %i; => unknown file format.", numfields);
761                         continue;
762                 }
763
764                 {
765                         derive_t diff_read_sectors;
766                         derive_t diff_write_sectors;
767
768                 /* If the counter wraps around, it's only 32 bits.. */
769                         if (read_sectors < ds->read_sectors)
770                                 diff_read_sectors = 1 + read_sectors
771                                         + (UINT_MAX - ds->read_sectors);
772                         else
773                                 diff_read_sectors = read_sectors - ds->read_sectors;
774                         if (write_sectors < ds->write_sectors)
775                                 diff_write_sectors = 1 + write_sectors
776                                         + (UINT_MAX - ds->write_sectors);
777                         else
778                                 diff_write_sectors = write_sectors - ds->write_sectors;
779
780                         ds->read_bytes += 512 * diff_read_sectors;
781                         ds->write_bytes += 512 * diff_write_sectors;
782                         ds->read_sectors = read_sectors;
783                         ds->write_sectors = write_sectors;
784                 }
785
786                 /* Calculate the average time an io-op needs to complete */
787                 if (is_disk)
788                 {
789                         derive_t diff_read_ops;
790                         derive_t diff_write_ops;
791                         derive_t diff_read_time;
792                         derive_t diff_write_time;
793
794                         if (read_ops < ds->read_ops)
795                                 diff_read_ops = 1 + read_ops
796                                         + (UINT_MAX - ds->read_ops);
797                         else
798                                 diff_read_ops = read_ops - ds->read_ops;
799                         DEBUG ("disk plugin: disk_name = %s; read_ops = %"PRIi64"; "
800                                         "ds->read_ops = %"PRIi64"; diff_read_ops = %"PRIi64";",
801                                         disk_name,
802                                         read_ops, ds->read_ops, diff_read_ops);
803
804                         if (write_ops < ds->write_ops)
805                                 diff_write_ops = 1 + write_ops
806                                         + (UINT_MAX - ds->write_ops);
807                         else
808                                 diff_write_ops = write_ops - ds->write_ops;
809
810                         if (read_time < ds->read_time)
811                                 diff_read_time = 1 + read_time
812                                         + (UINT_MAX - ds->read_time);
813                         else
814                                 diff_read_time = read_time - ds->read_time;
815
816                         if (write_time < ds->write_time)
817                                 diff_write_time = 1 + write_time
818                                         + (UINT_MAX - ds->write_time);
819                         else
820                                 diff_write_time = write_time - ds->write_time;
821
822                         if (diff_read_ops != 0)
823                                 ds->avg_read_time += disk_calc_time_incr (
824                                                 diff_read_time, diff_read_ops);
825                         if (diff_write_ops != 0)
826                                 ds->avg_write_time += disk_calc_time_incr (
827                                                 diff_write_time, diff_write_ops);
828
829                         ds->read_ops = read_ops;
830                         ds->read_time = read_time;
831                         ds->write_ops = write_ops;
832                         ds->write_time = write_time;
833                 } /* if (is_disk) */
834
835                 /* Don't write to the RRDs if we've just started.. */
836                 ds->poll_count++;
837                 if (ds->poll_count <= 2)
838                 {
839                         DEBUG ("disk plugin: (ds->poll_count = %i) <= "
840                                         "(min_poll_count = 2); => Not writing.",
841                                         ds->poll_count);
842                         continue;
843                 }
844
845                 if ((read_ops == 0) && (write_ops == 0))
846                 {
847                         DEBUG ("disk plugin: ((read_ops == 0) && "
848                                         "(write_ops == 0)); => Not writing.");
849                         continue;
850                 }
851
852                 output_name = disk_name;
853
854 #if HAVE_LIBUDEV
855                 char *alt_name = disk_udev_attr_name (handle_udev, disk_name, conf_udev_name_attr);
856                 if (alt_name != NULL)
857                         output_name = alt_name;
858 #endif
859
860                 if ((ds->read_bytes != 0) || (ds->write_bytes != 0))
861                         disk_submit (output_name, "disk_octets",
862                                         ds->read_bytes, ds->write_bytes);
863
864                 if ((ds->read_ops != 0) || (ds->write_ops != 0))
865                         disk_submit (output_name, "disk_ops",
866                                         read_ops, write_ops);
867
868                 if ((ds->avg_read_time != 0) || (ds->avg_write_time != 0))
869                         disk_submit (output_name, "disk_time",
870                                         ds->avg_read_time, ds->avg_write_time);
871
872                 if (is_disk)
873                 {
874                         disk_submit (output_name, "disk_merged",
875                                         read_merged, write_merged);
876                         submit_in_progress (output_name, in_progress);
877                         submit_io_time (output_name, io_time, weighted_time);
878                 } /* if (is_disk) */
879
880 #if HAVE_LIBUDEV
881                 /* release udev-based alternate name, if allocated */
882                 sfree (alt_name);
883 #endif
884         } /* while (fgets (buffer, sizeof (buffer), fh) != NULL) */
885
886 #if HAVE_LIBUDEV
887         udev_unref(handle_udev);
888 #endif
889
890         fclose (fh);
891 /* #endif defined(KERNEL_LINUX) */
892
893 #elif HAVE_LIBKSTAT
894 # if HAVE_KSTAT_IO_T_WRITES && HAVE_KSTAT_IO_T_NWRITES && HAVE_KSTAT_IO_T_WTIME
895 #  define KIO_ROCTETS reads
896 #  define KIO_WOCTETS writes
897 #  define KIO_ROPS    nreads
898 #  define KIO_WOPS    nwrites
899 #  define KIO_RTIME   rtime
900 #  define KIO_WTIME   wtime
901 # elif HAVE_KSTAT_IO_T_NWRITTEN && HAVE_KSTAT_IO_T_WRITES && HAVE_KSTAT_IO_T_WTIME
902 #  define KIO_ROCTETS nread
903 #  define KIO_WOCTETS nwritten
904 #  define KIO_ROPS    reads
905 #  define KIO_WOPS    writes
906 #  define KIO_RTIME   rtime
907 #  define KIO_WTIME   wtime
908 # else
909 #  error "kstat_io_t does not have the required members"
910 # endif
911         static kstat_io_t kio;
912         int i;
913
914         if (kc == NULL)
915                 return (-1);
916
917         for (i = 0; i < numdisk; i++)
918         {
919                 if (kstat_read (kc, ksp[i], &kio) == -1)
920                         continue;
921
922                 if (strncmp (ksp[i]->ks_class, "disk", 4) == 0)
923                 {
924                         disk_submit (ksp[i]->ks_name, "disk_octets",
925                                         kio.KIO_ROCTETS, kio.KIO_WOCTETS);
926                         disk_submit (ksp[i]->ks_name, "disk_ops",
927                                         kio.KIO_ROPS, kio.KIO_WOPS);
928                         /* FIXME: Convert this to microseconds if necessary */
929                         disk_submit (ksp[i]->ks_name, "disk_time",
930                                         kio.KIO_RTIME, kio.KIO_WTIME);
931                 }
932                 else if (strncmp (ksp[i]->ks_class, "partition", 9) == 0)
933                 {
934                         disk_submit (ksp[i]->ks_name, "disk_octets",
935                                         kio.KIO_ROCTETS, kio.KIO_WOCTETS);
936                         disk_submit (ksp[i]->ks_name, "disk_ops",
937                                         kio.KIO_ROPS, kio.KIO_WOPS);
938                 }
939         }
940 /* #endif defined(HAVE_LIBKSTAT) */
941
942 #elif defined(HAVE_LIBSTATGRAB)
943         sg_disk_io_stats *ds;
944 # if HAVE_LIBSTATGRAB_0_90
945         size_t disks;
946 # else
947         int disks;
948 #endif
949         int counter;
950         char name[DATA_MAX_NAME_LEN];
951
952         if ((ds = sg_get_disk_io_stats(&disks)) == NULL)
953                 return (0);
954
955         for (counter=0; counter < disks; counter++) {
956                 strncpy(name, ds->disk_name, sizeof(name));
957                 name[sizeof(name)-1] = '\0'; /* strncpy doesn't terminate longer strings */
958                 disk_submit (name, "disk_octets", ds->read_bytes, ds->write_bytes);
959                 ds++;
960         }
961 /* #endif defined(HAVE_LIBSTATGRAB) */
962
963 #elif defined(HAVE_PERFSTAT)
964         derive_t read_sectors;
965         derive_t write_sectors;
966         derive_t read_time;
967         derive_t write_time;
968         derive_t read_ops;
969         derive_t write_ops;
970         perfstat_id_t firstpath;
971         int rnumdisk;
972         int i;
973
974         if ((numdisk = perfstat_disk(NULL, NULL, sizeof(perfstat_disk_t), 0)) < 0)
975         {
976                 char errbuf[1024];
977                 WARNING ("disk plugin: perfstat_disk: %s",
978                                 sstrerror (errno, errbuf, sizeof (errbuf)));
979                 return (-1);
980         }
981
982         if (numdisk != pnumdisk || stat_disk==NULL) {
983                 if (stat_disk!=NULL)
984                         free(stat_disk);
985                 stat_disk = (perfstat_disk_t *)calloc(numdisk, sizeof(perfstat_disk_t));
986         }
987         pnumdisk = numdisk;
988
989         firstpath.name[0]='\0';
990         if ((rnumdisk = perfstat_disk(&firstpath, stat_disk, sizeof(perfstat_disk_t), numdisk)) < 0)
991         {
992                 char errbuf[1024];
993                 WARNING ("disk plugin: perfstat_disk : %s",
994                                 sstrerror (errno, errbuf, sizeof (errbuf)));
995                 return (-1);
996         }
997
998         for (i = 0; i < rnumdisk; i++)
999         {
1000                 read_sectors = stat_disk[i].rblks*stat_disk[i].bsize;
1001                 write_sectors = stat_disk[i].wblks*stat_disk[i].bsize;
1002                 disk_submit (stat_disk[i].name, "disk_octets", read_sectors, write_sectors);
1003
1004                 read_ops = stat_disk[i].xrate;
1005                 write_ops = stat_disk[i].xfers - stat_disk[i].xrate;
1006                 disk_submit (stat_disk[i].name, "disk_ops", read_ops, write_ops);
1007
1008                 read_time = stat_disk[i].rserv;
1009                 read_time *= ((double)(_system_configuration.Xint)/(double)(_system_configuration.Xfrac)) / 1000000.0;
1010                 write_time = stat_disk[i].wserv;
1011                 write_time *= ((double)(_system_configuration.Xint)/(double)(_system_configuration.Xfrac)) / 1000000.0;
1012                 disk_submit (stat_disk[i].name, "disk_time", read_time, write_time);
1013         }
1014 #endif /* defined(HAVE_PERFSTAT) */
1015
1016         return (0);
1017 } /* int disk_read */
1018
1019 void module_register (void)
1020 {
1021   plugin_register_config ("disk", disk_config,
1022       config_keys, config_keys_num);
1023   plugin_register_init ("disk", disk_init);
1024   plugin_register_read ("disk", disk_read);
1025 } /* void module_register */