hashed match: Add a match to simplify load balancing.

[collectd.git] / src / collectd.conf.pod
diff --git a/src/collectd.conf.pod b/src/collectd.conf.pod

index c2212a3..2458462 100644 (file)
--- a/src/collectd.conf.pod
+++ b/src/collectd.conf.pod
@@ -870,19 +870,44 @@ Select partitions based on the mountpoint.
  
  Select partitions based on the filesystem type.
  
  
  Select partitions based on the filesystem type.
  
-=item B<IgnoreSelected> I<true>|I<false>
+=item B<IgnoreSelected> B<true>|B<false>
  
  Invert the selection: If set to true, all partitions B<except> the ones that
  match any one of the criteria are collected. By default only selected
  partitions are collected if a selection is made. If no selection is configured
  at all, B<all> partitions are selected.
  
  
  Invert the selection: If set to true, all partitions B<except> the ones that
  match any one of the criteria are collected. By default only selected
  partitions are collected if a selection is made. If no selection is configured
  at all, B<all> partitions are selected.
  
-=item B<ReportByDevice> I<true>|I<false>
+=item B<ReportByDevice> B<true>|B<false>
  
  Report using the device name rather than the mountpoint. i.e. with this I<false>,
  (the default), it will report a disk as "root", but with it I<true>, it will be
  "sda1" (or whichever).
  
  
  Report using the device name rather than the mountpoint. i.e. with this I<false>,
  (the default), it will report a disk as "root", but with it I<true>, it will be
  "sda1" (or whichever).
  
+=item B<ReportReserved> B<true>|B<false>
+
+When enabled, the blocks reserved for root are reported separately. When
+disabled (the default for backwards compatibility reasons) the reserved space
+will be included in the "free" space.
+
+When disabled, the "df" type will be used to store "free" and "used" space. The
+mount point or disk name (see option B<ReportByDevice>) is used as type
+instance in this case (again: backwards compatibility).
+
+When enabled, the type "df_complex" is used and three files are created. The
+mount point or disk name is used as plugin instance and the type instance is
+set to "free", "reserved" and "used" as appropriate.
+
+Enabling this option is recommended.
+
+=item B<ReportInodes> B<true>|B<false>
+
+Enables or disables reporting of free, reserved and used inodes. Defaults to
+inode collection being disabled.
+
+Enable this option if inodes are a scarce resource for you, usually because
+many small files are stored on the disk. This is a usual scenario for mail
+transfer agents and web caches.
+
  =back
  
  =head2 Plugin C<disk>
  =back
  
  =head2 Plugin C<disk>
@@ -1667,16 +1692,47 @@ Required capabilities are documented below.
      User          "username"
      Password      "aef4Aebe"
      Interval      30
      User          "username"
      Password      "aef4Aebe"
      Interval      30
-    <GetSystemPerfData>
-    </GetSystemPerfData>
-    <GetWaflPerfData>
-    </GetWaflPerfData>
-    <GetDiskPerfData>
-    </GetDiskPerfData>
-    <GetVolumePerfData>
-    </GetVolumePerfData>
-    <GetVolumeData>
-    </GetVolumeData>
+    
+    <WAFL>
+      Interval 30
+      GetNameCache   true
+      GetDirCache    true
+      GetBufferCache true
+      GetInodeCache  true
+    </WAFL>
+    
+    <Disks>
+      Interval 30
+      GetBusy true
+    </Disks>
+    
+    <VolumePerf>
+      Interval 30
+      GetIO      "volume0"
+      IgnoreSelectedIO      false
+      GetOps     "volume0"
+      IgnoreSelectedOps     false
+      GetLatency "volume0"
+      IgnoreSelectedLatency false
+    </VolumePerf>
+    
+    <VolumeUsage>
+      Interval 30
+      GetCapacity "vol0"
+      GetCapacity "vol1"
+      IgnoreSelectedCapacity false
+      GetSnapshot "vol1"
+      GetSnapshot "vol3"
+      IgnoreSelectedSnapshot false
+    </VolumeUsage>
+    
+    <System>
+      Interval 30
+      GetCPULoad     true
+      GetInterfaces  true
+      GetDiskOps     true
+      GetDiskIO      true
+    </System>
     </Host>
   </Plugin>
  
     </Host>
   </Plugin>
  
@@ -1746,20 +1802,14 @@ The following options are valid inside all blocks:
  
  =over 4
  
  
  =over 4
  
-=item B<Multiplier> I<Multiplier>
-
-The host specific interval between data collections is multiplied by this value
-for collecting these data.
-
-Optional
-
-Type: integer
+=item B<Interval> I<Seconds>
  
  
-Default: 1
+Collect the respective statistics every I<Seconds> seconds. Defaults to the
+host specific setting.
  
  =back
  
  
  =back
  
-=head3 The GetSystemPerfData block
+=head3 The System block
  
  This will collect various performance data about the whole system.
  
  
  This will collect various performance data about the whole system.
  
@@ -1768,6 +1818,10 @@ B<Note:> To get this data the collectd user needs the
  
  =over 4
  
  
  =over 4
  
+=item B<Interval> I<Seconds>
+
+Collect disk statistics every I<Seconds> seconds.
+
  =item B<GetCPULoad> B<true>|B<false>
  
  If you set this option to true the current CPU usage will be read. This will be
  =item B<GetCPULoad> B<true>|B<false>
  
  If you set this option to true the current CPU usage will be read. This will be
@@ -1843,7 +1897,7 @@ type instance.
  
  =back
  
  
  =back
  
-=head3 The GetWaflPerfData block
+=head3 The WAFL block
  
  This will collect various performance data about the WAFL file system. At the
  moment this just means cache performance.
  
  This will collect various performance data about the WAFL file system. At the
  moment this just means cache performance.
@@ -1857,6 +1911,10 @@ releases.
  
  =over 4
  
  
  =over 4
  
+=item B<Interval> I<Seconds>
+
+Collect disk statistics every I<Seconds> seconds.
+
  =item B<GetNameCache> B<true>|B<false>
  
  Optional
  =item B<GetNameCache> B<true>|B<false>
  
  Optional
@@ -1889,7 +1947,7 @@ Default: true
  Result: One value list of type "cache_ratio" and type instance
  "inode_cache_hit".
  
  Result: One value list of type "cache_ratio" and type instance
  "inode_cache_hit".
  
-=item B<GetBufCache> B<true>|B<false>
+=item B<GetBufferCache> B<true>|B<false>
  
  B<Note:> This is the same value that the NetApp CLI command "sysstat" returns
  in the "Cache hit" field.
  
  B<Note:> This is the same value that the NetApp CLI command "sysstat" returns
  in the "Cache hit" field.
@@ -1904,7 +1962,7 @@ Result: One value list of type "cache_ratio" and type instance "buf_hash_hit".
  
  =back
  
  
  =back
  
-=head3 The GetDiskPerfData block
+=head3 The Disks block
  
  This will collect performance data about the individual disks in the NetApp.
  
  
  This will collect performance data about the individual disks in the NetApp.
  
@@ -1913,6 +1971,10 @@ B<Note:> To get this data the collectd user needs the
  
  =over 4
  
  
  =over 4
  
+=item B<Interval> I<Seconds>
+
+Collect disk statistics every I<Seconds> seconds.
+
  =item B<GetBusy> B<true>|B<false>
  
  If you set this option to true the busy time of all disks will be calculated
  =item B<GetBusy> B<true>|B<false>
  
  If you set this option to true the busy time of all disks will be calculated
@@ -1931,80 +1993,78 @@ Result: One value list of type "percent" and type instance "disk_busy".
  
  =back
  
  
  =back
  
-=head3 The GetVolumePerfData block
+=head3 The VolumePerf block
  
  This will collect various performance data about the individual volumes.
  
  
  This will collect various performance data about the individual volumes.
  
-All of these options take a list of volumes as parameters. In this case
-"volume" means just the name of the volume, without the "/vol/" prefix or
-anything like that.
-
-The special values "-" and "+" are supported. "-" means "don't collect values
-for any volumes". "+" means "collect values for all volumes, even volumes that
-are created after collectd was started." Additionally you can prefix a volume
-name with a "-" sign to exclude this one volume. Eg '"+" "-vol0"' collectes
-values for all volumes except vol0.  The order of the parameters is important.
-'"-vol0" "+"' doesn't make sense because the "+" overrides the earlier "-vol0".
+You can select which data to collect about which volume using the following
+options. They follow the standard ignorelist semantic.
  
  B<Note:> To get this data the collectd user needs the
  
  B<Note:> To get this data the collectd user needs the
-"api-perf-object-get-instances" capability.
+I<api-perf-object-get-instances> capability.
  
  =over 4
  
  
  =over 4
  
-=item B<GetIO> I<Volume> [I<Volume> ...]
-The current IO throughput will be read for every volume specified here.
+=item B<Interval> I<Seconds>
  
  
-Optional
+Collect volume performance data every I<Seconds> seconds.
  
  
-Type: list of strings
+=item B<GetIO> I<Volume>
  
  
-Default: "+"
+=item B<GetOps> I<Volume>
  
  
-Result: Data sources of type "disk_octets" and the name of the volume as
-plugin_instance.
+=item B<GetLatency> I<Volume>
  
  
-=item B<GetOps> I<Volume> [I<Volume> ...]
+Select the given volume for IO, operations or latency statistics collection.
+The argument is the name of the volume without the C</vol/> prefix.
  
  
-The current number of operation will be read for every volume specified here. 
+Since the standard ignorelist functionality is used here, you can use a string
+starting and ending with a slash to specify regular expression matching: To
+match the volumes "vol0", "vol2" and "vol7", you can use this regular
+expression:
  
  
-Optional
+  GetIO "/^vol[027]$/"
  
  
-Type: list of strings
+If no regular expression is specified, an exact match is required. Both,
+regular and exact matching are case sensitive.
  
  
-Default: "+"
+If no volume was specified at all for either of the three options, that data
+will be collected for all available volumes.
  
  
-Result: Data sources of type "disk_ops" and the name of the volume as
-plugin_instance.
+=item B<IgnoreSelectedIO> B<true>|B<false>
  
  
-=item B<GetLatency> I<Volume> [I<Volume> ...]
+=item B<IgnoreSelectedOps> B<true>|B<false>
  
  
-The current latency for volume access in microseconds will be read for every
-volume specified here. 
+=item B<IgnoreSelectedLatency> B<true>|B<false>
  
  
-Optional
+When set to B<true>, the volumes selected for IO, operations or latency
+statistics collection will be ignored and the data will be collected for all
+other volumes.
  
  
-Type: list of strings
+When set to B<false>, data will only be collected for the specified volumes and
+all other volumes will be ignored.
  
  
-Default: "+"
+If no volumes have been specified with the above B<Get*> options, all volumes
+will be collected regardless of the B<IgnoreSelected*> option.
  
  
-Result: Data sources of type "disk_latency" and the name of the volume as
-plugin_instance.
+Defaults to B<false>
  
  =back
  
  
  =back
  
-=head3 The GetVolumeData block
+=head3 The VolumeUsage block
  
  This will collect capacity data about the individual volumes.
  
  
  This will collect capacity data about the individual volumes.
  
-All of these options take a list of volumes as parameters, just like the
-GetVolumePerfData options.
-
-B<Note:> To get this data the collectd user needs the "api-volume-list-info"
+B<Note:> To get this data the collectd user needs the I<api-volume-list-info>
  capability.
  
  =over 4
  
  capability.
  
  =over 4
  
-=item B<GetDiskUtil>
+=item B<Interval> I<Seconds>
+
+Collect volume usage statistics every I<Seconds> seconds.
+
+=item B<GetCapacity> I<VolumeName>
  
  The current capacity of the volume will be collected. This will result in two
  to four value lists, depending on the configuration of the volume. All data
  
  The current capacity of the volume will be collected. This will result in two
  to four value lists, depending on the configuration of the volume. All data
@@ -2022,15 +2082,38 @@ reported as a 32E<nbsp>bit number. This plugin tries to guess the correct
  number which works most of the time.  If you see strange values here, bug
  NetApp support to fix this.
  
  number which works most of the time.  If you see strange values here, bug
  NetApp support to fix this.
  
-Optional
+Repeat this option to specify multiple volumes.
  
  
-Type: list of strings
+=item B<IgnoreSelectedCapacity> B<true>|B<false>
  
  
-Default: "+"
+Specify whether to collect only the volumes selected by the B<GetCapacity>
+option or to ignore those volumes. B<IgnoreSelectedCapacity> defaults to
+B<false>. However, if no B<GetCapacity> option is specified at all, all
+capacities will be selected anyway.
  
  
-=item B<GetSnapData>
+=item B<GetSnapshot> I<VolumeName>
  
  
-B<TODO>
+Select volumes from which to collect snapshot information.
+
+Usually, the space used for snapshots is included in the space reported as
+"used". If snapshot information is collected as well, the space used for
+snapshots is subtracted from the used space.
+
+To make things even more interesting, it is possible to reserve space to be
+used for snapshots. If the space required for snapshots is less than that
+reserved space, there is "reserved free" and "reserved used" space in addition
+to "free" and "used". If the space required for snapshots exceeds the reserved
+space, that part allocated in the normal space is subtracted from the "used"
+space again.
+
+Repeat this option to specify multiple volumes.
+
+=item B<IgnoreSelectedSnapshot>
+
+Specify whether to collect only the volumes selected by the B<GetSnapshot>
+option or to ignore those volumes. B<IgnoreSelectedSnapshot> defaults to
+B<false>. However, if no B<GetSnapshot> option is specified at all, all
+capacities will be selected anyway.
  
  =back
  
  
  =back
  
@@ -4549,6 +4632,77 @@ time. If the counter is reset for some reason (machine or service restarted,
  usually), the graph will be empty (NAN) for a long time. People may not
  understand why.
  
  usually), the graph will be empty (NAN) for a long time. People may not
  understand why.
  
+=item B<hashed>
+
+Calculates a hash value of the host name and matches values according to that
+hash value. This makes it possible to divide all hosts into groups and match
+only values that are in a specific group. The intended use is in load
+balancing, where you want to handle only part of all data and leave the rest
+for other servers.
+
+The hashing function used tries to distribute the hosts evenly. First, it
+calculates a 32E<nbsp>bit hash value using the characters of the hostname:
+
+  hash_value = 0;
+  for (i = 0; host[i] != 0; i++)
+    hash_value = (hash_value * 251) + host[i];
+
+The constant 251 is a prime number which is supposed to make this hash value
+more random. The code then checks the group for this host according to the
+I<Total> and I<Match> arguments:
+
+  if ((hash_value % Total) == Match)
+    matches;
+  else
+    does not match;
+
+Please note that when you set I<Total> to two (i.E<nbsp>e. you have only two
+groups), then the least significant bit of the hash value will be the XOR of
+all least significant bits in the host name. One consequence is that when you
+have two hosts, "server0.example.com" and "server1.example.com", where the host
+name differs in one digit only and the digits differ by one, those hosts will
+never end up in the same group.
+
+Available options:
+
+=over 4
+
+=item B<Match> I<Match> I<Total>
+
+Divide the data into I<Total> groups and match all hosts in group I<Match> as
+described above. The groups are numbered from zero, i.E<nbsp>e. I<Match> must
+be smaller than I<Total>. I<Total> must be at least one, although only values
+greater than one really do make any sense.
+
+You can repeat this option to match multiple groups, for example:
+
+  Match 3 7
+  Match 5 7
+
+The above config will divide the data into seven groups and match groups three
+and five. One use would be to keep every value on two hosts so that if one
+fails the missing data can later be reconstructed from the second host.
+
+=back
+
+Example:
+
+ # Operate on the pre-cache chain, so that ignored values are not even in the
+ # global cache.
+ <Chain "PreCache">
+   <Rule>
+     <Match "hashed">
+       # Divide all received hosts in seven groups and accept all hosts in
+       # group three.
+       Match 3 7
+     </Match>
+     # If matched: Return and continue.
+     Target "return"
+   </Rule>
+   # If not matched: Return and stop.
+   Target "stop"
+ </Chain>
+
  =back
  
  =head2 Available targets
  =back
  
  =head2 Available targets