hashed match: Add a match to simplify load balancing.

[collectd.git] / src / collectd.conf.pod
diff --git a/src/collectd.conf.pod b/src/collectd.conf.pod

index c2212a3..2458462 100644 (file)
--- a/src/collectd.conf.pod
+++ b/src/collectd.conf.pod
@@ -870,19 +870,44 @@ Select partitions based on the mountpoint.
  
  Select partitions based on the filesystem type.
  
-=item B<IgnoreSelected> I<true>|I<false>
+=item B<IgnoreSelected> B<true>|B<false>
  
  Invert the selection: If set to true, all partitions B<except> the ones that
  match any one of the criteria are collected. By default only selected
  partitions are collected if a selection is made. If no selection is configured
  at all, B<all> partitions are selected.
  
-=item B<ReportByDevice> I<true>|I<false>
+=item B<ReportByDevice> B<true>|B<false>
  
  Report using the device name rather than the mountpoint. i.e. with this I<false>,
  (the default), it will report a disk as "root", but with it I<true>, it will be
  "sda1" (or whichever).
  
+=item B<ReportReserved> B<true>|B<false>
+
+When enabled, the blocks reserved for root are reported separately. When
+disabled (the default for backwards compatibility reasons) the reserved space
+will be included in the "free" space.
+
+When disabled, the "df" type will be used to store "free" and "used" space. The
+mount point or disk name (see option B<ReportByDevice>) is used as type
+instance in this case (again: backwards compatibility).
+
+When enabled, the type "df_complex" is used and three files are created. The
+mount point or disk name is used as plugin instance and the type instance is
+set to "free", "reserved" and "used" as appropriate.
+
+Enabling this option is recommended.
+
+=item B<ReportInodes> B<true>|B<false>
+
+Enables or disables reporting of free, reserved and used inodes. Defaults to
+inode collection being disabled.
+
+Enable this option if inodes are a scarce resource for you, usually because
+many small files are stored on the disk. This is a usual scenario for mail
+transfer agents and web caches.
+
  =back
  
  =head2 Plugin C<disk>
@@ -1667,16 +1692,47 @@ Required capabilities are documented below.
      User          "username"
      Password      "aef4Aebe"
      Interval      30
-    <GetSystemPerfData>
-    </GetSystemPerfData>
-    <GetWaflPerfData>
-    </GetWaflPerfData>
-    <GetDiskPerfData>
-    </GetDiskPerfData>
-    <GetVolumePerfData>
-    </GetVolumePerfData>
-    <GetVolumeData>
-    </GetVolumeData>
+    
+    <WAFL>
+      Interval 30
+      GetNameCache   true
+      GetDirCache    true
+      GetBufferCache true
+      GetInodeCache  true
+    </WAFL>
+    
+    <Disks>
+      Interval 30
+      GetBusy true
+    </Disks>
+    
+    <VolumePerf>
+      Interval 30
+      GetIO      "volume0"
+      IgnoreSelectedIO      false
+      GetOps     "volume0"
+      IgnoreSelectedOps     false
+      GetLatency "volume0"
+      IgnoreSelectedLatency false
+    </VolumePerf>
+    
+    <VolumeUsage>
+      Interval 30
+      GetCapacity "vol0"
+      GetCapacity "vol1"
+      IgnoreSelectedCapacity false
+      GetSnapshot "vol1"
+      GetSnapshot "vol3"
+      IgnoreSelectedSnapshot false
+    </VolumeUsage>
+    
+    <System>
+      Interval 30
+      GetCPULoad     true
+      GetInterfaces  true
+      GetDiskOps     true
+      GetDiskIO      true
+    </System>
     </Host>
   </Plugin>
  
@@ -1746,20 +1802,14 @@ The following options are valid inside all blocks:
  
  =over 4
  
-=item B<Multiplier> I<Multiplier>
-
-The host specific interval between data collections is multiplied by this value
-for collecting these data.
-
-Optional
-
-Type: integer
+=item B<Interval> I<Seconds>
  
-Default: 1
+Collect the respective statistics every I<Seconds> seconds. Defaults to the
+host specific setting.
  
  =back
  
-=head3 The GetSystemPerfData block
+=head3 The System block
  
  This will collect various performance data about the whole system.
  
@@ -1768,6 +1818,10 @@ B<Note:> To get this data the collectd user needs the
  
  =over 4
  
+=item B<Interval> I<Seconds>
+
+Collect disk statistics every I<Seconds> seconds.
+
  =item B<GetCPULoad> B<true>|B<false>
  
  If you set this option to true the current CPU usage will be read. This will be
@@ -1843,7 +1897,7 @@ type instance.
  
  =back
  
-=head3 The GetWaflPerfData block
+=head3 The WAFL block
  
  This will collect various performance data about the WAFL file system. At the
  moment this just means cache performance.
@@ -1857,6 +1911,10 @@ releases.
  
  =over 4
  
+=item B<Interval> I<Seconds>
+
+Collect disk statistics every I<Seconds> seconds.
+
  =item B<GetNameCache> B<true>|B<false>
  
  Optional
@@ -1889,7 +1947,7 @@ Default: true
  Result: One value list of type "cache_ratio" and type instance
  "inode_cache_hit".
  
-=item B<GetBufCache> B<true>|B<false>
+=item B<GetBufferCache> B<true>|B<false>
  
  B<Note:> This is the same value that the NetApp CLI command "sysstat" returns
  in the "Cache hit" field.
@@ -1904,7 +1962,7 @@ Result: One value list of type "cache_ratio" and type instance "buf_hash_hit".
  
  =back
  
-=head3 The GetDiskPerfData block
+=head3 The Disks block
  
  This will collect performance data about the individual disks in the NetApp.
  
@@ -1913,6 +1971,10 @@ B<Note:> To get this data the collectd user needs the
  
  =over 4
  
+=item B<Interval> I<Seconds>
+
+Collect disk statistics every I<Seconds> seconds.
+
  =item B<GetBusy> B<true>|B<false>
  
  If you set this option to true the busy time of all disks will be calculated
@@ -1931,80 +1993,78 @@ Result: One value list of type "percent" and type instance "disk_busy".
  
  =back
  
-=head3 The GetVolumePerfData block
+=head3 The VolumePerf block
  
  This will collect various performance data about the individual volumes.
  
-All of these options take a list of volumes as parameters. In this case
-"volume" means just the name of the volume, without the "/vol/" prefix or
-anything like that.
-
-The special values "-" and "+" are supported. "-" means "don't collect values
-for any volumes". "+" means "collect values for all volumes, even volumes that
-are created after collectd was started." Additionally you can prefix a volume
-name with a "-" sign to exclude this one volume. Eg '"+" "-vol0"' collectes
-values for all volumes except vol0.  The order of the parameters is important.
-'"-vol0" "+"' doesn't make sense because the "+" overrides the earlier "-vol0".
+You can select which data to collect about which volume using the following
+options. They follow the standard ignorelist semantic.
  
  B<Note:> To get this data the collectd user needs the
-"api-perf-object-get-instances" capability.
+I<api-perf-object-get-instances> capability.
  
  =over 4
  
-=item B<GetIO> I<Volume> [I<Volume> ...]
-The current IO throughput will be read for every volume specified here.
+=item B<Interval> I<Seconds>
  
-Optional
+Collect volume performance data every I<Seconds> seconds.
  
-Type: list of strings
+=item B<GetIO> I<Volume>
  
-Default: "+"
+=item B<GetOps> I<Volume>
  
-Result: Data sources of type "disk_octets" and the name of the volume as
-plugin_instance.
+=item B<GetLatency> I<Volume>
  
-=item B<GetOps> I<Volume> [I<Volume> ...]
+Select the given volume for IO, operations or latency statistics collection.
+The argument is the name of the volume without the C</vol/> prefix.
  
-The current number of operation will be read for every volume specified here. 
+Since the standard ignorelist functionality is used here, you can use a string
+starting and ending with a slash to specify regular expression matching: To
+match the volumes "vol0", "vol2" and "vol7", you can use this regular
+expression:
  
-Optional
+  GetIO "/^vol[027]$/"
  
-Type: list of strings
+If no regular expression is specified, an exact match is required. Both,
+regular and exact matching are case sensitive.
  
-Default: "+"
+If no volume was specified at all for either of the three options, that data
+will be collected for all available volumes.
  
-Result: Data sources of type "disk_ops" and the name of the volume as
-plugin_instance.
+=item B<IgnoreSelectedIO> B<true>|B<false>
  
-=item B<GetLatency> I<Volume> [I<Volume> ...]
+=item B<IgnoreSelectedOps> B<true>|B<false>
  
-The current latency for volume access in microseconds will be read for every
-volume specified here. 
+=item B<IgnoreSelectedLatency> B<true>|B<false>
  
-Optional
+When set to B<true>, the volumes selected for IO, operations or latency
+statistics collection will be ignored and the data will be collected for all
+other volumes.
  
-Type: list of strings
+When set to B<false>, data will only be collected for the specified volumes and
+all other volumes will be ignored.
  
-Default: "+"
+If no volumes have been specified with the above B<Get*> options, all volumes
+will be collected regardless of the B<IgnoreSelected*> option.
  
-Result: Data sources of type "disk_latency" and the name of the volume as
-plugin_instance.
+Defaults to B<false>
  
  =back
  
-=head3 The GetVolumeData block
+=head3 The VolumeUsage block
  
  This will collect capacity data about the individual volumes.
  
-All of these options take a list of volumes as parameters, just like the
-GetVolumePerfData options.
-
-B<Note:> To get this data the collectd user needs the "api-volume-list-info"
+B<Note:> To get this data the collectd user needs the I<api-volume-list-info>
  capability.
  
  =over 4
  
-=item B<GetDiskUtil>
+=item B<Interval> I<Seconds>
+
+Collect volume usage statistics every I<Seconds> seconds.
+
+=item B<GetCapacity> I<VolumeName>
  
  The current capacity of the volume will be collected. This will result in two
  to four value lists, depending on the configuration of the volume. All data
@@ -2022,15 +2082,38 @@ reported as a 32E<nbsp>bit number. This plugin tries to guess the correct
  number which works most of the time.  If you see strange values here, bug
  NetApp support to fix this.
  
-Optional
+Repeat this option to specify multiple volumes.
  
-Type: list of strings
+=item B<IgnoreSelectedCapacity> B<true>|B<false>
  
-Default: "+"
+Specify whether to collect only the volumes selected by the B<GetCapacity>
+option or to ignore those volumes. B<IgnoreSelectedCapacity> defaults to
+B<false>. However, if no B<GetCapacity> option is specified at all, all
+capacities will be selected anyway.
  
-=item B<GetSnapData>
+=item B<GetSnapshot> I<VolumeName>
  
-B<TODO>
+Select volumes from which to collect snapshot information.
+
+Usually, the space used for snapshots is included in the space reported as
+"used". If snapshot information is collected as well, the space used for
+snapshots is subtracted from the used space.
+
+To make things even more interesting, it is possible to reserve space to be
+used for snapshots. If the space required for snapshots is less than that
+reserved space, there is "reserved free" and "reserved used" space in addition
+to "free" and "used". If the space required for snapshots exceeds the reserved
+space, that part allocated in the normal space is subtracted from the "used"
+space again.
+
+Repeat this option to specify multiple volumes.
+
+=item B<IgnoreSelectedSnapshot>
+
+Specify whether to collect only the volumes selected by the B<GetSnapshot>
+option or to ignore those volumes. B<IgnoreSelectedSnapshot> defaults to
+B<false>. However, if no B<GetSnapshot> option is specified at all, all
+capacities will be selected anyway.
  
  =back
  
@@ -4549,6 +4632,77 @@ time. If the counter is reset for some reason (machine or service restarted,
  usually), the graph will be empty (NAN) for a long time. People may not
  understand why.
  
+=item B<hashed>
+
+Calculates a hash value of the host name and matches values according to that
+hash value. This makes it possible to divide all hosts into groups and match
+only values that are in a specific group. The intended use is in load
+balancing, where you want to handle only part of all data and leave the rest
+for other servers.
+
+The hashing function used tries to distribute the hosts evenly. First, it
+calculates a 32E<nbsp>bit hash value using the characters of the hostname:
+
+  hash_value = 0;
+  for (i = 0; host[i] != 0; i++)
+    hash_value = (hash_value * 251) + host[i];
+
+The constant 251 is a prime number which is supposed to make this hash value
+more random. The code then checks the group for this host according to the
+I<Total> and I<Match> arguments:
+
+  if ((hash_value % Total) == Match)
+    matches;
+  else
+    does not match;
+
+Please note that when you set I<Total> to two (i.E<nbsp>e. you have only two
+groups), then the least significant bit of the hash value will be the XOR of
+all least significant bits in the host name. One consequence is that when you
+have two hosts, "server0.example.com" and "server1.example.com", where the host
+name differs in one digit only and the digits differ by one, those hosts will
+never end up in the same group.
+
+Available options:
+
+=over 4
+
+=item B<Match> I<Match> I<Total>
+
+Divide the data into I<Total> groups and match all hosts in group I<Match> as
+described above. The groups are numbered from zero, i.E<nbsp>e. I<Match> must
+be smaller than I<Total>. I<Total> must be at least one, although only values
+greater than one really do make any sense.
+
+You can repeat this option to match multiple groups, for example:
+
+  Match 3 7
+  Match 5 7
+
+The above config will divide the data into seven groups and match groups three
+and five. One use would be to keep every value on two hosts so that if one
+fails the missing data can later be reconstructed from the second host.
+
+=back
+
+Example:
+
+ # Operate on the pre-cache chain, so that ignored values are not even in the
+ # global cache.
+ <Chain "PreCache">
+   <Rule>
+     <Match "hashed">
+       # Divide all received hosts in seven groups and accept all hosts in
+       # group three.
+       Match 3 7
+     </Match>
+     # If matched: Return and continue.
+     Target "return"
+   </Rule>
+   # If not matched: Return and stop.
+   Target "stop"
+ </Chain>
+
  =back
  
  =head2 Available targets