hashed match: Add a match to simplify load balancing.

[collectd.git] / src / collectd.conf.pod
diff --git a/src/collectd.conf.pod b/src/collectd.conf.pod

index ff44ba1..2458462 100644 (file)
--- a/src/collectd.conf.pod
+++ b/src/collectd.conf.pod
@@ -870,19 +870,44 @@ Select partitions based on the mountpoint.
  
  Select partitions based on the filesystem type.
  
-=item B<IgnoreSelected> I<true>|I<false>
+=item B<IgnoreSelected> B<true>|B<false>
  
  Invert the selection: If set to true, all partitions B<except> the ones that
  match any one of the criteria are collected. By default only selected
  partitions are collected if a selection is made. If no selection is configured
  at all, B<all> partitions are selected.
  
-=item B<ReportByDevice> I<true>|I<false>
+=item B<ReportByDevice> B<true>|B<false>
  
  Report using the device name rather than the mountpoint. i.e. with this I<false>,
  (the default), it will report a disk as "root", but with it I<true>, it will be
  "sda1" (or whichever).
  
+=item B<ReportReserved> B<true>|B<false>
+
+When enabled, the blocks reserved for root are reported separately. When
+disabled (the default for backwards compatibility reasons) the reserved space
+will be included in the "free" space.
+
+When disabled, the "df" type will be used to store "free" and "used" space. The
+mount point or disk name (see option B<ReportByDevice>) is used as type
+instance in this case (again: backwards compatibility).
+
+When enabled, the type "df_complex" is used and three files are created. The
+mount point or disk name is used as plugin instance and the type instance is
+set to "free", "reserved" and "used" as appropriate.
+
+Enabling this option is recommended.
+
+=item B<ReportInodes> B<true>|B<false>
+
+Enables or disables reporting of free, reserved and used inodes. Defaults to
+inode collection being disabled.
+
+Enable this option if inodes are a scarce resource for you, usually because
+many small files are stored on the disk. This is a usual scenario for mail
+transfer agents and web caches.
+
  =back
  
  =head2 Plugin C<disk>
@@ -1682,6 +1707,7 @@ Required capabilities are documented below.
      </Disks>
      
      <VolumePerf>
+      Interval 30
        GetIO      "volume0"
        IgnoreSelectedIO      false
        GetOps     "volume0"
@@ -1691,6 +1717,7 @@ Required capabilities are documented below.
      </VolumePerf>
      
      <VolumeUsage>
+      Interval 30
        GetCapacity "vol0"
        GetCapacity "vol1"
        IgnoreSelectedCapacity false
@@ -1775,16 +1802,10 @@ The following options are valid inside all blocks:
  
  =over 4
  
-=item B<Multiplier> I<Multiplier>
-
-The host specific interval between data collections is multiplied by this value
-for collecting these data.
-
-Optional
-
-Type: integer
+=item B<Interval> I<Seconds>
  
-Default: 1
+Collect the respective statistics every I<Seconds> seconds. Defaults to the
+host specific setting.
  
  =back
  
@@ -1984,6 +2005,10 @@ I<api-perf-object-get-instances> capability.
  
  =over 4
  
+=item B<Interval> I<Seconds>
+
+Collect volume performance data every I<Seconds> seconds.
+
  =item B<GetIO> I<Volume>
  
  =item B<GetOps> I<Volume>
@@ -2035,6 +2060,10 @@ capability.
  
  =over 4
  
+=item B<Interval> I<Seconds>
+
+Collect volume usage statistics every I<Seconds> seconds.
+
  =item B<GetCapacity> I<VolumeName>
  
  The current capacity of the volume will be collected. This will result in two
@@ -2064,7 +2093,18 @@ capacities will be selected anyway.
  
  =item B<GetSnapshot> I<VolumeName>
  
-B<Defunct!> Select volumes from which to collect snapshot information.
+Select volumes from which to collect snapshot information.
+
+Usually, the space used for snapshots is included in the space reported as
+"used". If snapshot information is collected as well, the space used for
+snapshots is subtracted from the used space.
+
+To make things even more interesting, it is possible to reserve space to be
+used for snapshots. If the space required for snapshots is less than that
+reserved space, there is "reserved free" and "reserved used" space in addition
+to "free" and "used". If the space required for snapshots exceeds the reserved
+space, that part allocated in the normal space is subtracted from the "used"
+space again.
  
  Repeat this option to specify multiple volumes.
  
@@ -4592,6 +4632,77 @@ time. If the counter is reset for some reason (machine or service restarted,
  usually), the graph will be empty (NAN) for a long time. People may not
  understand why.
  
+=item B<hashed>
+
+Calculates a hash value of the host name and matches values according to that
+hash value. This makes it possible to divide all hosts into groups and match
+only values that are in a specific group. The intended use is in load
+balancing, where you want to handle only part of all data and leave the rest
+for other servers.
+
+The hashing function used tries to distribute the hosts evenly. First, it
+calculates a 32E<nbsp>bit hash value using the characters of the hostname:
+
+  hash_value = 0;
+  for (i = 0; host[i] != 0; i++)
+    hash_value = (hash_value * 251) + host[i];
+
+The constant 251 is a prime number which is supposed to make this hash value
+more random. The code then checks the group for this host according to the
+I<Total> and I<Match> arguments:
+
+  if ((hash_value % Total) == Match)
+    matches;
+  else
+    does not match;
+
+Please note that when you set I<Total> to two (i.E<nbsp>e. you have only two
+groups), then the least significant bit of the hash value will be the XOR of
+all least significant bits in the host name. One consequence is that when you
+have two hosts, "server0.example.com" and "server1.example.com", where the host
+name differs in one digit only and the digits differ by one, those hosts will
+never end up in the same group.
+
+Available options:
+
+=over 4
+
+=item B<Match> I<Match> I<Total>
+
+Divide the data into I<Total> groups and match all hosts in group I<Match> as
+described above. The groups are numbered from zero, i.E<nbsp>e. I<Match> must
+be smaller than I<Total>. I<Total> must be at least one, although only values
+greater than one really do make any sense.
+
+You can repeat this option to match multiple groups, for example:
+
+  Match 3 7
+  Match 5 7
+
+The above config will divide the data into seven groups and match groups three
+and five. One use would be to keep every value on two hosts so that if one
+fails the missing data can later be reconstructed from the second host.
+
+=back
+
+Example:
+
+ # Operate on the pre-cache chain, so that ignored values are not even in the
+ # global cache.
+ <Chain "PreCache">
+   <Rule>
+     <Match "hashed">
+       # Divide all received hosts in seven groups and accept all hosts in
+       # group three.
+       Match 3 7
+     </Match>
+     # If matched: Return and continue.
+     Target "return"
+   </Rule>
+   # If not matched: Return and stop.
+   Target "stop"
+ </Chain>
+
  =back
  
  =head2 Available targets