3 # collectd - contrib/rrd_filter.px
4 # Copyright (C) 2007-2008 Florian octo Forster
6 # This program is free software; you can redistribute it and/or modify it
7 # under the terms of the GNU General Public License as published by the
8 # Free Software Foundation; only version 2 of the License is applicable.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 # Florian octo Forster <octo at verplant.org>
27 rrd_filter.px - Perform same advanced non-standard operations on an RRD file.
31 rrd_filter.px -i input.rrd -o output.rrd [options]
35 rrd_filter.px requires the RRDTool binary, Perl and the included
36 L<Getopt::Long> module.
40 use Getopt::Long ('GetOptions');
59 The following options can be passed on the command line:
63 =item B<--infile> I<file>
67 Reads from I<file>. If I<file> ends in C<.rrd>, then C<rrdtool dump> is invoked
68 to create an XML dump of the RRD file. Otherwise the XML dump is expected
69 directly. The special filename C<-> can be used to read from STDIN.
71 =item B<--outfile> I<file>
75 Writes output to I<file>. If I<file> ends in C<.rrd>, then C<rrdtool restore>
76 is invoked to create a binary RRD file. Otherwise an XML output is written. The
77 special filename C<-> can be used to write to STDOUT.
79 =item B<--map> I<in_ds>:I<out_ds>
81 =item B<-m> I<in_ds>:I<out_ds>
83 Writes the datasource I<in_ds> to the output and renames it to I<out_ds>. This
84 is useful to extract one DS from an RRD file.
86 =item B<--step> I<seconds>
88 =item B<-s> I<seconds>
90 Changes the step of the output RRD file to be I<seconds>. The new stepsize must
91 be a multiple of the old stepsize of the other way around. When increasing the
92 stepsize the number of PDPs in each RRA must be dividable by the factor by
93 which the stepsize is increased. The length of CDPs and the absolute length of
94 RRAs (and thus the data itself) is not altered.
98 step = 10, rra_steps = 12 => step = 60, rra_steps = 2
99 step = 300, rra_steps = 1 => step = 10, rra_steps = 30
101 =item B<--rra> B<RRA>:I<CF>:I<XFF>:I<steps>:I<rows>
103 =item B<-a> B<RRA>:I<CF>:I<XFF>:I<steps>:I<rows>
105 Inserts a new RRA in the generated RRD file. This is done B<after> the step has
106 been adjusted, take that into account when specifying I<steps> and I<rows>. For
107 an explanation of the format please see L<rrdcreate(1)>.
109 =item B<--scale> I<factor>
111 Scales the values by the factor I<factor>, i.E<nbsp>e. all values are
112 multiplied by I<factor>.
114 =item B<--shift> I<offset>
116 Shifts all values by I<offset>, i.E<nbsp>e. I<offset> is added to all values.
122 GetOptions ("infile|i=s" => \$InFile,
123 "outfile|o=s" => \$OutFile,
126 my ($in_ds, $out_ds) = split (':', $_[1]);
127 if (!defined ($in_ds) || !defined ($out_ds))
129 print STDERR "Argument for `map' incorrect! The format is `--map in_ds:out_ds'\n";
132 push (@$InDS, $in_ds);
133 push (@$OutDS, $out_ds);
135 'step|s=i' => \$Step,
138 #DS:ds-name:GAUGE | COUNTER | DERIVE | ABSOLUTE:heartbeat:min:max
139 my ($ds, $name, $type, $hb, $min, $max) = split (':', $_[1]);
140 if (($ds ne 'DS') || !defined ($max))
142 print STDERR "Please use the standard RRDTool syntax when adding DSes. I. e. DS:<name>:<type>:<heartbeat>:<min>:<max>.\n";
145 push (@$NewDSes, {name => $name, type => $type, heartbeat => $hb, min => $min, max => $max});
149 my ($rra, $cf, $xff, $steps, $rows) = split (':', $_[1]);
150 if (($rra ne 'RRA') || !defined ($rows))
152 print STDERR "Please use the standard RRDTool syntax when adding RRAs. I. e. RRA:<cf><xff>:<steps>:<rows>.\n";
155 push (@$NewRRAs, {cf => $cf, xff => $xff, steps => $steps, rows => $rows});
157 'scale=f' => \$Scale,
161 if (!$InFile || !$OutFile)
163 print STDERR "Usage: $0 -i <infile> -m <in_ds>:<out_ds> -s <step>\n";
166 if ((1 + @$InDS) != (1 + @$OutDS))
168 print STDERR "You need the same amount of in- and out-DSes\n";
171 main ($InFile, $OutFile);
177 # state 0 == searching for DS index
178 # state 1 == parse RRA header
179 # state 2 == parse values
182 sub handle_line_dsmap
188 if ((@$InDS == 0) || (@$OutDS == 0))
190 post_line ($line, $index + 1);
194 if (!defined ($state))
200 # $ds_index->[new_index] = old_index
202 for (my $i = 0; $i < @$InDS; $i++)
204 print STDOUT "DS map $i: $InDS->[$i] -> $OutDS->[$i]\n" if ($Debug);
205 $ds_index->[$i] = -1;
211 if ($line =~ m/<ds>/)
214 $out_cache->[$current_index] = $line;
216 elsif ($line =~ m#<name>\s*([^<\s]+)\s*</name>#)
218 # old_index == $current_index
220 for (my $i = 0; $i < @$InDS; $i++)
222 next if ($ds_index->[$i] >= 0);
224 if ($1 eq $InDS->[$i])
226 $line =~ s#<name>\s*([^<\s]+)\s*</name>#<name> $OutDS->[$i] </name>#;
227 $ds_index->[$i] = $current_index;
232 $out_cache->[$current_index] .= $line;
234 elsif ($line =~ m#<last_ds>\s*([^\s>]+)\s*</last_ds>#i)
236 $out_cache->[$current_index] .= "\t\t<last_ds> NaN </last_ds>\n";
238 elsif ($line =~ m#<value>\s*([^\s>]+)\s*</value>#i)
240 $out_cache->[$current_index] .= "\t\t<value> NaN </value>\n";
242 elsif ($line =~ m#</ds>#)
244 $out_cache->[$current_index] .= $line;
246 elsif ($line =~ m#<rra>#)
248 # Print out all the DS definitions we need
249 for (my $new_index = 0; $new_index < @$InDS; $new_index++)
251 my $old_index = $ds_index->[$new_index];
252 while ($out_cache->[$old_index] =~ m/^(.*)$/gm)
254 post_line ("$1\n", $index + 1);
258 # Clear the cache - it's used in state1, too.
259 for (my $i = 0; $i <= $current_index; $i++)
261 $out_cache->[$i] = '';
268 elsif ($current_index == -1)
270 # Print all the lines before the first DS definition
275 # Something belonging to a DS-definition
276 $out_cache->[$current_index] .= $line;
281 if ($line =~ m#<ds>#)
284 $out_cache->[$current_index] .= $line;
286 elsif ($line =~ m#<value>\s*([^\s>]+)\s*</value>#i)
288 $out_cache->[$current_index] .= "\t\t\t<value> NaN </value>\n";
290 elsif ($line =~ m#</cdp_prep>#)
292 # Print out all the DS definitions we need
293 for (my $new_index = 0; $new_index < @$InDS; $new_index++)
295 my $old_index = $ds_index->[$new_index];
296 while ($out_cache->[$old_index] =~ m/^(.*)$/gm)
298 post_line ("$1\n", $index + 1);
303 for (my $i = 0; $i <= $current_index; $i++)
305 $out_cache->[$i] = '';
311 elsif ($line =~ m#<database>#)
316 elsif ($current_index == -1)
318 # Print all the lines before the first DS definition
324 # Something belonging to a DS-definition
325 $out_cache->[$current_index] .= $line;
330 if ($line =~ m#</database>#)
343 if ($line =~ m#(<!-- .*? -->)#)
350 while ($line =~ m#<v>\s*([^<\s]+)\s*</v>#g)
356 for (my $new_index = 0; $new_index < @$InDS; $new_index++)
358 my $old_index = $ds_index->[$new_index];
359 $ret .= '<v> ' . $values[$old_index] . ' </v> ';
371 post_line ($ret, $index + 1);
373 }} # handle_line_dsmap
380 my $step_factor_down;
388 post_line ($line, $index + 1);
392 if ($Debug && !defined ($step_factor_up))
394 print STDOUT "New step: $Step\n";
397 $step_factor_up ||= 0;
398 $step_factor_down ||= 0;
400 if (($step_factor_up == 0) && ($step_factor_down == 0))
402 if ($line =~ m#<step>\s*(\d+)\s*</step>#i)
404 my $old_step = 0 + $1;
405 if ($Step < $old_step)
407 $step_factor_down = int ($old_step / $Step);
408 if (($step_factor_down * $Step) != $old_step)
410 print STDERR "The old step ($old_step seconds) "
411 . "is not a multiple of the new step "
412 . "($Step seconds).\n";
415 $line = "<step> $Step </step>\n";
417 elsif ($Step > $old_step)
419 $step_factor_up = int ($Step / $old_step);
420 if (($step_factor_up * $old_step) != $Step)
422 print STDERR "The new step ($Step seconds) "
423 . "is not a multiple of the old step "
424 . "($old_step seconds).\n";
427 $line = "<step> $Step </step>\n";
435 elsif ($line =~ m#<pdp_per_row>\s*(\d+)\s*</pdp_per_row>#i)
437 my $old_val = 0 + $1;
441 $new_val = int ($old_val / $step_factor_up);
442 if (($new_val * $step_factor_up) != $old_val)
444 print STDERR "Can't divide number of PDPs per row ($old_val) by step-factor ($step_factor_up).\n";
450 $new_val = $step_factor_down * $old_val;
452 $line = "<pdp_per_row> $new_val </pdp_per_row>\n";
455 post_line ($line, $index + 1);
456 }} # handle_line_step
459 # The _add DS_ handler
463 sub handle_line_add_ds
468 my $post = sub { for (@_) { post_line ($_, $index + 1); } };
476 if (!$add_ds_done && ($line =~ m#<rra>#i))
478 for (my $i = 0; $i < @$NewDSes; $i++)
480 my $ds = $NewDSes->[$i];
488 print STDOUT "Adding DS: name = $ds->{'name'}, type = $ds->{'type'}, heartbeat = $ds->{'heartbeat'}, min = $ds->{'min'}, max = $ds->{'max'}\n";
492 if (defined ($ds->{'min'}) && ($ds->{'min'} ne 'U'))
494 $min = sprintf ('%.10e', $ds->{'min'});
498 if (defined ($ds->{'max'}) && ($ds->{'max'} ne 'U'))
500 $max = sprintf ('%.10e', $ds->{'max'});
505 "\t\t<name> $ds->{'name'} </name>\n",
506 "\t\t<type> $ds->{'type'} </type>\n",
507 "\t\t<minimal_heartbeat> $ds->{'heartbeat'} </minimal_heartbeat>\n",
508 "\t\t<min> $min </min>\n",
509 "\t\t<max> $max </max>\n",
511 "\t\t<!-- PDP Status -->\n",
512 "\t\t<last_ds> UNKN </last_ds>\n",
513 "\t\t<value> NaN </value>\n",
514 "\t\t<unknown_sec> 0 </unknown_sec>\n",
521 elsif ($add_ds_done && ($line =~ m#</ds>#i)) # inside a cdp_prep block
523 $post->("\t\t\t</ds>\n",
525 "\t\t\t<primary_value> NaN </primary_value>\n",
526 "\t\t\t<secondary_value> NaN </secondary_value>\n",
527 "\t\t\t<value> NaN </value>\n",
528 "\t\t\t<unknown_datapoints> 0 </unknown_datapoints>\n");
530 elsif ($line =~ m#<row>#i)
532 my $insert = '<v> NaN </v>' x (0 + @$NewDSes);
533 $line =~ s#</row>#$insert</row>#i;
537 }} # handle_line_add_ds
540 # The _add RRA_ handler
545 sub handle_line_add_rra
550 my $post = sub { for (@_) { post_line ($_, $index + 1); } };
554 if (!@$NewRRAs || $add_rra_done)
560 if ($line =~ m#<ds>#i)
564 elsif ($line =~ m#<rra>#i)
566 for (my $i = 0; $i < @$NewRRAs; $i++)
568 my $rra = $NewRRAs->[$i];
573 print STDOUT "Adding RRA: CF = $rra->{'cf'}, xff = $rra->{'xff'}, steps = $rra->{'steps'}, rows = $rra->{'rows'}, num_ds = $num_ds\n";
577 "\t\t<cf> $rra->{'cf'} </cf>\n",
578 "\t\t<pdp_per_row> $rra->{'steps'} </pdp_per_row>\n",
580 "\t\t\t<xff> $rra->{'xff'} </xff>\n",
584 for (my $j = 0; $j < $num_ds; $j++)
586 $post->("\t\t\t<ds>\n",
587 "\t\t\t\t<primary_value> NaN </primary_value>\n",
588 "\t\t\t\t<secondary_value> NaN </secondary_value>\n",
589 "\t\t\t\t<value> NaN </value>\n",
590 "\t\t\t\t<unknown_datapoints> 0 </unknown_datapoints>\n",
594 $post->("\t\t</cdp_prep>\n", "\t\t<database>\n");
595 $temp = "\t\t\t<row>" . join ('', map { "<v> NaN </v>" } (1 .. $num_ds)) . "</row>\n";
596 for (my $j = 0; $j < $rra->{'rows'}; $j++)
600 $post->("\t\t</database>\n", "\t</rra>\n");
607 }} # handle_line_add_rra
610 # The _scale/shift_ handler
612 sub calculate_scale_shift
619 if (lc ("$value") eq 'nan')
622 return ("<$tag> NaN </$tag>");
625 $value = ($scale * (0.0 + $value)) + $shift;
626 return (sprintf ("<%s> %1.10e </%s>", $tag, $value, $tag));
629 sub handle_line_scale_shift
634 if (($Scale != 1.0) || ($Shift != 0.0))
636 $line =~ s#<(min|max|last_ds|value|primary_value|secondary_value|v)>\s*([^\s<]+)\s*</[^>]+>#calculate_scale_shift ($2, $1, $Scale, $Shift)#eg;
639 post_line ($line, $index + 1);
643 # The _output_ handler
645 # This filter is unfinished!
656 my $previous_differences;
658 sub handle_line_peak_detect
663 if (!$previous_values)
665 $previous_values = [];
666 $previous_differences = [];
669 if ($line =~ m#</database>#i)
671 $previous_values = [];
672 $previous_differences = [];
673 print STDERR "==============================================================================\n";
675 elsif ($line =~ m#<pdp_per_row>\s*([1-9][0-9]*)\s*</pdp_per_row>#)
677 $pdp_per_row = int ($1);
678 print STDERR "pdp_per_row = $pdp_per_row;\n";
680 elsif ($line =~ m#<row>#)
683 while ($line =~ m#<v>\s*([^\s>]+)\s*</v>#ig)
687 push (@values, undef);
691 push (@values, 0.0 + $1);
695 for (my $i = 0; $i < @values; $i++)
697 if (!defined ($values[$i]))
699 $previous_values->[$i] = undef;
701 elsif (!defined ($previous_values->[$i]))
703 $previous_values->[$i] = $values[$i];
705 elsif (!defined ($previous_differences->[$i]))
707 $previous_differences->[$i] = abs ($previous_values->[$i] - $values[$i]);
711 my $divisor = ($previous_differences->[$i] < 1.0) ? 1.0 : $previous_differences->[$i];
712 my $difference = abs ($previous_values->[$i] - $values[$i]);
713 my $change = $pdp_per_row * $difference / $divisor;
714 if (($divisor > 10.0) && ($change > 10e5))
716 print STDERR "i = $i; average difference = " . $previous_differences->[$i]. "; current difference = " . $difference. "; change = $change;\n";
718 $previous_values->[$i] = $values[$i];
719 $previous_differences->[$i] = (0.95 * $previous_differences->[$i]) + (0.05 * $difference);
724 post_line ($line, $index + 1);
725 }} # handle_line_peak_detect
727 sub handle_line_output
734 post_line ($line, $index + 1);
739 }} # handle_line_output
750 die unless (ref ($handler) eq 'CODE');
751 push (@handlers, $handler);
763 print "DEBUG: post_line ($copy, $index);\n";
766 if ($index > $#handlers)
770 $handlers[$index]->($line, $index);
778 set_output ($out_fh);
782 add_handler (\&handle_line_dsmap);
787 add_handler (\&handle_line_step);
790 if (($Scale != 1.0) || ($Shift != 0.0))
792 add_handler (\&handle_line_scale_shift);
795 #add_handler (\&handle_line_peak_detect);
799 add_handler (\&handle_line_add_ds);
804 add_handler (\&handle_line_add_rra);
807 add_handler (\&handle_line_output);
809 while (my $line = <$in_fh>)
811 post_line ($line, 0);
818 my $out_file = shift;
823 my $in_needs_close = 1;
824 my $out_needs_close = 1;
826 if ($in_file =~ m/\.rrd$/i)
828 open ($in_fh, '-|', 'rrdtool', 'dump', $in_file) or die ("open (rrdtool): $!");
830 elsif ($in_file eq '-')
837 open ($in_fh, '<', $in_file) or die ("open ($in_file): $!");
840 if ($out_file =~ m/\.rrd$/i)
842 open ($out_fh, '|-', 'rrdtool', 'restore', '-', $out_file) or die ("open (rrdtool): $!");
844 elsif ($out_file eq '-')
847 $out_needs_close = 0;
851 open ($out_fh, '>', $out_file) or die ("open ($out_file): $!");
854 handle_fh ($in_fh, $out_fh);
860 if ($out_needs_close)
868 This script is licensed under the GNU general public license, versionE<nbsp>2
873 Florian octo Forster E<lt>octo at verplant.orgE<gt>