#!/usr/bin/perl # collectd - contrib/rrd_filter.px # Copyright (C) 2007-2008 Florian octo Forster # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; only version 2 of the License is applicable. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # # Authors: # Florian octo Forster use strict; use warnings; =head1 NAME rrd_filter.px - Perform same advanced non-standard operations on an RRD file. =head1 SYNOPSYS rrd_filter.px -i input.rrd -o output.rrd [options] =head1 DEPENDENCIES rrd_filter.px requires the RRDTool binary, Perl and the included L module. =cut use Getopt::Long ('GetOptions'); our $InFile; our $InDS = []; our $OutFile; our $OutDS = []; our $NewDSes = []; our $NewRRAs = []; our $Step = 0; our $Scale = 1.0; our $Shift = 0.0; our $Debug = 0; =head1 OPTIONS The following options can be passed on the command line: =over 4 =item B<--infile> I =item B<-i> I Reads from I. If I ends in C<.rrd>, then C is invoked to create an XML dump of the RRD file. Otherwise the XML dump is expected directly. The special filename C<-> can be used to read from STDIN. =item B<--outfile> I =item B<-o> I Writes output to I. If I ends in C<.rrd>, then C is invoked to create a binary RRD file. Otherwise an XML output is written. The special filename C<-> can be used to write to STDOUT. =item B<--map> I:I =item B<-m> I:I Writes the datasource I to the output and renames it to I. This is useful to extract one DS from an RRD file. =item B<--step> I =item B<-s> I Changes the step of the output RRD file to be I. The new stepsize must be a multiple of the old stepsize of the other way around. When increasing the stepsize the number of PDPs in each RRA must be dividable by the factor by which the stepsize is increased. The length of CDPs and the absolute length of RRAs (and thus the data itself) is not altered. Examples: step = 10, rra_steps = 12 => step = 60, rra_steps = 2 step = 300, rra_steps = 1 => step = 10, rra_steps = 30 =item B<--rra> B:I:I:I:I =item B<-a> B:I:I:I:I Inserts a new RRA in the generated RRD file. This is done B the step has been adjusted, take that into account when specifying I and I. For an explanation of the format please see L. =item B<--scale> I Scales the values by the factor I, i.Ee. all values are multiplied by I. =item B<--shift> I Shifts all values by I, i.Ee. I is added to all values. =back =cut GetOptions ("infile|i=s" => \$InFile, "outfile|o=s" => \$OutFile, 'map|m=s' => sub { my ($in_ds, $out_ds) = split (':', $_[1]); if (!defined ($in_ds) || !defined ($out_ds)) { print STDERR "Argument for `map' incorrect! The format is `--map in_ds:out_ds'\n"; exit (1); } push (@$InDS, $in_ds); push (@$OutDS, $out_ds); }, 'step|s=i' => \$Step, 'ds|d=s' => sub { #DS:ds-name:GAUGE | COUNTER | DERIVE | ABSOLUTE:heartbeat:min:max my ($ds, $name, $type, $hb, $min, $max) = split (':', $_[1]); if (($ds ne 'DS') || !defined ($max)) { print STDERR "Please use the standard RRDTool syntax when adding DSes. I. e. DS:::::.\n"; exit (1); } push (@$NewDSes, {name => $name, type => $type, heartbeat => $hb, min => $min, max => $max}); }, 'rra|a=s' => sub { my ($rra, $cf, $xff, $steps, $rows) = split (':', $_[1]); if (($rra ne 'RRA') || !defined ($rows)) { print STDERR "Please use the standard RRDTool syntax when adding RRAs. I. e. RRA:::.\n"; exit (1); } push (@$NewRRAs, {cf => $cf, xff => $xff, steps => $steps, rows => $rows}); }, 'scale=f' => \$Scale, 'shift=f' => \$Shift ) or exit (1); if (!$InFile || !$OutFile) { print STDERR "Usage: $0 -i -m : -s \n"; exit (1); } if ((1 + @$InDS) != (1 + @$OutDS)) { print STDERR "You need the same amount of in- and out-DSes\n"; exit (1); } main ($InFile, $OutFile); exit (0); { my $ds_index; my $current_index; # state 0 == searching for DS index # state 1 == parse RRA header # state 2 == parse values my $state; my $out_cache; sub handle_line_dsmap { my $line = shift; my $index = shift; my $ret = ''; if ((@$InDS == 0) || (@$OutDS == 0)) { post_line ($line, $index + 1); return; } if (!defined ($state)) { $current_index = -1; $state = 0; $out_cache = []; # $ds_index->[new_index] = old_index $ds_index = []; for (my $i = 0; $i < @$InDS; $i++) { print STDOUT "DS map $i: $InDS->[$i] -> $OutDS->[$i]\n" if ($Debug); $ds_index->[$i] = -1; } } if ($state == 0) { if ($line =~ m//) { $current_index++; $out_cache->[$current_index] = $line; } elsif ($line =~ m#\s*([^<\s]+)\s*#) { # old_index == $current_index # new_index == $i for (my $i = 0; $i < @$InDS; $i++) { next if ($ds_index->[$i] >= 0); if ($1 eq $InDS->[$i]) { $line =~ s#\s*([^<\s]+)\s*# $OutDS->[$i] #; $ds_index->[$i] = $current_index; last; } } $out_cache->[$current_index] .= $line; } elsif ($line =~ m#\s*([^\s>]+)\s*#i) { $out_cache->[$current_index] .= "\t\t NaN \n"; } elsif ($line =~ m#\s*([^\s>]+)\s*#i) { $out_cache->[$current_index] .= "\t\t NaN \n"; } elsif ($line =~ m##) { $out_cache->[$current_index] .= $line; } elsif ($line =~ m##) { # Print out all the DS definitions we need for (my $new_index = 0; $new_index < @$InDS; $new_index++) { my $old_index = $ds_index->[$new_index]; while ($out_cache->[$old_index] =~ m/^(.*)$/gm) { post_line ("$1\n", $index + 1); } } # Clear the cache - it's used in state1, too. for (my $i = 0; $i <= $current_index; $i++) { $out_cache->[$i] = ''; } $ret .= $line; $current_index = -1; $state = 1; } elsif ($current_index == -1) { # Print all the lines before the first DS definition $ret .= $line; } else { # Something belonging to a DS-definition $out_cache->[$current_index] .= $line; } } elsif ($state == 1) { if ($line =~ m##) { $current_index++; $out_cache->[$current_index] .= $line; } elsif ($line =~ m#\s*([^\s>]+)\s*#i) { $out_cache->[$current_index] .= "\t\t\t NaN \n"; } elsif ($line =~ m##) { # Print out all the DS definitions we need for (my $new_index = 0; $new_index < @$InDS; $new_index++) { my $old_index = $ds_index->[$new_index]; while ($out_cache->[$old_index] =~ m/^(.*)$/gm) { post_line ("$1\n", $index + 1); } } # Clear the cache for (my $i = 0; $i <= $current_index; $i++) { $out_cache->[$i] = ''; } $ret .= $line; $current_index = -1; } elsif ($line =~ m##) { $ret .= $line; $state = 2; } elsif ($current_index == -1) { # Print all the lines before the first DS definition # and after cdp_prep $ret .= $line; } else { # Something belonging to a DS-definition $out_cache->[$current_index] .= $line; } } elsif ($state == 2) { if ($line =~ m##) { $ret .= $line; $current_index = -1; $state = 1; } else { my @values = (); my $i; $ret .= "\t\t"; if ($line =~ m#()#) { $ret .= "$1 "; } $ret .= " "; $i = 0; while ($line =~ m#\s*([^<\s]+)\s*#g) { $values[$i] = $1; $i++; } for (my $new_index = 0; $new_index < @$InDS; $new_index++) { my $old_index = $ds_index->[$new_index]; $ret .= ' ' . $values[$old_index] . ' '; } $ret .= "\n"; } } else { die; } if ($ret) { post_line ($ret, $index + 1); } }} # handle_line_dsmap # # The _step_ handler # { my $step_factor_up; my $step_factor_down; sub handle_line_step { my $line = shift; my $index = shift; if (!$Step) { post_line ($line, $index + 1); return; } if ($Debug && !defined ($step_factor_up)) { print STDOUT "New step: $Step\n"; } $step_factor_up ||= 0; $step_factor_down ||= 0; if (($step_factor_up == 0) && ($step_factor_down == 0)) { if ($line =~ m#\s*(\d+)\s*#i) { my $old_step = 0 + $1; if ($Step < $old_step) { $step_factor_down = int ($old_step / $Step); if (($step_factor_down * $Step) != $old_step) { print STDERR "The old step ($old_step seconds) " . "is not a multiple of the new step " . "($Step seconds).\n"; exit (1); } $line = " $Step \n"; } elsif ($Step > $old_step) { $step_factor_up = int ($Step / $old_step); if (($step_factor_up * $old_step) != $Step) { print STDERR "The new step ($Step seconds) " . "is not a multiple of the old step " . "($old_step seconds).\n"; exit (1); } $line = " $Step \n"; } else { $Step = 0; } } } elsif ($line =~ m#\s*(\d+)\s*#i) { my $old_val = 0 + $1; my $new_val; if ($step_factor_up) { $new_val = int ($old_val / $step_factor_up); if (($new_val * $step_factor_up) != $old_val) { print STDERR "Can't divide number of PDPs per row ($old_val) by step-factor ($step_factor_up).\n"; exit (1); } } else { $new_val = $step_factor_down * $old_val; } $line = " $new_val \n"; } post_line ($line, $index + 1); }} # handle_line_step # # The _add DS_ handler # { my $add_ds_done; sub handle_line_add_ds { my $line = shift; my $index = shift; my $post = sub { for (@_) { post_line ($_, $index + 1); } }; if (!@$NewDSes) { $post->($line); return; } if (!$add_ds_done && ($line =~ m##i)) { for (my $i = 0; $i < @$NewDSes; $i++) { my $ds = $NewDSes->[$i]; my $temp; my $min; my $max; if ($Debug) { print STDOUT "Adding DS: name = $ds->{'name'}, type = $ds->{'type'}, heartbeat = $ds->{'heartbeat'}, min = $ds->{'min'}, max = $ds->{'max'}\n"; } $min = 'NaN'; if (defined ($ds->{'min'}) && ($ds->{'min'} ne 'U')) { $min = sprintf ('%.10e', $ds->{'min'}); } $max = 'NaN'; if (defined ($ds->{'max'}) && ($ds->{'max'} ne 'U')) { $max = sprintf ('%.10e', $ds->{'max'}); } $post->("\t\n", "\t\t $ds->{'name'} \n", "\t\t $ds->{'type'} \n", "\t\t $ds->{'heartbeat'} \n", "\t\t $min \n", "\t\t $max \n", "\n", "\t\t\n", "\t\t UNKN \n", "\t\t NaN \n", "\t\t 0 \n", "\t\n", "\n"); } $add_ds_done = 1; } elsif ($add_ds_done && ($line =~ m##i)) # inside a cdp_prep block { $post->("\t\t\t\n", "\t\t\t\n", "\t\t\t NaN \n", "\t\t\t NaN \n", "\t\t\t NaN \n", "\t\t\t 0 \n"); } elsif ($line =~ m##i) { my $insert = ' NaN ' x (0 + @$NewDSes); $line =~ s##$insert#i; } $post->($line); }} # handle_line_add_ds # # The _add RRA_ handler # { my $add_rra_done; my $num_ds; sub handle_line_add_rra { my $line = shift; my $index = shift; my $post = sub { for (@_) { post_line ($_, $index + 1); } }; $num_ds ||= 0; if (!@$NewRRAs || $add_rra_done) { $post->($line); return; } if ($line =~ m##i) { $num_ds++; } elsif ($line =~ m##i) { for (my $i = 0; $i < @$NewRRAs; $i++) { my $rra = $NewRRAs->[$i]; my $temp; if ($Debug) { print STDOUT "Adding RRA: CF = $rra->{'cf'}, xff = $rra->{'xff'}, steps = $rra->{'steps'}, rows = $rra->{'rows'}, num_ds = $num_ds\n"; } $post->("\t\n", "\t\t $rra->{'cf'} \n", "\t\t $rra->{'steps'} \n", "\t\t\n", "\t\t\t $rra->{'xff'} \n", "\t\t\n", "\t\t\n"); for (my $j = 0; $j < $num_ds; $j++) { $post->("\t\t\t\n", "\t\t\t\t NaN \n", "\t\t\t\t NaN \n", "\t\t\t\t NaN \n", "\t\t\t\t 0 \n", "\t\t\t\n"); } $post->("\t\t\n", "\t\t\n"); $temp = "\t\t\t" . join ('', map { " NaN " } (1 .. $num_ds)) . "\n"; for (my $j = 0; $j < $rra->{'rows'}; $j++) { $post->($temp); } $post->("\t\t\n", "\t\n"); } $add_rra_done = 1; } $post->($line); }} # handle_line_add_rra # # The _scale/shift_ handler # sub calculate_scale_shift { my $value = shift; my $tag = shift; my $scale = shift; my $shift = shift; if (lc ("$value") eq 'nan') { $value = 'NaN'; return ("<$tag> NaN "); } $value = ($scale * (0.0 + $value)) + $shift; return (sprintf ("<%s> %1.10e ", $tag, $value, $tag)); } sub handle_line_scale_shift { my $line = shift; my $index = shift; if (($Scale != 1.0) || ($Shift != 0.0)) { $line =~ s#<(min|max|last_ds|value|primary_value|secondary_value|v)>\s*([^\s<]+)\s*]+>#calculate_scale_shift ($2, $1, $Scale, $Shift)#eg; } post_line ($line, $index + 1); } # # The _output_ handler # # This filter is unfinished! # { my $fh; sub set_output { $fh = shift; } { my $previous_values; my $previous_differences; my $pdp_per_row; sub handle_line_peak_detect { my $line = shift; my $index = shift; if (!$previous_values) { $previous_values = []; $previous_differences = []; } if ($line =~ m##i) { $previous_values = []; $previous_differences = []; print STDERR "==============================================================================\n"; } elsif ($line =~ m#\s*([1-9][0-9]*)\s*#) { $pdp_per_row = int ($1); print STDERR "pdp_per_row = $pdp_per_row;\n"; } elsif ($line =~ m##) { my @values = (); while ($line =~ m#\s*([^\s>]+)\s*#ig) { if ($1 eq 'NaN') { push (@values, undef); } else { push (@values, 0.0 + $1); } } for (my $i = 0; $i < @values; $i++) { if (!defined ($values[$i])) { $previous_values->[$i] = undef; } elsif (!defined ($previous_values->[$i])) { $previous_values->[$i] = $values[$i]; } elsif (!defined ($previous_differences->[$i])) { $previous_differences->[$i] = abs ($previous_values->[$i] - $values[$i]); } else { my $divisor = ($previous_differences->[$i] < 1.0) ? 1.0 : $previous_differences->[$i]; my $difference = abs ($previous_values->[$i] - $values[$i]); my $change = $pdp_per_row * $difference / $divisor; if (($divisor > 10.0) && ($change > 10e5)) { print STDERR "i = $i; average difference = " . $previous_differences->[$i]. "; current difference = " . $difference. "; change = $change;\n"; } $previous_values->[$i] = $values[$i]; $previous_differences->[$i] = (0.95 * $previous_differences->[$i]) + (0.05 * $difference); } } } post_line ($line, $index + 1); }} # handle_line_peak_detect sub handle_line_output { my $line = shift; my $index = shift; if (!defined ($fh)) { post_line ($line, $index + 1); return; } print $fh $line; }} # handle_line_output # # Dispatching logic # { my @handlers = (); sub add_handler { my $handler = shift; die unless (ref ($handler) eq 'CODE'); push (@handlers, $handler); } # add_handler sub post_line { my $line = shift; my $index = shift; if (0) { my $copy = $line; chomp ($copy); print "DEBUG: post_line ($copy, $index);\n"; } if ($index > $#handlers) { return; } $handlers[$index]->($line, $index); }} # post_line sub handle_fh { my $in_fh = shift; my $out_fh = shift; set_output ($out_fh); if (@$InDS) { add_handler (\&handle_line_dsmap); } if ($Step) { add_handler (\&handle_line_step); } if (($Scale != 1.0) || ($Shift != 0.0)) { add_handler (\&handle_line_scale_shift); } #add_handler (\&handle_line_peak_detect); if (@$NewDSes) { add_handler (\&handle_line_add_ds); } if (@$NewRRAs) { add_handler (\&handle_line_add_rra); } add_handler (\&handle_line_output); while (my $line = <$in_fh>) { post_line ($line, 0); } } # handle_fh sub main { my $in_file = shift; my $out_file = shift; my $in_fh; my $out_fh; my $in_needs_close = 1; my $out_needs_close = 1; if ($in_file =~ m/\.rrd$/i) { open ($in_fh, '-|', 'rrdtool', 'dump', $in_file) or die ("open (rrdtool): $!"); } elsif ($in_file eq '-') { $in_fh = \*STDIN; $in_needs_close = 0; } else { open ($in_fh, '<', $in_file) or die ("open ($in_file): $!"); } if ($out_file =~ m/\.rrd$/i) { open ($out_fh, '|-', 'rrdtool', 'restore', '-', $out_file) or die ("open (rrdtool): $!"); } elsif ($out_file eq '-') { $out_fh = \*STDOUT; $out_needs_close = 0; } else { open ($out_fh, '>', $out_file) or die ("open ($out_file): $!"); } handle_fh ($in_fh, $out_fh); if ($in_needs_close) { close ($in_fh); } if ($out_needs_close) { close ($out_fh); } } # main =head1 LICENSE This script is licensed under the GNU general public license, versionE2 (GPLv2). =head1 AUTHOR Florian octo Forster Eocto at verplant.orgE