From f82c759559e3fa4535960eaa20df1761b1763396 Mon Sep 17 00:00:00 2001 From: qMax Date: Sun, 25 Nov 2007 10:21:08 +0100 Subject: [PATCH] Parser/Sendmail.pm: Added a parser for sendmail. From: qMax To: yaala@verplant.org Subject: [yaala] Sendmail log parser Date: Tue, 23 Nov 2004 18:15:28 +0600 This is a Parser for sendmail log. Note: Each message in sendmail log forms several records (lines): one record with 'from=' field, and one or several with 'to=' field. Parser joins each 'from'-part with 'to'-part by message log id. However, if there're several recipients, result will be several records for the same message: one per recipient. When message is first time countd, datafield 'uniq' is set to 1. This is usefull to calculate total count/traffic or traffic by type. But if you count total by recipients, using this key (as WHERE uniq=='1') will make yaala ignore all recipients of a message, but the first. Config options: sendmail_aliases - aliases file used to resolve (local senders) adresses sendmail_localdomain - local domain to remove from adresses sendmail_localrelay - IP regexp to determine incoming/outgoing/local traffic, egg '192.168.1.\d+' Data fields: everything found in sendmail log, with: timedate is splited to 'date' and 'hour', as usual, year is taken from current date. rrelay is relay field from 'to'-part uniq = set to 1 when message first time counted. type = "I","O","L","R" for incoming, outgoing, local and relay traffic. It is determined using fields 'mailer' and 'relay'. (Thus, only applied to actually sent/recieved messages) Aggregations: size, count, nrcpts TODO: - Properly handle multiple aliases. - Split non-local multiple recipients - resolve hosts in relay fields. --- lib/Yaala/Parser/Sendmail.pm | 360 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 360 insertions(+) create mode 100644 lib/Yaala/Parser/Sendmail.pm diff --git a/lib/Yaala/Parser/Sendmail.pm b/lib/Yaala/Parser/Sendmail.pm new file mode 100644 index 0000000..094395b --- /dev/null +++ b/lib/Yaala/Parser/Sendmail.pm @@ -0,0 +1,360 @@ +package Yaala::Parser; + +=head1 NAME + +Yaala::Parser::Sendmail + +=head1 DESCRIPTION + +Parser for sendmail log. + +Note: + +Each message in sendmail log forms several records (lines): +one record with 'from=' field, and one or several with 'to=' field. + +Parser joins each 'from'-part with 'to'-part by message log id. +However, if there're several recipients, result will be several records for the same message: +one per recipient. +When message is first time countd, datafield 'uniq' is set to 1. +This is usefull to calculate total count/traffic or traffic by type. +But if you count total by recipients, using this key (as WHERE uniq=='1') +will make yaala ignore all recipients of a message, but the first. + +Grand total (extra) is counted unique and only with stat=/Sent.*/ + +=head1 CONFIG OPTIONS + + sendmail_aliases - aliases file used to resolve adresses + sendmail_localdomain - local domain to remove from adresses + sendmail_localrelay - IP regexp to determine incoming/outgoing/local traffic, egg '192.168.1.\d+' + +=head1 DATA FIELDS + +=head2 Key-fields + +=over 4 + +=item id + +=item from + +=item class + +=item msgid + +=item bodytype + +=item proto + +=item daemon + +=item relay + +=item to + +=item delay + +=item xdelay + +=item mailer + +=item pri + +=item dsn + +=item stat + +=item rrelay + +=item date + +=item hour + +=item uniq + +=item type + +=back + +=head2 Aggregation-Fields + +=over 4 + +=item size (bytes) + +=item nrcpts + +=item count + +=back + +=head2 Additional Notes + +timedate is splited to I and I, as usual, year is taken from +current date. I is relay field from to-part + +I is set to 1 when message first time counted. + +I = "I","O","L","R" for incoming, outgoing, local and relay traffic. It +is determined using fields 'mailer' and 'relay'. (Thus, only applied to +sent/recieved messages) + +=head1 TODO + +=over 4 + +=item Properly resolve multiple aliases. + +=item Split non-local multiple recipients + +=back + +=head1 AUTHOR + +qMax Eqmax-at-mediasoft.ruE + +=cut + +use strict; +use warnings; +use vars qw(%DATAFIELDS); + +use Exporter; +use Yaala::Parser::WebserverTools qw(%MONTH_NUMBERS); +use Yaala::Data::Persistent qw#init#; +use Yaala::Config qw#get_config#; + +@Yaala::Parser::EXPORT_OK = qw(parse extra %DATAFIELDS); +@Yaala::Parser::ISA = ('Exporter'); + +our $EXTRA = init ('$EXTRA', 'hash'); + +my %COUNTED = (); + +if (!defined ($EXTRA->{'totalcount'})) { $EXTRA->{'totalcount'} = {I=>0, O=>0, L=>0}; } +if (!defined ($EXTRA->{'totalamount'})) { $EXTRA->{'totalamount'} = {I=>0, O=>0, L=>0}; } +if (!defined ($EXTRA->{'start'} )) { $EXTRA->{'start'} = undef; } +if (!defined ($EXTRA->{'end'} )) { $EXTRA->{'end'} = undef; } + +%DATAFIELDS = ( + # log message id + id => 'key', + # 'from' part + from => 'key', + size => 'agg:bytes', + class => 'key', + nrcpts => 'agg', + msgid => 'key', + bodytype=> 'key', + proto => 'key', + daemon => 'key', + relay => 'key', + # 'to' part + to => 'key', + delay => 'key', + xdelay => 'key', + mailer => 'key', + pri => 'key', + dsn => 'key', + 'stat' => 'key', + rrelay => 'key', + # additional + date => 'key', + hour => 'key', + uniq => 'key', + type => 'key', + count => 'agg' +); + +# This needs to be done at runtime, since Data uses Setup which relies on +# %DATAFIELDS to be defined -octo +require Yaala::Data::Core; +import Yaala::Data::Core qw#store#; + +my $VERSION = 'v 1.1$'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +our %ALIASES; +my $aliasfile = get_config("sendmail_aliases"); +if( $aliasfile ) { + print STDERR $/, __FILE__, ": Loaded aliases from $aliasfile" if ($::DEBUG); + load_aliases($aliasfile); +} + +our $localdomain = get_config("sendmail_localdomain"); +our $localrelay = get_config("sendmail_localrelay"); +print STDERR $/, __FILE__, ": Local relay: $localrelay" if ($::DEBUG); +$localrelay = qr/\[$localrelay\]/; + +our %RECF = (); # all pending from-parts +our %RECT = (); # all pending to-parts + +return (1); + +sub parse +{ + my $line = shift or return undef; + if ( $line =~ s/^(...\s*\d+ \d\d:\d\d:\d\d) [\w-]+ sm-mta.*\[\d+\]: ([a-zA-Z0-9]{14}): // ) + { + my $datetime = $1; + my $id = $2; + if( $line =~ /^from=/ ) { + my $rec = parseline($line); + $rec->{'from'} = resolve_alias($rec->{'from'}); + $RECF{$id} = { datetime=>$datetime, %$rec }; + checkpair($id); + } + elsif( $line =~ /^to=/ ) { + my $rec = parseline($line); + $rec->{'to'} = resolve_alias($rec->{'to'}); + $RECT{$id} = { datetime=>$datetime, %$rec }; + checkpair($id); + } + else { + # some heaers mangling or mail filters log lines + } + } +} + +sub parseline +{ + my $line = shift; + my %rec=(); + foreach (split(/,\s+/,$line)) { + if( m/(.*?)=(.*)/ ) { + $rec{$1}=$2 if exists $DATAFIELDS{$1}; + } + } + return \%rec; +} + +sub checkpair +{ + my $id = shift; + return unless ( $RECF{$id} and $RECT{$id} ); + # rename relay in TO-part into rrelay + $RECT{$id}->{'rrelay'} = $RECT{$id}->{'relay'} if $RECT{$id}->{'relay'}; + delete $RECT{$id}->{'relay'}; + + my %rec = ( %{$RECF{$id}}, %{$RECT{$id}} ); + + #print STDERR "\nRECT K: ",join("; ", keys %{$RECT{$id}}); + #print STDERR "\nRECT V: ",join("; ", values %{$RECT{$id}}); + #print STDERR "\nRECF K: ",join("; ", keys %{$RECF{$id}}); + #print STDERR "\nRECF V: ",join("; ", values %{$RECF{$id}}); + + $rec{'datetime'} =~ /(\w\w\w)\s*(\d+) (\d\d):\d\d:\d\d/; + my ($month,$day,$hour) = ($1,$2,$3); + $month = $MONTH_NUMBERS{$month}; + my $year = [localtime(time)]->[5]+1900; # current year + my $date = sprintf("%04u-%02u-%02u", $year, $month, $day); + + my %combined = %rec; + $combined{'date'}=$date; + $combined{'hour'}=$hour; + $combined{'count'}=1; + $combined{'uniq'} = (exists($COUNTED{$id}) ? 0 : 1); + $combined{'to'} =~ s/\/\>/g; + $combined{'from'} =~ s/\/\>/g; + $combined{'stat'} =~ s/^((\w+)(\s+\w+)*).*$/$1/; + + my $type="UNDEF"; + if( $localrelay and $combined{'relay'} and $combined{'mailer'}) { + # + # L: l/l O: l/- + # I: -/l R: -/- + # + $type = + ( $combined{'relay'} =~ $localrelay ) ? + ( ( $combined{'mailer'} eq 'local' ) ? 'L' : 'O' ) : + ( ( $combined{'mailer'} eq 'local' ) ? 'I' : 'R' ) ; + + } + + $combined{'type'} = $type; + + unless( $COUNTED{$id} ) { + $COUNTED{$id} = 1; + $EXTRA->{'totalcount'}->{$type}++; + $EXTRA->{'totalamount'}->{$type}+=$combined{'size'}; + } + + if( not defined $EXTRA->{'start'} or $month < $EXTRA->{'start'}->{'m'} or $day < $EXTRA->{'start'}->{'d'} ) { + $EXTRA->{'start'}->{'m'} = $month; + $EXTRA->{'start'}->{'d'} = $day; + } + if( not defined $EXTRA->{'end'} or $month > $EXTRA->{'end'}->{'m'} or $day > $EXTRA->{'end'}->{'d'} ) { + $EXTRA->{'end'}->{'m'} = $month; + $EXTRA->{'end'}->{'d'} = $day; + } + + #print STDERR "\nParsed $id: ",join(";",map("$_=".$combined{$_}, sort keys %combined)); + store (\%combined); + + delete $RECF{$id}; + delete $RECT{$id}; + +} + +sub load_aliases +{ + my $file = shift; + if( open(F,"<$file") ) { + while() { + chomp(); + next unless( /^([^ ]*)\s*:\s*([^, ]*)\s*$/ ); + $ALIASES{lc($1)}=lc($2); + } + close(F); + } +} + +sub resolve_alias +{ + my ($alias) = @_; + my $addr = lc($alias); + $addr =~ s/[<>]//g; + $addr =~ s/\@$localdomain// if( $localdomain ); + if( $ALIASES{$addr} ) { + $addr = $ALIASES{$addr}; + } + return $addr; +} + +sub extra +{ + $::EXTRA->{'0. Begin/End date'} = sprintf "%02d/%02d - %02d/%02d", + $EXTRA->{'start'}->{'d'}, + $EXTRA->{'start'}->{'m'}, + $EXTRA->{'end'}->{'d'}, + $EXTRA->{'end'}->{'m'}; + + if ($EXTRA->{'totalcount'}->{'I'}) + { + $::EXTRA->{'1. Incoming mail'} = sprintf ("%5d / %12d Bytes", + $EXTRA->{'totalcount'}->{'I'}, $EXTRA->{'totalamount'}->{'I'}); + } + if ($EXTRA->{'totalcount'}->{'O'}) + { + $::EXTRA->{'2. Outgoing mail'} = sprintf ("%5d / %12d Bytes", + $EXTRA->{'totalcount'}->{'O'}, $EXTRA->{'totalamount'}->{'O'}); + } + if ($EXTRA->{'totalcount'}->{'I'}) + { + $::EXTRA->{'3. Local mail'} = sprintf ("%5d / %12d Bytes", + $EXTRA->{'totalcount'}->{'L'}, $EXTRA->{'totalamount'}->{'L'}); + } + if ($EXTRA->{'totalcount'}->{'R'}) + { + $::EXTRA->{'4. Relayed mail'} = sprintf ("%5d / %12d Bytes", + $EXTRA->{'totalcount'}->{'R'}, $EXTRA->{'totalamount'}->{'R'}); + } + if ($EXTRA->{'totalcount'}->{'UNDEF'}) + { + $::EXTRA->{'5. Unknown mail'} = sprintf ("%5d / %12d Bytes", + $EXTRA->{'totalcount'}->{'UNDEF'}, $EXTRA->{'totalamount'}->{'UNDEF'}); + } +} -- 2.11.0