--- /dev/null
+package Yaala::Parser;
+
+=head1 NAME
+
+Yaala::Parser::Sendmail
+
+=head1 DESCRIPTION
+
+Parser for sendmail log.
+
+Note:
+
+Each message in sendmail log forms several records (lines):
+one record with 'from=' field, and one or several with 'to=' field.
+
+Parser joins each 'from'-part with 'to'-part by message log id.
+However, if there're several recipients, result will be several records for the same message:
+one per recipient.
+When message is first time countd, datafield 'uniq' is set to 1.
+This is usefull to calculate total count/traffic or traffic by type.
+But if you count total by recipients, using this key (as WHERE uniq=='1')
+will make yaala ignore all recipients of a message, but the first.
+
+Grand total (extra) is counted unique and only with stat=/Sent.*/
+
+=head1 CONFIG OPTIONS
+
+ sendmail_aliases - aliases file used to resolve adresses
+ sendmail_localdomain - local domain to remove from adresses
+ sendmail_localrelay - IP regexp to determine incoming/outgoing/local traffic, egg '192.168.1.\d+'
+
+=head1 DATA FIELDS
+
+=head2 Key-fields
+
+=over 4
+
+=item id
+
+=item from
+
+=item class
+
+=item msgid
+
+=item bodytype
+
+=item proto
+
+=item daemon
+
+=item relay
+
+=item to
+
+=item delay
+
+=item xdelay
+
+=item mailer
+
+=item pri
+
+=item dsn
+
+=item stat
+
+=item rrelay
+
+=item date
+
+=item hour
+
+=item uniq
+
+=item type
+
+=back
+
+=head2 Aggregation-Fields
+
+=over 4
+
+=item size (bytes)
+
+=item nrcpts
+
+=item count
+
+=back
+
+=head2 Additional Notes
+
+timedate is splited to I<date> and I<hour>, as usual, year is taken from
+current date. I<rrelay> is relay field from to-part
+
+I<uniq> is set to 1 when message first time counted.
+
+I<type> = "I","O","L","R" for incoming, outgoing, local and relay traffic. It
+is determined using fields 'mailer' and 'relay'. (Thus, only applied to
+sent/recieved messages)
+
+=head1 TODO
+
+=over 4
+
+=item Properly resolve multiple aliases.
+
+=item Split non-local multiple recipients
+
+=back
+
+=head1 AUTHOR
+
+qMax E<lt>qmax-at-mediasoft.ruE<gt>
+
+=cut
+
+use strict;
+use warnings;
+use vars qw(%DATAFIELDS);
+
+use Exporter;
+use Yaala::Parser::WebserverTools qw(%MONTH_NUMBERS);
+use Yaala::Data::Persistent qw#init#;
+use Yaala::Config qw#get_config#;
+
+@Yaala::Parser::EXPORT_OK = qw(parse extra %DATAFIELDS);
+@Yaala::Parser::ISA = ('Exporter');
+
+our $EXTRA = init ('$EXTRA', 'hash');
+
+my %COUNTED = ();
+
+if (!defined ($EXTRA->{'totalcount'})) { $EXTRA->{'totalcount'} = {I=>0, O=>0, L=>0}; }
+if (!defined ($EXTRA->{'totalamount'})) { $EXTRA->{'totalamount'} = {I=>0, O=>0, L=>0}; }
+if (!defined ($EXTRA->{'start'} )) { $EXTRA->{'start'} = undef; }
+if (!defined ($EXTRA->{'end'} )) { $EXTRA->{'end'} = undef; }
+
+%DATAFIELDS = (
+ # log message id
+ id => 'key',
+ # 'from' part
+ from => 'key',
+ size => 'agg:bytes',
+ class => 'key',
+ nrcpts => 'agg',
+ msgid => 'key',
+ bodytype=> 'key',
+ proto => 'key',
+ daemon => 'key',
+ relay => 'key',
+ # 'to' part
+ to => 'key',
+ delay => 'key',
+ xdelay => 'key',
+ mailer => 'key',
+ pri => 'key',
+ dsn => 'key',
+ 'stat' => 'key',
+ rrelay => 'key',
+ # additional
+ date => 'key',
+ hour => 'key',
+ uniq => 'key',
+ type => 'key',
+ count => 'agg'
+);
+
+# This needs to be done at runtime, since Data uses Setup which relies on
+# %DATAFIELDS to be defined -octo
+require Yaala::Data::Core;
+import Yaala::Data::Core qw#store#;
+
+my $VERSION = 'v 1.1$';
+print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG);
+
+our %ALIASES;
+my $aliasfile = get_config("sendmail_aliases");
+if( $aliasfile ) {
+ print STDERR $/, __FILE__, ": Loaded aliases from $aliasfile" if ($::DEBUG);
+ load_aliases($aliasfile);
+}
+
+our $localdomain = get_config("sendmail_localdomain");
+our $localrelay = get_config("sendmail_localrelay");
+print STDERR $/, __FILE__, ": Local relay: $localrelay" if ($::DEBUG);
+$localrelay = qr/\[$localrelay\]/;
+
+our %RECF = (); # all pending from-parts
+our %RECT = (); # all pending to-parts
+
+return (1);
+
+sub parse
+{
+ my $line = shift or return undef;
+ if ( $line =~ s/^(...\s*\d+ \d\d:\d\d:\d\d) [\w-]+ sm-mta.*\[\d+\]: ([a-zA-Z0-9]{14}): // )
+ {
+ my $datetime = $1;
+ my $id = $2;
+ if( $line =~ /^from=/ ) {
+ my $rec = parseline($line);
+ $rec->{'from'} = resolve_alias($rec->{'from'});
+ $RECF{$id} = { datetime=>$datetime, %$rec };
+ checkpair($id);
+ }
+ elsif( $line =~ /^to=/ ) {
+ my $rec = parseline($line);
+ $rec->{'to'} = resolve_alias($rec->{'to'});
+ $RECT{$id} = { datetime=>$datetime, %$rec };
+ checkpair($id);
+ }
+ else {
+ # some heaers mangling or mail filters log lines
+ }
+ }
+}
+
+sub parseline
+{
+ my $line = shift;
+ my %rec=();
+ foreach (split(/,\s+/,$line)) {
+ if( m/(.*?)=(.*)/ ) {
+ $rec{$1}=$2 if exists $DATAFIELDS{$1};
+ }
+ }
+ return \%rec;
+}
+
+sub checkpair
+{
+ my $id = shift;
+ return unless ( $RECF{$id} and $RECT{$id} );
+ # rename relay in TO-part into rrelay
+ $RECT{$id}->{'rrelay'} = $RECT{$id}->{'relay'} if $RECT{$id}->{'relay'};
+ delete $RECT{$id}->{'relay'};
+
+ my %rec = ( %{$RECF{$id}}, %{$RECT{$id}} );
+
+ #print STDERR "\nRECT K: ",join("; ", keys %{$RECT{$id}});
+ #print STDERR "\nRECT V: ",join("; ", values %{$RECT{$id}});
+ #print STDERR "\nRECF K: ",join("; ", keys %{$RECF{$id}});
+ #print STDERR "\nRECF V: ",join("; ", values %{$RECF{$id}});
+
+ $rec{'datetime'} =~ /(\w\w\w)\s*(\d+) (\d\d):\d\d:\d\d/;
+ my ($month,$day,$hour) = ($1,$2,$3);
+ $month = $MONTH_NUMBERS{$month};
+ my $year = [localtime(time)]->[5]+1900; # current year
+ my $date = sprintf("%04u-%02u-%02u", $year, $month, $day);
+
+ my %combined = %rec;
+ $combined{'date'}=$date;
+ $combined{'hour'}=$hour;
+ $combined{'count'}=1;
+ $combined{'uniq'} = (exists($COUNTED{$id}) ? 0 : 1);
+ $combined{'to'} =~ s/\</\</g;
+ $combined{'to'} =~ s/\>/\>/g;
+ $combined{'from'} =~ s/\</\</g;
+ $combined{'from'} =~ s/\>/\>/g;
+ $combined{'stat'} =~ s/^((\w+)(\s+\w+)*).*$/$1/;
+
+ my $type="UNDEF";
+ if( $localrelay and $combined{'relay'} and $combined{'mailer'}) {
+ #
+ # L: l/l O: l/-
+ # I: -/l R: -/-
+ #
+ $type =
+ ( $combined{'relay'} =~ $localrelay ) ?
+ ( ( $combined{'mailer'} eq 'local' ) ? 'L' : 'O' ) :
+ ( ( $combined{'mailer'} eq 'local' ) ? 'I' : 'R' ) ;
+
+ }
+
+ $combined{'type'} = $type;
+
+ unless( $COUNTED{$id} ) {
+ $COUNTED{$id} = 1;
+ $EXTRA->{'totalcount'}->{$type}++;
+ $EXTRA->{'totalamount'}->{$type}+=$combined{'size'};
+ }
+
+ if( not defined $EXTRA->{'start'} or $month < $EXTRA->{'start'}->{'m'} or $day < $EXTRA->{'start'}->{'d'} ) {
+ $EXTRA->{'start'}->{'m'} = $month;
+ $EXTRA->{'start'}->{'d'} = $day;
+ }
+ if( not defined $EXTRA->{'end'} or $month > $EXTRA->{'end'}->{'m'} or $day > $EXTRA->{'end'}->{'d'} ) {
+ $EXTRA->{'end'}->{'m'} = $month;
+ $EXTRA->{'end'}->{'d'} = $day;
+ }
+
+ #print STDERR "\nParsed $id: ",join(";",map("$_=".$combined{$_}, sort keys %combined));
+ store (\%combined);
+
+ delete $RECF{$id};
+ delete $RECT{$id};
+
+}
+
+sub load_aliases
+{
+ my $file = shift;
+ if( open(F,"<$file") ) {
+ while(<F>) {
+ chomp();
+ next unless( /^([^ ]*)\s*:\s*([^, ]*)\s*$/ );
+ $ALIASES{lc($1)}=lc($2);
+ }
+ close(F);
+ }
+}
+
+sub resolve_alias
+{
+ my ($alias) = @_;
+ my $addr = lc($alias);
+ $addr =~ s/[<>]//g;
+ $addr =~ s/\@$localdomain// if( $localdomain );
+ if( $ALIASES{$addr} ) {
+ $addr = $ALIASES{$addr};
+ }
+ return $addr;
+}
+
+sub extra
+{
+ $::EXTRA->{'0. Begin/End date'} = sprintf "%02d/%02d - %02d/%02d",
+ $EXTRA->{'start'}->{'d'},
+ $EXTRA->{'start'}->{'m'},
+ $EXTRA->{'end'}->{'d'},
+ $EXTRA->{'end'}->{'m'};
+
+ if ($EXTRA->{'totalcount'}->{'I'})
+ {
+ $::EXTRA->{'1. Incoming mail'} = sprintf ("%5d / %12d Bytes",
+ $EXTRA->{'totalcount'}->{'I'}, $EXTRA->{'totalamount'}->{'I'});
+ }
+ if ($EXTRA->{'totalcount'}->{'O'})
+ {
+ $::EXTRA->{'2. Outgoing mail'} = sprintf ("%5d / %12d Bytes",
+ $EXTRA->{'totalcount'}->{'O'}, $EXTRA->{'totalamount'}->{'O'});
+ }
+ if ($EXTRA->{'totalcount'}->{'I'})
+ {
+ $::EXTRA->{'3. Local mail'} = sprintf ("%5d / %12d Bytes",
+ $EXTRA->{'totalcount'}->{'L'}, $EXTRA->{'totalamount'}->{'L'});
+ }
+ if ($EXTRA->{'totalcount'}->{'R'})
+ {
+ $::EXTRA->{'4. Relayed mail'} = sprintf ("%5d / %12d Bytes",
+ $EXTRA->{'totalcount'}->{'R'}, $EXTRA->{'totalamount'}->{'R'});
+ }
+ if ($EXTRA->{'totalcount'}->{'UNDEF'})
+ {
+ $::EXTRA->{'5. Unknown mail'} = sprintf ("%5d / %12d Bytes",
+ $EXTRA->{'totalcount'}->{'UNDEF'}, $EXTRA->{'totalamount'}->{'UNDEF'});
+ }
+}