X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=lib%2FOnis%2FData%2FCore.pm;h=c47af2fea2d39553acc9902e7df8cfacc070a915;hb=fdbd3fe48e5309f6abd357160eb83e1866221f41;hp=29a5b4b1c6794347a4f5977a008702f2441ee08c;hpb=682812f41e5b83e006d5c482eaf8e33883dc7bce;p=onis.git diff --git a/lib/Onis/Data/Core.pm b/lib/Onis/Data/Core.pm index 29a5b4b..c47af2f 100644 --- a/lib/Onis/Data/Core.pm +++ b/lib/Onis/Data/Core.pm @@ -15,9 +15,10 @@ use strict; use warnings; use Exporter; -use Onis::Config qw#get_config#; -use Onis::Users qw#host_to_username nick_to_username#; +use Onis::Config qw(get_config); +use Onis::Users qw(chatter_to_name); use Onis::Data::Persistent; +use Onis::Parser::Persistent qw(get_absolute_time); =head1 NAMING CONVENTION @@ -33,32 +34,70 @@ the F defines a mapping of B -E B. =cut -our $Nick2Ident = Onis::Data::Persistent->new ('Nick2Ident', 'nick', 'ident'); +our $GeneralCounters = Onis::Data::Persistent->new ('GeneralCounters', 'key', 'value'); +our $NickToIdentCache = Onis::Data::Persistent->new ('NickToIdentCache', 'nick', 'ident'); our $ChatterList = Onis::Data::Persistent->new ('ChatterList', 'chatter', 'counter'); our $ChannelNames = Onis::Data::Persistent->new ('ChannelNames', 'channel', 'counter'); +@Onis::Data::Core::EXPORT_OK = +qw( + store unsharp calculate_nicks - -@Onis::Data::Core::EXPORT_OK = qw#get_all_nicks get_channel - nick_to_ident - ident_to_nick ident_to_name - get_main_nick - get_total_lines nick_rename print_output - register_plugin store get_print_name#; + get_all_nicks get_channel get_main_nick + nick_to_ident ident_to_nick + nick_to_name ident_to_name + get_total_lines get_most_recent_time nick_rename print_output register_plugin +); @Onis::Data::Core::ISA = ('Exporter'); -our $DATA = init ('$DATA', 'hash'); +our $LinesThisRun = 0; our $PluginCallbacks = {}; -our $OUTPUT = []; +our $OutputCallbacks = []; our @AllNicks = (); -our @ALLNAMES = (); -our %NickMap = (); + +our %NickToNick = (); our %NickToIdent = (); our %IdentToNick = (); -our $LASTRUN_DAYS = 0; +=head1 CONFIGURATION OPTIONS + +=over 4 + +=item B: I; + +Sets the amount of unsharping onis should do. Valid options are I, +I, I and I. + +=over 4 + +=item I + +does not do any unsharping. + +=item I + +Leaves IP-addresses as they are. The deepest subdomains containing numbers have +those numbers removed. So C becomes +C. +=item I + +Removes the last byte from IP-adresses. So C<84.56.107.131> becomes +C<84.56.107.*>. Hostnames have the deepest subdomains removed if they contain +numers, so C becomes C<*.arcor-ip.net> while +C is not modified. This is the default and recommended +behavior. + +=item I + +Handles IP-addresses as I. Hostnames have all subdomains removed, so +C becomes C<*.t-dialin.net> and C +becomes C<*.franken.de>. + +=back + +=cut our $UNSHARP = 'MEDIUM'; if (get_config ('unsharp')) @@ -80,32 +119,19 @@ if (get_config ('unsharp')) } } -if (!%$DATA) -{ - $DATA->{'idents_of_nick'} = {}; - $DATA->{'channel'} = {}; - $DATA->{'total_lines'} = 0; -} +=item B: I; -if (defined ($DATA->{'lastrun'})) -{ - my $last = $DATA->{'lastrun'}; - my $now = time; +Sets the name of the channel. This is mostly automatically figured out, use +this if onis doesn't get it right or you want another name.. - my $diff = ($now - $last) % 86400; +=back - if ($diff > 0) - { - $DATA->{'lastrun'} = $now; - $LASTRUN_DAYS = $diff; - } -} -else -{ - $DATA->{'lastrun'} = time; -} +=cut + +# TODO +# - lastrun -my $VERSION = '$Id: Core.pm,v 1.14 2004/10/31 15:00:32 octo Exp $'; +my $VERSION = '$Id$'; print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); return (1); @@ -158,16 +184,17 @@ sub store $data->{'user'} = $user; $data->{'ident'} = $ident; - $Nick2Ident->put ($nick, $ident); + $NickToIdentCache->put ($nick, $ident); $chatter = "$nick!$ident"; ($counter) = $ChatterList->get ($chatter); $counter ||= 0; $counter++; $ChatterList->put ($chatter, $counter); } - elsif (($ident) = $Nick2Ident->get ($nick)) + elsif (($ident) = $NickToIdentCache->get ($nick)) { my $chatter = "$nick!$ident"; + my $counter; ($user, $host) = split (m/@/, $ident); $data->{'host'} = $host; @@ -198,6 +225,11 @@ sub store $ChannelNames->put ($chan, $count); } + if (!defined ($data->{'epoch'})) + { + $data->{'epoch'} = get_absolute_time (); + } + if ($::DEBUG & 0x400) { my @keys = keys (%$data); @@ -210,7 +242,19 @@ sub store } } - #$DATA->{'total_lines'}++; + { + my ($counter) = $GeneralCounters->get ('lines_total'); + $counter ||= 0; + $counter++; + $GeneralCounters->put ('lines_total', $counter); + + my ($time) = $GeneralCounters->get ('most_recent_time'); + $time ||= 0; + $time = $data->{'epoch'} if ($time < $data->{'epoch'}); + $GeneralCounters->put ('most_recent_time', $time); + + $LinesThisRun++; + } if (defined ($PluginCallbacks->{$type})) { @@ -229,8 +273,6 @@ Takes an ident (i.e. a user-host-pair, e.g. I or I) and "unsharps it". The unsharp version is then returned. -What unsharp exactly does is described in the F. - =cut sub unsharp @@ -329,51 +371,49 @@ only once and so does every nick. sub calculate_nicks { - my $nicks = {}; - my $idents = {}; + my $nicks = {}; + my $idents = {}; + my $name2nick = {}; + my $name2ident = {}; for ($ChatterList->keys ()) { - my $chatter = shift; + my $chatter = $_; my ($nick, $ident) = split (m/!/, $chatter); - my $name = host_to_username ($chatter); my ($counter) = $ChatterList->get ($chatter); - my $temp = $name ? $name : $ident; - - next if (lc ($name) eq 'ignore'); - - $nicks->{$nick}{$temp} = 0 unless (defined ($nicks->{$nick}{$temp})); - $nicks->{$nick}{$temp} += $counter; + $nicks->{$nick}{$ident} = 0 unless (defined ($nicks->{$nick}{$ident})); + $nicks->{$nick}{$ident} += $counter; } for (keys %$nicks) { my $this_nick = $_; my $this_ident = 'unidentified'; + my $this_name = ''; my $this_total = 0; my $this_max = 0; - my $this_ident_is_user = 0; for (keys %{$nicks->{$this_nick}}) { my $ident = $_; + my $name = chatter_to_name ("$this_nick!$ident"); my $num = $nicks->{$this_nick}{$ident}; $this_total += $num; - if ($ident =~ m/@/) # $ident is a (user)name + if ($name) { - if (($num >= $this_max) or !$this_ident_is_user) + if (($num >= $this_max) or !$this_name) { $this_max = $num; $this_ident = $ident; - $this_ident_is_user = 1; + $this_name = $name; } } else { - if (($num >= $this_max) and !$this_ident_is_user) + if (($num >= $this_max) and !$this_name) { $this_max = $num; $this_ident = $ident; @@ -385,8 +425,19 @@ sub calculate_nicks if ($this_ident ne 'unidentified') { - $idents->{$this_ident}{$this_nick} = 0 unless (defined ($idents->{$this_ident}{$this_nick})); - $idents->{$this_ident}{$this_nick} += $this_total; + if ($this_name) + { + $name2nick->{$this_name}{$this_nick} = 0 unless (defined ($name2nick->{$this_name}{$this_nick})); + $name2nick->{$this_name}{$this_nick} += $this_total; + + $name2ident->{$this_name}{$this_ident} = 0 unless (defined ($name2nick->{$this_name}{$this_ident})); + $name2ident->{$this_name}{$this_ident} += $this_total; + } + else + { + $idents->{$this_ident}{$this_nick} = 0 unless (defined ($idents->{$this_ident}{$this_nick})); + $idents->{$this_ident}{$this_nick} += $this_total; + } } elsif ($::DEBUG & 0x100) { @@ -406,7 +457,7 @@ sub calculate_nicks for (@nicks) { my $nick = $_; - my $num = $nicks_of_ident->{$this_ident}{$nick}; + my $num = $idents->{$this_ident}{$nick}; if ($num > $this_max) { @@ -425,13 +476,70 @@ sub calculate_nicks for (@other_nicks, $this_nick) { push (@AllNicks, $_); - $NickMap{$_} = $this_nick; - # FIXME + $NickToNick{$_} = $this_nick; $NickToIdent{$_} = $this_ident; } $IdentToNick{$this_ident} = $this_nick; } + + for (keys %$name2nick) + { + my $name = $_; + my $max_num = 0; + my $max_nick = ''; + my $max_ident = ''; + + my @other_nicks = (); + my @other_idents = (); + + for (keys %{$name2nick->{$name}}) + { + my $nick = $_; + my $num = $name2nick->{$name}{$nick}; + + if ($num > $max_num) + { + push (@other_nicks, $max_nick) if ($max_nick); + $max_nick = $nick; + $max_num = $num; + } + else + { + push (@other_nicks, $nick); + } + } + + $max_num = 0; + for (keys %{$name2ident->{$name}}) + { + my $ident = $_; + my $num = $name2ident->{$name}{$ident}; + + if ($num > $max_num) + { + push (@other_idents, $max_ident) if ($max_ident); + $max_ident = $ident; + $max_num = $num; + } + else + { + push (@other_idents, $ident); + } + } + + for (@other_nicks, $max_nick) + { + push (@AllNicks, $_); + $NickToNick{$_} = $max_nick; + $NickToIdent{$_} = $max_ident; + } + + for (@other_idents, $max_ident) + { + $IdentToNick{$_} = $max_nick; + } + } } =item I<@nicks> = B () @@ -453,27 +561,30 @@ Returns the name of the channel we're generating stats for. sub get_channel { - my $chan; + my $chan = '#unknown'; if (get_config ('channel')) { $chan = get_config ('channel'); } - elsif (keys (%{$DATA->{'channel'}})) - { - ($chan) = sort - { - $DATA->{'channel'}{$b} <=> $DATA->{'channel'}{$a} - } (keys (%{$DATA->{'channel'}})); - } else { - $chan = '#unknown'; + my $max = 0; + for ($ChannelNames->keys ()) + { + my $c = $_; + my ($num) = $ChannelNames->get ($c); + if (defined ($num) and ($num > $max)) + { + $max = $num; + $chan = $c; + } + } } # Fix network-safe channel named (RFC 2811) - if ($chan =~ m/^![A-Z0-9]{5}.+/) + if ($chan =~ m/^![A-Z0-9]{5}(.+)/) { - $chan =~ s/[A-Z0-9]{5}//; + $chan = '!' . $1; } return ($chan); @@ -488,9 +599,9 @@ Returns the main nick for I<$nick> or an empty string if the nick is unknown.. sub get_main_nick { my $nick = shift; - if (defined ($NickMap{$nick})) + if (defined ($NickToNick{$nick})) { - return ($NickMap{$nick}); + return ($NickToNick{$nick}); } else { @@ -500,16 +611,29 @@ sub get_main_nick =item I<$ident> = B (I<$nick>) -Returns the ident for this nick or an empty string if unknown. +Returns the ident for this nick or an empty string if unknown. Before +B is run it will use the database to find the most recent +mapping. After B is run the calculated mapping will be used. =cut sub nick_to_ident { my $nick = shift; + my $ident = ''; - my ($ident) = $Nick2Ident->get ($nick); - $ident ||= ''; + if (%NickToIdent) + { + if (defined ($NickToIdent{$nick})) + { + $ident = $NickToIdent{$nick}; + } + } + else + { + ($ident) = $NickToIdentCache->get ($nick); + $ident ||= ''; + } return ($ident); } @@ -524,13 +648,7 @@ sub ident_to_nick { my $ident = shift; - if (!defined ($ident) - or (lc ($ident) eq 'ignore') - or (lc ($ident) eq 'unidentified')) - { - return (''); - } - elsif (defined ($IdentToNick{$ident})) + if (defined ($IdentToNick{$ident})) { return ($IdentToNick{$ident}); } @@ -540,53 +658,46 @@ sub ident_to_nick } } -=item I<$name> = B (I<$ident>) +=item I<$name> = B (I<$nick>) -Returns the printable version of the name for the chatter identified by -I<$ident>. Returns an empty string if the ident is not known. +Return the name associated with I<$nick>. =cut -sub ident_to_name +sub nick_to_name { - my $ident = shift; - my $nick = ident_to_nick ($ident); - my $name; - - if (!$nick) + my $nick = shift; + my $ident = nick_to_ident ($nick); + + if ($ident) + { + return (chatter_to_name ("$nick!$ident")); + } + else { return (''); } - - $name = get_print_name ($nick); - - return ($name); } -=item I<$name> = B (I<$nick>) +=item I<$name> = B (I<$ident>) -Returns the printable version of the name for the nick I<$nick> or I<$nick> if -unknown. +Returns the name associated with I<$ident>. =cut -sub get_print_name +sub ident_to_name { - my $nick = shift; - my $ident = ''; - my $name = $nick; + my $ident = shift; + my $nick = ident_to_nick ($ident); - if (defined ($NickToIdent{$nick})) + if ($nick) { - $ident = $NickToIdent{$nick}; + return (chatter_to_name ("$nick!$ident")); } - - if (($ident !~ m/^[^@]+@.+$/) and $ident) + else { - $name = $ident; + return (''); } - - return ($name); } =item I<$lines> = B () @@ -597,7 +708,25 @@ Returns the total number of lines parsed so far. sub get_total_lines { - return ($DATA->{'total_lines'}); + my ($total) = $GeneralCounters->get ('lines_total'); + + return (qw()) unless ($total); + + return ($total, $LinesThisRun); +} + +=item I<$epoch> = B () + +Returns the epoch of the most recent line received from the parser. + +=cut + +sub get_most_recent_time +{ + my ($time) = $GeneralCounters->get ('most_recent_time'); + $time ||= 0; + + return ($time); } =item B (I<$old_nick>, I<$new_nick>) @@ -610,24 +739,13 @@ sub nick_rename { my $old_nick = shift; my $new_nick = shift; + my $ident; - if (defined ($DATA->{'host_cache'}{$old_nick})) - { - my $host = $DATA->{'host_cache'}{$old_nick}; - $DATA->{'host_cache'}{$new_nick} = $host; - - if (!defined ($DATA->{'hosts_of_nick'}{$new_nick}{$host})) - { - $DATA->{'hosts_of_nick'}{$new_nick}{$host} = 1; - } - } + ($ident) = $NickToIdentCache->get ($old_nick); - if (defined ($DATA->{'byident'}{"$old_nick\@unidentified"})) + if (defined ($ident) and ($ident)) { - # Other data may be overwritten, but I don't care here.. - # This should be a extremely rare case.. - $DATA->{'byident'}{"$new_nick\@unidentified"} = $DATA->{'byident'}{"$old_nick\@unidentified"}; - delete ($DATA->{'byident'}{"$old_nick\@unidentified"}); + $NickToIdentCache->put ($new_nick, $ident); } } @@ -639,7 +757,9 @@ Print the output. Should be called only once.. sub print_output { - if (!$DATA->{'total_lines'}) + my ($total, $this) = get_total_lines (); + + if (!$total) { print STDERR <<'MESSAGE'; @@ -655,14 +775,11 @@ MESSAGE } calculate_nicks (); - merge_idents (); - for (@$OUTPUT) + for (@$OutputCallbacks) { &$_ (); } - - delete ($DATA->{'byname'}); } =item I<$data> = B (I<$type>, I<$sub_ref>) @@ -686,7 +803,7 @@ sub register_plugin if ($type eq 'OUTPUT') { - push (@$OUTPUT, $sub_ref); + push (@$OutputCallbacks, $sub_ref); } else { @@ -699,99 +816,6 @@ sub register_plugin push (@{$PluginCallbacks->{$type}}, $sub_ref); print STDERR $/, __FILE__, ': ', scalar (caller ()), " registered for ``$type''." if ($::DEBUG & 0x800); - - return ($DATA); -} - -=item B () - -Merges idents. Does magic, don't interfere ;) - -=cut - -sub merge_idents -{ - my @idents = keys (%IdentToNick); - - for (@idents) - { - my $ident = $_; - my $name = ident_to_name ($ident); - - if (!defined ($DATA->{'byident'}{$ident})) - { - next; - } - - if (!defined ($DATA->{'byname'}{$name})) - { - $DATA->{'byname'}{$name} = {}; - } - - add_hash ($DATA->{'byname'}{$name}, $DATA->{'byident'}{$ident}); - } -} - -sub add_hash -{ - my $dst = shift; - my $src = shift; - - my @keys = keys (%$src); - - for (@keys) - { - my $key = $_; - my $val = $src->{$key}; - - if (!defined ($dst->{$key})) - { - $dst->{$key} = $val; - } - elsif (!ref ($val)) - { - if ($val =~ m/\D/) - { - # FIXME - print STDERR $/, __FILE__, ": ``$key'' = ``$val''" if ($::DEBUG); - } - else - { - $dst->{$key} += $val; - } - } - elsif (ref ($val) ne ref ($dst->{$key})) - { - print STDERR $/, __FILE__, ": Destination and source type do not match!" if ($::DEBUG); - } - elsif (ref ($val) eq "HASH") - { - add_hash ($dst->{$key}, $val); - } - elsif (ref ($val) eq "ARRAY") - { - my $i = 0; - for (@$val) - { - my $j = $_; - if ($j =~ m/\D/) - { - # FIXME - print STDERR $/, __FILE__, ": ``", $key, '[', $i, "]'' = ``$j''" if ($::DEBUG); - } - else - { - $dst->{$key}->[$i] += $j; - } - $i++; - } - } - else - { - my $type = ref ($val); - print STDERR $/, __FILE__, ": Reference type ``$type'' is not supported!", $/; - } - } } =back