1 package Onis::Data::Core;
5 Onis::Data::Core - User management
9 Store data to the internal structure, care about users, nicks and idents and
10 dispatch to plugins. The core of the data even..
18 use Onis::Config qw(get_config);
19 use Onis::Users qw(ident_to_name);
20 use Onis::Data::Persistent;
21 use Onis::Parser::Persistent qw(get_absolute_time);
23 =head1 NAMING CONVENTION
25 Each and every person in the IRC can be identified by a three-tupel: B<nick>,
26 B<user> and B<host>, most often seen as I<nick!user@host>.
28 The combination of B<user> and B<host> is called an B<ident> here and written
29 I<user@host>. The combination of all three parts is called a B<chatter> here,
30 though it's rarely used.
32 A B<name> is the name of the "user" as defined in the F<users.conf>. Therefore,
33 the F<users.conf> defines a mapping of B<chatter> -E<gt> B<name>.
37 our $GeneralCounters = Onis::Data::Persistent->new ('GeneralCounters', 'key', 'value');
38 our $NickToIdentCache = Onis::Data::Persistent->new ('NickToIdentCache', 'nick', 'ident');
39 our $ChatterList = Onis::Data::Persistent->new ('ChatterList', 'chatter', 'counter');
40 our $ChannelNames = Onis::Data::Persistent->new ('ChannelNames', 'channel', 'counter');
42 @Onis::Data::Core::EXPORT_OK =
44 store unsharp calculate_nicks
46 get_all_nicks get_channel get_main_nick nick_to_ident ident_to_nick nick_to_name
47 get_total_lines get_most_recent_time nick_rename print_output register_plugin
49 @Onis::Data::Core::ISA = ('Exporter');
51 our $LinesThisRun = 0;
53 our $PluginCallbacks = {};
54 our $OutputCallbacks = [];
58 our %NickToIdent = ();
59 our %IdentToNick = ();
61 =head1 CONFIGURATION OPTIONS
65 =item B<unsharp>: I<medium>;
67 Sets the amount of unsharping onis should do. Valid options are I<none>,
68 I<light>, I<medium> and I<hard>.
74 does not do any unsharping.
78 Leaves IP-addresses as they are. The deepest subdomains containing numbers have
79 those numbers removed. So C<dsl-084-056-107-131.arcor-ip.net> becomes
80 C<dsl-*-*-*-*.arcor-ip.net>.
84 Removes the last byte from IP-adresses. So C<84.56.107.131> becomes
85 C<84.56.107.*>. Hostnames have the deepest subdomains removed if they contain
86 numers, so C<dsl-084-056-107-131.arcor-ip.net> becomes C<*.arcor-ip.net> while
87 C<shell.franken.de> is not modified. This is the default and recommended
92 Handles IP-addresses as I<medium>. Hostnames have all subdomains removed, so
93 C<p5493EC60.dip.t-dialin.net> becomes C<*.t-dialin.net> and C<shell.franken.de>
94 becomes C<*.franken.de>.
100 our $UNSHARP = 'MEDIUM';
101 if (get_config ('unsharp'))
103 my $tmp = get_config ('unsharp');
107 if ($tmp eq 'NONE' or $tmp eq 'LIGHT'
115 print STDERR $/, __FILE__, ": ``$tmp'' is not a valid value for config option ``unsharp''.",
116 $/, __FILE__, ": Using standard value ``MEDIUM''.";
120 =item B<channel>: I<name>;
122 Sets the name of the channel. This is mostly automatically figured out, use
123 this if onis doesn't get it right or you want another name..
132 my $VERSION = '$Id$';
133 print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG);
137 =head1 EXPORTED FUNCTIONS
141 =item B<store> (I<$type>, I<$data>)
143 Passes I<$data> (a hashref) to all plugins which registered for I<$type>. This
144 is the actual workhorse when parsing the file since it will be called once for
147 It will fill I<$data> with I<host>, I<user> and I<ident> if these fields are
148 missing but have been seen for this nick before.
155 my $type = $data->{'type'};
156 my ($nick, $user, $host);
159 if (!defined ($type))
161 print STDERR $/, __FILE__, ": Plugin data did not include a type. This line will be skipped." if ($::DEBUG & 0x20);
165 if (!defined ($data->{'nick'}))
167 print STDERR $/, __FILE__, ": Plugin data did not include a nick. This line will be skipped." if ($::DEBUG & 0x20);
171 $nick = $data->{'nick'};
173 if (defined ($data->{'host'}))
178 ($user, $host) = unsharp ($data->{'host'});
179 $ident = "$user\@$host";
181 $data->{'host'} = $host;
182 $data->{'user'} = $user;
183 $data->{'ident'} = $ident;
185 $NickToIdentCache->put ($nick, $ident);
187 $chatter = "$nick!$ident";
188 ($counter) = $ChatterList->get ($chatter);
189 $counter ||= 0; $counter++;
190 $ChatterList->put ($chatter, $counter);
192 elsif (($ident) = $NickToIdentCache->get ($nick))
194 my $chatter = "$nick!$ident";
196 ($user, $host) = split (m/@/, $ident);
198 $data->{'host'} = $host;
199 $data->{'user'} = $user;
200 $data->{'ident'} = $ident;
202 ($counter) = $ChatterList->get ($chatter);
203 $counter ||= 0; $counter++;
204 $ChatterList->put ($chatter, $counter);
208 $data->{'host'} = $host = '';
209 $data->{'user'} = $user = '';
210 $data->{'ident'} = $ident = '';
213 if ($::DEBUG & 0x0100)
215 print STDERR $/, __FILE__, ": id ($nick) = ", $ident;
218 if (defined ($data->{'channel'}))
220 my $chan = lc ($data->{'channel'});
221 my ($count) = $ChannelNames->get ($chan);
222 $count ||= 0; $count++;
223 $ChannelNames->put ($chan, $count);
226 if (!defined ($data->{'epoch'}))
228 $data->{'epoch'} = get_absolute_time ();
231 if ($::DEBUG & 0x400)
233 my @keys = keys (%$data);
237 my $val = $data->{$key};
238 print STDERR $/, __FILE__, ': ';
239 printf STDERR ("%10s: %s", $key, $val);
244 my ($counter) = $GeneralCounters->get ('lines_total');
247 $GeneralCounters->put ('lines_total', $counter);
249 my ($time) = $GeneralCounters->get ('most_recent_time');
251 $time = $data->{'epoch'} if ($time < $data->{'epoch'});
252 $GeneralCounters->put ('most_recent_time', $time);
257 if (defined ($PluginCallbacks->{$type}))
259 for (@{$PluginCallbacks->{$type}})
268 =item (I<$user>, I<$host>) = B<unsharp> (I<$ident>)
270 Takes an ident (i.e. a user-host-pair, e.g. I<user@host.domain.com> or
271 I<login@123.123.123.123>) and "unsharps it". The unsharp version is then
286 print STDERR $/, __FILE__, ": Unsharp ``$ident''" if ($::DEBUG & 0x100);
288 ($user, $host) = split (m/@/, $ident, 2);
290 @parts = split (m/\./, $host);
291 $num_parts = scalar (@parts);
293 if (($UNSHARP ne 'NONE')
294 and ($user =~ m/^[\~\^\-\+\=](.+)$/))
299 if ($UNSHARP eq 'NONE')
301 return ($user, $host);
303 elsif ($host =~ m/^[\d\.]{7,15}$/)
305 if ($UNSHARP ne 'LIGHT')
312 for ($i = 0; $i < ($num_parts - 2); $i++)
314 if ($UNSHARP eq 'LIGHT')
316 if ($parts[$i] !~ s/\d+/*/g)
321 elsif ($UNSHARP eq 'MEDIUM')
323 if ($parts[$i] =~ m/\d/)
332 else # ($UNSHARP eq 'HARD')
339 $host = lc (join ('.', @parts));
340 $host =~ s/\*(?:\.\*)+/*/;
342 print STDERR " -> ``$user\@$host''" if ($::DEBUG & 0x100);
343 return ($user, $host);
346 =item B<calculate_nicks> ()
348 Iterates over all chatters found so far, trying to figure out which belong to
349 the same person. This function has to be called before any calls to
350 B<get_all_nicks>, B<get_main_nick>, B<get_print_name> and B<nick_to_ident>.
352 This is normally the step after having parsed all files and before doing any
353 output. After this function has been run all the other informative functions
354 return actually usefull information..
356 It does the following: First, it iterates over all chatters and splits them up
357 into nicks and idents. If a (user)name is found for the ident it (the ident) is
358 replaced with it (the name).
360 In the second step we iterate over all nicks that have been found and
361 determines the most active ident for each nick. After this has been done each
362 nick is associated with exactly one ident, but B<not> vice versa.
364 The final step is to iterate over all idents and determine the most active nick
365 for each ident. After some thought you will agree that now each ident exists
366 only once and so does every nick.
377 for ($ChatterList->keys ())
380 my ($nick, $ident) = split (m/!/, $chatter);
381 my $name = ident_to_name ($ident);
382 my ($counter) = $ChatterList->get ($chatter);
384 $nicks->{$nick}{$ident} = 0 unless (defined ($nicks->{$nick}{$ident}));
385 $nicks->{$nick}{$ident} += $counter;
391 my $this_ident = 'unidentified';
396 for (keys %{$nicks->{$this_nick}})
399 my $name = ident_to_name ($ident);
400 my $num = $nicks->{$this_nick}{$ident};
406 if (($num >= $this_max) or !$this_name)
409 $this_ident = $ident;
415 if (($num >= $this_max) and !$this_name)
418 $this_ident = $ident;
423 print $/, __FILE__, ": max_ident ($this_nick) = $this_ident" if ($::DEBUG & 0x100);
425 if ($this_ident ne 'unidentified')
429 $name2nick->{$this_name}{$this_nick} = 0 unless (defined ($name2nick->{$this_name}{$this_nick}));
430 $name2nick->{$this_name}{$this_nick} += $this_total;
432 $name2ident->{$this_name}{$this_ident} = 0 unless (defined ($name2nick->{$this_name}{$this_ident}));
433 $name2ident->{$this_name}{$this_ident} += $this_total;
437 $idents->{$this_ident}{$this_nick} = 0 unless (defined ($idents->{$this_ident}{$this_nick}));
438 $idents->{$this_ident}{$this_nick} += $this_total;
441 elsif ($::DEBUG & 0x100)
443 print STDERR $/, __FILE__, ": Ignoring unidentified nick ``$this_nick''";
452 my @other_nicks = ();
454 my @nicks = keys (%{$idents->{$this_ident}});
459 my $num = $idents->{$this_ident}{$nick};
461 if ($num > $this_max)
463 if ($this_nick) { push (@other_nicks, $this_nick); }
469 push (@other_nicks, $nick);
473 print STDERR $/, __FILE__, ": max_nick ($this_ident) = $this_nick" if ($::DEBUG & 0x100);
475 for (@other_nicks, $this_nick)
477 push (@AllNicks, $_);
478 $NickToNick{$_} = $this_nick;
479 $NickToIdent{$_} = $this_ident;
482 $IdentToNick{$this_ident} = $this_nick;
485 for (keys %$name2nick)
492 my @other_nicks = ();
493 my @other_idents = ();
495 for (keys %{$name2nick->{$name}})
498 my $num = $name2nick->{$name}{$nick};
502 push (@other_nicks, $max_nick) if ($max_nick);
508 push (@other_nicks, $nick);
513 for (keys %{$name2ident->{$name}})
516 my $num = $name2ident->{$name}{$ident};
520 push (@other_idents, $max_ident) if ($max_ident);
526 push (@other_idents, $ident);
530 for (@other_nicks, $max_nick)
532 push (@AllNicks, $_);
533 $NickToNick{$_} = $max_nick;
534 $NickToIdent{$_} = $max_ident;
537 for (@other_idents, $max_ident)
539 $IdentToNick{$_} = $max_nick;
544 =item I<@nicks> = B<get_all_nicks> ()
546 Returns an array of all seen nicks.
555 =item I<$channel> = B<get_channel> ()
557 Returns the name of the channel we're generating stats for.
563 my $chan = '#unknown';
564 if (get_config ('channel'))
566 $chan = get_config ('channel');
571 for ($ChannelNames->keys ())
574 my ($num) = $ChannelNames->get ($c);
575 if (defined ($num) and ($num > $max))
583 # Fix network-safe channel named (RFC 2811)
584 if ($chan =~ m/^![A-Z0-9]{5}(.+)/)
592 =item I<$main> = B<get_main_nick> (I<$nick>)
594 Returns the main nick for I<$nick> or an empty string if the nick is unknown..
601 if (defined ($NickToNick{$nick}))
603 return ($NickToNick{$nick});
611 =item I<$ident> = B<nick_to_ident> (I<$nick>)
613 Returns the ident for this nick or an empty string if unknown. Before
614 B<calculate_nicks> is run it will use the database to find the most recent
615 mapping. After B<calculate_nicks> is run the calculated mapping will be used.
626 if (defined ($NickToIdent{$nick}))
628 $ident = $NickToIdent{$nick};
633 ($ident) = $NickToIdentCache->get ($nick);
640 =item I<$nick> = B<ident_to_nick> (I<$ident>)
642 Returns the nick for the given ident or an empty string if unknown.
650 if (defined ($IdentToNick{$ident}))
652 return ($IdentToNick{$ident});
660 =item I<$name> = B<nick_to_name> (I<$nick>)
662 Return the name associated with I<$nick>. This function uses B<ident_to_name>
663 (see L<Onis::Users>).
670 my $ident = nick_to_ident ($nick);
674 return (ident_to_name ($ident));
682 =item I<$lines> = B<get_total_lines> ()
684 Returns the total number of lines parsed so far.
690 my ($total) = $GeneralCounters->get ('lines_total');
692 return (qw()) unless ($total);
694 return ($total, $LinesThisRun);
697 =item I<$epoch> = B<get_most_recent_time> ()
699 Returns the epoch of the most recent line received from the parser.
703 sub get_most_recent_time
705 my ($time) = $GeneralCounters->get ('most_recent_time');
711 =item B<nick_rename> (I<$old_nick>, I<$new_nick>)
713 Keeps track of a nick's hostname if the nick changes.
719 my $old_nick = shift;
720 my $new_nick = shift;
723 ($ident) = $NickToIdentCache->get ($old_nick);
725 if (defined ($ident) and ($ident))
727 $NickToIdentCache->put ($new_nick, $ident);
731 =item B<print_output> ()
733 Print the output. Should be called only once..
740 if (!get_total_lines ())
742 print STDERR <<'MESSAGE';
746 The most common reasons for this are:
747 - The logfile used was empty.
748 - The ``logtype'' setting did not match the logfile.
749 - The logfile did not include a date.
757 for (@$OutputCallbacks)
763 =item I<$data> = B<register_plugin> (I<$type>, I<$sub_ref>)
765 Register a subroutine for the given type. Returns a reference to the internal
766 data object. This will change soon, don't use it anymore if possible.
776 if (ref ($sub_ref) ne "CODE")
778 print STDERR $/, __FILE__, ": Plugin tried to register a non-code reference. Ignoring it.";
782 if ($type eq 'OUTPUT')
784 push (@$OutputCallbacks, $sub_ref);
788 if (!defined ($PluginCallbacks->{$type}))
790 $PluginCallbacks->{$type} = [];
794 push (@{$PluginCallbacks->{$type}}, $sub_ref);
796 print STDERR $/, __FILE__, ': ', scalar (caller ()), " registered for ``$type''." if ($::DEBUG & 0x800);
803 Florian octo Forster E<lt>octo at verplant.orgE<gt>