1 package Onis::Data::Core;
5 Onis::Data::Core - User management
9 Store data to the internal structure, care about users, nicks and idents and
10 dispatch to plugins. The core of the data even..
18 use Onis::Config qw(get_config);
19 use Onis::Users qw(chatter_to_name);
20 use Onis::Data::Persistent;
21 use Onis::Parser::Persistent qw(get_absolute_time);
23 =head1 NAMING CONVENTION
25 Each and every person in the IRC can be identified by a three-tupel: B<nick>,
26 B<user> and B<host>, most often seen as I<nick!user@host>.
28 The combination of B<user> and B<host> is called an B<ident> here and written
29 I<user@host>. The combination of all three parts is called a B<chatter> here,
30 though it's rarely used.
32 A B<name> is the name of the "user" as defined in the F<users.conf>. Therefore,
33 the F<users.conf> defines a mapping of B<chatter> -E<gt> B<name>.
37 our $GeneralCounters = Onis::Data::Persistent->new ('GeneralCounters', 'key', 'value');
38 our $NickToIdentCache = Onis::Data::Persistent->new ('NickToIdentCache', 'nick', 'ident');
39 our $ChatterList = Onis::Data::Persistent->new ('ChatterList', 'chatter', 'counter');
40 our $ChannelNames = Onis::Data::Persistent->new ('ChannelNames', 'channel', 'counter');
42 @Onis::Data::Core::EXPORT_OK =
44 store unsharp calculate_nicks
46 get_all_nicks get_channel get_main_nick
47 nick_to_ident ident_to_nick
48 nick_to_name ident_to_name
49 get_total_lines get_most_recent_time nick_rename print_output register_plugin
51 @Onis::Data::Core::ISA = ('Exporter');
53 our $LinesThisRun = 0;
55 our $PluginCallbacks = {};
56 our $OutputCallbacks = [];
60 our %NickToIdent = ();
61 our %IdentToNick = ();
63 =head1 CONFIGURATION OPTIONS
67 =item B<unsharp>: I<medium>;
69 Sets the amount of unsharping onis should do. Valid options are I<none>,
70 I<light>, I<medium> and I<hard>.
76 does not do any unsharping.
80 Leaves IP-addresses as they are. The deepest subdomains containing numbers have
81 those numbers removed. So C<dsl-084-056-107-131.arcor-ip.net> becomes
82 C<dsl-*-*-*-*.arcor-ip.net>.
86 Removes the last byte from IP-adresses. So C<84.56.107.131> becomes
87 C<84.56.107.*>. Hostnames have the deepest subdomains removed if they contain
88 numers, so C<dsl-084-056-107-131.arcor-ip.net> becomes C<*.arcor-ip.net> while
89 C<shell.franken.de> is not modified. This is the default and recommended
94 Handles IP-addresses as I<medium>. Hostnames have all subdomains removed, so
95 C<p5493EC60.dip.t-dialin.net> becomes C<*.t-dialin.net> and C<shell.franken.de>
96 becomes C<*.franken.de>.
102 our $UNSHARP = 'MEDIUM';
103 if (get_config ('unsharp'))
105 my $tmp = get_config ('unsharp');
109 if ($tmp eq 'NONE' or $tmp eq 'LIGHT'
117 print STDERR $/, __FILE__, ": ``$tmp'' is not a valid value for config option ``unsharp''.",
118 $/, __FILE__, ": Using standard value ``MEDIUM''.";
122 =item B<channel>: I<name>;
124 Sets the name of the channel. This is mostly automatically figured out, use
125 this if onis doesn't get it right or you want another name..
134 my $VERSION = '$Id$';
135 print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG);
139 =head1 EXPORTED FUNCTIONS
143 =item B<store> (I<$type>, I<$data>)
145 Passes I<$data> (a hashref) to all plugins which registered for I<$type>. This
146 is the actual workhorse when parsing the file since it will be called once for
149 It will fill I<$data> with I<host>, I<user> and I<ident> if these fields are
150 missing but have been seen for this nick before.
157 my $type = $data->{'type'};
158 my ($nick, $user, $host);
161 if (!defined ($type))
163 print STDERR $/, __FILE__, ": Plugin data did not include a type. This line will be skipped." if ($::DEBUG & 0x20);
167 if (!defined ($data->{'nick'}))
169 print STDERR $/, __FILE__, ": Plugin data did not include a nick. This line will be skipped." if ($::DEBUG & 0x20);
173 $nick = $data->{'nick'};
175 if (defined ($data->{'host'}))
180 ($user, $host) = unsharp ($data->{'host'});
181 $ident = "$user\@$host";
183 $data->{'host'} = $host;
184 $data->{'user'} = $user;
185 $data->{'ident'} = $ident;
187 $NickToIdentCache->put ($nick, $ident);
189 $chatter = "$nick!$ident";
190 ($counter) = $ChatterList->get ($chatter);
191 $counter ||= 0; $counter++;
192 $ChatterList->put ($chatter, $counter);
194 elsif (($ident) = $NickToIdentCache->get ($nick))
196 my $chatter = "$nick!$ident";
198 ($user, $host) = split (m/@/, $ident);
200 $data->{'host'} = $host;
201 $data->{'user'} = $user;
202 $data->{'ident'} = $ident;
204 ($counter) = $ChatterList->get ($chatter);
205 $counter ||= 0; $counter++;
206 $ChatterList->put ($chatter, $counter);
210 $data->{'host'} = $host = '';
211 $data->{'user'} = $user = '';
212 $data->{'ident'} = $ident = '';
215 if ($::DEBUG & 0x0100)
217 print STDERR $/, __FILE__, ": id ($nick) = ", $ident;
220 if (defined ($data->{'channel'}))
222 my $chan = lc ($data->{'channel'});
223 my ($count) = $ChannelNames->get ($chan);
224 $count ||= 0; $count++;
225 $ChannelNames->put ($chan, $count);
228 if (!defined ($data->{'epoch'}))
230 $data->{'epoch'} = get_absolute_time ();
233 if ($::DEBUG & 0x400)
235 my @keys = keys (%$data);
239 my $val = $data->{$key};
240 print STDERR $/, __FILE__, ': ';
241 printf STDERR ("%10s: %s", $key, $val);
246 my ($counter) = $GeneralCounters->get ('lines_total');
249 $GeneralCounters->put ('lines_total', $counter);
251 my ($time) = $GeneralCounters->get ('most_recent_time');
253 $time = $data->{'epoch'} if ($time < $data->{'epoch'});
254 $GeneralCounters->put ('most_recent_time', $time);
259 if (defined ($PluginCallbacks->{$type}))
261 for (@{$PluginCallbacks->{$type}})
270 =item (I<$user>, I<$host>) = B<unsharp> (I<$ident>)
272 Takes an ident (i.e. a user-host-pair, e.g. I<user@host.domain.com> or
273 I<login@123.123.123.123>) and "unsharps it". The unsharp version is then
288 print STDERR $/, __FILE__, ": Unsharp ``$ident''" if ($::DEBUG & 0x100);
290 ($user, $host) = split (m/@/, $ident, 2);
292 @parts = split (m/\./, $host);
293 $num_parts = scalar (@parts);
295 if (($UNSHARP ne 'NONE')
296 and ($user =~ m/^[\~\^\-\+\=](.+)$/))
301 if ($UNSHARP eq 'NONE')
303 return ($user, $host);
305 elsif ($host =~ m/^[\d\.]{7,15}$/)
307 if ($UNSHARP ne 'LIGHT')
314 for ($i = 0; $i < ($num_parts - 2); $i++)
316 if ($UNSHARP eq 'LIGHT')
318 if ($parts[$i] !~ s/\d+/*/g)
323 elsif ($UNSHARP eq 'MEDIUM')
325 if ($parts[$i] =~ m/\d/)
334 else # ($UNSHARP eq 'HARD')
341 $host = lc (join ('.', @parts));
342 $host =~ s/\*(?:\.\*)+/*/;
344 print STDERR " -> ``$user\@$host''" if ($::DEBUG & 0x100);
345 return ($user, $host);
348 =item B<calculate_nicks> ()
350 Iterates over all chatters found so far, trying to figure out which belong to
351 the same person. This function has to be called before any calls to
352 B<get_all_nicks>, B<get_main_nick>, B<get_print_name> and B<nick_to_ident>.
354 This is normally the step after having parsed all files and before doing any
355 output. After this function has been run all the other informative functions
356 return actually usefull information..
358 It does the following: First, it iterates over all chatters and splits them up
359 into nicks and idents. If a (user)name is found for the ident it (the ident) is
360 replaced with it (the name).
362 In the second step we iterate over all nicks that have been found and
363 determines the most active ident for each nick. After this has been done each
364 nick is associated with exactly one ident, but B<not> vice versa.
366 The final step is to iterate over all idents and determine the most active nick
367 for each ident. After some thought you will agree that now each ident exists
368 only once and so does every nick.
379 for ($ChatterList->keys ())
382 my ($nick, $ident) = split (m/!/, $chatter);
383 my ($counter) = $ChatterList->get ($chatter);
385 $nicks->{$nick}{$ident} = 0 unless (defined ($nicks->{$nick}{$ident}));
386 $nicks->{$nick}{$ident} += $counter;
392 my $this_ident = 'unidentified';
397 for (keys %{$nicks->{$this_nick}})
400 my $name = chatter_to_name ("$this_nick!$ident");
401 my $num = $nicks->{$this_nick}{$ident};
403 next if ($name eq 'ignore');
409 if (($num >= $this_max) or !$this_name)
412 $this_ident = $ident;
418 if (($num >= $this_max) and !$this_name)
421 $this_ident = $ident;
426 print $/, __FILE__, ": max_ident ($this_nick) = $this_ident" if ($::DEBUG & 0x100);
428 if ($this_ident ne 'unidentified')
432 $name2nick->{$this_name}{$this_nick} = 0 unless (defined ($name2nick->{$this_name}{$this_nick}));
433 $name2nick->{$this_name}{$this_nick} += $this_total;
435 $name2ident->{$this_name}{$this_ident} = 0 unless (defined ($name2nick->{$this_name}{$this_ident}));
436 $name2ident->{$this_name}{$this_ident} += $this_total;
440 $idents->{$this_ident}{$this_nick} = 0 unless (defined ($idents->{$this_ident}{$this_nick}));
441 $idents->{$this_ident}{$this_nick} += $this_total;
444 elsif ($::DEBUG & 0x100)
446 print STDERR $/, __FILE__, ": Ignoring unidentified nick ``$this_nick''";
455 my @other_nicks = ();
457 my @nicks = keys (%{$idents->{$this_ident}});
462 my $num = $idents->{$this_ident}{$nick};
464 if ($num > $this_max)
466 if ($this_nick) { push (@other_nicks, $this_nick); }
472 push (@other_nicks, $nick);
476 print STDERR $/, __FILE__, ": max_nick ($this_ident) = $this_nick" if ($::DEBUG & 0x100);
478 for (@other_nicks, $this_nick)
480 push (@AllNicks, $_);
481 $NickToNick{$_} = $this_nick;
482 $NickToIdent{$_} = $this_ident;
485 $IdentToNick{$this_ident} = $this_nick;
488 for (keys %$name2nick)
495 my @other_nicks = ();
496 my @other_idents = ();
498 for (keys %{$name2nick->{$name}})
501 my $num = $name2nick->{$name}{$nick};
505 push (@other_nicks, $max_nick) if ($max_nick);
511 push (@other_nicks, $nick);
516 for (keys %{$name2ident->{$name}})
519 my $num = $name2ident->{$name}{$ident};
523 push (@other_idents, $max_ident) if ($max_ident);
529 push (@other_idents, $ident);
533 for (@other_nicks, $max_nick)
535 push (@AllNicks, $_);
536 $NickToNick{$_} = $max_nick;
537 $NickToIdent{$_} = $max_ident;
540 for (@other_idents, $max_ident)
542 $IdentToNick{$_} = $max_nick;
547 =item I<@nicks> = B<get_all_nicks> ()
549 Returns an array of all seen nicks.
558 =item I<$channel> = B<get_channel> ()
560 Returns the name of the channel we're generating stats for.
566 my $chan = '#unknown';
567 if (get_config ('channel'))
569 $chan = get_config ('channel');
574 for ($ChannelNames->keys ())
577 my ($num) = $ChannelNames->get ($c);
578 if (defined ($num) and ($num > $max))
586 # Fix network-safe channel named (RFC 2811)
587 if ($chan =~ m/^![A-Z0-9]{5}(.+)/)
595 =item I<$main> = B<get_main_nick> (I<$nick>)
597 Returns the main nick for I<$nick> or an empty string if the nick is unknown..
604 if (defined ($NickToNick{$nick}))
606 return ($NickToNick{$nick});
614 =item I<$ident> = B<nick_to_ident> (I<$nick>)
616 Returns the ident for this nick or an empty string if unknown. Before
617 B<calculate_nicks> is run it will use the database to find the most recent
618 mapping. After B<calculate_nicks> is run the calculated mapping will be used.
629 if (defined ($NickToIdent{$nick}))
631 $ident = $NickToIdent{$nick};
636 ($ident) = $NickToIdentCache->get ($nick);
643 =item I<$nick> = B<ident_to_nick> (I<$ident>)
645 Returns the nick for the given ident or an empty string if unknown.
653 if (defined ($IdentToNick{$ident}))
655 return ($IdentToNick{$ident});
663 =item I<$name> = B<nick_to_name> (I<$nick>)
665 Return the name associated with I<$nick>.
672 my $ident = nick_to_ident ($nick);
676 return (chatter_to_name ("$nick!$ident"));
684 =item I<$name> = B<ident_to_name> (I<$ident>)
686 Returns the name associated with I<$ident>.
693 my $nick = ident_to_nick ($ident);
697 return (chatter_to_name ("$nick!$ident"));
705 =item I<$lines> = B<get_total_lines> ()
707 Returns the total number of lines parsed so far.
713 my ($total) = $GeneralCounters->get ('lines_total');
715 return (qw()) unless ($total);
717 return ($total, $LinesThisRun);
720 =item I<$epoch> = B<get_most_recent_time> ()
722 Returns the epoch of the most recent line received from the parser.
726 sub get_most_recent_time
728 my ($time) = $GeneralCounters->get ('most_recent_time');
734 =item B<nick_rename> (I<$old_nick>, I<$new_nick>)
736 Keeps track of a nick's hostname if the nick changes.
742 my $old_nick = shift;
743 my $new_nick = shift;
746 ($ident) = $NickToIdentCache->get ($old_nick);
748 if (defined ($ident) and ($ident))
750 $NickToIdentCache->put ($new_nick, $ident);
754 =item B<print_output> ()
756 Print the output. Should be called only once..
762 my ($total, $this) = get_total_lines ();
766 print STDERR <<'MESSAGE';
770 The most common reasons for this are:
771 - The logfile used was empty.
772 - The ``logtype'' setting did not match the logfile.
773 - The logfile did not include a date.
781 for (@$OutputCallbacks)
787 =item I<$data> = B<register_plugin> (I<$type>, I<$sub_ref>)
789 Register a subroutine for the given type. Returns a reference to the internal
790 data object. This will change soon, don't use it anymore if possible.
800 if (ref ($sub_ref) ne "CODE")
802 print STDERR $/, __FILE__, ": Plugin tried to register a non-code reference. Ignoring it.";
806 if ($type eq 'OUTPUT')
808 push (@$OutputCallbacks, $sub_ref);
812 if (!defined ($PluginCallbacks->{$type}))
814 $PluginCallbacks->{$type} = [];
818 push (@{$PluginCallbacks->{$type}}, $sub_ref);
820 print STDERR $/, __FILE__, ': ', scalar (caller ()), " registered for ``$type''." if ($::DEBUG & 0x800);
827 Florian octo Forster E<lt>octo at verplant.orgE<gt>