1 package Onis::Data::Core;
5 Onis::Data::Core - User management
9 Store data to the internal structure, care about users, nicks and idents and
10 dispatch to plugins. The core of the data even..
18 use Onis::Config qw#get_config#;
19 use Onis::Users qw#host_to_username nick_to_username#;
20 use Onis::Data::Persistent;
22 =head1 NAMING CONVENTION
24 Each and every person in the IRC can be identified by a three-tupel: B<nick>,
25 B<user> and B<host>, most often seen as I<nick!user@host>.
27 The combination of B<user> and B<host> is called an B<ident> here and written
28 I<user@host>. The combination of all three parts is called a B<chatter> here,
29 though it's rarely used.
31 A B<name> is the name of the "user" as defined in the F<users.conf>. Therefore,
32 the F<users.conf> defines a mapping of B<chatter> -E<gt> B<name>.
36 our $Nick2Ident = Onis::Data::Persistent->new ('Nick2Ident', 'nick', 'ident');
37 our $ChatterList = Onis::Data::Persistent->new ('ChatterList', 'chatter', 'counter');
38 our $ChannelNames = Onis::Data::Persistent->new ('ChannelNames', 'channel', 'counter');
42 @Onis::Data::Core::EXPORT_OK =
44 store unsharp calculate_nicks
46 get_all_nicks get_channel get_main_nick nick_to_ident ident_to_nick
47 ident_to_print_name get_print_name get_total_lines nick_rename
48 print_output register_plugin merge_idents
50 @Onis::Data::Core::ISA = ('Exporter');
52 our $DATA = init ('$DATA', 'hash');
54 our $PluginCallbacks = {};
60 our %NickToIdent = ();
61 our %IdentToNick = ();
63 our $LASTRUN_DAYS = 0;
67 our $UNSHARP = 'MEDIUM';
68 if (get_config ('unsharp'))
70 my $tmp = get_config ('unsharp');
74 if ($tmp eq 'NONE' or $tmp eq 'LIGHT'
82 print STDERR $/, __FILE__, ": ``$tmp'' is not a valid value for config option ``unsharp''.",
83 $/, __FILE__, ": Using standard value ``MEDIUM''.";
89 $DATA->{'idents_of_nick'} = {};
90 $DATA->{'channel'} = {};
91 $DATA->{'total_lines'} = 0;
94 if (defined ($DATA->{'lastrun'}))
96 my $last = $DATA->{'lastrun'};
99 my $diff = ($now - $last) % 86400;
103 $DATA->{'lastrun'} = $now;
104 $LASTRUN_DAYS = $diff;
109 $DATA->{'lastrun'} = time;
112 my $VERSION = '$Id: Core.pm,v 1.14 2004/10/31 15:00:32 octo Exp $';
113 print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG);
117 =head1 EXPORTED FUNCTIONS
121 =item B<store> (I<$type>, I<$data>)
123 Passes I<$data> (a hashref) to all plugins which registered for I<$type>. This
124 is the actual workhorse when parsing the file since it will be called once for
127 It will fill I<$data> with I<host>, I<user> and I<ident> if these fields are
128 missing but have been seen for this nick before.
135 my $type = $data->{'type'};
136 my ($nick, $user, $host);
139 if (!defined ($type))
141 print STDERR $/, __FILE__, ": Plugin data did not include a type. This line will be skipped." if ($::DEBUG & 0x20);
145 if (!defined ($data->{'nick'}))
147 print STDERR $/, __FILE__, ": Plugin data did not include a nick. This line will be skipped." if ($::DEBUG & 0x20);
151 $nick = $data->{'nick'};
153 if (defined ($data->{'host'}))
158 ($user, $host) = unsharp ($data->{'host'});
159 $ident = "$user\@$host";
161 $data->{'host'} = $host;
162 $data->{'user'} = $user;
163 $data->{'ident'} = $ident;
165 $Nick2Ident->put ($nick, $ident);
167 $chatter = "$nick!$ident";
168 ($counter) = $ChatterList->get ($chatter);
169 $counter ||= 0; $counter++;
170 $ChatterList->put ($chatter, $counter);
172 elsif (($ident) = $Nick2Ident->get ($nick))
174 my $chatter = "$nick!$ident";
175 ($user, $host) = split (m/@/, $ident);
177 $data->{'host'} = $host;
178 $data->{'user'} = $user;
179 $data->{'ident'} = $ident;
181 ($counter) = $ChatterList->get ($chatter);
182 $counter ||= 0; $counter++;
183 $ChatterList->put ($chatter, $counter);
187 $data->{'host'} = $host = '';
188 $data->{'user'} = $user = '';
189 $data->{'ident'} = $ident = '';
192 if ($::DEBUG & 0x0100)
194 print STDERR $/, __FILE__, ": id ($nick) = ", $ident;
197 if (defined ($data->{'channel'}))
199 my $chan = lc ($data->{'channel'});
200 my ($count) = $ChannelNames->get ($chan);
201 $count ||= 0; $count++;
202 $ChannelNames->put ($chan, $count);
205 if ($::DEBUG & 0x400)
207 my @keys = keys (%$data);
211 my $val = $data->{$key};
212 print STDERR $/, __FILE__, ': ';
213 printf STDERR ("%10s: %s", $key, $val);
217 #$DATA->{'total_lines'}++;
219 if (defined ($PluginCallbacks->{$type}))
221 for (@{$PluginCallbacks->{$type}})
230 =item (I<$user>, I<$host>) = B<unsharp> (I<$ident>)
232 Takes an ident (i.e. a user-host-pair, e.g. I<user@host.domain.com> or
233 I<login@123.123.123.123>) and "unsharps it". The unsharp version is then
236 What unsharp exactly does is described in the F<README>.
250 print STDERR $/, __FILE__, ": Unsharp ``$ident''" if ($::DEBUG & 0x100);
252 ($user, $host) = split (m/@/, $ident, 2);
254 @parts = split (m/\./, $host);
255 $num_parts = scalar (@parts);
257 if (($UNSHARP ne 'NONE')
258 and ($user =~ m/^[\~\^\-\+\=](.+)$/))
263 if ($UNSHARP eq 'NONE')
265 return ($user, $host);
267 elsif ($host =~ m/^[\d\.]{7,15}$/)
269 if ($UNSHARP ne 'LIGHT')
276 for ($i = 0; $i < ($num_parts - 2); $i++)
278 if ($UNSHARP eq 'LIGHT')
280 if ($parts[$i] !~ s/\d+/*/g)
285 elsif ($UNSHARP eq 'MEDIUM')
287 if ($parts[$i] =~ m/\d/)
296 else # ($UNSHARP eq 'HARD')
303 $host = lc (join ('.', @parts));
304 $host =~ s/\*(?:\.\*)+/*/;
306 print STDERR " -> ``$user\@$host''" if ($::DEBUG & 0x100);
307 return ($user, $host);
310 =item B<calculate_nicks> ()
312 Iterates over all chatters found so far, trying to figure out which belong to
313 the same person. This function has to be called before any calls to
314 B<get_all_nicks>, B<get_main_nick>, B<get_print_name> and B<nick_to_ident>.
316 This is normally the step after having parsed all files and before doing any
317 output. After this function has been run all the other informative functions
318 return actually usefull information..
320 It does the following: First, it iterates over all chatters and splits them up
321 into nicks and idents. If a (user)name is found for the ident it (the ident) is
322 replaced with it (the name).
324 In the second step we iterate over all nicks that have been found and
325 determines the most active ident for each nick. After this has been done each
326 nick is associated with exactly one ident, but B<not> vice versa.
328 The final step is to iterate over all idents and determine the most active nick
329 for each ident. After some thought you will agree that now each ident exists
330 only once and so does every nick.
341 for ($ChatterList->keys ())
344 my ($nick, $ident) = split (m/!/, $chatter);
345 my $name = host_to_username ($chatter);
346 my ($counter) = $ChatterList->get ($chatter);
348 $nicks->{$nick}{$temp} = 0 unless (defined ($nicks->{$nick}{$temp}));
349 $nicks->{$nick}{$temp} += $counter;
355 my $this_ident = 'unidentified';
360 for (keys %{$nicks->{$this_nick}})
363 my $name = ident_to_name ($ident);
364 my $num = $nicks->{$this_nick}{$ident};
370 if (($num >= $this_max) or !$this_name)
373 $this_ident = $ident;
379 if (($num >= $this_max) and !$this_name)
382 $this_ident = $ident;
387 print $/, __FILE__, ": max_ident ($this_nick) = $this_ident" if ($::DEBUG & 0x100);
389 if ($this_ident ne 'unidentified')
393 $name2nick->{$this_name}{$this_nick} = 0 unless (defined ($names->{$this_name}{$this_nick}));
394 $name2nick->{$this_name}{$this_nick} += $this_total;
396 $name2ident->{$this_name}{$this_ident} = 0 unless (defined ($names->{$this_name}{$this_ident}));
397 $name2ident->{$this_name}{$this_ident} += $this_total;
401 $idents->{$this_ident}{$this_nick} = 0 unless (defined ($idents->{$this_ident}{$this_nick}));
402 $idents->{$this_ident}{$this_nick} += $this_total;
405 elsif ($::DEBUG & 0x100)
407 print STDERR $/, __FILE__, ": Ignoring unidentified nick ``$this_nick''";
416 my @other_nicks = ();
418 my @nicks = keys (%{$idents->{$this_ident}});
423 my $num = $idents->{$this_ident}{$nick};
425 if ($num > $this_max)
427 if ($this_nick) { push (@other_nicks, $this_nick); }
433 push (@other_nicks, $nick);
437 print STDERR $/, __FILE__, ": max_nick ($this_ident) = $this_nick" if ($::DEBUG & 0x100);
439 for (@other_nicks, $this_nick)
441 push (@AllNicks, $_);
442 $NickToNick{$_} = $this_nick;
443 $NickToIdent{$_} = $this_ident;
446 $IdentToNick{$this_ident} = $this_nick;
449 for (keys %$name2nick)
456 my @other_nicks = ();
457 my @other_idents = ();
459 for (keys %{$name2nick->{$name}})
462 my $num = $name2nick->{$name}{$nick};
466 push (@other_nicks, $max_nick) if ($max_nick);
472 push (@other_nicks, $nick);
477 for (keys %{$name2ident->{$name}})
480 my $num = $name2ident->{$name}{$ident};
484 push (@other_idents, $max_ident) if ($max_ident);
490 push (@other_idents, $ident);
494 for (@other_nicks, $max_nick)
496 push (@AllNicks, $_);
497 $NickToNick{$_} = $max_nick;
498 $NickToIdent{$_} = $max_ident;
501 for (@other_idents, $max_ident)
503 $IdentToNick{$_} = $max_nick;
508 =item I<@nicks> = B<get_all_nicks> ()
510 Returns an array of all seen nicks.
519 =item I<$channel> = B<get_channel> ()
521 Returns the name of the channel we're generating stats for.
528 if (get_config ('channel'))
530 $chan = get_config ('channel');
532 elsif (keys (%{$DATA->{'channel'}}))
536 $DATA->{'channel'}{$b} <=> $DATA->{'channel'}{$a}
537 } (keys (%{$DATA->{'channel'}}));
544 # Fix network-safe channel named (RFC 2811)
545 if ($chan =~ m/^![A-Z0-9]{5}.+/)
547 $chan =~ s/[A-Z0-9]{5}//;
553 =item I<$main> = B<get_main_nick> (I<$nick>)
555 Returns the main nick for I<$nick> or an empty string if the nick is unknown..
562 if (defined ($NickToNick{$nick}))
564 return ($NickToNick{$nick});
572 =item I<$ident> = B<nick_to_ident> (I<$nick>)
574 Returns the ident for this nick or an empty string if unknown. Before
575 B<calculate_nicks> is run it will use the database to find the most recent
576 mapping. After B<calculate_nicks> is run the calculated mapping will be used.
587 if (defined ($NickToIdent{$nick}))
589 $ident = $NickToIdent{$nick};
594 ($ident) = $Nick2Ident->get ($nick);
601 =item I<$nick> = B<ident_to_nick> (I<$ident>)
603 Returns the nick for the given ident or an empty string if unknown.
611 if (defined ($IdentToNick{$ident}))
613 return ($IdentToNick{$ident});
621 =item I<$name> = B<ident_to_print_name> (I<$ident>)
623 Returns the printable version of the name for the chatter identified by
624 I<$ident>. Returns an empty string if the ident is not known.
628 sub ident_to_print_name
631 my $nick = ident_to_nick ($ident);
639 $name = get_print_name ($nick);
644 =item I<$name> = B<get_print_name> (I<$nick>)
646 Returns the printable version of the name for the nick I<$nick> or I<$nick> if
657 if (defined ($NickToIdent{$nick}))
659 $ident = $NickToIdent{$nick};
662 if (($ident !~ m/^[^@]+@.+$/) and $ident)
670 =item I<$lines> = B<get_total_lines> ()
672 Returns the total number of lines parsed so far.
678 return ($DATA->{'total_lines'});
681 =item B<nick_rename> (I<$old_nick>, I<$new_nick>)
683 Keeps track of a nick's hostname if the nick changes.
689 my $old_nick = shift;
690 my $new_nick = shift;
692 if (defined ($DATA->{'host_cache'}{$old_nick}))
694 my $host = $DATA->{'host_cache'}{$old_nick};
695 $DATA->{'host_cache'}{$new_nick} = $host;
697 if (!defined ($DATA->{'hosts_of_nick'}{$new_nick}{$host}))
699 $DATA->{'hosts_of_nick'}{$new_nick}{$host} = 1;
703 if (defined ($DATA->{'byident'}{"$old_nick\@unidentified"}))
705 # Other data may be overwritten, but I don't care here..
706 # This should be a extremely rare case..
707 $DATA->{'byident'}{"$new_nick\@unidentified"} = $DATA->{'byident'}{"$old_nick\@unidentified"};
708 delete ($DATA->{'byident'}{"$old_nick\@unidentified"});
712 =item B<print_output> ()
714 Print the output. Should be called only once..
720 if (!$DATA->{'total_lines'})
722 print STDERR <<'MESSAGE';
726 The most common reasons for this are:
727 - The logfile used was empty.
728 - The ``logtype'' setting did not match the logfile.
729 - The logfile did not include a date.
743 delete ($DATA->{'byname'});
746 =item I<$data> = B<register_plugin> (I<$type>, I<$sub_ref>)
748 Register a subroutine for the given type. Returns a reference to the internal
749 data object. This will change soon, don't use it anymore if possible.
759 if (ref ($sub_ref) ne "CODE")
761 print STDERR $/, __FILE__, ": Plugin tried to register a non-code reference. Ignoring it.";
765 if ($type eq 'OUTPUT')
767 push (@$OUTPUT, $sub_ref);
771 if (!defined ($PluginCallbacks->{$type}))
773 $PluginCallbacks->{$type} = [];
777 push (@{$PluginCallbacks->{$type}}, $sub_ref);
779 print STDERR $/, __FILE__, ': ', scalar (caller ()), " registered for ``$type''." if ($::DEBUG & 0x800);
784 =item B<merge_idents> ()
786 Merges idents. Does magic, don't interfere ;)
792 my @idents = keys (%IdentToNick);
797 my $name = ident_to_name ($ident);
799 if (!defined ($DATA->{'byident'}{$ident}))
804 if (!defined ($DATA->{'byname'}{$name}))
806 $DATA->{'byname'}{$name} = {};
809 add_hash ($DATA->{'byname'}{$name}, $DATA->{'byident'}{$ident});
818 my @keys = keys (%$src);
823 my $val = $src->{$key};
825 if (!defined ($dst->{$key}))
834 print STDERR $/, __FILE__, ": ``$key'' = ``$val''" if ($::DEBUG);
838 $dst->{$key} += $val;
841 elsif (ref ($val) ne ref ($dst->{$key}))
843 print STDERR $/, __FILE__, ": Destination and source type do not match!" if ($::DEBUG);
845 elsif (ref ($val) eq "HASH")
847 add_hash ($dst->{$key}, $val);
849 elsif (ref ($val) eq "ARRAY")
858 print STDERR $/, __FILE__, ": ``", $key, '[', $i, "]'' = ``$j''" if ($::DEBUG);
862 $dst->{$key}->[$i] += $j;
869 my $type = ref ($val);
870 print STDERR $/, __FILE__, ": Reference type ``$type'' is not supported!", $/;
879 Florian octo Forster E<lt>octo at verplant.orgE<gt>