README*, config: Removed the version number from these files.
[yaala.git] / lib / Yaala / Parser / Wnserver.pm
1 package Yaala::Parser;
2
3 # ncsa.pm was patched to support wn-server by M. Feenstra on 20/09/2001
4
5 use strict;
6 use warnings;
7 use vars qw(%DATAFIELDS);
8
9 use Exporter;
10 use Yaala::Parser::WebserverTools qw#%MONTH_NUMBERS detect_referer detect_browser
11         detect_os extract_data#;
12 use Yaala::Data::Persistent qw#init#;
13
14 @Yaala::Parser::EXPORT_OK = qw(parse extra %DATAFIELDS);
15 @Yaala::Parser::ISA = ('Exporter');
16
17 our $LASTDATE = init ('$LASTDATE', 'scalar');
18 our $EXTRA = init ('$EXTRA', 'hash');
19
20 if (!$$LASTDATE) { $$LASTDATE = 0; }
21 if (!defined ($EXTRA->{'total'})) { $EXTRA->{'total'} = 0; }
22 if (!defined ($EXTRA->{'days'} )) { $EXTRA->{'days'}  = {}; }
23 if (!defined ($EXTRA->{'search_terms'} )) { $EXTRA->{'search_terms'} = {}; }
24
25 %DATAFIELDS = ( 
26         host            => 'key:host',
27         user            => 'key',
28         date            => 'key:date',
29         hour            => 'key:hour',
30         tld             => 'key',
31         file            => 'key',
32         status          => 'key:numeric',
33         browser         => 'key',
34         os              => 'key',
35         referer         => 'key:url',
36         virtualhost     => 'key',
37         
38         bytes           => 'agg:bytes',
39         requests        => 'agg'
40 );
41
42 # This needs to be done at runtime, since Data uses Setup which relies on
43 # %DATAFIELDS to be defined  -octo
44 require Yaala::Data::Core;
45 import Yaala::Data::Core qw#store#;
46
47 my $VERSION = '$Id: Wnserver.pm,v 1.9 2003/12/07 16:48:59 octo Exp $';
48 print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG);
49
50 return (1);
51
52 sub parse
53 {
54         my $line = shift or return undef;
55         
56         if ($line =~ /^(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]+)" (\d+) (\d+) <[^>]*><([^>]*)> <([^>]*)> <([^>]*)> <([^>]*)>$/)
57         {
58 # Initialize the variables that we can get out of
59 # each line first..
60                 my ($host, $ident, $user, $date, $request, $status,
61                         $bytes, $browser, $referer, $cookie, $virtual) =
62                 ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10);
63
64 # And now initialize all the variables we will use
65 # to get more information out of each field..
66                 my ($day, $month, $year, $hour, $minute, $second) =
67                         $date =~ m#(\d\d)/(\w{3})/(\d{4}):(\d\d):(\d\d):(\d\d)#;
68
69                 $month = $MONTH_NUMBERS{$month};
70                 $date = sprintf("%04u-%02u-%02u", $year, $month, $day);
71
72                 {
73                         my $tmp = int (sprintf ("%04u%02u%02u%02u%02u%02u",
74                                         $year, $month, $day, $hour, $minute, $second));
75                         
76                         if ($tmp < $$LASTDATE)
77                         {
78                                 print STDERR $/, __FILE__, ": Skipping.. ($tmp < $$LASTDATE)" if ($::DEBUG & 0x0200);
79                                 next;
80                         }
81                         else { $$LASTDATE = $tmp; }
82                 }
83                 
84                 my ($method, $file, $params);
85                 if ($request =~ m#(\S+) ([^ \?]+)\??(\S*)#)
86                 {
87                         $method = $1;
88                         $file = $2;
89                         $params = (defined ($3) ? $3 : '');
90                 }
91                 else
92                 {
93                         print STDERR $/, __FILE__, ": Malformed request: ``$request''." if ($::DEBUG);
94                         return (0);
95                 }
96                 
97                 if (($user ne '-') and ($status >= 400) and ($status < 500))
98                 {
99                         $user = '*INVALID*';
100                 }
101
102                 if ($user eq '-') { $user = '*UNKNOWN*'; }
103                 if ($bytes eq '-') { $bytes = 0; }
104
105                 my $tld;
106                 if ($host =~ m/\.([a-z]{2,})$/i)                                
107                 {
108                         $tld = lc ($1);
109                 }
110                 else
111                 {       
112                         $tld = '*UNRESOLVED*';
113                 }
114
115                 my $os = detect_os ($browser);
116                 my $browser_name = detect_browser ($browser);
117                 my @search_terms = extract_data ($referer);
118                 if ($referer eq '-') { $referer = ''; }
119
120                 $EXTRA->{'total'}++;
121                 $EXTRA->{'days'}{$date}++;
122
123                 if (scalar @search_terms)
124                 {
125                         print $/, __FILE__, ": Search Terms: ",
126                                 join (' ', @search_terms)
127                                 if ($::DEBUG & 0x1000);
128                         
129                         $EXTRA->{'search_terms'}{$_}++ for (@search_terms);
130                 }
131
132                 my %combined = (
133                                         'host'          =>      $host,
134                                         'user'          =>      $user,
135                                         'date'          =>      $date,
136                                         'hour'          =>      $hour,
137                                         'browser'       =>      $browser_name,
138                                         'os'            =>      $os,
139                                         'tld'           =>      $tld,
140                                         'file'          =>      $file,
141                                         'referer'       =>      $referer,
142                                         'status'        =>      $status,
143                                         'bytes'         =>      $bytes,
144                                         'virtualhost'   =>      $virtual,
145                                         'requests'      =>      1
146                                 );
147                 store (\%combined);
148         }
149         elsif ($::DEBUG)
150         {
151                 chomp ($line);
152                 print STDERR $/, __FILE__, ": Unable to parse: ``$line''";
153         }
154 }
155
156 sub extra
157 {
158         my ($average, $days) = (0, 0);
159         
160         $days = scalar (keys (%{$EXTRA->{'days'}}));
161         return (0) unless ($days);
162         
163         $average = sprintf ("%.1f", ($EXTRA->{'total'} / $days));
164
165         $::EXTRA->{'Total requests'} = $EXTRA->{'total'};
166         $::EXTRA->{'Average requests per day'} = $average;
167         $::EXTRA->{'Reporting period'} = "$days days";
168         
169         my @sorted_terms = sort
170                 { $EXTRA->{'search_terms'}{$b} <=> $EXTRA->{'search_terms'}{$a} }
171                 (keys %{$EXTRA->{'search_terms'}});
172         
173         if (@sorted_terms)
174         {
175                 my $max = $EXTRA->{'search_terms'}{$sorted_terms[0]};
176                 my @scalar_terms = ();
177                 
178                 while (@sorted_terms and
179                         ($EXTRA->{'search_terms'}{$sorted_terms[0]} / $max) > 0.1)
180                 {
181                         $_ = shift (@sorted_terms);
182                         
183                         push (@scalar_terms,
184                                 sprintf ("%s (%u)",
185                                         $_, $EXTRA->{'search_terms'}{$_})
186                         );
187                 }
188                 $::EXTRA->{'Search terms used'} = join ("<br />\n      ", @scalar_terms);
189
190                 if (@sorted_terms)
191                 {
192                         my $skipped = scalar (@sorted_terms);
193                         $::EXTRA->{'Search terms used'} .= "<br />\n      $skipped more skipped";
194                 }
195         }
196 }