1 rizwank 1.1 # TWiki Collaboration Platform, http://TWiki.org/
2 #
3 # Copyright (C) 2000-2004 Peter Thoeny, peter@thoeny.com
4 # Copyright (C) 2002 Richard Donkin, rdonkin@bigfoot.com
5 #
6 # For licensing info read license.txt file in the TWiki root.
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details, published at
16 # http://www.gnu.org/copyleft/gpl.html
17 #
18 =begin twiki
19
20 ---+ TWiki::UI::Statistics
21 Statistics extraction and presentation
22 rizwank 1.1
23 =cut
24 package TWiki::UI::Statistics;
25
26 use strict;
27 use File::Copy qw(copy);
28 use IO::File;
29
30 =pod
31
32 ---++ statistics( $query, $pathInfo, $remoteUser, $topic, $logDate )
33 Generate statistics topic. Optional parameters passed by optional CGI query:
34 | =$query= | |
35 | =$pathInfo= | |
36 | =$remoteUser= | |
37 | =$topic= | |
38 | =$logdate= | date of log to analyse, format "yyyymm" |
39
40 =cut
41 sub statistics {
42 my ( $query, $thePathInfo, $theRemoteUser, $topic, $logDate ) = @_;
43 rizwank 1.1
44 my $tmp = "";
45 my $destWeb = $TWiki::mainWebname; #web to redirect to after finishing
46 $logDate =~ s/[^0-9]//g; # remove all non numerals
47
48 # Set up locale and regexes
49 TWiki::basicInitialize();
50
51 if( $query ) {
52 # running from CGI
53 TWiki::writeHeader( $query );
54 print "<html>\n<head>\n<title>TWiki: Create Usage Statistics</title>\n";
55 print "</head>\n<body>\n";
56 }
57
58 # Initial messages
59 _printMsg( "TWiki: Create Usage Statistics", $query );
60 if( $query ) {
61 print "<h4><font color=\"red\"><span class=\"twikiAlert\">Do not interrupt this script!</span></font> ( Please wait until page download has finished )</h4>\n";
62 }
63
64 rizwank 1.1 unless( $logDate ) {
65 # get current local time and format to "yyyymm" format:
66 my ( $sec, $min, $hour, $mday, $mon, $year) = localtime( time() );
67 $year = sprintf("%.4u", $year + 1900); # Y2K fix
68 $mon = $mon+1;
69 $logDate = sprintf("%.4u%.2u", $year, $mon);
70 }
71
72 my $logMonth;
73 my $logYear;
74 $tmp = $logDate;
75 $tmp =~ s/([0-9]{4})(.*)/$2/g;
76 if( $tmp && $tmp < 13 ) {
77 $logMonth = $TWiki::isoMonth[$tmp-1];
78 } else {
79 $logMonth = "Date error";
80 }
81 $logYear = $logDate;
82 $logYear =~ s/([0-9]{4})(.*)/$1/g;
83 my $logMonthYear = "$logMonth $logYear";
84 _printMsg( "* Statistics for $logMonthYear", $query );
85 rizwank 1.1
86 my $logFile = $TWiki::logFilename;
87 $logFile =~ s/%DATE%/$logDate/g;
88
89 if( -e $logFile ) {
90 # Copy the log file to temp file, since analysis could take some time
91
92 # FIXME move the temp dir stuff to TWiki.cfg
93 my $tmpDir;
94 if ( $TWiki::OS eq "UNIX" ) {
95 $tmpDir = $ENV{'TEMP'} || "/tmp";
96 } elsif ( $TWiki::OS eq "WINDOWS" ) {
97 $tmpDir = $ENV{'TEMP'} || "c:/";
98 } else {
99 # FIXME handle other OSs properly - assume Unix for now.
100 $tmpDir = "/tmp";
101 }
102 my $randNo = int ( rand 1000); # For mod_perl with threading...
103 my $tmpFilename = "$tmpDir/twiki-stats.$$.$randNo";
104 $tmpFilename =~ /(.*)/; $tmpFilename = $1; # Untaint
105
106 rizwank 1.1 File::Copy::copy ($logFile, $tmpFilename)
107 or die "Can't copy $logFile to $tmpFilename - $!"; # FIXME: Never die in a cgi script
108
109 # Open the temp file
110 my $TMPFILE = new IO::File;
111 open $TMPFILE, $tmpFilename
112 or die "Can't open $tmpFilename - $!"; # FIXME: Never die in a cgi script
113
114 # Do a single data collection pass on the temporary copy of logfile,
115 # then call processWeb once for each web of interest.
116 my ($viewRef, $contribRef, $statViewsRef, $statSavesRef,
117 $statUploadsRef) = _collectLogData( $TMPFILE, $logMonthYear );
118
119 =pod
120 # DEBUG ONLY
121 _debugPrintHash($viewRef);
122 _debugPrintHash($contribRef);
123 print "statViews tests===========\n";
124 print "Views in Main = " . ${$statViewsRef}{'Main'} . "\n";
125 print "hash stats (used/avail) = " . %{$statViewsRef}."\n";
126
127 rizwank 1.1 foreach my $web (keys %{$statViewsRef}) {
128 print "Web summary for $web\n";
129 print $statViewsRef->{$web}."\n";
130 print $statSavesRef->{$web}."\n";
131 print $statUploadsRef->{$web}."\n";
132 }
133 =cut
134
135 # Generate WebStatistics topic update for one or more webs
136 if( $thePathInfo =~ /\/./ ) {
137 # do a particular web:
138 $destWeb = _processWeb( $thePathInfo, $theRemoteUser, $topic,
139 $logMonthYear, $viewRef, $contribRef, $statViewsRef,
140 $statSavesRef, $statUploadsRef, $query, 1 );
141 } else {
142 # do all webs:
143 my @weblist = grep{ /^[^\.\_]/ } TWiki::Store::getAllWebs( "" );
144 my $firstTime = 1;
145 foreach my $web ( @weblist ) {
146 if( TWiki::Store::webExists( $web ) ) {
147 $destWeb = _processWeb( "/$web", $theRemoteUser, $topic,
148 rizwank 1.1 $logMonthYear, $viewRef, $contribRef, $statViewsRef,
149 $statSavesRef, $statUploadsRef, $query, $firstTime );
150 $firstTime = 0;
151 } else {
152 _printMsg( " *** Error: $web does not exist", $query );
153 }
154 }
155 }
156 close $TMPFILE; # Shouldn't be necessary with 'my'
157 unlink $tmpFilename; # FIXME: works on Windows??? Unlink before
158 # usage to ensure deleted on crash?
159 } else {
160 _printMsg( " - Note: Log file $logFile does not exist", $query );
161 }
162
163 if( $query ) {
164 $tmp = $TWiki::statisticsTopicname;
165 my $url = &TWiki::getViewUrl( $destWeb, $tmp );
166 _printMsg( "* Go back to <a href=\"$url\">$tmp</a> topic", $query );
167 _printMsg( "End creating usage statistics", $query );
168 print "</body></html>\n";
169 rizwank 1.1 } else {
170 _printMsg( "End creating usage statistics", $query );
171 }
172 }
173
174 # Debug only
175 # Print all entries in a view or contrib hash, sorted by web and item name
176 sub _debugPrintHash {
177 my ($statsRef) = @_;
178 # print "Main.WebHome views = " . ${$statsRef}{'Main'}{'WebHome'}."\n";
179 # print "Main web, TWikiGuest contribs = " . ${$statsRef}{'Main'}{'Main.TWikiGuest'}."\n";
180 foreach my $web ( sort keys %$statsRef) {
181 my $count = 0;
182 print "$web web:\n";
183 # Get reference to the sub-hash for this web
184 my $webhashref = ${$statsRef}{$web};
185 # print "webhashref is " . ref ($webhashref) ."\n";
186 # Items can be topics (for view hash) or users (for contrib hash)
187 foreach my $item ( sort keys %$webhashref ) {
188 print " $item = ";
189 print "" . ( ${$webhashref}{$item} || 0 ) ."\n";
190 rizwank 1.1 $count += ${$webhashref}{$item};
191 }
192 print " WEB TOTAL = $count\n";
193 }
194 }
195
196
197 # =========================
198 # Process the whole log file and collect information in hash tables.
199 # Must build stats for all webs, to handle case of renames into web
200 # requested for a single-web statistics run.
201 #
202 # Main hash tables are divided by web:
203 #
204 # $view{$web}{$TopicName} == number of views, by topic
205 # $contrib{$web}{"Main.".$WikiName} == number of saves/uploads, by user
206
207 sub _collectLogData
208 {
209 my( $TMPFILE, $theLogMonthYear ) = @_;
210
211 rizwank 1.1 # Examples of log file format:
212 # | 03 Feb 2000 - 02:43 | Main.PeterThoeny | view | Know.WebHome | |
213 # | 03 Feb 2000 - 02:43 | Main.PeterThoeny | save | Know.WebHome | |
214 # | 03 Feb 2000 - 02:53 | Main.PeterThoeny | save | Know.WebHome | repRev 1.7 Main.PeterThoeny 2000/02/03 02:43:22 |
215 # | 23 Feb 2002 - 11:07 | Main.TWikiGuest | search | Main | Office *Locations[^A-Za-z] | 127.0.0.1 |
216 # Note: there's no topic name on search log entry
217 # | 23 Feb 2002 - 11:07 | Main.guest | search | Main | Office *Locations[^A-Za-z] | 127.0.0.1 |
218 # | 28 Mar 2002 - 07:11 | Main.FredBloggs | rename | Test.TestTopic7 | moved to Test.TestTopic7New | 127.0.0.1 |
219
220
221 my %view; # Hash of hashes, counts topic views by <web, topic>
222 my %contrib; # Hash of hashes, counts uploads/saves by <web, user>
223
224 # Hashes for each type of statistic, one hash entry per web
225 my %statViews;
226 my %statSaves;
227 my %statUploads;
228
229 # Imported regex objects, supporting I18N
230 my $webNameRegex = $TWiki::regex{webNameRegex};
231 my $wikiWordRegex = $TWiki::regex{wikiWordRegex};
232 rizwank 1.1 my $abbrevRegex = $TWiki::regex{abbrevRegex};
233
234 # Script regexes
235 my $intranetUserRegex = qr/[a-z0-9]+/; # FIXME: should centralise this
236 my $userRegex = qr/(?:$intranetUserRegex|$wikiWordRegex)/;
237 my $opRegex = qr/[a-z0-9]+/; # Operation, no i18n needed
238 # my $topicRegex = qr/(?:$wikiWordRegex|$abbrevRegex)/; # Strict topic names only
239 my $topicRegex = qr/[^ ]+/; # Relaxed topic names - any non-space OK
240 # but won't be auto-linked in WebStatistics
241 my $errorRegex = qr/\(not exist\)/; # Match '(not exist)' flag
242
243 my ($webName, $opName, $topicName, $userName, $newTopicName, $newTopicWeb);
244 binmode $TMPFILE;
245 while ( <$TMPFILE> ) {
246 my $line = $_;
247 $line =~ s/\r*\n$//; # Clean out line endings
248
249 $line =~ /^\|[^\|]*\| ($webNameRegex\.$userRegex) \| ($opRegex) \| ($webNameRegex)[. ]/o;
250 $userName = $1 || ""; # Main.FredBloggs
251 $opName = $2 || "";
252 $webName = $3 || "";
253 rizwank 1.1
254 # Skip bad logfile lines and warn if necessary
255 unless ($userName && $opName && $webName) {
256 if( $TWiki::doDebugStatistics ) {
257 TWiki::writeDebug("Invalid log file line = '$line'");
258 TWiki::writeDebug("userName = '$userName'");
259 TWiki::writeDebug("opName = '$opName'");
260 TWiki::writeDebug("webName = '$webName'");
261 }
262 next;
263 }
264
265 my $logContrib = 0;
266
267 if ($opName eq 'view' ) {
268 $statViews{$webName}++;
269 # Pick up the topic name and any error string
270 $line =~ /^\|[^\|]*\| ($webNameRegex\.$userRegex) \| ($opRegex) \| ($webNameRegex)\.($topicRegex) \| +(${errorRegex}?) */o;
271 $topicName = $4 || "";
272 my $noSuchTopic = $5 || ""; # Set if '(not exist)' matched
273
274 rizwank 1.1 unless( $topicName ) {
275 if( $TWiki::doDebugStatistics ) {
276 TWiki::writeDebug("Invalid log file line = '$line'");
277 TWiki::writeDebug("userName = '$userName'");
278 TWiki::writeDebug("opName = '$opName'");
279 TWiki::writeDebug("webName = '$webName'");
280 TWiki::writeDebug("topicName = '$topicName'");
281 }
282 next;
283 }
284
285 # Skip accesses to non-existent topics
286 if ($noSuchTopic) {
287 next;
288 } else {
289 # Count this topic access
290 $view{$webName}{$topicName}++;
291 }
292
293 } elsif ($opName eq 'save' ) {
294 $statSaves{$webName}++;
295 rizwank 1.1 $logContrib = 1;
296
297 } elsif ($opName eq 'upload' ) {
298 $statUploads{$webName}++;
299 $logContrib = 1;
300
301 } elsif ($opName eq 'rename' ) {
302 # Pick up the old and new topic names
303 $line =~ /^\|[^\|]*\| ($webNameRegex\.$userRegex) \| ($opRegex) \| ($webNameRegex)\.($topicRegex) \| moved to ($webNameRegex)\.($topicRegex) /o;
304 $topicName = $4 || "";
305 $newTopicWeb = $5 || "";
306 $newTopicName = $6 || "";
307 ## TWiki::writeDebug("$topicName renamed to $newTopicWeb.$newTopicName");
308
309 unless ($topicName && $newTopicWeb && $newTopicName) {
310 if( $TWiki::doDebugStatistics ) {
311 TWiki::writeDebug("Invalid log file line (rename) = '$line'");
312 TWiki::writeDebug("userName = '$userName'");
313 TWiki::writeDebug("opName = '$opName'");
314 TWiki::writeDebug("webName = '$webName'");
315 TWiki::writeDebug("topicName= '$topicName'");
316 rizwank 1.1 TWiki::writeDebug("newTopicWeb= '$newTopicWeb'");
317 TWiki::writeDebug("newTopicName = '$newTopicName'");
318 }
319 next;
320 }
321 # Get number of views for old topic this month (may be zero)
322 my $oldViews = $view{$webName}{$topicName} || 0;
323
324 # Transfer views from old to new topic
325 $view{$newTopicWeb}{$newTopicName} = $oldViews;
326 delete $view{$webName}{$topicName};
327
328 # Transfer views from old to new web
329 if ( $newTopicWeb ne $webName ) {
330 $statViews{$webName} -= $oldViews;
331 $statViews{$newTopicWeb} += $oldViews;
332 }
333 }
334 # Record saves and uploads
335 if ($logContrib) {
336 # Record the contribution by user name
337 rizwank 1.1 $contrib{$webName}{$userName}++;
338 }
339 =pod
340 # DEBUG
341 $. <= 5 && print "$line\n";
342 print "$line\n";
343 print "$.: $userName did $opName on $webName";
344 print ".$topicName" if (defined $topicName);
345 print "\n";
346 =cut
347
348 }
349
350 =pod
351
352 print "Main.WebHome views = " . $view{'Main'}{'WebHome'}."\n";
353 print "Main web's contribs = " . $contrib{'Main'}{'Main.RichardDonkin'}."\n";
354 _debugPrintHash(\%view);
355 _debugPrintHash(\%contrib);
356 =cut
357 return \%view, \%contrib, \%statViews, \%statSaves, \%statUploads;
358 rizwank 1.1 }
359
360 # =========================
361 sub _processWeb
362 {
363 my( $thePathInfo, $theRemoteUser, $theTopic, $theLogMonthYear, $viewRef, $contribRef,
364 $statViewsRef, $statSavesRef, $statUploadsRef, $query, $isFirstTime ) = @_;
365
366 my ( $topic, $webName, $dummy, $userName, $dataDir ) =
367 &TWiki::initialize( $thePathInfo, $theRemoteUser, $theTopic, "", $query );
368 $dummy = ""; # to suppress warning
369
370 if( $isFirstTime ) {
371 my $tmp = &TWiki::userToWikiName( $userName, 1 );
372 $tmp .= " as shell script" unless( $query );
373 _printMsg( "* Executed by $tmp", $query );
374 }
375
376 _printMsg( "* Reporting on TWiki.$webName web", $query );
377
378 if( ! &TWiki::Store::webExists( $webName ) ) {
379 rizwank 1.1 _printMsg( " *** Error: Web $webName does not exist", $query );
380 return $TWiki::mainWebname;
381 }
382
383 # Handle null values, print summary message to browser/stdout
384 my $statViews = $statViewsRef->{$webName};
385 my $statSaves = $statSavesRef->{$webName};
386 my $statUploads = $statUploadsRef->{$webName};
387 $statViews ||= 0;
388 $statSaves ||= 0;
389 $statUploads ||= 0;
390 _printMsg( " - view: $statViews, save: $statSaves, upload: $statUploads", $query );
391
392
393 # Get the top N views and contribs in this web
394 my (@topViews) = _getTopList( $TWiki::statsTopViews, $webName, $viewRef );
395 my (@topContribs) = _getTopList( $TWiki::statsTopContrib, $webName, $contribRef );
396
397 # Print information to stdout
398 my $statTopViews = "";
399 my $statTopContributors = "";
400 rizwank 1.1 if( @topViews ) {
401 $statTopViews = join( "<br /> ", @topViews );
402 $topViews[0] =~ s/[\[\]]*//g;
403 _printMsg( " - top view: $topViews[0]", $query );
404 }
405 if( @topContribs ) {
406 $statTopContributors = join( "<br /> ", @topContribs );
407 _printMsg( " - top contributor: $topContribs[0]", $query );
408 }
409
410 # Update the WebStatistics topic
411
412 my $tmp;
413 my $statsTopic = $TWiki::statisticsTopicname;
414 # DEBUG
415 # $statsTopic = "TestStatistics"; # Create this by hand
416 if( &TWiki::Store::topicExists( $webName, $statsTopic ) ) {
417 my( $meta, $text ) = &TWiki::Store::readTopic( $webName, $statsTopic, 1 );
418 my @lines = split( /\n/, $text );
419 my $statLine;
420 my $idxStat = -1;
421 rizwank 1.1 my $idxTmpl = -1;
422 for( my $x = 0; $x < @lines; $x++ ) {
423 $tmp = $lines[$x];
424 # Check for existing line for this month+year
425 if( $tmp =~ /$theLogMonthYear/ ) {
426 $idxStat = $x;
427 } elsif( $tmp =~ /<\!\-\-statDate\-\->/ ) {
428 $statLine = $tmp;
429 $idxTmpl = $x;
430 }
431 }
432 if( ! $statLine ) {
433 $statLine = "| <!--statDate--> | <!--statViews--> | <!--statSaves--> | <!--statUploads--> | <!--statTopViews--> | <!--statTopContributors--> |";
434 }
435 $statLine =~ s/<\!\-\-statDate\-\->/$theLogMonthYear/;
436 $statLine =~ s/<\!\-\-statViews\-\->/ $statViews/;
437 $statLine =~ s/<\!\-\-statSaves\-\->/ $statSaves/;
438 $statLine =~ s/<\!\-\-statUploads\-\->/ $statUploads/;
439 $statLine =~ s/<\!\-\-statTopViews\-\->/$statTopViews/;
440 $statLine =~ s/<\!\-\-statTopContributors\-\->/$statTopContributors/;
441
442 rizwank 1.1 if( $idxStat >= 0 ) {
443 # entry already exists, need to update
444 $lines[$idxStat] = $statLine;
445
446 } elsif( $idxTmpl >= 0 ) {
447 # entry does not exist, add after <!--statDate--> line
448 $lines[$idxTmpl] = "$lines[$idxTmpl]\n$statLine";
449
450 } else {
451 # entry does not exist, add at the end
452 $lines[@lines] = $statLine;
453 }
454 $text = join( "\n", @lines );
455 $text .= "\n";
456
457 &TWiki::Store::saveTopic( $webName, $statsTopic, $text, $meta, "", 1, 1, 1 );
458 _printMsg( " - Topic $statsTopic updated", $query );
459
460 } else {
461 _printMsg( " *** Warning: No updates done, topic $webName.$statsTopic does not exist", $query );
462 }
463 rizwank 1.1
464 return $webName;
465 }
466
467 # =========================
468 # Get the items with top N frequency counts
469 # Items can be topics (for view hash) or users (for contrib hash)
470 sub _getTopList
471 {
472 my( $theMaxNum, $webName, $statsRef ) = @_;
473
474 # Get reference to the sub-hash for this web
475 my $webhashref = $statsRef->{$webName};
476
477 # print "Main.WebHome views = " . $statsRef->{$webName}{'WebHome'}."\n";
478 # print "Main web, TWikiGuest contribs = " . ${$statsRef}{$webName}{'Main.TWikiGuest'}."\n";
479
480 my @list = ();
481 my $topicName;
482 my $statValue;
483
484 rizwank 1.1 # Convert sub hash of item=>statsvalue pairs into an array, @list,
485 # of '$statValue $topicName', ready for sorting.
486 while( ( $topicName, $statValue ) = each( %$webhashref ) ) {
487 # Right-align statistic value for sorting
488 $statValue = sprintf "%7d", $statValue;
489 # Add new array item at end of array
490 if( $topicName =~ /\./ ) {
491 $list[@list] = "$statValue $topicName";
492 } else {
493 $list[@list] = "$statValue [[$topicName]]";
494 }
495 }
496
497 # DEBUG
498 # print " top N list for $webName\n";
499 # print join "\n", @list;
500
501 # Sort @list by frequency and pick the top N entries
502 if( @list ) {
503 # Strip initial spaces
504 @list = map{ s/^\s*//; $_ } @list;
505 rizwank 1.1
506 @list = # Prepend spaces depending on no. of digits
507 map{ s/^([0-9][0-9][^0-9])/\ \;$1/; $_ }
508 map{ s/^([0-9][^0-9])/\ \;\ \;$1/; $_ }
509 # Sort numerically, descending order
510 sort { (split / /, $b)[0] <=> (split / /, $a)[0] } @list;
511
512 if( $theMaxNum >= @list ) {
513 $theMaxNum = @list - 1;
514 }
515 return @list[0..$theMaxNum];
516 }
517 return @list;
518 }
519
520 # =========================
521 sub _printMsg
522 {
523 my( $msg, $query ) = @_;
524 my $htmlMsg = $msg;
525
526 rizwank 1.1 if( $query ) {
527 # TODO: May need to fix this regex if internationalised script
528 # messages are supported in future.
529 if( $htmlMsg =~ /^[A-Z]/ ) {
530 $htmlMsg =~ s/^([A-Z].*)/<h3>$1<\/h3>/go;
531 } else {
532 $htmlMsg =~ s/(\*\*\*.*)/<font color=\"#FF0000\"><span class=\"twikiAlert\">$1<\/span><\/font>/go;
533 $htmlMsg =~ s/^\s\s/ /go;
534 $htmlMsg =~ s/^\s/ /go;
535 $htmlMsg .= "<br />";
536 }
537 $htmlMsg =~ s/==([A-Z]*)==/<font color=\"#FF0000\"><span class=\"twikiAlert\">==$1==<\/span><\/font>/go;
538 print "$htmlMsg\n";
539 } else {
540 $msg =~ s/ / /go;
541 print "$msg\n";
542 }
543 }
544
545 1;
|