#!/usr/bin/perl -w # # Convert sendmail, postfix, smail, or qmail logs to common log format so # they can be processed by standard web log processing software. # # Here's a sample log entry, in common log format: # # someone@foo.bar - - [31/May/1996:13:55:28 -0400] "GET /fred/" 200 541 # # Meaning that someone@foo.bar sent mail to fred, on the given date, and the # message was 541 k long. # # Only mail that was successfully sent is logged. # # Maillog2Commonlog v. 3.2 is copyright 1995, 1996, 2000 by Joey Hess. # May be distributed under the terms of the GPL. # # Usage: # maillog2commonlog [sendmail|smail|newsmail|qmail|postfix] < logfile # # Note: if your smail is < version 3.2, then use smail. If it is 3.2 or # greater, the logfile format changed, and you must use newsmail instead. # # Note: it only works for qmail if qmail is set up to log messages via # syslog. Otherwise, it isn't going to find timestamps. use URI::Escape; $logtype=lc(shift); if ($logtype ne 'sendmail' and $logtype ne 'smail' and $logtype ne 'newsmail' and $logtype ne 'qmail' and $logtype ne 'postfix') { print <]//g; if (m/\@(.*)$/ ne '') { if ($pub_hosts_hash{$1}) { ($_)=m/^(.*)\@/ } else { $_=$1 } } return uri_escape($_); } foreach (@pub_hosts) { $pub_hosts_hash{$_}=1; } my @localtime=localtime; $year=$localtime[5] + 1900; # Now on to actually processing the logs. Sendmail and smail use very # different file formats, sendmail is all on 1 line, smail is a muilt- # line format that's easier to process, with \n\n seperating each multi- # line record. And newsmail is ugly ('nuff said..) if ($logtype eq 'smail') { # read in a whole multi-line record at one go. $/="\n\n"; } if ($logtype=~m/smail/) { # Set up numeric date to Mmm date translation table for smail. my $i=1; foreach (qw{Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec}) { $date_trans[$i++]=$_; } } while (<>) { # There are 2 distinct log lines types, either mail is being recieved # or sent. We have to combine the 2 lines to get a clear picture of a # mail message. For qmail, there are 3 log line types: mail recieved, # delivery started, and delivery completed. if ((/: from=/) || (/\] received\n/m) || (/\] Received /) || (/info msg .* from/)) { # Recieved mail. if (/: from=/) { # SENDMAIL and POSTFIX ($message_id,$from,$size)=m/\w+\s+\d+\s+\d+:\d+:\d+\s+[^ ]+\s+(?:imap\s+)?(?:sendmail|postfix\/qmgr)\[\d+\]:\s+(.*?):\s+from=(.*?),\s+size=(\d+)/; } elsif (/\] received\n/m) { # SMAIL ($message_id,$from)=m/^\d+\/\d+\/\d+\s+\d+\:\d+\:\d+\:\s+\[(.*?)\]\s+received\n\|\s+from:\s+(.*?)\n/m; ($size)=m/\|\s+size:\s+(\d+)\s+bytes\n/m; } elsif (/\] Received /) { # NEWSMAIL ($message_id)=m/\[(.*?)\]/; ($from)=m/Received FROM:(.*?) /; ($size)=m/SIZE:(\d+)\s/; } elsif (/info msg .* from/) { # QMAIL ($message_id,$size,$from)=m/info msg (\d+): bytes (\d+) from <(.*)>/; } if (!$from) { $from="unknown" } $from=FixEmail($from); $msg_buf{$message_id}{from}=$from; $msg_buf{$message_id}{size}=$size; if ($msg_buf{$message_id}{to}) { &Log($message_id) } } elsif ((/: to=.*stat(us)?=sent/i) || (/\] delivered\n/m) || (/\] Delivered /) || (/starting delivery/)) { # The line logs mail being sent ok. if (/: to=.*stat(us)?=sent/i) { ($mon,$day,$time,$message_id,$to)=m/(\w+)\s+(\d+)\s+(\d+:\d+:\d+)\s+[^ ]+\s+(?:imap\s+)?(?:sendmail|postfix\/\w+)\[.*?\]:\s+(.*?):\s+to=(.*?),/; } elsif (/\] delivered\n/m) { ($mon,$day,$time,$message_id,$to)=m/(\d+)\/(\d+)\/\d+\s+(\d+:\d+:\d+):\s\[(.*?)\] delivered\n\|\s+to:\s+(.*?)\n/m; $mon=$date_trans[$mon]; # Translate to Mmm format. } elsif (/\] Delivered /) { ($mon,$day,$time,$message_id)=m/(\d+)\/(\d+)\/\d+\s+(\d+:\d+:\d+):\s\[(.*?)\]/; ($to)=m/TO:(.*?)\s/; $mon=$date_trans[$mon]; # Translate to Mmm format. } elsif (/starting delivery/) { ($mon,$day,$time,$message_id,$to)=m/^(\w+)\s+(\d+)\s+(\d+:\d+:\d+)\s+.*\s+msg\s+(\d+)\s+to\s+.*?\s+(.*)$/; } $to=FixEmail($to); if (length($day) eq 1 ) { $day="0$day" } $msg_buf{$message_id}{mon}=$mon; $msg_buf{$message_id}{day}=$day; $msg_buf{$message_id}{time}=$time; $msg_buf{$message_id}{to}=$to; if ($msg_buf{$message_id}{from}) { &Log($message_id) } } }