Thread: Spam ratio
View Single Post
  #4 (permalink)  
Old 06-30-2009, 12:19 PM
jrefl5 jrefl5 is offline
Advanced Member
 
Posts: 205
Default

you will need a script to process the zimbra log file.
I use the attached perl script then place the results in a spreadsheet

Code:
#!/usr/bin/perl
# input is the current zimbra logfile
# this will read the zimbra logfile and create a summary report of
# postfix, spamassin, and clamAV events
# note that the default logrotation for zimbra (/etc/logrotate.d/zimbra in Red Hat clones) needs to have
# "delaycompress" set or I will not be able to read the logfile if used as part of the "postrotate scripts"
# this script bu JCrawford at AZ Structural Pest Control Commission to scratch an itch.
# it is Licensed  as GPL v3 or later  "jrefl5" ( yahoo ) <com> and others, No warrenty as to usability
# is provided. james
#
# format of zimbra log line is
# Field				Position
# Month				0-2
# Day				4-5
# Time (HH:MM:SS)   7-14
# Server name		15-(14+length(server name))
# Data              end of SNAme+1-> end of line (\n)
my $CVersion 	= "1.0";
my $Rejects  	= 0;
my $Security 	= 0;
my $Attach	 	= 0;
my $SAddress	= 0;
my $DAddress	= 0;
my $Discards	= 0;
my $DSN			= 0;
my $Crylic		= 0;
my $TheBat		= 0;
my $Relay		= 0;
my $Pipe 		= 0;
my $ConfErr		= 0;
my $Relay		= 0;
my $ConfErr		= 0;
my $ConfErr		= 0;
my $donknow		= 0;
my $Blocked		= 0;
my $BlockBounce = 0;
my $ClamAV		= 0;
my $Passed		= 0;
my $Spammy		= 0;
my $Clean		= 0;
my $StDate		= "";
my $EDate		= "";
my $inlines		= 0;
my $currID		= "";
my $Messages	= 0;
my $Sender		= "";
my $Dest		= "";
my $amavis		= 0;
my $DefCount	= 0;
my $BHcount		= 0;
my $VQueCount 	= 0;
my %Senders		=();
my %Recivers	=();
my %MailWare	=();
my %Deferred	=();
my %DefMessage  =();
my %DefRelay	=();
my %DefTo		=();
my %DefTime		=();
my @BadHeader   =();
my @VirQuar		=();
# my $emailsenderformat ="^[a-zA-Z][\w\.-]*[a-zA-Z0-9]@[a-zA-Z0-9][\w\.-]*[a-zA-Z0-9]\.[a-zA-Z][a-zA-Z\.]*[a-zA-Z]$";
InputLoop: while ( <> ) {
	# check beginning of line
	if ($inlines == 0) {
		$StDate = substr $_, 0, 15;	# Get the start time from the First line in the log
	}
	$inlines+=1;
	$EDate = substr $_, 0, 15;	# Get the time from each line in the log (it may be tha last)
	if ($_ =~ /postfix\/cleanup/gi)	{   # start of new message processing
		$Senders[$Sender]+=1 unless (length($Sender) <= 1);
		$Recivers[$Dest]+=1 unless (length($Dest) <= 1);
		if ($_ =~ / ([0-9A-F]{10,}): message-id=/i ) {
			$currID = $1;       # use this as a hash id for message threads?
			$Messages+=1;
			$Sender = "";
			$Dest = "";
			# add logic to handle mutilitple messages in process loop at same time
		}	elsif ($_ =~ /discard/i)	{ # track input messages that Postfix doen not queue for amavis
			$Discards+=1;
			if ($_ =~ /Crylic/i ) {
				$Crylic+=1;
			}	elsif ($_ =~ /no third-party DSNs/)	{
				$DSN+=1;
			}	elsif ($_ =~ /The Bat as a Mailer/)	{
				$TheBat+=1;
			}	else {
				print STDOUT "Discard type not Tracked for :\n\t" . $_;
			}
		}
	}	elsif ($_ =~ /Passed CLEAN/i)	{ # Spamassissin thinks its ok
		$Passed+=1;
		$Clean+=1;
	}	elsif ($_ =~ /Passed SPAMMY/i)	{ # Spamassissin thinks it may be spam (place in junk mail folder)
		$Passed+=1;
		$Spammy+=1;
	}	elsif ($_ =~ /Passed BAD-HEADER/i)	{ # bad header on incomming message, message placed in quarantine
		if ( $_ =~ /quarantine: (badh-[-+a-zA-Z0-9]{10,15}), Message-ID:/g )	{
			$BHcount += 1;
			$BadHeader[$BHcount] = $1;
		}
	}	elsif ($_ =~ /NOQUEUE: reject/i)	{ # not placed on queue by postfix bounced back to sending server
		$Messages+=1;
		$Rejects+=1;
#		Use the first form if we choose to track the invalid senders <jhc>
#		if ($_ =~ /Helo command rejected: need fully-qualified hostname; from=<($emailsenterformat)>/i)	{
		if ($_ =~ /Helo command rejected: need fully-qualified hostname; from=/i)	{
			$SAddress+=1;
			# Track invalid senders?
		} 	elsif ($_ =~ /Sender address rejected/i )	{
			$SAddress+=1;
			# Track invalid senders?
		}	elsif ($_ =~ /Recipient address rejected:/i)	{ # we don't have that email address localy
			$DAddress+=1;
			# Track invalid reciver?
		}	elsif ($_ =~ /Relay access denied/i)	{ # someone wants to relay (ain't no way)
				$Relay+=1;
		}	elsif ($_ =~ /Server configuration error/i)	{ # we goofed on a change revert it ASAP
				$ConfErr+=1;
		}	elsif ($_ =~ /Improper use of SMTP command pipelining/i)	{ # Sender is not forming SMTP correctly
				$Pipe+=1;
		}	else	{
			$donknow+=1;
			print STDOUT "Unknown Reject = \n\t" . $_ ;
		}
	}	elsif ($_ =~ /Blocked SPAM/)	{
		$Blocked+=1;
	}	elsif ( $_ =~ /amavis\[\d*\]/i)	{
			if ($_ =~ /Checking:/i)	{
				$amavis+=1;
			}	elsif ($_ =~ /local delivery: <> -> <virus-quarantine/gi )	{ # clamd had problems with it (encripted zip?)
#				print STDERR "Debug found $_ \n";
				if ( $_ =~ 'mbx=/opt/zimbra/data/amavisd/quarantine/(virus-[-+a-zA-Z0-9]{10,15})')	{
					$VQueCount += 1;
					$VirQuar[$VQueCount] = $1;
				}
			}
	}	elsif ($_ =~ /clamd\[\d{1,}\]:/i)	{ # CalmAV message
#		Strip out mailware name from log line.
                        if ( $_ =~ /FOUND/ )	{  # Found malware
			    $ClamAV+= 1;
			    # use "'" in regexp instead of "/" due to path nameing in the regexp <jhc>
			    if ($_ =~ m'/opt/zimbra/data/amavisd/tmp/amavis-\d{8,8}T\d{6,6}-(\d{4,5})/parts/\w{1,}:\s{1,}((\w*\.*)*-{0,1}(\w*-*)*)'i)	{
				$MailWare{$2}+=1;	# save the malware name and counter for the report
		            }
			}
	}	elsif ($_ =~ /deferred/g )	{		# its a deferred message we may see the same one many times
		if ( $_ =~ 'postfix/smtp\[\d{3,5}\]: ([A-F0-9]{12,12}): to=<(\S{1,}?@\S{1,}?.\S{2,5}?)>, relay=([[:print:]]{1,}?)\[\d+\.\d+\.\d+\.\d+\]:25')	{
			my $msgid = $1;
			my $address = $2;
			my $relay = $3;
			my $err = "I missed the error msg";
			if ($_ =~ /said: ([[:print:]]{1,})$/) {
				$err = $1;
			} elsif ($_ =~ /talk to me: ([[:print:]]{1,})$/) {
                                $err = $1;
			}
		 	$Deferred{$msgid}	+=1;
		 	$DefMessage{$msgid} = $err;
		 	$DefRelay{$msgid} 	= $relay;
		 	$DefTo{$msgid} 		= $address;
		 	$DefCount += 1 unless $Deferred{$msgid} > 1;
                        $DefTime{$msgid} = substr $EDate, 7, 8;
		}
	}	elsif ($_ =~ /-> <virus-quarantine>/gi )	{ # clamd had problems with it (encripted zip?)
			if ( $_ =~ 'mbx=/opt/zimbra/data/amavisd/quarantine/(virus-[a-zA-Z0-9]{10,15})')	{
				$VirQuar[$VQueCount] = $1;
				$VQueCount += 1;
			}
#	}	else	{
#		# Unknown Line print it out
#		print STDOUT "Line not counted\n\t>>>" . $_ . "<<<\n";
	}
}
# full file processed now for the report
print STDOUT "Mail Log Report Ver. " . $CVersion . " for;\n    " . $StDate . " Through " . $EDate . "\n";
print STDOUT "    Log contains          \t = " . $inlines . " Lines\n";
print STDOUT "\tTotal Messages processed = " . $Messages . "\n";
print STDOUT "\tDiscarded messages       = " . $Discards . "\n";
print STDOUT "\t   Crylic Discards       = " . $Crylic . "\n";
print STDOUT "\t   The Bat X-Mailer      = " . $TheBat . "\n";
print STDOUT "\t   BackScatter           = " . $DSN . "\n";
print STDOUT "\tRejected Messages        = " . $Rejects . "\n";
print STDOUT "\t   Invalid From address  = " . $SAddress . "\n";
print STDOUT "\t   Invalid To Address    = " . $DAddress . "\n";
print STDOUT "\t   Relay Request         = " . $Relay . "\n";
print STDOUT "\t   Improper Pipelining   = " . $Pipe  . "\n";
if ($ConfErr > 0)	{
	print STDOUT "\t   Config Error !!! !!!  = " . $ConfErr . "\n";
}
if ($donkonw > 0)	{
	print STDOUT "\t   Unknown Rej.          = " . $donknow . "\n";
}
print STDOUT "    Messages processed by CLAMAV &\n    SpamAssissin\t\t = " . $amavis . "\n";
print STDOUT "\tBlocked SPAM  **         = " . $Blocked . "\n";
print STDOUT "\tPassed Messages          = " . $Passed . "\n";
print STDOUT "\t     Passed Clean ?      = " . $Clean . "\n";
print STDOUT "\t     Passed Spammy       = " . $Spammy . "\n";
print STDOUT "\tClamAV hits **           = " . $ClamAV . "\n";
if ($ClamAV > 0)	{
	foreach my $kval ( keys %MailWare )	{
		print STDOUT "\t\t" . $kval . " found " . $MailWare{$kval} . " time";
		print STDOUT "s" unless ($MailWare{$kval}<2);
		print STDOUT "\n";
	}
	print STDOUT " ** Note that ClamAV hits are also counted as SPAM\n";
}
if ( ($BHcount > 0) || ($VQueCount > 0))	{
	print STDOUT "\tQuarantined messages	= " . ($BHcount+$VQueCount) . "\n";
	print STDOUT "\t  messages can be examined in /opt/zimbra/data/amavisd/quarantine/ \n";
	if ( $BHcount > 0 ) {
		my $bcount = 6;
		print STDOUT "\t     Bad Headers		= " . $BHcount;
		foreach my $msgid (@BadHeader )	{
			if ( $bcount > 3 ) {
				print STDOUT "\n\t\t" . $msgid ;
				$bcount= 1;
			} else	{
				print STDOUT ", " . $msgid ;
				$bcount += 1;
			}
		}
		print STDOUT "\n";
	}
	if ($VQueCount > 0)	{
		my $bcount = 6;
		print STDOUT "\t     Virus		= " . $VQueCount;
		foreach my $msgid ( @VirQuar)	{
			if ( $bcount > 3 ) {
				print STDOUT "\n\t\t" . $msgid ;
				$bcount= 1;
			} else	{
				print STDOUT ", " . $msgid ;
				$bcount += 1;
			}
		}
		print STDOUT "\n";
	}
}
if ($DefCount > 0)	{		# we had 1 or more deferred emails
	print STDOUT "Unique Deferred messages	 = " . $DefCount . "\n";
	foreach my $msgid ( keys %Deferred )	{
		print STDOUT "\t" . $msgid . " occured " . $Deferred{$msgid} . " times\n\t\taddressed to ";
		print STDOUT $DefTo{$msgid} . "\n\t\tvia relay " . $DefRelay{$msgid} . "\n\t\tLast responce at ". $DefTime{$msgid} . " was:\n";
		print STDOUT "\t" . $DefMessage{$msgid} . "\n";
	}
}

Last edited by jrefl5; 06-30-2009 at 12:20 PM.. Reason: the attachement did not attach
Reply With Quote