#!/usr/bin/perl

# Copyright (c) 2006 Bob Beck <beck@openbsd.org>.  All rights reserved.
# Copyright (c) 2009,2013,2014 Jim Razmus II <jim@openbsd.org>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

use strict;
use warnings;
use IO::Handle;
use Net::DNS;
use Email::Valid;
use POSIX qw(setgid setsid setuid);
use Sys::Syslog qw(:DEFAULT setlogsock);

use constant {
    STATE     => 0,
    IP        => 1,
    HOST_NAME => 2,
    MAIL_FROM => 3,
    RCPT_TO   => 4,
    PASSED    => 0,
    FAILED    => 1
};

my $SCAN_INTERVAL            = 600;
my $DNS_SOCK_MAX             = 50;
my $SUSPECT_TUPLES           = 5;
my $MAX_DOMAINS              = 3;
my $MAX_SENDERS_RATIO        = 0.75;
my @BAD                      = ();
my @GOOD                     = ();
my $EXTERNAL_ADDRESS_CHECKER = q{};
my $COMPREHENSIVE            = 0;

eval `cat /etc/mail/greyscanner.conf`;

if ( !$ENV{DEBUG} ) {
    chdir '/' or die q{can't chdir to /};
    open STDIN,  '<',  '/dev/null' or die q{can't open /dev/null};
    open STDOUT, '>>', '/dev/null' or die q{can't open /dev/null};
    open STDERR, '>>', '/dev/null' or die q{can't open /dev/null};
    defined( my $pid = fork ) or die "Can't fork: $!";
    exit if ($pid);
    setsid or die "can't setsid: $!";
}

setlogsock('unix');
openlog( 'greyscanner', 'pid', 'mail' ) || die q{can't openlog};

while (1) {
    ### Beginning a Scan

    pipe( READER, WRITER );
    WRITER->autoflush(1);

    my $pid = fork;

    if ( $pid == 0 ) {
        close READER;
        &child;
        close WRITER;
        exit;
    }
    elsif ($pid) {
        close WRITER;
        &parent;
        close READER;
        wait;
    }
    elsif ( not defined $pid ) {
        syslog( 'err', 'Failed to fork' );
        close READER;
        close WRITER;
    }

    ### Scan completed: "Sleeping for $SCAN_INTERVAL secs."
    sleep $SCAN_INTERVAL;
}

closelog();

exit 0;

sub check_dns {
    my ($senders_href) = @_;
    my @domains = ( keys %$senders_href );
    my @ready   = ();
    my $res     = Net::DNS::Resolver->new;
    my $sel     = IO::Select->new;
    my %strikes = ();
    my $timeout = 5;
    my $unready = 0;

    while ( ( scalar @domains > 0 ) || $sel->count ) {

        ### Attempt to queue queries
        while ( $sel->count < $DNS_SOCK_MAX ) {
            my $d = pop @domains;
            last unless defined $d;
            $sel->add( $res->bgsend( $d, "A" ) );
            $sel->add( $res->bgsend( $d, "MX" ) );
        }

        ### Attempt to dequeue answers
        @ready = $sel->can_read($timeout);
        if (@ready) {
            foreach my $sock (@ready) {
                my $packet = $res->bgread($sock);
                if ( $packet->header->ancount == 0 ) {
                    my @q = $packet->question;
                    my $d = $q[0]->qname;
                    $strikes{"$d"}++;
                }
                $sel->remove($sock);
                $sock = undef;
            }
        }
        else {
            $unready++;
        }

        ### Check how many times we've been un-ready
        last if ( $unready == $timeout );
    }

    ### Trap everyone with neither an MX or A record
    foreach my $h ( keys %strikes ) {
        next unless ( $strikes{"$h"} == 2 );
        my @IPs = split m/\t/s, $senders_href->{"$h"};
        foreach my $IP (@IPs) {
            print WRITER qq{$IP\tMailed from sender $h with neither MX or A\n};
        }
    }

    return;
}

sub check_recipients {
    my ($recipients) = @_;
    my $m            = q{};    # addr that failed the checks or err message
    my $r            = -1;

    # XXX: filter for domains we host so we only process "our" email addresses
    my @q = split m/\t/s, $recipients;
    if ( !@q ) {
        $m = '*no recipients*';
        $r = FAILED;
    }
    else {
        foreach my $addr (@q) {
            ### Recipient: $addr
            foreach (@GOOD) {
                if ( $addr =~ $_ ) {
                    $m = q{};
                    $r = PASSED;
                    last;
                }
            }
            next if $r == PASSED;
            if ( $COMPREHENSIVE == 1 ) {
                $m = $addr;
                $r = FAILED;
            }
            last if $r == FAILED;
            foreach (@BAD) {
                if ( $addr =~ $_ ) {
                    $r = FAILED;
                    last;
                }
            }
            last if $r == FAILED;
            if ( -x $EXTERNAL_ADDRESS_CHECKER ) {
                if ( system( ( "$EXTERNAL_ADDRESS_CHECKER", "$addr" ) ) != 0 ) {
                    $r = FAILED;
                }
            }
            last if $r == FAILED;
        }
    }
    if ( $r == -1 ) { $r = PASSED; }
    return ( $r, $m );
}

sub check_senders {
    my ($senders) = @_;
    my $m         = q{};      # addr that failed the checks or err message
    my $r         = PASSED;
    my @q = split m/\t/s, $senders;
    if ( !@q ) {
        $m = '*no senders*';
        $r = FAILED;
    }
    else {
        foreach my $addr (@q) {
            ### Sender: $addr
            next
              if (
                Email::Valid->address(
                    -address     => "$addr",
                    -fudge       => 1,
                    -local_rules => 1
                )
              );
            $m = "$addr ($Email::Valid::Details)";
            $r = FAILED;
            last;
        }
    }
    return ( $r, $m );
}

sub check_thresholds {
    my ( $senders, $recipients ) = @_;
    my $m     = q{};
    my $r     = PASSED;
    my @qr    = split m/\t/s, $recipients;
    my @qs    = split m/\t/s, $senders;
    my $count = @qs;

    if ( $count > $SUSPECT_TUPLES ) {
        my %R = ();
        my %S = ();
        my %D = ();

        # count the unique senders, recipients, and domains.
        foreach my $i (@qr) {
            $R{"$i"}++;
        }
        foreach my $i (@qs) {
            $S{"$i"}++;
            $i =~ s/[^\@]+\@//;
            $D{"$i"}++;
        }

        my $rcount = keys %R;
        my $scount = keys %S;
        my $dcount = keys %D;

        if ( $dcount > $MAX_DOMAINS ) {
            $m = "sending from $dcount domains (> $MAX_DOMAINS)";
            $r = FAILED;
        }
        elsif ( $scount / $count > $MAX_SENDERS_RATIO ) {
            $m = "ratio is $scount/$count (> $MAX_SENDERS_RATIO)";
            $r = FAILED;
        }

    }
    return ( $r, $m );
}

sub child {

    my ( $uid, $gid ) = ( getpwnam '_greyscanner' )[ 2, 3 ];
    unless ( $uid and $gid ) {
        syslog( 'err', '_greyscanner uid and gid not found' );
        return 0;
    }

    setgid($gid);    # change real and effective group id's
    $) = "$gid $gid";    # ~= setgroups
    setuid($uid);        # change real and effective user id's
    unless ( $( == $gid && $) == $gid && $< == $uid && $> == $uid ) {
        syslog( 'err', 'Failed to setgid or setuid' );
        return 0;
    }

    my %WHITE;
    my %GREY;
    my %TRAPPED;
    my %FROM;
    my %RCPT;
    my %SENDERS;
    my @line;
    my $spamdb;

    unless ( open $spamdb, q{-|}, '/usr/sbin/spamdb' ) {
        syslog( 'err', q{can't invoke spamdb!} );
        return 0;
    }
    while (<$spamdb>) {
        chomp;
        @line = split m/[|]/s;

        if ( $line[STATE] eq 'WHITE' ) {

            $WHITE{"$line[IP]"}++;

        }
        elsif ( $line[STATE] eq 'TRAPPED' ) {

            $TRAPPED{"$line[IP]"}++;

        }
        elsif ( $line[STATE] eq 'GREY' ) {

            $line[MAIL_FROM] =~ s/^<//;
            $line[MAIL_FROM] =~ s/>$//;
            $line[RCPT_TO] =~ s/^<//;
            $line[RCPT_TO] =~ s/>$//;

            if ( $GREY{"$line[IP]"} ) {
                $FROM{"$line[IP]"} .= "\t$line[MAIL_FROM]";
                $RCPT{"$line[IP]"} .= "\t$line[RCPT_TO]";
            }
            else {
                $GREY{"$line[IP]"}++;
                $FROM{"$line[IP]"} = "$line[MAIL_FROM]";
                $RCPT{"$line[IP]"} = "$line[RCPT_TO]";
            }

        }

    }
    close $spamdb;

    my $wi = keys %WHITE;
    my $tr = keys %TRAPPED;
    my $gr = keys %GREY;

    ### Loaded: "$wi whitelisted, $tr trapped, $gr unique greys"

    my ( $r, $m );

    foreach my $grey ( keys %GREY ) {

        ### Analyzing Grey IP: $grey

        ### Ignore if it's already done
        next if ( $TRAPPED{$grey} || $WHITE{$grey} );

        ### Check the recipients.
        ( $r, $m ) = &check_recipients( $RCPT{$grey} );
        if ( $r == FAILED ) {
            print WRITER qq{$grey\tfailed recipient check: $m\n};
            $TRAPPED{$grey}++;
        }
        next if $TRAPPED{$grey};

        ### Check the senders.
        ( $r, $m ) = &check_senders( $FROM{$grey} );
        if ( $r == FAILED ) {
            print WRITER qq{$grey\tfailed sender check: $m\n};
            $TRAPPED{$grey}++;
        }
        next if $TRAPPED{$grey};

        ### Check if host has queued up more than our suspect threshold
        ( $r, $m ) = &check_thresholds( $RCPT{$grey}, $FROM{$grey} );
        if ( $r == FAILED ) {
            print WRITER qq{$grey\tfailed threshold check: $m\n};
            $TRAPPED{$grey}++;
        }
        next if $TRAPPED{$grey};

        next if ( $DNS_SOCK_MAX == 0 );    # skip rest if not using DNS checks;

        ### Create hash of senders keyed by the host part of the address
        # so we only look each host part up once, no matter how many hosts
        # are sending mail with it as the sender.
        my %done = ();
        my @senders = split m/\t/s, $FROM{$grey};
        foreach my $s (@senders) {

            # extract the host part.
            my $h = ( $s =~ /^.*@(.*)$/ ? $1 : $s );
            $h =~ s/\s_+//g;

            next if $done{"$h"};

            if ( $SENDERS{"$h"} ) {
                $SENDERS{"$h"} .= "\t$grey";
            }
            else {
                $SENDERS{"$h"} = "$grey";
            }

            $done{"$h"}++;
        }

    }

    return if ( $DNS_SOCK_MAX == 0 );

    ### Check DNS for hosts in the SENDERS hash
    &check_dns( \%SENDERS );

    return;
}

sub parent {
    my $line;
    my $ip_regex =
'(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)';
    while ( $line = <READER> ) {
        chomp $line;
        if ( $line =~ m/^($ip_regex)\t(.*)$/s ) {
            my $ip     = $1;
            my $reason = $2;
            system "/usr/sbin/spamdb -t -a $ip\n";
            syslog( 'info', "Trapped $ip: $reason" );
            ### trapped: "$ip for $reason"
        }
        else {
            syslog( 'info', "Malformed line: $line" );
            ### malformed line: $line
        }
    }
    return;
}

__END__

=head1 NAME

greyscanner - Grey trapping daemon for OpenBSD spamd

=head1 SYNOPSIS

B<greyscanner>

=head1 DESCRIPTION

greyscanner complements OpenBSD spamd(8) greylisting by applying
additional heuristics to greylisted hosts.  Additional heuristics
include: confirm senders email address is valid, confirm existence of
the senders MX or A record in DNS, confirm recipient address(es) is
valid, and more.  Offending hosts are flagged as 'trapped' in the spamd
database.

=head1 FILES

F</etc/mail/greyscanner.conf> optional config file

=head1 SEE ALSO

spamd(8), spamdb(8), L<http://bitbucket.org/bonetruck/greyscanner>

=head1 HISTORY

Bob Beck created greyscanner in 2006.  Jim Razmus II revised the
program, added documentation, and packaged it for OpenBSD in 2009.
