#!/usr/bin/perl
# Program:	pemf
# Author:	David Ljung Madison <DaveSource.com>
# See License:	http://MarginalHacks.com/License
# Version:	2.04
# Description:	Perl Extensible Mail Filter
#
# Installation:
# 1) Install this and make it readable/executable
# 2) Install the Filter.pm in the same directory as pemf or in your home
#    directory and make it readable
# 3) Make a gobally readable .forward file in your home directory containing:
#    "|/path/to/pemf -l"
#
# Credit:	Originally idea from mailfilt by Michael Fisk
#		(http://www.nmt.edu/~mfisk/mailfilt.html)
use strict;
use Fcntl ':flock';
my $PROGNAME = $0; $PROGNAME =~ s|.*/||;

# TODO:
#  Clean logs on occasion
#  Add PGP support

#########################
# User/home directory
#########################
my ($USER,$HOME,$HOST) = (getpwuid($>))[0,7];
Die("Can't get home directory\n") if (! ($HOME = (getpwuid($>))[7]));
chomp($HOST =`hostname`) if (! ($HOST = $ENV{'HOST'}) );

##################################################
# Include the personal filter module
##################################################
my $FILTER = "Filter";		# Current dir, same dir as "pemf" or home dir

my $basename = $0; $basename =~ s|/[^/]+$||;

try_filter("./${FILTER}") ||
  try_filter("./${FILTER}.pm") ||
  try_filter("$basename/${FILTER}") ||
  try_filter("$basename/${FILTER}.pm") ||
  try_filter("$HOME/${FILTER}") ||
  try_filter("$HOME/${FILTER}.pm") ||
  Die("Couldn't find a filter [$FILTER] to require\n");

sub try_filter {
  my ($f) = @_;
  return (-f $f && -r $f) ? (require $f) : 0;
}

##################################################
# Default filter info (the personal filter should/could override any of this)
##################################################

# Mailbox
sub inbox { -d "/var/spool/mail" ? "/var/spool/mail/$USER" : "/usr/mail/$USER"; }
sub save_mail { "$HOME/Mail"; } 

# Logging
sub logfile { "$HOME/Mail/pemf.log"; }

# Autorespond
sub my_name { my ($home,$user) = @_;  $user; }
sub auto_respond_from { return $_[0]; }
sub respond_key { "[pemf_resp]"; }

# Spam
sub is_spam_to { 0 }
sub is_spam_from { 0 }
sub is_spam_subject { 0 }
sub is_spam_hdr { 0 }
sub is_never_spam { 0 }
sub never_spam_from { 0 }

# Specific filter control
sub filter_mime { 0 }	# Filter out mime-version header
sub filter_urgent { 1 }	# Filter out urgent priority
sub filter_pgp { 0 }	# Filter out PGP signatures	(broken??)

# Inbound mail
sub save_inbound { 0 }
sub save_inbound_lines { -1 }

# Sendmail binary
sub sendmail {
  my $s = "/usr/bin/forge_pipe";
  $s = "/usr/sbin/forge_pipe" if (! -x $s);
  $s = "/usr/local/sbin/forge_pipe" if (! -x $s);
  $s = "/usr/local/bin/forge_pipe" if (! -x $s);
  $s = "/usr/sbin/sendmail" if (! -x $s);
  $s = "/usr/lib/sendmail" if (! -x $s);
  $s = "/usr/bin/sendmail" if (! -x $s);
  $s;
}

# The filter itself
sub mail_filter { my ($msg) = @_; send_to($msg); }

#########################
# Path settings
#########################
$ENV{PATH}	= "/bin:/usr/local/bin:/usr/bin:$HOME/bin";
$ENV{PGPPATH}	= "$HOME/.pgp";

##################################################
##################################################
# Be wary, all ye who enter here...
##################################################
##################################################
sub usage {
  foreach my $msg (@_) { print STDERR "ERROR:  $msg\n"; }
  print STDERR "\n";
  print STDERR "Usage:\t$PROGNAME [-d] <mail>\n";
  print STDERR "\tFilter a mail message\n";
  print STDERR "\n";
  print STDERR "\t-l\tLog output [", logfile($HOME,$USER), "]\n";
  print STDERR "\t-spam\tThis is spam\n";
  print STDERR "\t-why\tTell me why a message was marked as spam\n";
  print STDERR "\t-d\tSet debug mode\n";
  print STDERR "\n";
  exit -1;
}

sub use_logfile() {
  my $log = logfile($HOME,$USER);
  open(STDERR,">>$log") || Die("Can't open logfile [$log]\n");
}

sub parse_args {
  my $file;
  while (my $arg=shift(@ARGV)) {
    if ($arg =~ /^-h$/) { usage(); }
    if ($arg =~ /^-d$/) { $MAIN::DEBUG=1; next; }
    if ($arg =~ /^-l$/) { use_logfile; next; }
    if ($arg =~ /^-spam$/) { $MAIN::IS_SPAM=1; next; }
    if ($arg =~ /^-w(hy)?$/) { $MAIN::SPAM_WHY=1; next; }
    if ($arg =~ /^-/) { usage("Unknown option: $arg"); }
    usage("Too many messages specified [$arg and $file]") if $file;
    $file=$arg;
  }

  push(@ARGV,$file) if $file;
}
parse_args();

sub debug {
  return unless $MAIN::DEBUG;
  foreach my $msg (@_) { print STDERR "\n[$PROGNAME] $msg\n"; }
}

#########################
# Read in headers
#########################
sub read_headers {
  my %msg;

  my $hdr;

  my $found_body=0;
  while (<>) {
    if (/^\s*$/) {
      # Done with headers
      # Set up 'rcpt' key
      my @rcpt;
      add_list($msg{headers}{to},\@rcpt);
      add_list($msg{headers}{cc},\@rcpt);
      add_list($1,\@rcpt) if $msg{headers}{x_apparently_to} =~ /^(\S+)/;
      $msg{headers}{rcpt}=join(',',@rcpt);
      # And 'RCPT' key, which includes all the "Received: .. for <...>"
      add_list($1,\@rcpt) while ($msg{headers}{received} =~ /for\s+<?([^>;]+)[>;]/g);
      $msg{headers}{RCPT}=join(',',@rcpt);
      $msg{headers}{RCPT_LIST}=\@rcpt;
      $found_body=1;
      last;
    }

    # Add to headers array (text) and hash (for search)
    push(@{$msg{header_list}},$_) unless ( (filter_mime() && /^mime-version:/i) ||
                            (filter_urgent() && /^priority:\s*urgent/i) );
    chomp;
    if ($. == 1 && /^From (.+)\s+(\S+\s+\S+\s+\d+\s+[\d:]+\s+\d+)\s*$/) {
      ($msg{headers}{FROM},$msg{headers}{DATE}) = ($1,$2);
    } elsif (/^(\S+):\s+(\S.*)$/) {
      # Normal header
      $hdr = lc($1);  my $val = $2;
      $hdr =~ s/-/_/g;
      $msg{headers}{$hdr} = $msg{headers}{$hdr} ?  "$msg{headers}{$hdr}\n$val" : $val;
    } else {
      # Continuation of previous header
      s/^\s+/ /;
      $msg{headers}{$hdr} .= $_;
    }
  }

  # Boundary?
  $msg{boundary} = $1
    if $msg{headers}{content_type} =~ /multipart.alternative.*boundary="([^"\s]+)"/;
  $msg{boundary} = $1 if !$msg{boundary} && 
    $msg{headers}{content_type} =~ /multipart.alternative.*boundary=(\S+)/;

  print STDERR "-"x50,"\n";
  print STDERR "[Message with no body?]\n" if (!$found_body);
  print STDERR "From:    $msg{headers}{from}\n";
  print STDERR "To:      $msg{headers}{rcpt}\n";
  print STDERR "Subject: $msg{headers}{subject}\n";

  \%msg;
}

sub change_header {
  my ($msg,$head,$str) = @_;
  for (my $i=0; $i<=$#{$msg->{header_list}}; $i++) {
    $msg->{header_list}[$i] = "$1 $str\n"
      if $msg->{header_list}[$i] =~ /^(${head}:)/i;
  }
}

sub get_body {
  my ($msg) = @_;
  return @{$msg->{body}} if $msg->{body};
  @{$msg->{body}} = <>;
  @{$msg->{body}};
}

my $GBL;
sub start_get_body_line { $GBL = 0; }
sub get_body_line {
  my ($msg) = @_;
  $msg->{body} ? ($_ = $msg->{body}[$GBL++]) : ($_ = scalar <>);
  $_;
}

#########################
# Send a canned response to an email
#########################
sub add_list {
  my ($addstr,$arr) = @_;

  my @add = split(",",$addstr);
  @add = map {
    s/\([^\)]+\)//g;			# Ignore everything in (parens)
    s/\s+//g;				# Ignore whitespace
    s/.*<([^>]+\@[^>]+)>.*/$1/g;	# Pull addresses out of brackets
    s/(.+)<[^>]+>(.*)/$1$2/g;		# Ignore everything else in <brackets>??
    s/(.*)<[^>]+>(.+)/$1$2/g;
    s/^<//;				# Erase beginning/ending <,>
    s/>$//;
    $_; } @add;
  @add = grep $_, @add;

  # Add unique entries
  foreach my $add ( @add ) {
    push(@$arr,$add) unless grep(lc($add) eq lc($_), @$arr);
  }
}

# Check headers and body for auto response indicators
sub is_auto_respond {
  my ($msg) = @_;

  my $key = respond_key();

  return 1 if $msg->{headers}{Subject} =~ /\Q$key\E/;

  my @body = get_body($msg);
  return 1 if grep(/X-MFLoop-Cnt/, @body);
  return 1 if grep(/\Q$key\E/, @body);
  0;
}

# "ignoring_from_re" is a regular expression of "from" to ignore, useful
# for auto respond spam warnings where we want to ignore any forges from
# our own domain(s)
sub auto_respond_mail {
  my ($msg,$subject,$message,$ignoring_from_re) = @_;

  # Who do we reply to?
  my @reply;
  add_list($msg->{headers}{FROM}, \@reply);
  add_list($1, \@reply) if ($msg->{headers}{reply_to} =~ /^([^<]+@\S+\.[^<]+)$/);
  add_list($1, \@reply) if ($msg->{headers}{reply_to} =~ /<([^<\s]+@[^<\s]+\.[^<\s]+)>/);
  add_list($1, \@reply) if ($msg->{headers}{received} =~ /sent by <([^>]+)>/);
  add_list($1, \@reply) if ($msg->{headers}{from} =~ /^([^<]+@\S+\.[^<]+)$/);
  add_list($1, \@reply) if ($msg->{headers}{from} =~ /<([^<\s]+@[^<\s]+\.[^<\s]+)>/);
  @reply = grep(!/$ignoring_from_re/, @reply);
  my $reply = join(',',@reply);

  # Who do we claim the message is from?
  my $from = auto_respond_from(@{$msg->{headers}{RCPT_LIST}});
  $from=$1 if $from =~ /<(.+)>/;	# Just the email address
  $from=$1 if $from =~ /^([^\s,]+)[\s,]/;
  my $from_full = my_name($HOME,$USER)."'s Auto Responder";

  my $loop = $msg->{headers}{'x-mfloop-cnt'} + 1;

  print STDERR "Auto-respond-from: $from ($from_full)\n";
  debug("Auto-respond-to: $reply\n");
  return print STDERR "No email to reply to (forged?)\n" unless $reply;
  return if ($MAIN::DEBUG);

  my $sendmail = sendmail();
  my $key = respond_key();
  return unless open(RESPOND,"|$sendmail -i -f \"$from\" -F \"$from_full\" $reply");
  print RESPOND <<RESPOND_MESSAGE;
From: $from_full <$from>
Subject: $key $subject
To: $reply
X-Mailer: $PROGNAME
X-MFLoop-Cnt: $loop
In-Reply-To: $msg->{headers}{message_id}

$message

RESPOND_MESSAGE
  close(RESPOND);
}

#########################
# Locking
#########################
my $UNDELIV = "$HOME/UNDELIVERED_MAIL";
my $LOCK_TIMEOUT = 5;

# Attempt something every second for a number of seconds
sub attempts {
  my ($num,$sub) = @_;
  my $ret;
  while (!($ret = &$sub) && $num-->0) { sleep 1; }
  $ret;
}

my $FILELOCK;
sub get_filelock {
  my ($box,$msg) = @_;
  my $filelock = "$box.lock";
  send_die($msg,$box,"Gave up waiting on $filelock")
    unless attempts($LOCK_TIMEOUT, sub { ! -e "$filelock" });
  send_die($msg,$box,"Couldn't open $filelock")
    unless attempts($LOCK_TIMEOUT, sub { open(LOCK,">$filelock") });
  $FILELOCK = $filelock;
  print LOCK $$;
  close LOCK;
}

sub remove_filelock { unlink $FILELOCK if $FILELOCK; undef $FILELOCK;}

sub get_folder_lock {
  my ($box,$msg) = @_;

  # No locking, just do it
  if ($box eq $UNDELIV) {
    send_die($msg,$box,"Can't even open [$box]!") unless open(FOLDER,">>$box");
    return 0;
  }

  # First the file .lock
  get_filelock($box,$msg);

  # Open it
  send_die($msg,$box,"Can't open $box.")
    unless attempts($LOCK_TIMEOUT, sub { open(FOLDER,">>$box") });

  # Flock it, in an alarmed eval
  eval {
    local $SIG{ALRM} = sub { die "Alarm timeout waiting for flock [$box]\n" };
    alarm $LOCK_TIMEOUT;
    send_die($msg,$box,"Flock failed: [$!]")
      unless (flock(FOLDER,LOCK_EX));
    alarm 0;
  };
  send_die($msg,$box,$@) if $@;

  # In case someone appended while we were waiting
  seek(FOLDER,0,2) || print STDERR "Seek failed [$!]\n";

#    select((select(FOLDER), $|=1)[0]);	# Don't buffer box

  1;
}

#########################
# Write the mail somewhere
#########################
sub send_die {
  my ($msg,$box,$err) = @_;

  remove_filelock();

  # First we try the default mailbox, then we try the undelivered mail file
  if ($box eq $UNDELIV) {
    Die($err);
  } elsif ($box eq inbox($HOME,$USER) || !$box) {
    print STDERR "$err\n";
    print STDERR "Attempting undelivered mail file [$UNDELIV]\n";
    $box = $UNDELIV;
    push(@{$msg->{header_list}},"PEMF-send_die: ERROR: see log, attempting $box\n");
  } else {
    print STDERR "$err\n";
    print STDERR "Attempting default box\n";
    undef $box;		# Default
    push(@{$msg->{header_list}},"PEMF-send_die: ERROR: see log, attempting default box\n");
  }

  send_to($msg,$box);
}

sub send_to {
  my ($msg,$box,$no_quit) = @_;

  my ($lock,$remail,$lines)=(0,0,0);

  #########################
  # Open up INBOUND
  #########################
  my $inbound = $msg->{no_save_inbound} ? 0 : save_inbound($HOME,$USER);
  $inbound = 0 unless $msg->{header_list};
  $inbound = 0 if $MAIN::DEBUG;
  if ($inbound) {
    if (!open(INBOUND,">>$inbound")) {
      print STDERR "Open inbound failed: [$inbound: $!]\n";
      $inbound = 0;
    } else {
      if (!flock(INBOUND,LOCK_EX)) {
        print STDERR "Inbound flock failed: [$inbound: $!]\n";
        $inbound = 0;
      } else {
        seek(INBOUND, 0, 2);	# In case someone appended while we were waiting
        $inbound = save_inbound_lines();
      }
    }
  }

  #########################
  # Open the box
  #########################
  if ($box =~ /^\|(.+)/) {
    # Command
    my $pipe=$1;
    print STDERR "Command: [$pipe]";
    ($no_quit ? return : exit(0)) if $MAIN::DEBUG;
    $pipe =~ /^(\S+)/; send_die($msg,$box,"Can't find command [$1]") if (! -x $1);
    open(FOLDER,"|$pipe") || send_die($msg,$box,"Can't run [$pipe]");
  } elsif ($box =~ /\@/) {
    # Address
    print STDERR "Remail:  [$box]";
    ($no_quit ? return : exit(0)) if $MAIN::DEBUG;
    my $sendmail = sendmail();
    open(FOLDER,"|$sendmail $box") || send_die($msg,$box,"Can't run sendmail to [$box]");
    $remail=1;
  } else {
    # File
    $box=inbox($HOME,$USER) unless $box;
    $box=save_mail($HOME,$USER)."/$box" if ($box !~ m|^/|);
    print STDERR "File:    [$box]";
    ($no_quit ? return : exit(0)) if $MAIN::DEBUG;

    # Lock
    $lock = get_folder_lock($box,$msg);
  }

  #########################
  # Write it
  #########################
  my $lines = 0;
  if ($msg->{header_list}) {
    print FOLDER @{$msg->{header_list}},"\n";
    print INBOUND @{$msg->{header_list}},"\n" if $inbound;
    $lines=@{$msg->{header_list}} + 1;
  }
  if ($remail) {
    print FOLDER "X-Forwarded-By: $USER\@$HOST\n";
    print FOLDER "X-Filter: $PROGNAME\n";
  }
  my $pgp;

  get_body($msg) if $no_quit;	# We'll probably be back, so save the body

  start_get_body_line();
  while (get_body_line($msg)) {
    print INBOUND if ($inbound==-1 || $.<=$inbound);

    # START PGP
    if (filter_pgp() && /^-+BEGIN PGP SIGNED/) {
      $pgp++;
      if (!open(PGP,"|pgp -f >/tmp/mailfilt.$$.$pgp 2>&1 >/dev/null")) {
        warn "PGP: $!";
        $pgp--;
      }
    }
    # IN PGP
    if (filter_pgp() && $pgp) {
      filter_pgp || print PGP || warn $!;
      if (/^-+END PGP/) {
# Blah - need to fix this
        print FOLDER "----- $PROGNAME [pgp]: 0\n" || warn $!;
        $pgp--;
      }

    # NORMAL MAIL TEXT
    } else {
      # Clean equals?
      undef($msg->{in_boundary}) if $msg->{boundary} && /^(--)?$msg->{boundary}/;
      if ($msg->{clean_equals} || $msg->{in_boundary}{clean_equals}) {
        chomp, chop if (/=$/);
        # Non =escapes, but probably just as unreadable
        s/\x92/'/g;
        s/\x93/"/g;   # double left quote  ``?
        s/\x94/"/g;   # double right quote ''?
        s/\x96/--/g;  # em-dash

        # =escapes
        s/=(20|0D|85|96)//g;	# LF, CR, ??, ??
        s/=91/`/g;  s/=92/'/g;	# Quotes
        s/=93/  /g; s/=94/\n    /g;	# <p> and </p>
        s/=97/ /g;		# ??
        s/=B7/-/g;		# '·' doesn't show up with less
        s/=([0-9A-F]{2})/sprintf('%c',hex($1))/eg;
      }
      $msg->{in_boundary}{clean_equals} = 1
        if $msg->{boundary} && /Content-Transfer-Encoding:\s+quoted-printable/i;

# I need a good piece of HTML conversion code for this..
#      # HTML?
#      $msg->{in_boundary}{unhtml} = 1
#        if $msg->{boundary} && /Content-Type:\s+text\/html/i;


      s/–//g if $msg->{clean_v};	# Yahoo has been using this troublesome char.

      print FOLDER || warn $!;
    }

    $lines++;
  }
  print FOLDER "\n";
  print INBOUND "\n" if ($inbound);

  #########################
  # Unlock and close it
  #########################
  if ($inbound) {
    flock(INBOUND,LOCK_UN) || print STDERR "Inbound unlock failure: $!\n";
    close INBOUND;
  }

  !$lock || flock(FOLDER,LOCK_UN) || print STDERR "Unlock failure: $!\n";
  close(FOLDER) || send_die($msg,$box,"Close failure: $!");
  remove_filelock();

  print STDERR " [$lines lines]\n";
  exit 0 unless $no_quit;
}

#########################
# Fatal
#########################
sub Die {
  $! = 75;
  foreach ( @_ ) { print STDERR "[$PROGNAME] $_\n"; }
  die("[$PROGNAME] ".$_[0]."\n");
}

##################################################
# Spam checking
##################################################
sub is_spam {
  my ($msg) = @_;

  my $reason;

  my $is_spam = $MAIN::IS_SPAM;

  # Spam triggers (use "$is_spam ||" for efficiencies sake)
  $is_spam = $is_spam || $msg->{headers}{from} eq "" && ($reason = "from eq \"\"");
  $is_spam = $is_spam || $msg->{headers}{from} eq "<>" && ($reason = "from eq <>");
  $is_spam = $is_spam || is_spam_to($msg->{headers}{RCPT},$msg->{headers}{to}) && ($reason = "is_spam_to:\n  $msg->{headers}{RCPT}\n  $msg->{headers}{to}");
  $is_spam = $is_spam || is_spam_from($msg->{headers}{FROM}) && ($reason = "is_spam_from($msg->{headers}{FROM})");
  $is_spam = $is_spam || is_spam_subject($msg->{headers}{subject}) && ($reason = "is_spam_subject($msg->{headers}{subject})");
  $is_spam = $is_spam || is_spam_hdr($msg) && ($reason = "is_spam_hdr()");

  # Special, non-spam cases
  unless ($MAIN::IS_SPAM) {
    $is_spam = 0 if is_never_spam($msg);
    $is_spam = 0 if never_spam_from($msg->{headers}{FROM});
  }

  die("\n\nSPAM (-why): $reason\n") if ($is_spam && $reason && $MAIN::SPAM_WHY);
  $is_spam;
}

##################################################
# Main
##################################################
sub main {
  $!="";
  my ($msg) = read_headers();

  # Infinite loop avoidance
  exit 0 if ($msg->{headers}{'x-mfloop-cnt'} > 5);		# Using my LOOP_CNT header
  exit 0 if (grep(/^Received:/, @{$msg->{header_list}}) > 200);	# More than 200 Received headers!
  mail_filter($msg);
  exit 0;
} main();

