Code examples from 'Internet Forensics'
Chapter 10 (Patterns of Activity)

Example 10-1: extract_match_string.pl
#!/usr/bin/perl -w
# Example 10-1: extract_match_string.pl

# Excerpted from 'Internet Forensics' by Robert Jones
# Published 2005 by O'Reilly Media (ISBN 0-596-10006-X)

if(@ARGV == 0 or @ARGV > 2) {
   die "Usage: $0 <pattern> [<mail file>]\n";
} elsif(@ARGV == 1) {
   $ARGV[1] = '-';
}
my $pattern = $ARGV[0];
my $flag = 0;
my $separator = 0;
my $text = '';

open INPUT, "< $ARGV[1]" or die "$0: Unable to open file $ARGV[1]\n";
while(<INPUT>) {
    if(/^From\s.*200\d$/ and $separator == 1) {
        $separator = 0;
        if($flag) { # print previous message if it matched
           print $text;
           $flag = 0;
        }
        $text = '';
    } elsif(/^\s*$/) {
        $separator = 1;
    } else {
        $separator = 0;
        if(/$pattern/) {
           $flag++;
        }
    }
    $text .= $_;
}
if($flag) {
   print $text;
}
close INPUT;

Example 10-2: search_mailfile.pl
#!/usr/bin/perl -w
# Example 10-2: search_mailfile.pl

# Excerpted from 'Internet Forensics' by Robert Jones
# Published 2005 by O'Reilly Media (ISBN 0-596-10006-X)


my $minLength = 5;
if(@ARGV < 2 or @ARGV > 3) {
   die "Usage: $0 <message> <mail file> [<cutoff score>]\n";
}

my $cutoff = -1;
my $mode = 'score';
if(@ARGV == 3) {
   $cutoff = $ARGV[2];
   $mode = 'select';
}

my %msg0 = ();
my %histogram = ();
open INPUT, "< $ARGV[0]" or die "$0: Unable to open file $ARGV[0]\n";
while(<INPUT>) {
   my $block = loadBlock(\%msg0);
}
close INPUT;

open INPUT, "< $ARGV[1]" or die "$0: Unable to open file $ARGV[1]\n";
while(<INPUT>) {
   my %msg1 = ();
   my $block = loadBlock(\%msg1);
   my $score = compareWordSets(\%msg0, \%msg1);
   if($mode eq 'score') {
      $histogram{$score}++;
   } else {
      if($score >= $cutoff) {
         print "# Score: $score\n";
         print "$block\n";
      } 
   }
}
close INPUT;

if($mode eq 'score') {
   foreach my $score (sort {$a <=> $b} keys %histogram) {
      printf "%-5d  %d\n", $score, $histogram{$score};
   }
}

sub loadBlock {
   my $words = shift;
   my $block = '';
   my $body = 0;

   while(<INPUT>) {
      if($body == 0 and /^\s*$/) {
          $body = 1;
      } elsif($body == 1 and /^From\s/) {
          last;
      } elsif($body == 1) {
          my $line = lc $_;
          # fix any quoted-printable encoding
          $line =~ s/\=([0-7][0-9a-f])/chr hex $1/ge;
          # convert any punctuation to whitespace
          $line =~ s/[^a-zA-Z0-9]/ /g;
          foreach $word (split /\s+/, $line) {
             if(length $word >= $minLength) {
                $words->{$word}++;
             }
          }
      }
      $block .= $_;
   }
   $block;
}

sub compareWordSets {
   my $msg0 = shift;
   my $msg1 = shift;
   my $score = 0;
   foreach my $word (keys %$msg0) {
      if(exists $msg1->{$word}) {
         $score++;
      }
   }
   $score;
}