Code examples from 'Internet Forensics'
Chapter 5 (Web Sites)

Example 5-1: readable_html.pl
#!/usr/bin/perl -w
# Example 5-1: readable_html.pl 

# Excerpted from 'Internet Forensics' by Robert Jones
# Published 2005 by O'Reilly Media (ISBN 0-596-10006-X)

die "Usage: $0 <html file>\n" unless @ARGV < 2;
$ARGV[0] = '-' if @ARGV == 0;

open INPUT, "< $ARGV[0]" or 
            die "$0: Unable to open html file $ARGV[0]\n";
while(<INPUT>) {
    s/(\<\/.*?\>)/$1\n/g;
    print $_;
}
close INPUT;

Example 5-2: extract_links.pl
#!/usr/bin/perl -w
# Example 5-2: extract_links.pl

# Excerpted from 'Internet Forensics' by Robert Jones
# Published 2005 by O'Reilly Media (ISBN 0-596-10006-X)

use HTML::LinkExtor;
use LWP::Simple;
die "Usage: $0 <url>\n" unless @ARGV == 1;

my $doc = get($ARGV[0]) or die "$0: Unable to get url: $ARGV[0]\n";
my $parser = HTML::LinkExtor->new(undef, $ARGV[0]);
$parser->parse($doc)->eof;
my %hash = ();

foreach my $linkarray ($parser->links) {
    $hash{$$linkarray[2]} = $$linkarray[0];
}

foreach my $key (sort { $hash{$a} cmp $hash{$b} or $a cmp $b }
                 keys %hash) {
   printf qq[%-6s  %s\n], $hash{$key}, $key;
}

Example 5-3: extract_form_elements.pl
#!/usr/bin/perl -w
# Example 5-3: extract_form_elements.pl

# Excerpted from 'Internet Forensics' by Robert Jones
# Published 2005 by O'Reilly Media (ISBN 0-596-10006-X)

use HTML::TokeParser;
die "Usage: $0 <html file>\n" unless @ARGV == 1;

my $p = HTML::TokeParser->new($ARGV[0]) || die "Can't open: $!";

while(my $token = $p->get_token) {
  if($token->[0] eq 'S') {
     if($token->[1] eq 'form' or
        $token->[1] eq 'button' or
        $token->[1] eq 'input' or
        $token->[1] eq 'select' or
        $token->[1] eq 'option' or
        $token->[1] eq 'textarea') {
        print $token->[4] . "\n";
     }
  } elsif($token->[0] eq 'E') {
     if($token->[1] eq 'form') {
        print $token->[2] . "\n\n";
     }
  }
}