#!/usr/bin/perl
# dbxrecover, a program for recovering mail from damaged Outlook dbx files.
# Copyright (C) 2005 Piotr Pawlow <pp@siedziba.pl>
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
use strict;
use warnings;
{
package DBX::ChunkList;
sub new
{
my $class = shift;
my $self = {};
bless ($self, $class);
$self->{list} = [ @_ ];
return $self;
}
sub join
{
my $self = shift;
my $data = '';
$self->join_int(\$data);
return $data;
}
sub join_int
{
no warnings qw(recursion);
my $self = shift;
my $dataref = shift;
my $list = $self->{list};
for my $elem (@{$list})
{
if (ref($elem))
{
$elem->join_int($dataref)
}
else
{
$$dataref .= $elem;
}
}
}
sub first
{
my $self = shift;
my $list = $self->{list};
my $elem = ${$list}[0];
$elem = $elem->first() if ref($elem);
return $elem;
}
sub push
{
my $self = shift;
my $a1 = $self->{list};
my $a2 = shift;
my $a2_list = $a2->{list};
if (scalar @{$a2_list} < 32) # FIXME: Arbitrary value choosen without much care. Probably not optimal.
{
push @{$a1}, @{$a2_list};
}
else
{
push @{$a1}, $a2;
}
}
}
{
package DBX::ChainFinder;
use BerkeleyDB;
use File::Temp;
sub new
{
my $class = shift;
my $self = {};
bless ($self, $class);
$self->reset;
return $self;
}
sub reset
{
my $self = shift;
$self->{id} = {};
$self->{next} = {};
$self->{stats} =
{ 'chunkstot' => 0,
'chainfrag' => 0,
'chainscompl' => 0,
};
}
# FIXME: this function is too convoluted
sub combine
{
my $self = shift;
my $hash = shift;
my $otherhash = shift;
my $key = shift;
my $data = shift;
my $idx = shift;
while (defined($hash->{$key}))
{
my @chunkkeys = sort keys %{$hash->{$key}};
my $chunkkey = pop @chunkkeys;
my $chunkref = $hash->{$key}->{$chunkkey};
my $chunkid = $chunkref->{head}->[$idx];
delete $hash->{$key}->{$chunkkey};
delete $hash->{$key} if (!scalar %{$hash->{$key}});
delete $otherhash->{$chunkid}->{$chunkkey};
delete $otherhash->{$chunkid} if (!scalar %{$otherhash->{$chunkid}});
if ($idx == 1)
{
$data->push($chunkref);
}
else
{
$chunkref->push($data);
$data = $chunkref;
}
$key = $chunkid;
$self->{stats}->{chainfrag}--;
}
return ($key, $data);
}
sub add
{
my $self = shift;
my $id = shift;
my $next = shift;
my $data = new DBX::ChunkList(shift);
my $chunkid = $self->{stats}->{chunkstot}++;
my $idhash = $self->{id};
my $nexthash = $self->{next};
($next, $data) = $self->combine($idhash, $nexthash, $next, $data, 1);
($id, $data) = $self->combine($nexthash, $idhash, $id, $data, 0);
# TODO: better test for first chunk. Maybe a grace period to keep seemingly complete messages, in case of false positives.
$self->{stats}->{chainscompl}++,return($data->join()) if (($next == 0) && ($data->first() =~ /^(From|From:|Return-Path:|Received:) /));
$idhash->{$id} = {} if (!defined($idhash->{$id}));
$nexthash->{$next} = {} if (!defined($nexthash->{$next}));
$data->{head} = [$id, $next];
$idhash->{$id}->{$chunkid} = $data;
$nexthash->{$next}->{$chunkid} = $data;
$self->{stats}->{chainfrag}++;
return undef;
}
sub remaining
{
my $self = shift;
return undef if (!scalar %{$self->{id}});
while (1)
{
(my $id, my $list) = each %{$self->{id}} or next;
(my $cid, my $chain) = each %{$list} or next;
my $data = $chain->join();
delete $self->{id}->{$id}->{$cid};
delete $self->{id}->{$id} if (!scalar %{$self->{id}->{$id}});
return $data;
}
}
sub stats
{
my $self = shift;
return $self->{stats};
}
}
{
package DBX::Scan;
sub new
{
my $class = shift;
my $self = {};
$self->{file} = shift;
$self->{cf} = DBX::ChainFinder->new;
bless ($self, $class);
return $self;
}
sub scan
{
my $self = shift;
my $file = $self->{file};
$file->binmode();
$self->{cf}->reset;
my $time = 0;
my $tmp;
my $buffer = '';
my $bufsize = 64*1024;
while ($file->read($tmp, $bufsize))
{
$buffer .= $tmp;
my $buflen = length $buffer;
my $idx = 0;
my $header;
while (($buflen - $idx) >= 16)
{
$header = substr($buffer, $idx, 16);
my ($id, $chunksize, $datasize, $next) = unpack("VVVV", $header);
# test for a valid header
# chunk size is always 512 bytes in all dbx files I have seen
if (
($chunksize == 512) &&
($datasize <= $chunksize) &&
($datasize > 0) &&
($id != 0) &&
(($next == 0) || ($datasize == $chunksize))
)
{
last if ($buflen - $idx - 16 < $chunksize);
my $data = substr($buffer, $idx+16, $datasize);
# if the header seems valid, we skip the whole chunk
# the chance we miss a start of another chunk is low, because
# the test above is pretty strict and false positives are quite rare
# it also helps in cases when there are dbx files contained inside dbx
$idx += $chunksize + 16;
my $message = $self->{cf}->add($id, $next, $data);
$self->printmsg($message);
if (time - $time > 1)
{
$self->printstats;
$time = time;
}
}
else
{
# skip 4 bytes and try again
# headers were always at 4 byte boundary in every dbx file I have seen
$idx += 4;
}
}
$buffer = substr($buffer, $idx);
}
$self->printstats;
print STDERR "\nPrinting remaining chains...";
my $message;
do {
$message = $self->{cf}->remaining;
$self->printmsg($message);
} while defined($message);
print STDERR "\n";
}
sub printmsg
{
my $self = shift;
my $message = shift;
if (defined($message))
{
$message =~ s/^>*From />$&/mg; # mbox "From " escaping
# we don't have the original envelope address, so just output a fake one
print "From unknown\@unknown.invalid Mon Jan 1 00:00:00 1970\r\n";
print $message;
print "\r\n\r\n";
}
}
sub printstats
{
my $self = shift;
my $stats = $self->{cf}->stats;
print STDERR "Chunks found: ".$stats->{chunkstot}."; ";
print STDERR "Chain fragments: ".$stats->{chainfrag}."; ";
print STDERR "Chains completed: ".$stats->{chainscompl}."\r";
}
}
use IO::File;
my $dbxname=($ARGV[0] || "-");
print STDERR "Opening $dbxname for reading...\n";
my $dbxfile = IO::File->new("<$dbxname") or die("Error opening file!\n");
my $dbxscanner = DBX::Scan->new($dbxfile);
print STDERR "Searching for messages...\n";
$dbxscanner->scan;
print STDERR "\n";