#!/usr/bin/perl # dbxrecover, a program for recovering mail from damaged Outlook dbx files. # Copyright (C) 2005 Piotr Pawlow <pp@siedziba.pl> # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. use strict; use warnings; { package DBX::ChunkList; sub new { my $class = shift; my $self = {}; bless ($self, $class); $self->{list} = [ @_ ]; return $self; } sub join { my $self = shift; my $data = ''; $self->join_int(\$data); return $data; } sub join_int { no warnings qw(recursion); my $self = shift; my $dataref = shift; my $list = $self->{list}; for my $elem (@{$list}) { if (ref($elem)) { $elem->join_int($dataref) } else { $$dataref .= $elem; } } } sub first { my $self = shift; my $list = $self->{list}; my $elem = ${$list}[0]; $elem = $elem->first() if ref($elem); return $elem; } sub push { my $self = shift; my $a1 = $self->{list}; my $a2 = shift; my $a2_list = $a2->{list}; if (scalar @{$a2_list} < 32) # FIXME: Arbitrary value choosen without much care. Probably not optimal. { push @{$a1}, @{$a2_list}; } else { push @{$a1}, $a2; } } } { package DBX::ChainFinder; use BerkeleyDB; use File::Temp; sub new { my $class = shift; my $self = {}; bless ($self, $class); $self->reset; return $self; } sub reset { my $self = shift; $self->{id} = {}; $self->{next} = {}; $self->{stats} = { 'chunkstot' => 0, 'chainfrag' => 0, 'chainscompl' => 0, }; } # FIXME: this function is too convoluted sub combine { my $self = shift; my $hash = shift; my $otherhash = shift; my $key = shift; my $data = shift; my $idx = shift; while (defined($hash->{$key})) { my @chunkkeys = sort keys %{$hash->{$key}}; my $chunkkey = pop @chunkkeys; my $chunkref = $hash->{$key}->{$chunkkey}; my $chunkid = $chunkref->{head}->[$idx]; delete $hash->{$key}->{$chunkkey}; delete $hash->{$key} if (!scalar %{$hash->{$key}}); delete $otherhash->{$chunkid}->{$chunkkey}; delete $otherhash->{$chunkid} if (!scalar %{$otherhash->{$chunkid}}); if ($idx == 1) { $data->push($chunkref); } else { $chunkref->push($data); $data = $chunkref; } $key = $chunkid; $self->{stats}->{chainfrag}--; } return ($key, $data); } sub add { my $self = shift; my $id = shift; my $next = shift; my $data = new DBX::ChunkList(shift); my $chunkid = $self->{stats}->{chunkstot}++; my $idhash = $self->{id}; my $nexthash = $self->{next}; ($next, $data) = $self->combine($idhash, $nexthash, $next, $data, 1); ($id, $data) = $self->combine($nexthash, $idhash, $id, $data, 0); # the test for first chunk is very simple and naive, but it works well enough $self->{stats}->{chainscompl}++,return($data->join()) if (($next == 0) && ($data->first() =~ /^(From|From:|Return-Path:|Received:) /)); $idhash->{$id} = {} if (!defined($idhash->{$id})); $nexthash->{$next} = {} if (!defined($nexthash->{$next})); $data->{head} = [$id, $next]; $idhash->{$id}->{$chunkid} = $data; $nexthash->{$next}->{$chunkid} = $data; $self->{stats}->{chainfrag}++; return undef; } sub remaining { my $self = shift; return undef if (!scalar %{$self->{id}}); while (1) { (my $id, my $list) = each %{$self->{id}} or next; (my $cid, my $chain) = each %{$list} or next; my $data = $chain->join(); delete $self->{id}->{$id}->{$cid}; delete $self->{id}->{$id} if (!scalar %{$self->{id}->{$id}}); return $data; } } sub stats { my $self = shift; return $self->{stats}; } } { package DBX::Scan; sub new { my $class = shift; my $self = {}; $self->{file} = shift; $self->{cf} = DBX::ChainFinder->new; bless ($self, $class); return $self; } sub scan { my $self = shift; my $file = $self->{file}; $file->binmode(); $self->{cf}->reset; my $time = 0; my $header; $file->read($header, 16); while ((length $header) == 16) { my ($id, $chunksize, $datasize, $next) = unpack("VVVV", $header); # test for a valid header # chunk size is always 512 bytes in all dbx files I have seen if ( ($chunksize == 512) && ($datasize <= $chunksize) && ($datasize > 0) && ($id != 0) && (($next == 0) || ($datasize == $chunksize)) ) { my $data; $file->read($data, $datasize); # if the header seems valid, we skip the whole chunk # the chance we miss a start of another chunk is low, because # the test above is pretty strict and false positives are quite rare # it also helps in cases when there are dbx files contained inside dbx my $tmp; $file->read($tmp, $chunksize-$datasize); my $message = $self->{cf}->add($id, $next, $data); $self->printmsg($message); if (time - $time > 1) { $self->printstats; $time = time; } $file->read($header, 16); } else { # skip 4 bytes and try again # headers were always at 4 byte boundary in every dbx file I have seen my $tmp; $file->read($tmp, 4); $header = substr($header, 4).$tmp; } } $self->printstats; print STDERR "\nPrinting remaining chains..."; my $message; do { $message = $self->{cf}->remaining; $self->printmsg($message); } while defined($message); print STDERR "\n"; } sub printmsg { my $self = shift; my $message = shift; if (defined($message)) { $message =~ s/^>*From />$&/m; # mbox "From " escaping # we don't have the original envelope address, so just output a fake one print "From unknown\@unknown.invalid Mon Jan 1 00:00:00 1970\r\n"; print $message; print "\r\n\r\n"; } } sub printstats { my $self = shift; my $stats = $self->{cf}->stats; print STDERR "Chunks found: ".$stats->{chunkstot}."; "; print STDERR "Chain fragments: ".$stats->{chainfrag}."; "; print STDERR "Chains completed: ".$stats->{chainscompl}."\r"; } } use IO::File; my $dbxname=($ARGV[0] || "-"); print STDERR "Opening $dbxname for reading...\n"; my $dbxfile = IO::File->new("<$dbxname") or die("Error opening file!\n"); my $dbxscanner = DBX::Scan->new($dbxfile); print STDERR "Searching for messages...\n"; $dbxscanner->scan; print STDERR "\n";