diff options
Diffstat (limited to 'dbxrecover-1p')
| -rwxr-xr-x | dbxrecover-1p | 310 |
1 files changed, 310 insertions, 0 deletions
diff --git a/dbxrecover-1p b/dbxrecover-1p new file mode 100755 index 0000000..db37ab6 --- /dev/null +++ b/dbxrecover-1p @@ -0,0 +1,310 @@ +#!/usr/bin/perl + +# dbxrecover, a program for recovering mail from damaged Outlook dbx files. +# Copyright (C) 2005 Piotr Pawlow <pp@siedziba.pl> + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +use strict; +use warnings; + +{ + package DBX::ChunkList; + + sub new + { + my $class = shift; + my $self = {}; + bless ($self, $class); + $self->{list} = [ @_ ]; + return $self; + } + + sub join + { + my $self = shift; + my $data = ''; + $self->join_int(\$data); + return $data; + } + + sub join_int + { + no warnings qw(recursion); + + my $self = shift; + my $dataref = shift; + my $list = $self->{list}; + for my $elem (@{$list}) + { + if (ref($elem)) + { + $elem->join_int($dataref) + } + else + { + $$dataref .= $elem; + } + } + } + + sub first + { + my $self = shift; + my $list = $self->{list}; + my $elem = ${$list}[0]; + $elem = $elem->first() if ref($elem); + return $elem; + } + + sub push + { + my $self = shift; + my $a1 = $self->{list}; + my $a2 = shift; + my $a2_list = $a2->{list}; + if (scalar @{$a2_list} < 32) # FIXME: Arbitrary value choosen without much care. Probably not optimal. + { + push @{$a1}, @{$a2_list}; + } + else + { + push @{$a1}, $a2; + } + } +} + +{ + package DBX::ChainFinder; + use BerkeleyDB; + use File::Temp; + + sub new + { + my $class = shift; + my $self = {}; + bless ($self, $class); + $self->reset; + return $self; + } + + sub reset + { + my $self = shift; + + $self->{id} = {}; + $self->{next} = {}; + $self->{stats} = + { 'chunkstot' => 0, + 'chainfrag' => 0, + 'chainscompl' => 0, + }; + } + + # FIXME: this function is too convoluted + sub combine + { + my $self = shift; + my $hash = shift; + my $otherhash = shift; + my $key = shift; + my $data = shift; + my $idx = shift; + + while (defined($hash->{$key})) + { + my @chunkkeys = sort keys %{$hash->{$key}}; + my $chunkkey = pop @chunkkeys; + my $chunkref = $hash->{$key}->{$chunkkey}; + my $chunkid = $chunkref->{head}->[$idx]; + delete $hash->{$key}->{$chunkkey}; + delete $hash->{$key} if (!scalar %{$hash->{$key}}); + delete $otherhash->{$chunkid}->{$chunkkey}; + delete $otherhash->{$chunkid} if (!scalar %{$otherhash->{$chunkid}}); + if ($idx == 1) + { + $data->push($chunkref); + } + else + { + $chunkref->push($data); + $data = $chunkref; + } + $key = $chunkid; + $self->{stats}->{chainfrag}--; + } + return ($key, $data); + } + + sub add + { + my $self = shift; + my $id = shift; + my $next = shift; + my $data = new DBX::ChunkList(shift); + my $chunkid = $self->{stats}->{chunkstot}++; + my $idhash = $self->{id}; + my $nexthash = $self->{next}; + + ($next, $data) = $self->combine($idhash, $nexthash, $next, $data, 1); + ($id, $data) = $self->combine($nexthash, $idhash, $id, $data, 0); + + # the test for first chunk is very simple and naive, but it works well enough + $self->{stats}->{chainscompl}++,return($data->join()) if (($next == 0) && ($data->first() =~ /^(From|From:|Return-Path:|Received:) /)); + $idhash->{$id} = {} if (!defined($idhash->{$id})); + $nexthash->{$next} = {} if (!defined($nexthash->{$next})); + $data->{head} = [$id, $next]; + $idhash->{$id}->{$chunkid} = $data; + $nexthash->{$next}->{$chunkid} = $data; + $self->{stats}->{chainfrag}++; + return undef; + } + + sub remaining + { + my $self = shift; + return undef if (!scalar %{$self->{id}}); + while (1) + { + (my $id, my $list) = each %{$self->{id}} or next; + (my $cid, my $chain) = each %{$list} or next; + my $data = $chain->join(); + delete $self->{id}->{$id}->{$cid}; + delete $self->{id}->{$id} if (!scalar %{$self->{id}->{$id}}); + return $data; + } + } + + sub stats + { + my $self = shift; + return $self->{stats}; + } +} + +{ + package DBX::Scan; + sub new + { + my $class = shift; + my $self = {}; + $self->{file} = shift; + $self->{cf} = DBX::ChainFinder->new; + bless ($self, $class); + return $self; + } + + sub scan + { + my $self = shift; + my $file = $self->{file}; + $file->binmode(); + $self->{cf}->reset; + my $time = 0; + my $header; + + $file->read($header, 16); + + while ((length $header) == 16) + { + my ($id, $chunksize, $datasize, $next) = unpack("VVVV", $header); + + # test for a valid header + # chunk size is always 512 bytes in all dbx files I have seen + if ( + ($chunksize == 512) && + ($datasize <= $chunksize) && + ($datasize > 0) && + ($id != 0) && + (($next == 0) || ($datasize == $chunksize)) + ) + { + my $data; + $file->read($data, $datasize); + + # if the header seems valid, we skip the whole chunk + + # the chance we miss a start of another chunk is low, because + # the test above is pretty strict and false positives are quite rare + + # it also helps in cases when there are dbx files contained inside dbx + + my $tmp; + $file->read($tmp, $chunksize-$datasize); + + my $message = $self->{cf}->add($id, $next, $data); + $self->printmsg($message); + + if (time - $time > 1) + { + $self->printstats; + $time = time; + } + + $file->read($header, 16); + } + else + { + # skip 4 bytes and try again + # headers were always at 4 byte boundary in every dbx file I have seen + + my $tmp; + $file->read($tmp, 4); + $header = substr($header, 4).$tmp; + } + } + $self->printstats; + print STDERR "\nPrinting remaining chains..."; + my $message; + do { + $message = $self->{cf}->remaining; + $self->printmsg($message); + } while defined($message); + print STDERR "\n"; + } + + sub printmsg + { + my $self = shift; + my $message = shift; + if (defined($message)) + { + $message =~ s/^>*From />$&/m; # mbox "From " escaping + # we don't have the original envelope address, so just output a fake one + print "From unknown\@unknown.invalid Mon Jan 1 00:00:00 1970\r\n"; + print $message; + print "\r\n\r\n"; + } + } + + sub printstats + { + my $self = shift; + my $stats = $self->{cf}->stats; + print STDERR "Chunks found: ".$stats->{chunkstot}."; "; + print STDERR "Chain fragments: ".$stats->{chainfrag}."; "; + print STDERR "Chains completed: ".$stats->{chainscompl}."\r"; + } +} + +use IO::File; + +my $dbxname=($ARGV[0] || "-"); +print STDERR "Opening $dbxname for reading...\n"; +my $dbxfile = IO::File->new("<$dbxname") or die("Error opening file!\n"); +my $dbxscanner = DBX::Scan->new($dbxfile); +print STDERR "Searching for messages...\n"; +$dbxscanner->scan; +print STDERR "\n"; |
