diff options
| author | pp <pp@455248ca-bdda-0310-9134-f4ebb693071a> | 2005-11-30 20:36:50 +0000 |
|---|---|---|
| committer | pp <pp@455248ca-bdda-0310-9134-f4ebb693071a> | 2005-11-30 20:36:50 +0000 |
| commit | 98badafe03c0707ca73833e285e619ba0fda8270 (patch) | |
| tree | d864bd739498b68975fd9dadbbb12d82a6940178 /dbxrecover | |
- initial import
git-svn-id: https://siedziba.pl:790/svn/repos/dbxrecover@222 455248ca-bdda-0310-9134-f4ebb693071a
Diffstat (limited to 'dbxrecover')
| -rwxr-xr-x | dbxrecover | 290 |
1 files changed, 290 insertions, 0 deletions
diff --git a/dbxrecover b/dbxrecover new file mode 100755 index 0000000..0dffecc --- /dev/null +++ b/dbxrecover @@ -0,0 +1,290 @@ +#!/usr/bin/perl + +# dbxrecover, a program for recovering mail from damaged Outlook dbx files. +# Copyright (C) 2005 Piotr Pawłow <pp@siedziba.pl> + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +use strict; + +{ + package DBX::Chain; + + sub new + { + my $class = shift; + my $self = {}; + my $arrayref = shift; + my @ca = @{$arrayref}; + $self->{chunks} = \@ca; + $self->{idx} = scalar @ca; + bless ($self, $class); + return $self; + } + + # we assume here the single message is small enough to fit into memory + # this may not be a case for publishing companies or other businesses sending huge amounts of data by mail :) + sub data + { + my $self = shift; + my $data = ""; + for(@{$self->{chunks}}) + { + $data .= $_->data; + } + return $data; + } + + sub broken + { + my $self = shift; + return ($self->{chunks}->[$self->{idx}-1]->next != 0); + } +} + +{ + package DBX::ChainFinder; + + sub new + { + my $class = shift; + my $self = {}; + bless ($self, $class); + $self->reset; + return $self; + } + + sub reset + { + my $self = shift; + $self->{chunks} = {}; + $self->{first} = {}; + } + + sub add + { + my $self = shift; + my $chunk = shift; + my $id = $chunk->id; + my $next = $chunk->next; + if (!defined($self->{chunks}->{$id})) + { + $self->{chunks}->{$id} = []; + } + else + { + for (@{$self->{chunks}->{$id}}) + { + return if (!$chunk->differ($_)); # don't add chunk if there is a duplicate present + } + } + push @{$self->{chunks}->{$id}}, $chunk; + + # we store if there is any other chunk pointing to this one + if (!defined($self->{first}->{$id})) { $self->{first}->{$id} = 1 }; + $self->{first}->{$next} = 0; + + # adding a new chunk requires rescanning for chains + $self->{chains} = undef; + } + + sub find + { + my $self = shift; + $self->{chains} = []; + + for(keys %{$self->{first}}) + { + if ($self->{first}->{$_} == 1) + { + # if there is no other chunk pointing to this one, start following it + $self->walk($_); + } + } + return $self->{chains}; + } + + sub walk + { + my $self = shift; + my $id = shift; + my $chain = shift; + my $seen = shift; + my $combinations = shift; + return if ($combinations > 16); # we don't extract the message if there are too many branches + if (!defined($chain)) { $chain = [] }; + if (!defined($seen)) { $seen = {} }; + if (!defined($combinations)) { $combinations = 1 }; + $seen->{$id} = 1; # note chunk identifiers for infinite loop testing + for(@{$self->{chunks}->{$id}}) + { + my $chunk = $_; + push @{$chain}, $chunk; + if (defined($self->{chunks}->{$chunk->next}) && !$seen->{$chunk->next}) + { + $self->walk($chunk->next, $chain, $seen, $combinations * scalar @{$self->{chunks}->{$chunk->next}}); + } + else + { + push @{$self->{chains}}, DBX::Chain->new($chain); + } + pop @{$chain}; + } + $seen->{$id} = 0; + } +} + +{ + package DBX::Chunk; + use Digest::SHA1 qw(sha1); + + sub new + { + my $class = shift; + my $self = {}; + $self->{file} = shift; + return if ($self->{file}->read($self->{header}, 16) != 16); + my ($id, $chunksize, $datasize, $next) = unpack("VVVV", $self->{header}); + + # test for a valid header + # chunk size is always 512 bytes in all dbx files I have seen + + if ( + ($chunksize == 512) && + ($datasize <= $chunksize) && + ($datasize > 0) && + ($id != 0) && + (($next == 0) || ($datasize == $chunksize)) + ) + { + $self->{filepos} = $self->{file}->getpos; + + # if the header seems valid, we skip the whole chunk + + # there's a low chance we miss a start of another chunk, + # as the test above is pretty strict and false positives are quite rare + + # it also helps in cases when there are dbx files contained inside dbx + + $self->{file}->seek($chunksize, 1); + $self->{hash} = undef; + ($self->{id}, $self->{chunksize}, $self->{datasize}, $self->{next}) = ($id, $chunksize, $datasize, $next); + bless ($self, $class); + return $self; + } + else + { + + # skip 4 bytes and try again + # headers were always at 4 byte boundary in every dbx file I have seen + + $self->{file}->seek(-12, 1); + return 0; + } + } + + sub datahash + { + my $self = shift; + if (!defined($self->{hash})) { $self->{hash} = sha1($self->data) }; + return $self->{hash}; + } + + sub differ + { + my $self = shift; + my $other = shift; + return 1 if (($self->{header}) ne ($other->{header})); + # use a hash for comparision, to avoid storing message data, or retrieving message data on each comparision + return 1 if ($self->datahash ne $other->datahash); + return 0; + } + + sub data + { + my $self = shift; + my $data; + $self->{file}->setpos($self->{filepos}); + $self->{file}->read($data, $self->{datasize}); + return $data; + } + + sub id + { + my $self = shift; + return $self->{id}; + } + + sub next + { + my $self = shift; + return $self->{next}; + } + +} + +{ + package DBX::Scan; + + sub new + { + my $class = shift; + my $self = {}; + $self->{file} = shift; + $self->{cf} = DBX::ChainFinder->new; + bless ($self, $class); + return $self; + } + + sub scan + { + my $self = shift; + $self->{file}->binmode(); + $self->{file}->seek(0, 0); + $self->{cf}->reset; + + while (defined(my $chunk = DBX::Chunk->new($self->{file}))) + { + $self->{cf}->add($chunk) if ($chunk); + } + } + + sub chains + { + my $self = shift; + return $self->{cf}->find; + } +} + +use IO::File; + +(my $dbxname=$ARGV[0]) and ((scalar @ARGV) == 1) or print(STDERR "Usage: $0 input.dbx >output.mbox\n"),exit; +print STDERR "Opening $dbxname for reading...\n"; +my $dbxfile = IO::File->new($dbxname, "r") or die("Error opening file!\n"); +my $dbxscanner = DBX::Scan->new($dbxfile); +print STDERR "Searching for chunks...\n"; +$dbxscanner->scan; +print STDERR "Reconstructing chains...\n"; +my $chains = $dbxscanner->chains; +print STDERR "Printing...\n"; +for(@{$chains}) +{ + my $message = $_->data; + $message =~ s/^>*From />$&/m; # mbox "From " escaping + # we don't have the original envelope address, so just output a fake one + print "From unknown\@unknown.invalid Mon Jan 1 00:00:00 1970\r\n"; + print $message; + print "\r\n\r\n"; +} |
