diff options
Diffstat (limited to 'dbxrecover')
| -rwxr-xr-x | dbxrecover | 368 |
1 files changed, 0 insertions, 368 deletions
diff --git a/dbxrecover b/dbxrecover deleted file mode 100755 index 6d2f71c..0000000 --- a/dbxrecover +++ /dev/null @@ -1,368 +0,0 @@ -#!/usr/bin/perl - -# dbxrecover, a program for recovering mail from damaged Outlook dbx files. -# Copyright (C) 2005 Piotr Pawlow <pp@siedziba.pl> - -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -use strict; -use warnings; - -{ - package DBX::Chain; - - sub new - { - my $class = shift; - my $self = {}; - my $arrayref = shift; - my @ca = @{$arrayref}; - $self->{chunks} = \@ca; - $self->{idx} = scalar @ca; - bless ($self, $class); - return $self; - } - - # we assume here the single message is small enough to fit into memory - # this may not be a case for publishing companies or other businesses sending huge amounts of data by mail :) - sub data - { - my $self = shift; - my $data = ""; - for(@{$self->{chunks}}) - { - $data .= $_->data; - } - return $data; - } - - sub broken - { - my $self = shift; - return ($self->{chunks}->[$self->{idx}-1]->next ne "\x00\x00\x00\x00"); - } -} - -{ - package DBX::ChainFinder; - - sub new - { - my $class = shift; - my $self = {}; - bless ($self, $class); - $self->reset; - return $self; - } - - sub reset - { - my $self = shift; - $self->{chunks} = {}; - $self->{first} = {}; - $self->{stats} = - { 'chunkstot' => 0, - 'chunksdupl' => 0, - 'chunkidsdupl' => 0, - 'chainstot' => 0, - 'chainloops' => 0, - 'chainsign' => 0, - 'chainsbroken' => 0, - }; - } - - sub add - { - my $self = shift; - my $chunk = shift; - my $id = $chunk->id; - my $next = $chunk->next; - $self->{stats}->{chunkstot}++; - my $chunks = $self->{chunks}->{$id}; - if (defined($chunks)) - { - my @chunks = (ref($chunks) eq "ARRAY") ? @{$chunks} : ($chunks); - for (@chunks) - { - $self->{stats}->{chunksdupl}++,return if (!$chunk->differ($_)); # don't add chunk if there is a duplicate present - } - $self->{stats}->{chunkidsdupl}++; - push @chunks, $chunk; - $self->{chunks}->{$id} = \@chunks; - } - else - { - $self->{chunks}->{$id} = $chunk; - } - - # we store if there is any other chunk pointing to this one - if (!defined($self->{first}->{$id})) { $self->{first}->{$id} = 1 }; - $self->{first}->{$next} = 0; - } - - sub find - { - my $self = shift; - my $first = $self->{first}; - my @ids; - while (my ($key, $value) = each %$first) - { - push @ids,$key if $value == 1; - } - return \@ids; - } - - sub walk_int - { - no warnings qw(recursion); - - my $self = shift; - my $id = shift; - my $chain = shift; - my $chains = shift; - my $seen = shift; - my $combinations = shift; - $self->{stats}->{chainsign}++,return if ($combinations > 16); # we don't extract the message if there are too many branches - $seen->{$id} = 1; # note chunk identifiers for infinite loop testing - my $chunks = $self->{chunks}->{$id}; - my @chunks = (ref($chunks) eq "ARRAY") ? @{$chunks} : ($chunks); - for(@chunks) - { - my $chunk = $_; - push @{$chain}, $chunk; - if (defined($self->{chunks}->{$chunk->next}) && !$seen->{$chunk->next}) - { - my $nextchunks = $self->{chunks}->{$chunk->next}; - my $nextcount = (ref($nextchunks) eq "ARRAY") ? scalar @{$nextchunks} : 1; - $self->walk_int($chunk->next, $chain, $chains, $seen, $combinations * $nextcount); - } - else - { - $self->{stats}->{chainloops}++ if $seen->{$chunk->next}; - my $chainobj = DBX::Chain->new($chain); - $self->{stats}->{chainsbroken}++ if $chainobj->broken; - $self->{stats}->{chainstot}++; - push @{$chains}, $chainobj; - } - pop @{$chain}; - } - $seen->{$id} = 0; - } - - sub walk - { - my $self = shift; - my $id = shift; - my $chains = []; - $self->walk_int($id, [], $chains, {}, 1); - return $chains; - } - - sub stats - { - my $self = shift; - return $self->{stats}; - } -} - -{ - package DBX::Chunk; - use Digest::SHA1 qw(sha1); - - sub new - { - my $class = shift; - my $self = {}; - $self->{f} = shift; - return if ($self->{f}->read($self->{h}, 16) != 16); - my ($id, $chunksize, $datasize, $next) = unpack("VVVV", $self->{h}); - - # test for a valid header - # chunk size is always 512 bytes in all dbx files I have seen - - if ( - ($chunksize == 512) && - ($datasize <= $chunksize) && - ($datasize > 0) && - ($id != 0) && - (($next == 0) || ($datasize == $chunksize)) - ) - { - $self->{p} = $self->{f}->getpos; - - # if the header seems valid, we skip the whole chunk - - # the chance we miss a start of another chunk is low, because - # the test above is pretty strict and false positives are quite rare - - # it also helps in cases when there are dbx files contained inside dbx - - $self->{f}->seek($chunksize, 1); - bless ($self, $class); - return $self; - } - else - { - - # skip 4 bytes and try again - # headers were always at 4 byte boundary in every dbx file I have seen - - $self->{f}->seek(-12, 1); - return 0; - } - } - - sub datahash - { - my $self = shift; - if (!defined($self->{hash})) { $self->{hash} = sha1($self->data) }; - return $self->{hash}; - } - - sub differ - { - my $self = shift; - my $other = shift; - return 1 if (($self->{h}) ne ($other->{h})); - # use a hash for comparision, to avoid storing message data, or retrieving message data on each comparision - return 1 if ($self->datahash ne $other->datahash); - return 0; - } - - sub data - { - my $self = shift; - my $datasize = unpack("V", substr($self->{h}, 8, 4)); - my $data; - my $pos = $self->{f}->getpos; - $self->{f}->setpos($self->{p}); - $self->{f}->read($data, $datasize); - $self->{f}->setpos($pos); - return $data; - } - - sub id - { - my $self = shift; - return substr($self->{h}, 0, 4); - } - - sub next - { - my $self = shift; - return substr($self->{h}, 12, 4); - } - -} - -{ - package DBX::Scan; - - sub new - { - my $class = shift; - my $self = {}; - $self->{file} = shift; - $self->{cf} = DBX::ChainFinder->new; - bless ($self, $class); - return $self; - } - - sub scan - { - my $self = shift; - $self->{file}->binmode(); - $self->{file}->seek(0, 0); - $self->{cf}->reset; - my $time = 0; - - while (defined(my $chunk = DBX::Chunk->new($self->{file}))) - { - if ($chunk) - { - $self->{cf}->add($chunk); - if (time - $time > 1) - { - $self->printchunkstats; - $time = time; - } - } - } - $self->printchunkstats; - print STDERR "\n"; - } - - sub chains - { - my $self = shift; - my $chainids = $self->{cf}->find; - my $time = 0; - my $tot = scalar @{$chainids}; - my $idx = 0; - for (@{$chainids}) - { - my $chains = $self->{cf}->walk($_); - $idx++; - for(@{$chains}) - { - my $message = $_->data; - $message =~ s/^>*From />$&/m; # mbox "From " escaping - # we don't have the original envelope address, so just output a fake one - print "From unknown\@unknown.invalid Mon Jan 1 00:00:00 1970\r\n"; - print $message; - print "\r\n\r\n"; - } - if (time - $time > 1) - { - $self->printchainstats(int($idx/$tot*100)); - $time = time; - } - } - $self->printchainstats(100); - } - - sub printchunkstats - { - my $self = shift; - my $stats = $self->{cf}->stats; - print STDERR "Chunks found: ".$stats->{chunkstot}."; "; - print STDERR "Duplicates: ".$stats->{chunksdupl}."; "; - print STDERR "Duplicate IDs: ".$stats->{chunkidsdupl}."\r"; - } - - sub printchainstats - { - my $self = shift; - my $percdone = shift; - my $stats = $self->{cf}->stats; - print STDERR "Chains found: ".$stats->{chainstot}."; "; - print STDERR "Broken: ".$stats->{chainsbroken}."; "; - print STDERR "Loops: ".$stats->{chainloops}."; "; - print STDERR "Dropped: ".$stats->{chainsign}."; "; - print STDERR $percdone."% done\r"; - } -} - -use IO::File; - -(my $dbxname=$ARGV[0]) and ((scalar @ARGV) == 1) or print(STDERR "Usage: $0 input.dbx >output.mbox\n"),exit; -print STDERR "Opening $dbxname for reading...\n"; -my $dbxfile = IO::File->new($dbxname, "r") or die("Error opening file!\n"); -my $dbxscanner = DBX::Scan->new($dbxfile); -print STDERR "Searching for chunks...\n"; -$dbxscanner->scan; -print STDERR "Searching for chains and printing...\n"; -$dbxscanner->chains; -print STDERR "\n"; |
