summaryrefslogtreecommitdiff
path: root/dbxrecover
diff options
context:
space:
mode:
Diffstat (limited to 'dbxrecover')
-rwxr-xr-xdbxrecover368
1 files changed, 0 insertions, 368 deletions
diff --git a/dbxrecover b/dbxrecover
deleted file mode 100755
index 6d2f71c..0000000
--- a/dbxrecover
+++ /dev/null
@@ -1,368 +0,0 @@
-#!/usr/bin/perl
-
-# dbxrecover, a program for recovering mail from damaged Outlook dbx files.
-# Copyright (C) 2005 Piotr Pawlow <pp@siedziba.pl>
-
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-use strict;
-use warnings;
-
-{
- package DBX::Chain;
-
- sub new
- {
- my $class = shift;
- my $self = {};
- my $arrayref = shift;
- my @ca = @{$arrayref};
- $self->{chunks} = \@ca;
- $self->{idx} = scalar @ca;
- bless ($self, $class);
- return $self;
- }
-
- # we assume here the single message is small enough to fit into memory
- # this may not be a case for publishing companies or other businesses sending huge amounts of data by mail :)
- sub data
- {
- my $self = shift;
- my $data = "";
- for(@{$self->{chunks}})
- {
- $data .= $_->data;
- }
- return $data;
- }
-
- sub broken
- {
- my $self = shift;
- return ($self->{chunks}->[$self->{idx}-1]->next ne "\x00\x00\x00\x00");
- }
-}
-
-{
- package DBX::ChainFinder;
-
- sub new
- {
- my $class = shift;
- my $self = {};
- bless ($self, $class);
- $self->reset;
- return $self;
- }
-
- sub reset
- {
- my $self = shift;
- $self->{chunks} = {};
- $self->{first} = {};
- $self->{stats} =
- { 'chunkstot' => 0,
- 'chunksdupl' => 0,
- 'chunkidsdupl' => 0,
- 'chainstot' => 0,
- 'chainloops' => 0,
- 'chainsign' => 0,
- 'chainsbroken' => 0,
- };
- }
-
- sub add
- {
- my $self = shift;
- my $chunk = shift;
- my $id = $chunk->id;
- my $next = $chunk->next;
- $self->{stats}->{chunkstot}++;
- my $chunks = $self->{chunks}->{$id};
- if (defined($chunks))
- {
- my @chunks = (ref($chunks) eq "ARRAY") ? @{$chunks} : ($chunks);
- for (@chunks)
- {
- $self->{stats}->{chunksdupl}++,return if (!$chunk->differ($_)); # don't add chunk if there is a duplicate present
- }
- $self->{stats}->{chunkidsdupl}++;
- push @chunks, $chunk;
- $self->{chunks}->{$id} = \@chunks;
- }
- else
- {
- $self->{chunks}->{$id} = $chunk;
- }
-
- # we store if there is any other chunk pointing to this one
- if (!defined($self->{first}->{$id})) { $self->{first}->{$id} = 1 };
- $self->{first}->{$next} = 0;
- }
-
- sub find
- {
- my $self = shift;
- my $first = $self->{first};
- my @ids;
- while (my ($key, $value) = each %$first)
- {
- push @ids,$key if $value == 1;
- }
- return \@ids;
- }
-
- sub walk_int
- {
- no warnings qw(recursion);
-
- my $self = shift;
- my $id = shift;
- my $chain = shift;
- my $chains = shift;
- my $seen = shift;
- my $combinations = shift;
- $self->{stats}->{chainsign}++,return if ($combinations > 16); # we don't extract the message if there are too many branches
- $seen->{$id} = 1; # note chunk identifiers for infinite loop testing
- my $chunks = $self->{chunks}->{$id};
- my @chunks = (ref($chunks) eq "ARRAY") ? @{$chunks} : ($chunks);
- for(@chunks)
- {
- my $chunk = $_;
- push @{$chain}, $chunk;
- if (defined($self->{chunks}->{$chunk->next}) && !$seen->{$chunk->next})
- {
- my $nextchunks = $self->{chunks}->{$chunk->next};
- my $nextcount = (ref($nextchunks) eq "ARRAY") ? scalar @{$nextchunks} : 1;
- $self->walk_int($chunk->next, $chain, $chains, $seen, $combinations * $nextcount);
- }
- else
- {
- $self->{stats}->{chainloops}++ if $seen->{$chunk->next};
- my $chainobj = DBX::Chain->new($chain);
- $self->{stats}->{chainsbroken}++ if $chainobj->broken;
- $self->{stats}->{chainstot}++;
- push @{$chains}, $chainobj;
- }
- pop @{$chain};
- }
- $seen->{$id} = 0;
- }
-
- sub walk
- {
- my $self = shift;
- my $id = shift;
- my $chains = [];
- $self->walk_int($id, [], $chains, {}, 1);
- return $chains;
- }
-
- sub stats
- {
- my $self = shift;
- return $self->{stats};
- }
-}
-
-{
- package DBX::Chunk;
- use Digest::SHA1 qw(sha1);
-
- sub new
- {
- my $class = shift;
- my $self = {};
- $self->{f} = shift;
- return if ($self->{f}->read($self->{h}, 16) != 16);
- my ($id, $chunksize, $datasize, $next) = unpack("VVVV", $self->{h});
-
- # test for a valid header
- # chunk size is always 512 bytes in all dbx files I have seen
-
- if (
- ($chunksize == 512) &&
- ($datasize <= $chunksize) &&
- ($datasize > 0) &&
- ($id != 0) &&
- (($next == 0) || ($datasize == $chunksize))
- )
- {
- $self->{p} = $self->{f}->getpos;
-
- # if the header seems valid, we skip the whole chunk
-
- # the chance we miss a start of another chunk is low, because
- # the test above is pretty strict and false positives are quite rare
-
- # it also helps in cases when there are dbx files contained inside dbx
-
- $self->{f}->seek($chunksize, 1);
- bless ($self, $class);
- return $self;
- }
- else
- {
-
- # skip 4 bytes and try again
- # headers were always at 4 byte boundary in every dbx file I have seen
-
- $self->{f}->seek(-12, 1);
- return 0;
- }
- }
-
- sub datahash
- {
- my $self = shift;
- if (!defined($self->{hash})) { $self->{hash} = sha1($self->data) };
- return $self->{hash};
- }
-
- sub differ
- {
- my $self = shift;
- my $other = shift;
- return 1 if (($self->{h}) ne ($other->{h}));
- # use a hash for comparision, to avoid storing message data, or retrieving message data on each comparision
- return 1 if ($self->datahash ne $other->datahash);
- return 0;
- }
-
- sub data
- {
- my $self = shift;
- my $datasize = unpack("V", substr($self->{h}, 8, 4));
- my $data;
- my $pos = $self->{f}->getpos;
- $self->{f}->setpos($self->{p});
- $self->{f}->read($data, $datasize);
- $self->{f}->setpos($pos);
- return $data;
- }
-
- sub id
- {
- my $self = shift;
- return substr($self->{h}, 0, 4);
- }
-
- sub next
- {
- my $self = shift;
- return substr($self->{h}, 12, 4);
- }
-
-}
-
-{
- package DBX::Scan;
-
- sub new
- {
- my $class = shift;
- my $self = {};
- $self->{file} = shift;
- $self->{cf} = DBX::ChainFinder->new;
- bless ($self, $class);
- return $self;
- }
-
- sub scan
- {
- my $self = shift;
- $self->{file}->binmode();
- $self->{file}->seek(0, 0);
- $self->{cf}->reset;
- my $time = 0;
-
- while (defined(my $chunk = DBX::Chunk->new($self->{file})))
- {
- if ($chunk)
- {
- $self->{cf}->add($chunk);
- if (time - $time > 1)
- {
- $self->printchunkstats;
- $time = time;
- }
- }
- }
- $self->printchunkstats;
- print STDERR "\n";
- }
-
- sub chains
- {
- my $self = shift;
- my $chainids = $self->{cf}->find;
- my $time = 0;
- my $tot = scalar @{$chainids};
- my $idx = 0;
- for (@{$chainids})
- {
- my $chains = $self->{cf}->walk($_);
- $idx++;
- for(@{$chains})
- {
- my $message = $_->data;
- $message =~ s/^>*From />$&/m; # mbox "From " escaping
- # we don't have the original envelope address, so just output a fake one
- print "From unknown\@unknown.invalid Mon Jan 1 00:00:00 1970\r\n";
- print $message;
- print "\r\n\r\n";
- }
- if (time - $time > 1)
- {
- $self->printchainstats(int($idx/$tot*100));
- $time = time;
- }
- }
- $self->printchainstats(100);
- }
-
- sub printchunkstats
- {
- my $self = shift;
- my $stats = $self->{cf}->stats;
- print STDERR "Chunks found: ".$stats->{chunkstot}."; ";
- print STDERR "Duplicates: ".$stats->{chunksdupl}."; ";
- print STDERR "Duplicate IDs: ".$stats->{chunkidsdupl}."\r";
- }
-
- sub printchainstats
- {
- my $self = shift;
- my $percdone = shift;
- my $stats = $self->{cf}->stats;
- print STDERR "Chains found: ".$stats->{chainstot}."; ";
- print STDERR "Broken: ".$stats->{chainsbroken}."; ";
- print STDERR "Loops: ".$stats->{chainloops}."; ";
- print STDERR "Dropped: ".$stats->{chainsign}."; ";
- print STDERR $percdone."% done\r";
- }
-}
-
-use IO::File;
-
-(my $dbxname=$ARGV[0]) and ((scalar @ARGV) == 1) or print(STDERR "Usage: $0 input.dbx >output.mbox\n"),exit;
-print STDERR "Opening $dbxname for reading...\n";
-my $dbxfile = IO::File->new($dbxname, "r") or die("Error opening file!\n");
-my $dbxscanner = DBX::Scan->new($dbxfile);
-print STDERR "Searching for chunks...\n";
-$dbxscanner->scan;
-print STDERR "Searching for chains and printing...\n";
-$dbxscanner->chains;
-print STDERR "\n";