diff --git a/dbxrecover b/dbxrecover
index 2d703aa..081556a 100755
--- a/dbxrecover
+++ b/dbxrecover
@@ -18,6 +18,7 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
use strict;
+use warnings;
{
package DBX::Chain;
@@ -50,7 +51,7 @@ use strict;
sub broken
{
my $self = shift;
- return ($self->{chunks}->[$self->{idx}-1]->next != 0);
+ return ($self->{chunks}->[$self->{idx}-1]->next ne "\x00\x00\x00\x00");
}
}
@@ -78,6 +79,7 @@ use strict;
'chainstot' => 0,
'chainloops' => 0,
'chainsign' => 0,
+ 'chainsbroken' => 0,
};
}
@@ -88,78 +90,88 @@ use strict;
my $id = $chunk->id;
my $next = $chunk->next;
$self->{stats}->{chunkstot}++;
- if (!defined($self->{chunks}->{$id}))
+ my $chunks = $self->{chunks}->{$id};
+ if (defined($chunks))
{
- $self->{chunks}->{$id} = [];
- }
- else
- {
- for (@{$self->{chunks}->{$id}})
+ my @chunks = (ref($chunks) eq "ARRAY") ? @{$chunks} : ($chunks);
+ for (@chunks)
{
$self->{stats}->{chunksdupl}++,return if (!$chunk->differ($_)); # don't add chunk if there is a duplicate present
}
+ $self->{stats}->{chunkidsdupl}++;
+ push @chunks, $chunk;
+ $self->{chunks}->{$id} = \@chunks;
+ }
+ else
+ {
+ $self->{chunks}->{$id} = $chunk;
}
- push @{$self->{chunks}->{$id}}, $chunk;
# we store if there is any other chunk pointing to this one
if (!defined($self->{first}->{$id})) { $self->{first}->{$id} = 1 };
$self->{first}->{$next} = 0;
-
- # adding a new chunk requires rescanning for chains
- $self->{chains} = undef;
}
sub find
{
my $self = shift;
- $self->{chains} = [];
-
- for(keys %{$self->{first}})
+ my $first = $self->{first};
+ my @ids;
+ while (my ($key, $value) = each %$first)
{
- if ($self->{first}->{$_} == 1)
- {
- # if there is no other chunk pointing to this one, start following it
- $self->walk($_);
- }
+ push @ids,$key if $value == 1;
}
- return $self->{chains};
+ return \@ids;
}
- sub walk
+ sub walk_int
{
+ no warnings qw(recursion);
+
my $self = shift;
my $id = shift;
my $chain = shift;
my $seen = shift;
my $combinations = shift;
$self->{stats}->{chainsign}++,return if ($combinations > 16); # we don't extract the message if there are too many branches
- if (!defined($chain)) { $chain = [] };
- if (!defined($seen)) { $seen = {} };
- if (!defined($combinations)) { $combinations = 1 };
$seen->{$id} = 1; # note chunk identifiers for infinite loop testing
- for(@{$self->{chunks}->{$id}})
+ my $chunks = $self->{chunks}->{$id};
+ my @chunks = (ref($chunks) eq "ARRAY") ? @{$chunks} : ($chunks);
+ for(@chunks)
{
my $chunk = $_;
push @{$chain}, $chunk;
if (defined($self->{chunks}->{$chunk->next}) && !$seen->{$chunk->next})
{
- $self->walk($chunk->next, $chain, $seen, $combinations * scalar @{$self->{chunks}->{$chunk->next}});
+ my $nextchunks = $self->{chunks}->{$chunk->next};
+ my $nextcount = (ref($nextchunks) eq "ARRAY") ? scalar @{$nextchunks} : 1;
+ $self->walk_int($chunk->next, $chain, $seen, $combinations * $nextcount);
}
else
{
$self->{stats}->{chainloops}++ if $seen->{$chunk->next};
- push @{$self->{chains}}, DBX::Chain->new($chain);
+ my $chainobj = DBX::Chain->new($chain);
+ $self->{stats}->{chainsbroken}++ if $chainobj->broken;
+ $self->{stats}->{chainstot}++;
+ push @{$self->{chains}}, $chainobj;
}
pop @{$chain};
}
$seen->{$id} = 0;
}
+ sub walk
+ {
+ my $self = shift;
+ my $id = shift;
+ $self->{chains} = [];
+ $self->walk_int($id, [], {}, 1);
+ return $self->{chains};
+ }
+
sub stats
{
my $self = shift;
- $self->{stats}->{chunkidsdupl} = $self->{stats}->{chunkstot} - scalar keys %{$self->{chunks}};
- $self->{stats}->{chainstot} = scalar @{$self->{chains}} if ($self->{chains});
return $self->{stats};
}
}
@@ -172,9 +184,9 @@ use strict;
{
my $class = shift;
my $self = {};
- $self->{file} = shift;
- return if ($self->{file}->read($self->{header}, 16) != 16);
- my ($id, $chunksize, $datasize, $next) = unpack("VVVV", $self->{header});
+ $self->{f} = shift;
+ return if ($self->{f}->read($self->{h}, 16) != 16);
+ my ($id, $chunksize, $datasize, $next) = unpack("VVVV", $self->{h});
# test for a valid header
# chunk size is always 512 bytes in all dbx files I have seen
@@ -187,7 +199,7 @@ use strict;
(($next == 0) || ($datasize == $chunksize))
)
{
- $self->{filepos} = $self->{file}->getpos;
+ $self->{p} = $self->{f}->getpos;
# if the header seems valid, we skip the whole chunk
@@ -196,7 +208,7 @@ use strict;
# it also helps in cases when there are dbx files contained inside dbx
- $self->{file}->seek($chunksize, 1);
+ $self->{f}->seek($chunksize, 1);
bless ($self, $class);
return $self;
}
@@ -206,7 +218,7 @@ use strict;
# skip 4 bytes and try again
# headers were always at 4 byte boundary in every dbx file I have seen
- $self->{file}->seek(-12, 1);
+ $self->{f}->seek(-12, 1);
return 0;
}
}
@@ -222,7 +234,7 @@ use strict;
{
my $self = shift;
my $other = shift;
- return 1 if (($self->{header}) ne ($other->{header}));
+ return 1 if (($self->{h}) ne ($other->{h}));
# use a hash for comparision, to avoid storing message data, or retrieving message data on each comparision
return 1 if ($self->datahash ne $other->datahash);
return 0;
@@ -231,25 +243,25 @@ use strict;
sub data
{
my $self = shift;
- my $datasize = unpack("V", substr($self->{header}, 8, 4));
+ my $datasize = unpack("V", substr($self->{h}, 8, 4));
my $data;
- my $pos = $self->{file}->getpos;
- $self->{file}->setpos($self->{filepos});
- $self->{file}->read($data, $datasize);
- $self->{file}->setpos($pos);
+ my $pos = $self->{f}->getpos;
+ $self->{f}->setpos($self->{p});
+ $self->{f}->read($data, $datasize);
+ $self->{f}->setpos($pos);
return $data;
}
sub id
{
my $self = shift;
- return scalar unpack("V", $self->{header});
+ return substr($self->{h}, 0, 4);
}
sub next
{
my $self = shift;
- return scalar unpack("V", substr($self->{header}, 12, 4));
+ return substr($self->{h}, 12, 4);
}
}
@@ -273,15 +285,18 @@ use strict;
$self->{file}->binmode();
$self->{file}->seek(0, 0);
$self->{cf}->reset;
- my $time;
+ my $time = 0;
while (defined(my $chunk = DBX::Chunk->new($self->{file})))
{
- $self->{cf}->add($chunk) if ($chunk);
- if (time - $time > 1)
+ if ($chunk)
{
- $self->printchunkstats;
- $time = time;
+ $self->{cf}->add($chunk);
+ if (time - $time > 1)
+ {
+ $self->printchunkstats;
+ $time = time;
+ }
}
}
$self->printchunkstats;
@@ -291,9 +306,30 @@ use strict;
sub chains
{
my $self = shift;
- my $chains = $self->{cf}->find;
- $self->printchainstats;
- return $chains;
+ my $chainids = $self->{cf}->find;
+ my $time = 0;
+ my $tot = scalar @{$chainids};
+ my $idx = 0;
+ for (@{$chainids})
+ {
+ my $chains = $self->{cf}->walk($_);
+ $idx++;
+ for(@{$chains})
+ {
+ my $message = $_->data;
+ $message =~ s/^>*From />$&/m; # mbox "From " escaping
+ # we don't have the original envelope address, so just output a fake one
+ print "From unknown\@unknown.invalid Mon Jan 1 00:00:00 1970\r\n";
+ print $message;
+ print "\r\n\r\n";
+ }
+ if (time - $time > 1)
+ {
+ $self->printchainstats(int($idx/$tot*100));
+ $time = time;
+ }
+ }
+ $self->printchainstats(100);
}
sub printchunkstats
@@ -308,10 +344,13 @@ use strict;
sub printchainstats
{
my $self = shift;
+ my $percdone = shift;
my $stats = $self->{cf}->stats;
- print STDERR "Chains found: ".$stats->{chainstot}."\n";
- print STDERR "Chain loops found: ".$stats->{chainloops}."\n";
- print STDERR "Chains ignored due to duplicate chunk ids: ".$stats->{chainsign}."\n";
+ print STDERR "Chains found: ".$stats->{chainstot}."; ";
+ print STDERR "Broken: ".$stats->{chainsbroken}."; ";
+ print STDERR "Loops: ".$stats->{chainloops}."; ";
+ print STDERR "Dropped: ".$stats->{chainsign}."; ";
+ print STDERR $percdone."% done\r";
}
}
@@ -323,15 +362,6 @@ my $dbxfile = IO::File->new($dbxname, "r") or die("Error opening file!\n");
my $dbxscanner = DBX::Scan->new($dbxfile);
print STDERR "Searching for chunks...\n";
$dbxscanner->scan;
-print STDERR "Reconstructing chains...\n";
-my $chains = $dbxscanner->chains;
-print STDERR "Printing...\n";
-for(@{$chains})
-{
- my $message = $_->data;
- $message =~ s/^>*From />$&/m; # mbox "From " escaping
- # we don't have the original envelope address, so just output a fake one
- print "From unknown\@unknown.invalid Mon Jan 1 00:00:00 1970\r\n";
- print $message;
- print "\r\n\r\n";
-}
+print STDERR "Searching for chains and printing...\n";
+$dbxscanner->chains;
+print STDERR "\n";