- statistics

pp [2006-04-14 18:31:56]
- statistics


git-svn-id: https://siedziba.pl:790/svn/repos/dbxrecover@231 455248ca-bdda-0310-9134-f4ebb693071a
Filename
dbxrecover
diff --git a/dbxrecover b/dbxrecover
index bda8fca..2d703aa 100755
--- a/dbxrecover
+++ b/dbxrecover
@@ -71,6 +71,14 @@ use strict;
     my $self = shift;
     $self->{chunks} = {};
     $self->{first} = {};
+    $self->{stats} =
+    { 'chunkstot'    => 0,
+      'chunksdupl'   => 0,
+      'chunkidsdupl' => 0,
+      'chainstot'    => 0,
+      'chainloops'   => 0,
+      'chainsign'    => 0,
+    };
   }

   sub add
@@ -79,6 +87,7 @@ use strict;
     my $chunk = shift;
     my $id = $chunk->id;
     my $next = $chunk->next;
+    $self->{stats}->{chunkstot}++;
     if (!defined($self->{chunks}->{$id}))
     {
       $self->{chunks}->{$id} = [];
@@ -87,7 +96,7 @@ use strict;
     {
       for (@{$self->{chunks}->{$id}})
       {
-        return if (!$chunk->differ($_)); # don't add chunk if there is a duplicate present
+        $self->{stats}->{chunksdupl}++,return if (!$chunk->differ($_)); # don't add chunk if there is a duplicate present
       }
     }
     push @{$self->{chunks}->{$id}}, $chunk;
@@ -123,7 +132,7 @@ use strict;
     my $chain = shift;
     my $seen = shift;
     my $combinations = shift;
-    return if ($combinations > 16); # we don't extract the message if there are too many branches
+    $self->{stats}->{chainsign}++,return if ($combinations > 16); # we don't extract the message if there are too many branches
     if (!defined($chain)) { $chain = [] };
     if (!defined($seen)) { $seen = {} };
     if (!defined($combinations)) { $combinations = 1 };
@@ -138,12 +147,21 @@ use strict;
       }
       else
       {
+        $self->{stats}->{chainloops}++ if $seen->{$chunk->next};
         push @{$self->{chains}}, DBX::Chain->new($chain);
       }
       pop @{$chain};
     }
     $seen->{$id} = 0;
   }
+
+  sub stats
+  {
+    my $self = shift;
+    $self->{stats}->{chunkidsdupl} = $self->{stats}->{chunkstot} - scalar keys %{$self->{chunks}};
+    $self->{stats}->{chainstot} = scalar @{$self->{chains}} if ($self->{chains});
+    return $self->{stats};
+  }
 }

 {
@@ -255,17 +273,45 @@ use strict;
     $self->{file}->binmode();
     $self->{file}->seek(0, 0);
     $self->{cf}->reset;
+    my $time;

     while (defined(my $chunk = DBX::Chunk->new($self->{file})))
     {
       $self->{cf}->add($chunk) if ($chunk);
+      if (time - $time > 1)
+      {
+        $self->printchunkstats;
+        $time = time;
+      }
     }
+    $self->printchunkstats;
+    print STDERR "\n";
   }

   sub chains
   {
     my $self = shift;
-    return $self->{cf}->find;
+    my $chains = $self->{cf}->find;
+    $self->printchainstats;
+    return $chains;
+  }
+
+  sub printchunkstats
+  {
+    my $self = shift;
+    my $stats = $self->{cf}->stats;
+    print STDERR "Chunks found: ".$stats->{chunkstot}."; ";
+    print STDERR "Duplicates: ".$stats->{chunksdupl}."; ";
+    print STDERR "Duplicate IDs: ".$stats->{chunkidsdupl}."\r";
+  }
+
+  sub printchainstats
+  {
+    my $self = shift;
+    my $stats = $self->{cf}->stats;
+    print STDERR "Chains found: ".$stats->{chainstot}."\n";
+    print STDERR "Chain loops found: ".$stats->{chainloops}."\n";
+    print STDERR "Chains ignored due to duplicate chunk ids: ".$stats->{chainsign}."\n";
   }
 }
ViewGit