- read bigger chunks of data at once to make the program faster

pp [2006-05-13 22:58:24]
- read bigger chunks of data at once to make the program faster


git-svn-id: https://siedziba.pl:790/svn/repos/dbxrecover@238 455248ca-bdda-0310-9134-f4ebb693071a
Filename
dbxrecover-1p
diff --git a/dbxrecover-1p b/dbxrecover-1p
index db37ab6..0ed226c 100755
--- a/dbxrecover-1p
+++ b/dbxrecover-1p
@@ -213,57 +213,61 @@ use warnings;
     $file->binmode();
     $self->{cf}->reset;
     my $time = 0;
-    my $header;
+    my $tmp;
+    my $buffer = '';
+    my $bufsize = 64*1024;

-    $file->read($header, 16);
-
-    while ((length $header) == 16)
+    while ($file->read($tmp, $bufsize))
     {
-      my ($id, $chunksize, $datasize, $next) = unpack("VVVV", $header);
-
-      # test for a valid header
-      # chunk size is always 512 bytes in all dbx files I have seen
-      if (
-           ($chunksize == 512) &&
-           ($datasize <= $chunksize) &&
-           ($datasize > 0) &&
-           ($id != 0) &&
-           (($next == 0) || ($datasize == $chunksize))
-         )
+      $buffer .= $tmp;
+      my $buflen = length $buffer;
+      my $idx = 0;
+      my $header;
+      while (($buflen - $idx) >= 16)
       {
-        my $data;
-        $file->read($data, $datasize);
+        $header = substr($buffer, $idx, 16);
+        my ($id, $chunksize, $datasize, $next) = unpack("VVVV", $header);
+
+        # test for a valid header
+        # chunk size is always 512 bytes in all dbx files I have seen
+        if (
+             ($chunksize == 512) &&
+             ($datasize <= $chunksize) &&
+             ($datasize > 0) &&
+             ($id != 0) &&
+             (($next == 0) || ($datasize == $chunksize))
+           )
+        {
+          last if ($buflen - $idx - 16 < $chunksize);
+          my $data = substr($buffer, $idx+16, $datasize);

-        # if the header seems valid, we skip the whole chunk
+          # if the header seems valid, we skip the whole chunk

-        # the chance we miss a start of another chunk is low, because
-        # the test above is pretty strict and false positives are quite rare
+          # the chance we miss a start of another chunk is low, because
+          # the test above is pretty strict and false positives are quite rare

-        # it also helps in cases when there are dbx files contained inside dbx
+          # it also helps in cases when there are dbx files contained inside dbx

-        my $tmp;
-        $file->read($tmp, $chunksize-$datasize);
+          $idx += $chunksize + 16;

-        my $message = $self->{cf}->add($id, $next, $data);
-        $self->printmsg($message);
+          my $message = $self->{cf}->add($id, $next, $data);
+          $self->printmsg($message);

-        if (time - $time > 1)
-        {
-          $self->printstats;
-          $time = time;
+          if (time - $time > 1)
+          {
+            $self->printstats;
+            $time = time;
+          }
         }
+        else
+        {
+          # skip 4 bytes and try again
+          # headers were always at 4 byte boundary in every dbx file I have seen

-        $file->read($header, 16);
-      }
-      else
-      {
-        # skip 4 bytes and try again
-        # headers were always at 4 byte boundary in every dbx file I have seen
-
-        my $tmp;
-        $file->read($tmp, 4);
-        $header = substr($header, 4).$tmp;
+          $idx += 4;
+        }
       }
+      $buffer = substr($buffer, $idx);
     }
     $self->printstats;
     print STDERR "\nPrinting remaining chains...";
ViewGit