#!/usr/bin/perl use strict; use warnings; use utf8; use Fcntl qw(SEEK_SET SEEK_CUR SEEK_END); use IO::Handle; STDOUT->autoflush(1); my $mbox = $ARGV[0] || die("Usage: $0 mbox_file_name [max_bytes]\n"); my $max_size = $ARGV[1] || 1_500_000_000; my $cur = 0; open(my $in, '<', $mbox) or die("Open failed: $!\n"); seek($in, 0, SEEK_END); my $in_size = tell($in); seek($in, 0, SEEK_SET); print "File size: $in_size\n"; my @split_points; print "Searching for split points\n"; while(tell($in) + $max_size < $in_size) { seek($in, $max_size, SEEK_CUR); my $search_str_head = "\r\n\r\n"; my $search_str = $search_str_head."From - "; my $search_str_len = length $search_str; my $chunk_size = 128*1024; $chunk_size = $max_size if $chunk_size > $max_size; my $buffer = ''; my $otime = time; my $pos; while(($pos = rindex($buffer, $search_str)) == -1) { my $seek_amount = $chunk_size + $search_str_len - 1; $seek_amount = tell($in) if tell($in) < $seek_amount; die("Can't split, message size larger than split size\n") if $seek_amount == 0; seek($in, -$seek_amount , SEEK_CUR); read($in, $buffer, $chunk_size); if (time >= $otime + 1) { print " ".tell($in)." \r"; $otime = time; } seek($in, -$chunk_size, SEEK_CUR); } seek($in, $pos + length($search_str_head), SEEK_CUR); print " ".tell($in)." \n"; die("Can't split, message size larger than split size\n") if scalar @split_points > 0 && $split_points[scalar @split_points - 1] == tell($in); push @split_points, tell($in); } push @split_points, $in_size; seek($in, 0, SEEK_SET); my $chunk_size = 128 * 1024; my $n = 1; for my $end (@split_points) { my $out_f = $mbox.'.'.$n++; print "Writing $out_f\n"; open(my $out, '>', $out_f) or die("Open failed: $!\n"); while((my $read_len = $end - tell($in)) > 0) { $read_len = $chunk_size if $read_len > $chunk_size; read($in, my $buffer, $read_len) == $read_len or die("Read failed: $!\n"); print $out $buffer or die("Write failed: $!\n"); } close($out) or die("Close failed: $!\n"); }