Last commit for dbxrecover.d: f05e533f206e5ac5f51c606d6e034bbff9187392

- don't compare file offsets when testing if chunks are different

pp [2007-02-08 07:51:11]
- don't compare file offsets when testing if chunks are different


git-svn-id: https://siedziba.pl:790/svn/repos/dbxrecover@274 455248ca-bdda-0310-9134-f4ebb693071a
import std.stream;
import std.stdio;
import std.c.stdlib;
import std.system;
import std.c.string;
import std.c.time;
import std.regexp;
import std.string;

struct Header
{
	align(4)
	{
		uint id;
		uint chunksize;
		uint datasize;
		uint next;
	}
};

struct ChunkInfo
{
	uint next;
	uint datasize;
	long fileoffset;
};

class Chunk
{
	static bool differ(ChunkInfo info1, ChunkInfo info2, Stream s)
	{
		if (memcmp(&info1, &info2, 8) !=0) return true;
		if (data(info1, s) != data(info2, s)) return true;
		return false;
	}

	static char[] data(ChunkInfo info, Stream s)
	{
		long p = s.position();
		s.seekSet(info.fileoffset);
		char[] o = new char[info.datasize];
		o.length = s.readBlock(cast(void*)o, info.datasize);
		s.seekSet(p);
		return o;
	}
}

class ChainFinder
{
	private:
		Stream s;
		ChunkInfo[][uint] infos;
		bool[uint] first;

		int walk(uint id, int delegate(inout Message) dg)
		{
			auto m = new Message(s);
			return walk(id, &infos[id], dg, m, 1);
		}

		int walk(uint id, ChunkInfo[]* infosp, int delegate(inout Message) dg, Message m, uint combinations)
		{
			if (combinations > 16)
			{
				chainstats.dropped++;
				return 0;
			}
			foreach(ChunkInfo info; *infosp)
			{
				m.push(id, info);
				bool seen = m.seen(info.next);
				ChunkInfo[]* nextp = info.next in infos;
				if (nextp && (!seen))
				{
					walk(info.next, nextp, dg, m, combinations * nextp.length);
				}
				else
				{
					if (seen) chainstats.loops++;
					if (m.broken()) chainstats.broken++;
					chainstats.count++;
					int result = dg(m);
					if (result) return result;
				}
				m.pop(id);
			}
			return 0;
		}

	public:
		struct ChunkStatistics
		{
			uint count;
			uint duplicates;
			uint id_duplicates;
		};
		struct ChainStatistics
		{
			uint count;
			uint loops;
			uint dropped;
			uint broken;
			uint progress_total;
			uint progress_current;
		};

		ChunkStatistics chunkstats;
		ChainStatistics chainstats;

		this(Stream s)
		{
			this.s = s;
		}

		void add(uint id, ChunkInfo info)
		{
			chunkstats.count++;
			if (id in infos) {
				foreach (ChunkInfo info2; infos[id])
				{
					if (!Chunk.differ(info, info2, s))
					{
						chunkstats.duplicates++;
						return;
					}
				}
			}
			else
			{
				ChunkInfo[] tmp;
				infos[id] = tmp;
			}
			int len = (infos[id].length = infos[id].length + 1);
			infos[id][len-1] = info;
			if (len > 1) chunkstats.id_duplicates++;
			if (!(id in first)) first[id] = true;
			first[info.next] = false;
		}

		int opApply(int delegate(inout Message) dg)
		{
			uint[] ids = first.keys;
			chainstats.progress_total = ids.length+1;
			chainstats.progress_current = 1;

			foreach(uint id; ids)
			{
				if (first[id])
				{
					int result = walk(id, dg);
					if (result) return result;
				}
				chainstats.progress_current++;
			}
			return 0;
		}
};

class Message
{
	private:
		ChunkInfo infos[];
		bool[uint] seen_tab;
		Stream s;
	public:
		this(Stream s)
		{
			this.s = s;
		}

		void push(uint id, ChunkInfo info)
		{
			infos.length = infos.length + 1;
			infos[infos.length-1] = info;
			seen_tab[id] = true;
		}

		void pop(uint id)
		{
			seen_tab.remove(id);
			infos.length = infos.length - 1;
		}

		bool seen(uint id)
		{
			return (id in seen_tab != null);
		}

		bool broken()
		{
			return (infos[infos.length-1].next != 0);
		}

		int opApply(int delegate(inout char[]) dg)
		{
			char[] line = "";
			foreach (ChunkInfo info; infos)
			{
				int index;
				int index2;
				char[] data = Chunk.data(info, s);
				while ((index2 = 1+std.string.find(data[index..length], 10)) != 0)
				{

					line ~= data[index..index+index2];
					int result = dg(line);
					if (result) return result;
					line.length = 0;
					index += index2;
				}
				line ~= data[index..length];
			}
			int result = dg(line);
			if (result) return result;
			return 0;
		}
};

int main (char[][] args)
{
	if (args.length != 2)
	{
		fwritef(stderr, "Usage: %s input.dbx >output.mbox\n", args[0]);
		exit(1);
	}
	auto input = new EndianStream(new BufferedFile(args[1], FileMode.In), Endian.LittleEndian);
	auto cf = new ChainFinder(input);

	void chunkstats() {
		fwritef(stderr,"Bytes: %d; Chunks: %d; Duplicates: %d; Duplicate IDs: %d; %d%% done\r",
			input.position(), cf.chunkstats.count, cf.chunkstats.duplicates, cf.chunkstats.id_duplicates, (input.position()+1)*100/(input.size+1));
	}

	time_t time1;
	void every(uint seconds, lazy void func)
	{
		time_t time2 = time(null);
		if (time2 - time1 >= seconds)
		{
			time1 = time2;
			func();
		}
	}

	fwritef(stderr,"Pass 1/2...\n");
	while(true)
	{
		Header h;
		if (input.readBlock(&h, 16) != 16) break;
		input.fixBlockBO(&h, 4, 4);
		with(h)
		{
			if (
				(chunksize == 512) &&
				(datasize <= chunksize) &&
				(datasize > 0) &&
				(id != 0) &&
				((next == 0) || (datasize == chunksize))
			)
			{
				ChunkInfo info;
				info.next = h.next;
				info.datasize = h.datasize;
				info.fileoffset = input.position();
				input.seekCur(chunksize);
				cf.add(h.id, info);
				every(2, chunkstats());
			}
			else input.seekCur(-12);
		}
	}
	chunkstats();
	fwritef(stderr,"\n");

	void chainstats() {
		fwritef(stderr,"Chains: %d; Broken: %d; Loops: %d; Dropped: %d; %d%% done\r",
			cf.chainstats.count, cf.chainstats.broken, cf.chainstats.loops, cf.chainstats.dropped, cf.chainstats.progress_current*100/cf.chainstats.progress_total);
	}

	fwritef(stderr,"Pass 2/2...\n");
	foreach (Message m; cf)
	{
		writef("From unknown@unknown.invalid Mon Jan 1 00:00:00 1970\r\n");
		foreach (char[] line; m)
		{
			line = sub(line, "^>*From ", ">$&");
			fwrite(cast(void*)line, line.length, char.sizeof, stdout);
		}
		writef("\r\n\r\n");
		every(2, chainstats());
	}
	chainstats();
	fwritef(stderr,"\n");

	return 0;
};
ViewGit