Work-in-progress:

* Started a file consistency checker; right now, it will just compute a SHA256 hash value for every file, and dump it to stdout. git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@28406 a95241bf-73f2-0310-859d-f6bbb57e9c96
2008-10-31 17:05:49 +00:00 · 2008-10-31 17:05:49 +00:00 · d6013f6096
commit d6013f6096
parent e3bcc1dc09
5 changed files with 700 additions and 0 deletions
--- a/src/tests/add-ons/kernel/file_systems/Jamfile
+++ b/src/tests/add-ons/kernel/file_systems/Jamfile
@ -2,6 +2,7 @@ SubDir HAIKU_TOP src tests add-ons kernel file_systems ;

 SubInclude HAIKU_TOP src tests add-ons kernel file_systems bfs ;
 SubInclude HAIKU_TOP src tests add-ons kernel file_systems cdda ;
+SubInclude HAIKU_TOP src tests add-ons kernel file_systems consistency_check ;
 SubInclude HAIKU_TOP src tests add-ons kernel file_systems fs_shell ;
 SubInclude HAIKU_TOP src tests add-ons kernel file_systems fragmenter ;
 #SubInclude HAIKU_TOP src tests add-ons kernel file_systems iso9660 ;
--- a/src/tests/add-ons/kernel/file_systems/consistency_check/Jamfile
+++ b/src/tests/add-ons/kernel/file_systems/consistency_check/Jamfile
@ -0,0 +1,7 @@
+SubDir HAIKU_TOP src tests add-ons kernel file_systems consistency_check ;
+
+SimpleTest generate_hashs :
+	generate_hashs.cpp
+	SHA256.cpp
+	: be $(TARGET_LIBSTDC++)
+;
--- a/src/tests/add-ons/kernel/file_systems/consistency_check/SHA256.cpp
+++ b/src/tests/add-ons/kernel/file_systems/consistency_check/SHA256.cpp
@ -0,0 +1,190 @@
+/*
+ * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de.
+ * Distributed under the terms of the MIT License.
+ */
+
+
+#include "SHA256.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include <ByteOrder.h>
+
+
+static const uint32 kChunkSize = 64;	// 64 bytes == 512 bits
+
+static const uint32 kRounds[64] = {
+   0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+   0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+   0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+   0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+   0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+   0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+   0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+   0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+static const uint32 kHash[8] = {
+	0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+	0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
+};
+
+
+static inline uint32
+rotate_right(uint32 value, int bits)
+{
+	return (value >> bits) | (value << (32 - bits));
+}
+
+
+//	#pragma mark -
+
+
+SHA256::SHA256()
+{
+	Init();
+}
+
+
+SHA256::~SHA256()
+{
+}
+
+
+void
+SHA256::Init()
+{
+	memcpy(fHash, kHash, sizeof(kHash));
+
+	fBytesInBuffer = 0;
+	fMessageSize = 0;
+	fDigested = false;
+}
+
+
+void
+SHA256::Update(const void* _buffer, size_t size)
+{
+	const uint8* buffer = (const uint8*)_buffer;
+	fMessageSize += size;
+
+	while (fBytesInBuffer + size >= kChunkSize) {
+		size_t toCopy = kChunkSize - fBytesInBuffer;
+		memcpy((uint8*)fBuffer + fBytesInBuffer, buffer, toCopy);
+		buffer += toCopy;
+		size -= toCopy;
+
+		_ProcessChunk();
+		fBytesInBuffer = 0;
+	}
+
+	if (size > 0) {
+		memcpy((uint8*)fBuffer + fBytesInBuffer, buffer, size);
+		fBytesInBuffer += size;
+	}
+}
+
+
+const uint8*
+SHA256::Digest()
+{
+	if (!fDigested) {
+		// We need to append a 1 bit, append padding with 0 bits, and append
+		// the message size in bits (64 bit big-endian int), so that the whole
+		// is chunk-aligned. So we either have to process one last chunk or two
+		// chunks.
+
+		// append the 1 bit
+		((uint8*)fBuffer)[fBytesInBuffer] = 0x80;
+		fBytesInBuffer++;
+
+		// if the message size doesn't fit anymore, we pad the chunk and
+		// process it
+		if (fBytesInBuffer > kChunkSize - 8) {
+			memset((uint8*)fBuffer + fBytesInBuffer, 0,
+				kChunkSize - fBytesInBuffer);
+			_ProcessChunk();
+			fBytesInBuffer = 0;
+		}
+
+		// pad the buffer
+		if (fBytesInBuffer < kChunkSize - 8) {
+			memset((uint8*)fBuffer + fBytesInBuffer, 0,
+				kChunkSize - 8 - fBytesInBuffer);
+		}
+
+		// write the (big-endian) message size in bits
+		*(uint64*)((uint8*)fBuffer + kChunkSize - 8)
+			= B_HOST_TO_BENDIAN_INT64((uint64)fMessageSize * 8);
+
+		_ProcessChunk();
+
+		// set digest
+		for (int i = 0; i < 8; i++)
+			fDigest[i] = B_HOST_TO_BENDIAN_INT32(fHash[i]);
+
+		fDigested = true;
+	}
+
+	return (uint8*)fDigest;
+}
+
+
+void
+SHA256::_ProcessChunk()
+{
+	// convert endianess -- the data are supposed to be a stream of
+	// 32 bit big-endian integers
+	#if B_HOST_IS_LENDIAN
+		for (int i = 0; i < (int)kChunkSize / 4; i++)
+			fBuffer[i] = B_SWAP_INT32(fBuffer[i]);
+	#endif
+
+	// pre-process buffer (extend to 64 elements)
+	for (int i = 16; i < 64; i++) {
+		uint32 v0 = fBuffer[i - 15];
+		uint32 v1 = fBuffer[i - 2];
+		uint32 s0 = rotate_right(v0, 7) ^ rotate_right(v0, 18) ^ (v0 >> 3);
+		uint32 s1 = rotate_right(v1, 17) ^ rotate_right(v1, 19) ^ (v1 >> 10);
+		fBuffer[i] = fBuffer[i - 16] + s0 + fBuffer[i - 7] + s1;
+	}
+
+	uint32 a = fHash[0];
+	uint32 b = fHash[1];
+	uint32 c = fHash[2];
+	uint32 d = fHash[3];
+	uint32 e = fHash[4];
+	uint32 f = fHash[5];
+	uint32 g = fHash[6];
+	uint32 h = fHash[7];
+
+	// process the buffer
+	for (int i = 0; i < 64; i++) {
+		uint32 s0 = rotate_right(a, 2) ^ rotate_right(a, 13)
+			^ rotate_right(a, 22);
+		uint32 maj = (a & b) ^ (a & c) ^ (b & c);
+		uint32 t2 = s0 + maj;
+		uint32 s1 = rotate_right(e, 6) ^ rotate_right(e, 11)
+			^ rotate_right(e, 25);
+		uint32 ch = (e & f) ^ (~e & g);
+		uint32 t1 = h + s1 + ch + kRounds[i] + fBuffer[i];
+
+		h = g;
+		g = f;
+		f = e;
+		e = d + t1;
+		d = c;
+		c = b;
+		b = a;
+		a = t1 + t2;
+	}
+
+	fHash[0] += a;
+	fHash[1] += b;
+	fHash[2] += c;
+	fHash[3] += d;
+	fHash[4] += e;
+	fHash[5] += f;
+	fHash[6] += g;
+	fHash[7] += h;
+}
--- a/src/tests/add-ons/kernel/file_systems/consistency_check/SHA256.h
+++ b/src/tests/add-ons/kernel/file_systems/consistency_check/SHA256.h
@ -0,0 +1,39 @@
+/*
+ * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de.
+ * Distributed under the terms of the MIT License.
+ */
+#ifndef SHA_256_H
+#define SHA_256_H
+
+
+#include <SupportDefs.h>
+
+
+#define SHA_DIGEST_LENGTH	32
+
+
+class SHA256 {
+public:
+								SHA256();
+								~SHA256();
+
+			void				Init();
+			void				Update(const void* buffer, size_t size);
+			const uint8*		Digest();
+			size_t				DigestLength() const
+									{ return SHA_DIGEST_LENGTH; }
+
+private:
+			void				_ProcessChunk();
+
+private:
+			uint32				fHash[8];
+			uint32				fDigest[8];
+			uint32				fBuffer[64];
+			size_t				fBytesInBuffer;
+			size_t				fMessageSize;
+			bool				fDigested;
+};
+
+
+#endif	// SHA_256_H
--- a/src/tests/add-ons/kernel/file_systems/consistency_check/generate_hashs.cpp
+++ b/src/tests/add-ons/kernel/file_systems/consistency_check/generate_hashs.cpp
@ -0,0 +1,463 @@
+/*
+ * Copyright 2008, Axel Dörfler, axeld@pinc-software.de.
+ * Distributed under the terms of the MIT License.
+ */
+
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <OS.h>
+
+#include "SHA256.h"
+
+
+//#define TRACE(x...) printf(x)
+#define TRACE(x...) ;
+
+
+extern const char *__progname;
+static const char *kProgramName = __progname;
+
+const size_t kInitialBufferSize = 1 * 1024 * 1024;
+const size_t kMaxBufferSize = 10 * 1024 * 1024;
+
+
+class AdaptiveBuffering {
+public:
+							AdaptiveBuffering(size_t initialBufferSize,
+								size_t maxBufferSize, uint32 count);
+	virtual					~AdaptiveBuffering();
+
+	virtual status_t		Init();
+
+	virtual status_t		Read(uint8* buffer, size_t* _length);
+	virtual status_t		Write(uint8* buffer, size_t length);
+
+			status_t		Run();
+
+private:
+			void			_QuitWriter();
+			status_t		_Writer();
+	static	status_t		_Writer(void* self);
+
+			thread_id		fWriterThread;
+			uint8**			fBuffers;
+			size_t*			fReadBytes;
+			uint32			fBufferCount;
+			uint32			fReadIndex;
+			uint32			fWriteIndex;
+			uint32			fReadCount;
+			uint32			fWriteCount;
+			size_t			fMaxBufferSize;
+			size_t			fCurrentBufferSize;
+			sem_id			fReadSem;
+			sem_id			fWriteSem;
+			sem_id			fFinishedSem;
+			status_t		fWriteStatus;
+			uint32			fWriteTime;
+			bool			fFinished;
+			bool			fQuit;
+};
+
+class SHAProcessor : public AdaptiveBuffering {
+public:
+	SHAProcessor()
+		: AdaptiveBuffering(kInitialBufferSize, kMaxBufferSize, 3),
+		fFile(-1)
+	{
+	}
+
+	virtual ~SHAProcessor()
+	{
+		Unset();
+	}
+
+	void Unset()
+	{
+		if (fFile >= 0)
+			close(fFile);
+	}
+
+	status_t Process(int file)
+	{
+		Unset();
+		fSHA.Init();
+		fFile = file;
+
+		return Run();
+	}
+
+	virtual status_t Read(uint8* buffer, size_t* _length)
+	{
+		ssize_t bytes = read(fFile, buffer, *_length);
+		if (bytes < B_OK)
+			return errno;
+
+		*_length = bytes;
+		return B_OK;
+	}
+
+	virtual status_t Write(uint8* buffer, size_t length)
+	{
+		fSHA.Update(buffer, length);
+		return B_OK;
+	}
+
+	const uint8* Digest() { return fSHA.Digest(); }
+	size_t DigestLength() const	{ return fSHA.DigestLength(); }
+
+private:
+	SHA256	fSHA;
+	int		fFile;
+};
+
+struct file_entry {
+	uint8			hash[SHA_DIGEST_LENGTH];
+	std::string		path;
+
+	bool operator<(const struct file_entry& other) const
+	{
+		return path < other.path;
+	}
+
+	std::string HashString() const
+	{
+		char buffer[128];
+		for (int i = 0; i < SHA_DIGEST_LENGTH; i++) {
+			sprintf(buffer + i * 2, "%02x", hash[i]);
+		}
+
+		return buffer;
+	}
+};
+
+typedef std::vector<file_entry> FileList;
+
+void process_file(const char* path);
+
+
+SHAProcessor gSHA;
+FileList gFiles;
+
+
+AdaptiveBuffering::AdaptiveBuffering(size_t initialBufferSize,
+		size_t maxBufferSize, uint32 count)
+	:
+	fWriterThread(-1),
+	fBuffers(NULL),
+	fReadBytes(NULL),
+	fBufferCount(count),
+	fReadIndex(0),
+	fWriteIndex(0),
+	fReadCount(0),
+	fWriteCount(0),
+	fMaxBufferSize(maxBufferSize),
+	fCurrentBufferSize(initialBufferSize),
+	fReadSem(-1),
+	fWriteSem(-1),
+	fFinishedSem(-1),
+	fWriteStatus(B_OK),
+	fWriteTime(0),
+	fFinished(false),
+	fQuit(false)
+{
+}
+
+
+AdaptiveBuffering::~AdaptiveBuffering()
+{
+	_QuitWriter();
+
+	delete_sem(fReadSem);
+	delete_sem(fWriteSem);
+
+	if (fBuffers != NULL) {
+		for (uint32 i = 0; i < fBufferCount; i++) {
+			if (fBuffers[i] == NULL)
+				break;
+
+			free(fBuffers[i]);
+		}
+
+		free(fBuffers);
+	}
+
+	free(fReadBytes);
+}
+
+
+status_t
+AdaptiveBuffering::Init()
+{
+	fReadBytes = (size_t*)malloc(fBufferCount * sizeof(size_t));
+	if (fReadBytes == NULL)
+		return B_NO_MEMORY;
+
+	fBuffers = (uint8**)malloc(fBufferCount * sizeof(uint8*));
+	if (fBuffers == NULL)
+		return B_NO_MEMORY;
+
+	for (uint32 i = 0; i < fBufferCount; i++) {
+		fBuffers[i] = (uint8*)malloc(fMaxBufferSize);
+		if (fBuffers[i] == NULL)
+			return B_NO_MEMORY;
+	}
+
+	fReadSem = create_sem(0, "reader");
+	if (fReadSem < B_OK)
+		return fReadSem;
+
+	fWriteSem = create_sem(fBufferCount - 1, "writer");
+	if (fWriteSem < B_OK)
+		return fWriteSem;
+
+	fFinishedSem = create_sem(0, "finished");
+	if (fFinishedSem < B_OK)
+		return fFinishedSem;
+
+	fWriterThread = spawn_thread(&_Writer, "buffer reader", B_LOW_PRIORITY,
+		this);
+	if (fWriterThread < B_OK)
+		return fWriterThread;
+
+	return resume_thread(fWriterThread);
+}
+
+
+status_t
+AdaptiveBuffering::Read(uint8* /*buffer*/, size_t* _length)
+{
+	*_length = 0;
+	return B_OK;
+}
+
+
+status_t
+AdaptiveBuffering::Write(uint8* /*buffer*/, size_t /*length*/)
+{
+	return B_OK;
+}
+
+
+status_t
+AdaptiveBuffering::Run()
+{
+	fReadIndex = 0;
+	fWriteIndex = 0;
+	fReadCount = 0;
+	fWriteCount = 0;
+	fWriteStatus = B_OK;
+	fWriteTime = 0;
+
+	while (fWriteStatus >= B_OK) {
+		bigtime_t start = system_time();
+		int32 index = fReadIndex;
+
+		TRACE("%ld. read index %lu, buffer size %lu\n", fReadCount, index,
+			fCurrentBufferSize);
+
+		fReadBytes[index] = fCurrentBufferSize;
+		status_t status = Read(fBuffers[index], &fReadBytes[index]);
+		if (status < B_OK)
+			return status;
+
+		TRACE("%ld. read -> %lu bytes\n", fReadCount, fReadBytes[index]);
+
+		fReadCount++;
+		fReadIndex = (index + 1) % fBufferCount;
+		if (fReadBytes[index] == 0)
+			fFinished = true;
+		release_sem(fReadSem);
+
+		while (acquire_sem(fWriteSem) == B_INTERRUPTED)
+			;
+
+		if (fFinished)
+			break;
+
+		bigtime_t readTime = system_time() - start;
+		uint32 writeTime = fWriteTime;
+		if (writeTime) {
+			if (writeTime > readTime) {
+				fCurrentBufferSize = fCurrentBufferSize * 8/9;
+				fCurrentBufferSize &= ~65535;
+			} else {
+				fCurrentBufferSize = fCurrentBufferSize * 9/8;
+				fCurrentBufferSize = (fCurrentBufferSize + 65535) & ~65535;
+
+				if (fCurrentBufferSize > fMaxBufferSize)
+					fCurrentBufferSize = fMaxBufferSize;
+			}
+		}
+	}
+
+	while (acquire_sem(fFinishedSem) == B_INTERRUPTED)
+		;
+
+	return fWriteStatus;
+}
+
+
+void
+AdaptiveBuffering::_QuitWriter()
+{
+	if (fWriterThread >= B_OK) {
+		fQuit = true;
+		release_sem(fReadSem);
+
+		status_t status;
+		wait_for_thread(fWriterThread, &status);
+
+		fWriterThread = -1;
+	}
+}
+
+
+status_t
+AdaptiveBuffering::_Writer()
+{
+	while (true) {
+		while (acquire_sem(fReadSem) == B_INTERRUPTED)
+			;
+		if (fQuit)
+			break;
+
+		bigtime_t start = system_time();
+
+		TRACE("%ld. write index %lu, %p, bytes %lu\n", fWriteCount, fWriteIndex,
+			fBuffers[fWriteIndex], fReadBytes[fWriteIndex]);
+
+		fWriteStatus = Write(fBuffers[fWriteIndex], fReadBytes[fWriteIndex]);
+
+		TRACE("%ld. write done\n", fWriteCount);
+
+		fWriteIndex = (fWriteIndex + 1) % fBufferCount;
+		fWriteTime = uint32(system_time() - start);
+		fWriteCount++;
+
+		release_sem(fWriteSem);
+
+		if (fWriteStatus < B_OK)
+			return fWriteStatus;
+		if (fFinished)
+			release_sem(fFinishedSem);
+	}
+
+	return B_OK;
+}
+
+
+/*static*/ status_t
+AdaptiveBuffering::_Writer(void* self)
+{
+	return ((AdaptiveBuffering*)self)->_Writer();
+}
+
+
+//	#pragma mark -
+
+
+void
+process_directory(const char* path)
+{
+	DIR* dir = opendir(path);
+	if (dir == NULL)
+		return;
+
+	while (struct dirent* entry = readdir(dir)) {
+		if (!strcmp(entry->d_name, ".")
+			|| !strcmp(entry->d_name, ".."))
+			continue;
+
+		char fullPath[1024];
+		strlcpy(fullPath, path, sizeof(fullPath));
+		strlcat(fullPath, "/", sizeof(fullPath));
+		strlcat(fullPath, entry->d_name, sizeof(fullPath));
+
+		process_file(fullPath);
+	}
+
+	closedir(dir);
+}
+
+
+void
+process_file(const char* path)
+{
+	struct stat stat;
+	if (::stat(path, &stat) != 0) {
+		fprintf(stderr, "Could not stat file \"%s\": %s\n", path,
+			strerror(errno));
+		return;
+	}
+
+	if (S_ISDIR(stat.st_mode)) {
+		process_directory(path);
+		return;
+	}
+
+	int file = open(path, O_RDONLY);
+	if (file < 0) {
+		fprintf(stderr, "Could not open file \"%s\": %s\n", path,
+			strerror(errno));
+		return;
+	}
+
+	status_t status = gSHA.Process(file);
+	if (status != B_OK) {
+		fprintf(stderr, "Computing SHA failed \"%s\": %s\n", path,
+			strerror(status));
+		return;
+	}
+
+	file_entry entry;
+	memcpy(entry.hash, gSHA.Digest(), SHA_DIGEST_LENGTH);
+	entry.path = path;
+
+	printf("%s  %s\n", entry.HashString().c_str(), path);
+
+	gFiles.push_back(entry);
+}
+
+
+int
+main(int argc, char** argv)
+{
+	if (argc < 2) {
+		fprintf(stderr, "usage: %s <hash-file> <files>\n", kProgramName);
+		return 1;
+	}
+
+	status_t status = gSHA.Init();
+	if (status != B_OK) {
+		fprintf(stderr, "Could not initialize SHA processor: %s\n",
+			strerror(status));
+		return 1;
+	}
+
+	bigtime_t start = system_time();
+
+	for (int i = 1; i < argc; i++) {
+		process_file(argv[i]);
+	}
+
+	sort(gFiles.begin(), gFiles.end());
+
+	bigtime_t runtime = system_time() - start;
+	if (gFiles.size() > 0) {
+		printf("Generated hashes for %ld files in %g seconds, %g msec per "
+			"file.\n", gFiles.size(), runtime / 1000000.0,
+			runtime / 1000.0 / gFiles.size());
+	}
+
+	return 0;
+}