Work-in-progress:

* Started a file consistency checker; right now, it will just compute a SHA256
  hash value for every file, and dump it to stdout.


git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@28406 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Axel Dörfler 2008-10-31 17:05:49 +00:00
parent e3bcc1dc09
commit d6013f6096
5 changed files with 700 additions and 0 deletions

View File

@ -2,6 +2,7 @@ SubDir HAIKU_TOP src tests add-ons kernel file_systems ;
SubInclude HAIKU_TOP src tests add-ons kernel file_systems bfs ;
SubInclude HAIKU_TOP src tests add-ons kernel file_systems cdda ;
SubInclude HAIKU_TOP src tests add-ons kernel file_systems consistency_check ;
SubInclude HAIKU_TOP src tests add-ons kernel file_systems fs_shell ;
SubInclude HAIKU_TOP src tests add-ons kernel file_systems fragmenter ;
#SubInclude HAIKU_TOP src tests add-ons kernel file_systems iso9660 ;

View File

@ -0,0 +1,7 @@
SubDir HAIKU_TOP src tests add-ons kernel file_systems consistency_check ;
SimpleTest generate_hashs :
generate_hashs.cpp
SHA256.cpp
: be $(TARGET_LIBSTDC++)
;

View File

@ -0,0 +1,190 @@
/*
* Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de.
* Distributed under the terms of the MIT License.
*/
#include "SHA256.h"
#include <stdio.h>
#include <string.h>
#include <ByteOrder.h>
static const uint32 kChunkSize = 64; // 64 bytes == 512 bits
static const uint32 kRounds[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
static const uint32 kHash[8] = {
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
};
static inline uint32
rotate_right(uint32 value, int bits)
{
return (value >> bits) | (value << (32 - bits));
}
// #pragma mark -
SHA256::SHA256()
{
Init();
}
SHA256::~SHA256()
{
}
void
SHA256::Init()
{
memcpy(fHash, kHash, sizeof(kHash));
fBytesInBuffer = 0;
fMessageSize = 0;
fDigested = false;
}
void
SHA256::Update(const void* _buffer, size_t size)
{
const uint8* buffer = (const uint8*)_buffer;
fMessageSize += size;
while (fBytesInBuffer + size >= kChunkSize) {
size_t toCopy = kChunkSize - fBytesInBuffer;
memcpy((uint8*)fBuffer + fBytesInBuffer, buffer, toCopy);
buffer += toCopy;
size -= toCopy;
_ProcessChunk();
fBytesInBuffer = 0;
}
if (size > 0) {
memcpy((uint8*)fBuffer + fBytesInBuffer, buffer, size);
fBytesInBuffer += size;
}
}
const uint8*
SHA256::Digest()
{
if (!fDigested) {
// We need to append a 1 bit, append padding with 0 bits, and append
// the message size in bits (64 bit big-endian int), so that the whole
// is chunk-aligned. So we either have to process one last chunk or two
// chunks.
// append the 1 bit
((uint8*)fBuffer)[fBytesInBuffer] = 0x80;
fBytesInBuffer++;
// if the message size doesn't fit anymore, we pad the chunk and
// process it
if (fBytesInBuffer > kChunkSize - 8) {
memset((uint8*)fBuffer + fBytesInBuffer, 0,
kChunkSize - fBytesInBuffer);
_ProcessChunk();
fBytesInBuffer = 0;
}
// pad the buffer
if (fBytesInBuffer < kChunkSize - 8) {
memset((uint8*)fBuffer + fBytesInBuffer, 0,
kChunkSize - 8 - fBytesInBuffer);
}
// write the (big-endian) message size in bits
*(uint64*)((uint8*)fBuffer + kChunkSize - 8)
= B_HOST_TO_BENDIAN_INT64((uint64)fMessageSize * 8);
_ProcessChunk();
// set digest
for (int i = 0; i < 8; i++)
fDigest[i] = B_HOST_TO_BENDIAN_INT32(fHash[i]);
fDigested = true;
}
return (uint8*)fDigest;
}
void
SHA256::_ProcessChunk()
{
// convert endianess -- the data are supposed to be a stream of
// 32 bit big-endian integers
#if B_HOST_IS_LENDIAN
for (int i = 0; i < (int)kChunkSize / 4; i++)
fBuffer[i] = B_SWAP_INT32(fBuffer[i]);
#endif
// pre-process buffer (extend to 64 elements)
for (int i = 16; i < 64; i++) {
uint32 v0 = fBuffer[i - 15];
uint32 v1 = fBuffer[i - 2];
uint32 s0 = rotate_right(v0, 7) ^ rotate_right(v0, 18) ^ (v0 >> 3);
uint32 s1 = rotate_right(v1, 17) ^ rotate_right(v1, 19) ^ (v1 >> 10);
fBuffer[i] = fBuffer[i - 16] + s0 + fBuffer[i - 7] + s1;
}
uint32 a = fHash[0];
uint32 b = fHash[1];
uint32 c = fHash[2];
uint32 d = fHash[3];
uint32 e = fHash[4];
uint32 f = fHash[5];
uint32 g = fHash[6];
uint32 h = fHash[7];
// process the buffer
for (int i = 0; i < 64; i++) {
uint32 s0 = rotate_right(a, 2) ^ rotate_right(a, 13)
^ rotate_right(a, 22);
uint32 maj = (a & b) ^ (a & c) ^ (b & c);
uint32 t2 = s0 + maj;
uint32 s1 = rotate_right(e, 6) ^ rotate_right(e, 11)
^ rotate_right(e, 25);
uint32 ch = (e & f) ^ (~e & g);
uint32 t1 = h + s1 + ch + kRounds[i] + fBuffer[i];
h = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
fHash[0] += a;
fHash[1] += b;
fHash[2] += c;
fHash[3] += d;
fHash[4] += e;
fHash[5] += f;
fHash[6] += g;
fHash[7] += h;
}

View File

@ -0,0 +1,39 @@
/*
* Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de.
* Distributed under the terms of the MIT License.
*/
#ifndef SHA_256_H
#define SHA_256_H
#include <SupportDefs.h>
#define SHA_DIGEST_LENGTH 32
class SHA256 {
public:
SHA256();
~SHA256();
void Init();
void Update(const void* buffer, size_t size);
const uint8* Digest();
size_t DigestLength() const
{ return SHA_DIGEST_LENGTH; }
private:
void _ProcessChunk();
private:
uint32 fHash[8];
uint32 fDigest[8];
uint32 fBuffer[64];
size_t fBytesInBuffer;
size_t fMessageSize;
bool fDigested;
};
#endif // SHA_256_H

View File

@ -0,0 +1,463 @@
/*
* Copyright 2008, Axel Dörfler, axeld@pinc-software.de.
* Distributed under the terms of the MIT License.
*/
#include <algorithm>
#include <string>
#include <vector>
#include <dirent.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <OS.h>
#include "SHA256.h"
//#define TRACE(x...) printf(x)
#define TRACE(x...) ;
extern const char *__progname;
static const char *kProgramName = __progname;
const size_t kInitialBufferSize = 1 * 1024 * 1024;
const size_t kMaxBufferSize = 10 * 1024 * 1024;
class AdaptiveBuffering {
public:
AdaptiveBuffering(size_t initialBufferSize,
size_t maxBufferSize, uint32 count);
virtual ~AdaptiveBuffering();
virtual status_t Init();
virtual status_t Read(uint8* buffer, size_t* _length);
virtual status_t Write(uint8* buffer, size_t length);
status_t Run();
private:
void _QuitWriter();
status_t _Writer();
static status_t _Writer(void* self);
thread_id fWriterThread;
uint8** fBuffers;
size_t* fReadBytes;
uint32 fBufferCount;
uint32 fReadIndex;
uint32 fWriteIndex;
uint32 fReadCount;
uint32 fWriteCount;
size_t fMaxBufferSize;
size_t fCurrentBufferSize;
sem_id fReadSem;
sem_id fWriteSem;
sem_id fFinishedSem;
status_t fWriteStatus;
uint32 fWriteTime;
bool fFinished;
bool fQuit;
};
class SHAProcessor : public AdaptiveBuffering {
public:
SHAProcessor()
: AdaptiveBuffering(kInitialBufferSize, kMaxBufferSize, 3),
fFile(-1)
{
}
virtual ~SHAProcessor()
{
Unset();
}
void Unset()
{
if (fFile >= 0)
close(fFile);
}
status_t Process(int file)
{
Unset();
fSHA.Init();
fFile = file;
return Run();
}
virtual status_t Read(uint8* buffer, size_t* _length)
{
ssize_t bytes = read(fFile, buffer, *_length);
if (bytes < B_OK)
return errno;
*_length = bytes;
return B_OK;
}
virtual status_t Write(uint8* buffer, size_t length)
{
fSHA.Update(buffer, length);
return B_OK;
}
const uint8* Digest() { return fSHA.Digest(); }
size_t DigestLength() const { return fSHA.DigestLength(); }
private:
SHA256 fSHA;
int fFile;
};
struct file_entry {
uint8 hash[SHA_DIGEST_LENGTH];
std::string path;
bool operator<(const struct file_entry& other) const
{
return path < other.path;
}
std::string HashString() const
{
char buffer[128];
for (int i = 0; i < SHA_DIGEST_LENGTH; i++) {
sprintf(buffer + i * 2, "%02x", hash[i]);
}
return buffer;
}
};
typedef std::vector<file_entry> FileList;
void process_file(const char* path);
SHAProcessor gSHA;
FileList gFiles;
AdaptiveBuffering::AdaptiveBuffering(size_t initialBufferSize,
size_t maxBufferSize, uint32 count)
:
fWriterThread(-1),
fBuffers(NULL),
fReadBytes(NULL),
fBufferCount(count),
fReadIndex(0),
fWriteIndex(0),
fReadCount(0),
fWriteCount(0),
fMaxBufferSize(maxBufferSize),
fCurrentBufferSize(initialBufferSize),
fReadSem(-1),
fWriteSem(-1),
fFinishedSem(-1),
fWriteStatus(B_OK),
fWriteTime(0),
fFinished(false),
fQuit(false)
{
}
AdaptiveBuffering::~AdaptiveBuffering()
{
_QuitWriter();
delete_sem(fReadSem);
delete_sem(fWriteSem);
if (fBuffers != NULL) {
for (uint32 i = 0; i < fBufferCount; i++) {
if (fBuffers[i] == NULL)
break;
free(fBuffers[i]);
}
free(fBuffers);
}
free(fReadBytes);
}
status_t
AdaptiveBuffering::Init()
{
fReadBytes = (size_t*)malloc(fBufferCount * sizeof(size_t));
if (fReadBytes == NULL)
return B_NO_MEMORY;
fBuffers = (uint8**)malloc(fBufferCount * sizeof(uint8*));
if (fBuffers == NULL)
return B_NO_MEMORY;
for (uint32 i = 0; i < fBufferCount; i++) {
fBuffers[i] = (uint8*)malloc(fMaxBufferSize);
if (fBuffers[i] == NULL)
return B_NO_MEMORY;
}
fReadSem = create_sem(0, "reader");
if (fReadSem < B_OK)
return fReadSem;
fWriteSem = create_sem(fBufferCount - 1, "writer");
if (fWriteSem < B_OK)
return fWriteSem;
fFinishedSem = create_sem(0, "finished");
if (fFinishedSem < B_OK)
return fFinishedSem;
fWriterThread = spawn_thread(&_Writer, "buffer reader", B_LOW_PRIORITY,
this);
if (fWriterThread < B_OK)
return fWriterThread;
return resume_thread(fWriterThread);
}
status_t
AdaptiveBuffering::Read(uint8* /*buffer*/, size_t* _length)
{
*_length = 0;
return B_OK;
}
status_t
AdaptiveBuffering::Write(uint8* /*buffer*/, size_t /*length*/)
{
return B_OK;
}
status_t
AdaptiveBuffering::Run()
{
fReadIndex = 0;
fWriteIndex = 0;
fReadCount = 0;
fWriteCount = 0;
fWriteStatus = B_OK;
fWriteTime = 0;
while (fWriteStatus >= B_OK) {
bigtime_t start = system_time();
int32 index = fReadIndex;
TRACE("%ld. read index %lu, buffer size %lu\n", fReadCount, index,
fCurrentBufferSize);
fReadBytes[index] = fCurrentBufferSize;
status_t status = Read(fBuffers[index], &fReadBytes[index]);
if (status < B_OK)
return status;
TRACE("%ld. read -> %lu bytes\n", fReadCount, fReadBytes[index]);
fReadCount++;
fReadIndex = (index + 1) % fBufferCount;
if (fReadBytes[index] == 0)
fFinished = true;
release_sem(fReadSem);
while (acquire_sem(fWriteSem) == B_INTERRUPTED)
;
if (fFinished)
break;
bigtime_t readTime = system_time() - start;
uint32 writeTime = fWriteTime;
if (writeTime) {
if (writeTime > readTime) {
fCurrentBufferSize = fCurrentBufferSize * 8/9;
fCurrentBufferSize &= ~65535;
} else {
fCurrentBufferSize = fCurrentBufferSize * 9/8;
fCurrentBufferSize = (fCurrentBufferSize + 65535) & ~65535;
if (fCurrentBufferSize > fMaxBufferSize)
fCurrentBufferSize = fMaxBufferSize;
}
}
}
while (acquire_sem(fFinishedSem) == B_INTERRUPTED)
;
return fWriteStatus;
}
void
AdaptiveBuffering::_QuitWriter()
{
if (fWriterThread >= B_OK) {
fQuit = true;
release_sem(fReadSem);
status_t status;
wait_for_thread(fWriterThread, &status);
fWriterThread = -1;
}
}
status_t
AdaptiveBuffering::_Writer()
{
while (true) {
while (acquire_sem(fReadSem) == B_INTERRUPTED)
;
if (fQuit)
break;
bigtime_t start = system_time();
TRACE("%ld. write index %lu, %p, bytes %lu\n", fWriteCount, fWriteIndex,
fBuffers[fWriteIndex], fReadBytes[fWriteIndex]);
fWriteStatus = Write(fBuffers[fWriteIndex], fReadBytes[fWriteIndex]);
TRACE("%ld. write done\n", fWriteCount);
fWriteIndex = (fWriteIndex + 1) % fBufferCount;
fWriteTime = uint32(system_time() - start);
fWriteCount++;
release_sem(fWriteSem);
if (fWriteStatus < B_OK)
return fWriteStatus;
if (fFinished)
release_sem(fFinishedSem);
}
return B_OK;
}
/*static*/ status_t
AdaptiveBuffering::_Writer(void* self)
{
return ((AdaptiveBuffering*)self)->_Writer();
}
// #pragma mark -
void
process_directory(const char* path)
{
DIR* dir = opendir(path);
if (dir == NULL)
return;
while (struct dirent* entry = readdir(dir)) {
if (!strcmp(entry->d_name, ".")
|| !strcmp(entry->d_name, ".."))
continue;
char fullPath[1024];
strlcpy(fullPath, path, sizeof(fullPath));
strlcat(fullPath, "/", sizeof(fullPath));
strlcat(fullPath, entry->d_name, sizeof(fullPath));
process_file(fullPath);
}
closedir(dir);
}
void
process_file(const char* path)
{
struct stat stat;
if (::stat(path, &stat) != 0) {
fprintf(stderr, "Could not stat file \"%s\": %s\n", path,
strerror(errno));
return;
}
if (S_ISDIR(stat.st_mode)) {
process_directory(path);
return;
}
int file = open(path, O_RDONLY);
if (file < 0) {
fprintf(stderr, "Could not open file \"%s\": %s\n", path,
strerror(errno));
return;
}
status_t status = gSHA.Process(file);
if (status != B_OK) {
fprintf(stderr, "Computing SHA failed \"%s\": %s\n", path,
strerror(status));
return;
}
file_entry entry;
memcpy(entry.hash, gSHA.Digest(), SHA_DIGEST_LENGTH);
entry.path = path;
printf("%s %s\n", entry.HashString().c_str(), path);
gFiles.push_back(entry);
}
int
main(int argc, char** argv)
{
if (argc < 2) {
fprintf(stderr, "usage: %s <hash-file> <files>\n", kProgramName);
return 1;
}
status_t status = gSHA.Init();
if (status != B_OK) {
fprintf(stderr, "Could not initialize SHA processor: %s\n",
strerror(status));
return 1;
}
bigtime_t start = system_time();
for (int i = 1; i < argc; i++) {
process_file(argv[i]);
}
sort(gFiles.begin(), gFiles.end());
bigtime_t runtime = system_time() - start;
if (gFiles.size() > 0) {
printf("Generated hashes for %ld files in %g seconds, %g msec per "
"file.\n", gFiles.size(), runtime / 1000000.0,
runtime / 1000.0 / gFiles.size());
}
return 0;
}