From a2e7c7417b968392fd1756b6b097eb047003e6c7 Mon Sep 17 00:00:00 2001 From: Ingo Weinhold Date: Mon, 12 Jul 2010 13:21:42 +0000 Subject: [PATCH] * Added Transaction class which wraps a block cache transaction and performs all other operations required to roll back a transaction. Transactions are fully serialized -- due to limitations of our block cache and also to keep things simple. * Use a transaction for all write operations. * Implemented the directory entry management code (a simple tree algorithm). * Finished/implemented the FS hooks for directory entry lookup, directory iteration, creation, and removal. * Added non-persistent support for node access times. * Set the user and group IDs on node creation. * Added permission checks to several FS hooks. * BlockAllocator::_Free(): The number of freed blocks was subtracted from fFreeBlocks instead of added. git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@37478 a95241bf-73f2-0310-859d-f6bbb57e9c96 --- .../kernel/file_corruption/checksumfs.h | 10 +- .../system/kernel/file_corruption/fs/Block.h | 50 +- .../file_corruption/fs/BlockAllocator.cpp | 84 +- .../file_corruption/fs/BlockAllocator.h | 39 +- .../kernel/file_corruption/fs/Directory.cpp | 1470 +++++++++++++++++ .../kernel/file_corruption/fs/Directory.h | 11 + .../system/kernel/file_corruption/fs/Jamfile | 3 + .../system/kernel/file_corruption/fs/Node.cpp | 77 +- .../system/kernel/file_corruption/fs/Node.h | 27 +- .../kernel/file_corruption/fs/Transaction.cpp | 196 +++ .../kernel/file_corruption/fs/Transaction.h | 62 + .../kernel/file_corruption/fs/Volume.cpp | 72 +- .../system/kernel/file_corruption/fs/Volume.h | 24 + .../kernel/file_corruption/fs/checksumfs.cpp | 363 +++- .../file_corruption/fs/userland/Jamfile | 3 + 15 files changed, 2392 insertions(+), 99 deletions(-) create mode 100644 src/tests/system/kernel/file_corruption/fs/Transaction.cpp create mode 100644 src/tests/system/kernel/file_corruption/fs/Transaction.h diff --git a/src/tests/system/kernel/file_corruption/checksumfs.h b/src/tests/system/kernel/file_corruption/checksumfs.h index 13ce788903..4295d814c1 100644 --- a/src/tests/system/kernel/file_corruption/checksumfs.h +++ b/src/tests/system/kernel/file_corruption/checksumfs.h @@ -56,9 +56,17 @@ struct checksumfs_node { } _PACKED; +static const uint32 kCheckSumFSMaxDirEntryTreeDepth = 24; + +struct checksumfs_dir_entry_tree { + uint16 depth; +} _PACKED; + + struct checksumfs_dir_entry_block { uint16 entryCount; - uint16 nameEnds[0]; // end (in-block) offsets of the names, + uint16 nameEnds[0]; // end offsets of the names (relative to the + // start of the first name), // e.g. nameEnds[0] == length of first name // char names[]; // string of all (unterminated) names, // directly follows the nameEnds array diff --git a/src/tests/system/kernel/file_corruption/fs/Block.h b/src/tests/system/kernel/file_corruption/fs/Block.h index 778d6bcdad..d8baf19006 100644 --- a/src/tests/system/kernel/file_corruption/fs/Block.h +++ b/src/tests/system/kernel/file_corruption/fs/Block.h @@ -8,6 +8,7 @@ #include +#include "Transaction.h" #include "Volume.h" @@ -25,28 +26,62 @@ public: Put(); } + void TransferFrom(Block& other) + { + Put(); + + fVolume = other.fVolume; + fData = other.fData; + fIndex = other.fIndex; + fWritable = other.fWritable; + + other.fVolume = NULL; + other.fData = NULL; + } + bool GetReadable(Volume* volume, uint64 blockIndex) { Put(); return _Init(volume, blockIndex, - block_cache_get(volume->BlockCache(), blockIndex)); + block_cache_get(volume->BlockCache(), blockIndex), false); } - bool GetWritable(Volume* volume, uint64 blockIndex) + bool GetWritable(Volume* volume, uint64 blockIndex, + Transaction& transaction) { Put(); return _Init(volume, blockIndex, - block_cache_get_writable(volume->BlockCache(), blockIndex, -1)); + block_cache_get_writable(volume->BlockCache(), blockIndex, + transaction.ID()), + true); } - bool GetZero(Volume* volume, uint64 blockIndex) + bool GetZero(Volume* volume, uint64 blockIndex, Transaction& transaction) { Put(); return _Init(volume, blockIndex, - block_cache_get_empty(volume->BlockCache(), blockIndex, -1)); + block_cache_get_empty(volume->BlockCache(), blockIndex, + transaction.ID()), + true); + } + + status_t MakeWritable(Transaction& transaction) + { + if (fVolume == NULL) + return B_BAD_VALUE; + if (fWritable) + return B_OK; + + status_t error = block_cache_make_writable(fVolume->BlockCache(), + fIndex, transaction.ID()); + if (error != B_OK) + return error; + + fWritable = true; + return B_OK; } void Put() @@ -78,7 +113,8 @@ public: } private: - bool _Init(Volume* volume, uint64 blockIndex, const void* data) + bool _Init(Volume* volume, uint64 blockIndex, const void* data, + bool writable) { if (data == NULL) return false; @@ -86,6 +122,7 @@ private: fVolume = volume; fData = const_cast(data); fIndex = blockIndex; + fWritable = writable; return true; } @@ -95,6 +132,7 @@ private: Volume* fVolume; void* fData; uint64 fIndex; + bool fWritable; }; diff --git a/src/tests/system/kernel/file_corruption/fs/BlockAllocator.cpp b/src/tests/system/kernel/file_corruption/fs/BlockAllocator.cpp index 776bd8ac72..f2654035d3 100644 --- a/src/tests/system/kernel/file_corruption/fs/BlockAllocator.cpp +++ b/src/tests/system/kernel/file_corruption/fs/BlockAllocator.cpp @@ -77,7 +77,7 @@ BlockAllocator::Init(uint64 blockBitmap, uint64 freeBlocks) status_t -BlockAllocator::Initialize() +BlockAllocator::Initialize(Transaction& transaction) { status_t error = Init(kCheckSumFSSuperBlockOffset / B_PAGE_SIZE + 1, fTotalBlocks); @@ -93,7 +93,7 @@ dprintf("fBitmapBlockCount: %llu\n", fBitmapBlockCount); // clear the block bitmap for (uint64 i = 0; i < fBitmapBlockCount; i++) { Block block; - if (!block.GetZero(fVolume, fBitmapBlock + i)) + if (!block.GetZero(fVolume, fBitmapBlock + i, transaction)) return B_ERROR; } @@ -102,8 +102,10 @@ dprintf("fBitmapBlockCount: %llu\n", fBitmapBlockCount); uint32 partialBitmapBlock = fTotalBlocks % kBlocksPerBitmapBlock; if (partialBitmapBlock != 0) { Block block; - if (!block.GetZero(fVolume, fBitmapBlock + fBitmapBlockCount - 1)) + if (!block.GetZero(fVolume, fBitmapBlock + fBitmapBlockCount - 1, + transaction)) { return B_ERROR; + } // set full uint32s uint32* bits = (uint32*)block.Data(); @@ -121,7 +123,7 @@ dprintf("fBitmapBlockCount: %llu\n", fBitmapBlockCount); uint32 partialGroup = fTotalBlocks % kBlocksPerGroup; for (uint64 i = 0; i < fAllocationGroupCount; i++) { Block block; - if (!block.GetZero(fVolume, fAllocationGroupBlock + i)) + if (!block.GetZero(fVolume, fAllocationGroupBlock + i, transaction)) return B_ERROR; uint16* counts = (uint16*)block.Data(); @@ -143,7 +145,7 @@ dprintf("fBitmapBlockCount: %llu\n", fBitmapBlockCount); } // mark all blocks we already use used - error = AllocateExactly(0, fBitmapBlock + fBitmapBlockCount); + error = AllocateExactly(0, fBitmapBlock + fBitmapBlockCount, transaction); if (error != B_OK) return error; @@ -159,8 +161,8 @@ dprintf("fBitmapBlockCount: %llu\n", fBitmapBlockCount); status_t -BlockAllocator::Allocate(uint64 baseHint, uint64 count, uint64& _allocatedBase, - uint64& _allocatedCount) +BlockAllocator::Allocate(uint64 baseHint, uint64 count, + Transaction& transaction, uint64& _allocatedBase, uint64& _allocatedCount) { MutexLocker locker(fLock); dprintf("BlockAllocator::Allocate(%llu, %llu)\n", baseHint, count); @@ -172,29 +174,32 @@ dprintf("BlockAllocator::Allocate(%llu, %llu)\n", baseHint, count); baseHint = 0; // search from base hint to end - status_t error = _Allocate(baseHint, fTotalBlocks, count, &_allocatedBase, - _allocatedCount); + status_t error = _Allocate(baseHint, fTotalBlocks, count, transaction, + &_allocatedBase, _allocatedCount); if (error == B_OK || baseHint == 0) return error; // search from 0 to hint - return _Allocate(0, baseHint, count, &_allocatedBase, _allocatedCount); + return _Allocate(0, baseHint, count, transaction, &_allocatedBase, + _allocatedCount); } status_t -BlockAllocator::AllocateExactly(uint64 base, uint64 count) +BlockAllocator::AllocateExactly(uint64 base, uint64 count, + Transaction& transaction) { MutexLocker locker(fLock); dprintf("BlockAllocator::AllocateExactly(%llu, %llu)\n", base, count); uint64 allocated; - status_t error = _Allocate(base, fTotalBlocks, count, NULL, allocated); + status_t error = _Allocate(base, fTotalBlocks, count, transaction, NULL, + allocated); if (error != B_OK) return error; if (allocated < count) { - _Free(base, allocated); + _Free(base, allocated, transaction); return B_BUSY; } @@ -203,11 +208,20 @@ dprintf("BlockAllocator::AllocateExactly(%llu, %llu)\n", base, count); status_t -BlockAllocator::Free(uint64 base, uint64 count) +BlockAllocator::Free(uint64 base, uint64 count, Transaction& transaction) { MutexLocker locker(fLock); - return _Free(base, count); + return _Free(base, count, transaction); +} + + +void +BlockAllocator::ResetFreeBlocks(uint64 count) +{ + MutexLocker locker(fLock); + + fFreeBlocks = count; } @@ -231,7 +245,7 @@ BlockAllocator::Free(uint64 base, uint64 count) */ status_t BlockAllocator::_Allocate(uint64 base, uint64 searchEnd, uint64 count, - uint64* _allocatedBase, uint64& _allocatedCount) + Transaction& transaction, uint64* _allocatedBase, uint64& _allocatedCount) { ASSERT(base <= fTotalBlocks); ASSERT(searchEnd <= fTotalBlocks); @@ -249,7 +263,7 @@ BlockAllocator::_Allocate(uint64 base, uint64 searchEnd, uint64 count, uint32 allocated; status_t error = _AllocateInGroup(base, searchEnd, toAllocate, - _allocatedBase, allocated); + transaction, _allocatedBase, allocated); if (error == B_OK) { fFreeBlocks -= toAllocate; @@ -281,8 +295,8 @@ BlockAllocator::_Allocate(uint64 base, uint64 searchEnd, uint64 count, while (remaining > 0 && base < searchEnd) { uint64 toAllocate = std::min(remaining, kBlocksPerGroup - groupOffset); uint32 allocated; - status_t error = _AllocateInGroup(base, searchEnd, toAllocate, NULL, - allocated); + status_t error = _AllocateInGroup(base, searchEnd, toAllocate, + transaction, NULL, allocated); if (error != B_OK) break; @@ -327,7 +341,7 @@ BlockAllocator::_Allocate(uint64 base, uint64 searchEnd, uint64 count, */ status_t BlockAllocator::_AllocateInGroup(uint64 base, uint64 searchEnd, uint32 count, - uint64* _allocatedBase, uint32& _allocatedCount) + Transaction& transaction, uint64* _allocatedBase, uint32& _allocatedCount) { dprintf("BlockAllocator::_AllocateInGroup(%llu, %lu)\n", base, count); ASSERT(count <= kBlocksPerGroup); @@ -338,7 +352,7 @@ dprintf("BlockAllocator::_AllocateInGroup(%llu, %lu)\n", base, count); Block block; if (!block.GetWritable(fVolume, - fAllocationGroupBlock + base / kBlocksPerGroup)) { + fAllocationGroupBlock + base / kBlocksPerGroup, transaction)) { return B_ERROR; } @@ -354,8 +368,8 @@ dprintf("BlockAllocator::_AllocateInGroup(%llu, %lu)\n", base, count); if (inBlockOffset != 0) { if (counts[blockIndex] < kBlocksPerBitmapBlock) { uint32 allocated; - if (_AllocateInBitmapBlock(base, count, _allocatedBase, - allocated) == B_OK) { + if (_AllocateInBitmapBlock(base, count, transaction, + _allocatedBase, allocated) == B_OK) { if (inBlockOffset + allocated < kBlocksPerBitmapBlock || allocated == remaining) { _allocatedCount = allocated; @@ -407,7 +421,7 @@ dprintf("BlockAllocator::_AllocateInGroup(%llu, %lu)\n", base, count); kBlocksPerBitmapBlock - inBlockOffset); uint32 allocated; - status_t error = _AllocateInBitmapBlock(base, toAllocate, + status_t error = _AllocateInBitmapBlock(base, toAllocate, transaction, _allocatedBase, allocated); if (error != B_OK) break; @@ -453,7 +467,7 @@ dprintf("BlockAllocator::_AllocateInGroup(%llu, %lu)\n", base, count); */ status_t BlockAllocator::_AllocateInBitmapBlock(uint64 base, uint32 count, - uint64* _allocatedBase, uint32& _allocatedCount) + Transaction& transaction, uint64* _allocatedBase, uint32& _allocatedCount) { dprintf("BlockAllocator::_AllocateInBitmapBlock(%llu, %lu)\n", base, count); ASSERT(count <= kBlocksPerBitmapBlock); @@ -461,7 +475,7 @@ dprintf("BlockAllocator::_AllocateInBitmapBlock(%llu, %lu)\n", base, count); Block block; if (!block.GetWritable(fVolume, - fBitmapBlock + base / kBlocksPerBitmapBlock)) { + fBitmapBlock + base / kBlocksPerBitmapBlock, transaction)) { return B_ERROR; } @@ -546,7 +560,7 @@ dprintf("BlockAllocator::_AllocateInBitmapBlock(%llu, %lu)\n", base, count); status_t -BlockAllocator::_Free(uint64 base, uint64 count) +BlockAllocator::_Free(uint64 base, uint64 count, Transaction& transaction) { if (count == 0) return B_OK; @@ -561,11 +575,11 @@ dprintf("BlockAllocator::_Free(%llu, %llu)\n", base, count); while (remaining > 0) { uint64 toFree = std::min(remaining, kBlocksPerGroup - groupOffset); - status_t error = _FreeInGroup(base, toFree); + status_t error = _FreeInGroup(base, toFree, transaction); if (error != B_OK) return error; - fFreeBlocks -= toFree; + fFreeBlocks += toFree; remaining -= toFree; base += toFree; groupOffset = 0; @@ -576,7 +590,8 @@ dprintf("BlockAllocator::_Free(%llu, %llu)\n", base, count); status_t -BlockAllocator::_FreeInGroup(uint64 base, uint32 count) +BlockAllocator::_FreeInGroup(uint64 base, uint32 count, + Transaction& transaction) { if (count == 0) return B_OK; @@ -587,7 +602,7 @@ dprintf("BlockAllocator::_FreeInGroup(%llu, %lu)\n", base, count); Block block; if (!block.GetWritable(fVolume, - fAllocationGroupBlock + base / kBlocksPerGroup)) { + fAllocationGroupBlock + base / kBlocksPerGroup, transaction)) { return B_ERROR; } @@ -604,7 +619,7 @@ dprintf("BlockAllocator::_FreeInGroup(%llu, %lu)\n", base, count); if (counts[blockIndex] + toFree > kBlocksPerBitmapBlock) return B_BAD_VALUE; - status_t error = _FreeInBitmapBlock(base, toFree); + status_t error = _FreeInBitmapBlock(base, toFree, transaction); if (error != B_OK) return error; @@ -620,7 +635,8 @@ dprintf("BlockAllocator::_FreeInGroup(%llu, %lu)\n", base, count); status_t -BlockAllocator::_FreeInBitmapBlock(uint64 base, uint32 count) +BlockAllocator::_FreeInBitmapBlock(uint64 base, uint32 count, + Transaction& transaction) { dprintf("BlockAllocator::_FreeInBitmapBlock(%llu, %lu)\n", base, count); ASSERT(count <= kBlocksPerBitmapBlock); @@ -628,7 +644,7 @@ dprintf("BlockAllocator::_FreeInBitmapBlock(%llu, %lu)\n", base, count); Block block; if (!block.GetWritable(fVolume, - fBitmapBlock + base / kBlocksPerBitmapBlock)) { + fBitmapBlock + base / kBlocksPerBitmapBlock, transaction)) { return B_ERROR; } diff --git a/src/tests/system/kernel/file_corruption/fs/BlockAllocator.h b/src/tests/system/kernel/file_corruption/fs/BlockAllocator.h index 394227333b..3b797352f6 100644 --- a/src/tests/system/kernel/file_corruption/fs/BlockAllocator.h +++ b/src/tests/system/kernel/file_corruption/fs/BlockAllocator.h @@ -9,6 +9,7 @@ #include +struct Transaction; struct Volume; @@ -22,29 +23,40 @@ public: uint64 FreeBlocks() const { return fFreeBlocks; } status_t Init(uint64 blockBitmap, uint64 freeBlocks); - status_t Initialize(); + status_t Initialize(Transaction& transaction); status_t Allocate(uint64 baseHint, uint64 count, + Transaction& transaction, uint64& _allocatedBase, uint64& _allocatedCount); status_t AllocateExactly(uint64 base, - uint64 count); - status_t Free(uint64 base, uint64 count); + uint64 count, Transaction& transaction); + status_t Free(uint64 base, uint64 count, + Transaction& transaction); + + void ResetFreeBlocks(uint64 count); + // interface for Transaction only private: status_t _Allocate(uint64 base, uint64 searchEnd, - uint64 count, uint64* _allocatedBase, + uint64 count, Transaction& transaction, + uint64* _allocatedBase, uint64& _allocatedCount); status_t _AllocateInGroup(uint64 base, uint64 searchEnd, - uint32 count, uint64* _allocatedBase, + uint32 count, Transaction& transaction, + uint64* _allocatedBase, uint32& _allocatedCount); status_t _AllocateInBitmapBlock(uint64 base, - uint32 count, uint64* _allocatedBase, + uint32 count, Transaction& transaction, + uint64* _allocatedBase, uint32& _allocatedCount); - status_t _Free(uint64 base, uint64 count); - status_t _FreeInGroup(uint64 base, uint32 count); - status_t _FreeInBitmapBlock(uint64 base, uint32 count); + status_t _Free(uint64 base, uint64 count, + Transaction& transaction); + status_t _FreeInGroup(uint64 base, uint32 count, + Transaction& transaction); + status_t _FreeInBitmapBlock(uint64 base, uint32 count, + Transaction& transaction); private: mutex fLock; @@ -60,9 +72,10 @@ private: class AllocatedBlock { public: - AllocatedBlock(BlockAllocator* allocator) + AllocatedBlock(BlockAllocator* allocator, Transaction& transaction) : fAllocator(allocator), + fTransaction(transaction), fIndex(0) { } @@ -70,7 +83,7 @@ public: ~AllocatedBlock() { if (fIndex > 0) - fAllocator->Free(fIndex, 1); + fAllocator->Free(fIndex, 1, fTransaction); } uint64 Index() const @@ -81,7 +94,8 @@ public: status_t Allocate(uint64 baseHint = 0) { uint64 allocatedBlocks; - status_t error = fAllocator->Allocate(0, 1, fIndex, allocatedBlocks); + status_t error = fAllocator->Allocate(0, 1, fTransaction, fIndex, + allocatedBlocks); if (error != B_OK) fIndex = 0; return error; @@ -96,6 +110,7 @@ public: private: BlockAllocator* fAllocator; + Transaction& fTransaction; uint64 fIndex; }; diff --git a/src/tests/system/kernel/file_corruption/fs/Directory.cpp b/src/tests/system/kernel/file_corruption/fs/Directory.cpp index b234527d70..f7049b7377 100644 --- a/src/tests/system/kernel/file_corruption/fs/Directory.cpp +++ b/src/tests/system/kernel/file_corruption/fs/Directory.cpp @@ -6,6 +6,1433 @@ #include "Directory.h" +#include +#include + +#include + +#include "Block.h" +#include "BlockAllocator.h" +#include "DebugSupport.h" + + +class DirEntryBlock { +public: + DirEntryBlock(); + DirEntryBlock( + checksumfs_dir_entry_block* entryBlock, + size_t entryBlockSize); + + void SetTo(checksumfs_dir_entry_block* entryBlock, + size_t entryBlockSize); + + inline int32 EntryCount() const; + inline size_t BytesUsedFor(int32 entryCount) const; + inline size_t BytesUsed() const; + inline size_t FreeSpace() const; + + inline uint64 BlockIndexAt(int32 index) const; + const char* NameAt(int32 index, size_t& _nameLength) const; + + int32 FindInsertionIndex(const char* name, + size_t nameLength, bool& _exactMatch) const; + + int32 FindSplitIndex(int32 index, + size_t bytesNeeded) const; + + void InsertEntry(int32 index, const char* name, + size_t nameLength, uint64 blockIndex); + void ReplaceEntryName(int32 index, const char* name, + size_t nameLength); + void RemoveEntry(int32 index); + + void SplitBlock(int32 splitIndex, + DirEntryBlock& other); + + bool Check() const; + +private: + checksumfs_dir_entry_block* fBlock; + size_t fBlockSize; +}; + + +class DirEntryTree { +public: + DirEntryTree(Directory* directory); + + status_t LookupEntry(const char* name, + uint64& _blockIndex); + status_t LookupNextEntry(const char* name, + char* foundName, size_t& _foundNameLength, + uint64& _blockIndex); + + status_t InsertEntry(const char* name, uint64 blockIndex, + Transaction& transaction); + status_t RemoveEntry(const char* name, + Transaction& transaction); + + bool Check(); + +private: + struct LevelInfo { + Block block; + DirEntryBlock entryBlock; + int32 index; + bool exactMatch; + }; + +private: + status_t _InitReadOnly(); + status_t _InitWritable(Transaction& transaction); + status_t _InitCommon(); + + status_t _UpdateOrInsertKey(LevelInfo* infos, + int32 level, const char* name, + size_t nameLength, uint64 blockIndex, + bool insertKey, Transaction& transaction); + + status_t _InsertEntryIncrementDepth(LevelInfo* infos, + Transaction& transaction); + status_t _InsertEntrySplitBlock(int32 level, + LevelInfo& info, size_t needed, + Transaction& transaction, Block& newBlock, + int32& _splitIndex); + + bool _Check(int32 level, uint64 blockIndex, + const char* key, size_t keyLength, + DirEntryBlock& entryBlock); + + inline uint16 _Depth() const { return fTree->depth; } + +private: + Directory* fDirectory; + Block fRootBlock; + checksumfs_dir_entry_tree* fTree; + checksumfs_dir_entry_block* fRootEntryBlock; + size_t fRootEntryBlockSize; +}; + + +// #pragma mark - + + +static int +compare_names(const char* a, size_t lengthA, const char* b, size_t lengthB) +{ + int cmp = strncmp(a, b, std::min(lengthA, lengthB)); + if (cmp != 0) + return cmp; + + return (int)lengthA - (int)lengthB; + // assumes we don't overflow 31 bits +} + + +// #pragma mark - DirEntryBlock + + +DirEntryBlock::DirEntryBlock() + : + fBlock(NULL), + fBlockSize(0) +{ +} + + +DirEntryBlock::DirEntryBlock(checksumfs_dir_entry_block* entryBlock, + size_t entryBlockSize) + : + fBlock(entryBlock), + fBlockSize(entryBlockSize) +{ +} + + +void +DirEntryBlock::SetTo(checksumfs_dir_entry_block* entryBlock, + size_t entryBlockSize) +{ + fBlock = entryBlock; + fBlockSize = entryBlockSize; +} + + +int32 +DirEntryBlock::EntryCount() const +{ + return fBlock->entryCount; +} + + +size_t +DirEntryBlock::BytesUsedFor(int32 entryCount) const +{ + if (entryCount == 0) + return sizeof(*fBlock); + return sizeof(*fBlock) + 10 * entryCount + + fBlock->nameEnds[entryCount - 1]; +} + + +size_t +DirEntryBlock::BytesUsed() const +{ + return BytesUsedFor(EntryCount()); +} + + +size_t +DirEntryBlock::FreeSpace() const +{ + return fBlockSize - BytesUsed(); +} + + +uint64 +DirEntryBlock::BlockIndexAt(int32 index) const +{ + uint64* blockIndices + = (uint64*)((uint8*)fBlock + fBlockSize) - 1; + return blockIndices[-index]; +} + + +const char* +DirEntryBlock::NameAt(int32 index, size_t& _nameLength) const +{ + int32 entryCount = EntryCount(); + if (index < 0 || index >= entryCount) + return NULL; + + uint32 nameOffset = index > 0 ? fBlock->nameEnds[index - 1] : 0; + _nameLength = fBlock->nameEnds[index] - nameOffset; + return (const char*)(fBlock->nameEnds + entryCount) + nameOffset; +} + + +int32 +DirEntryBlock::FindInsertionIndex(const char* name, size_t nameLength, + bool& _exactMatch) const +{ + int32 entryCount = EntryCount(); + if (entryCount == 0) { + _exactMatch = false; + return 0; + } + + const char* entryNames = (char*)(fBlock->nameEnds + entryCount); + uint32 nameOffset = 0; + + int32 index = 0; + int cmp = -1; + + // TODO: Binary search! + for (; index < entryCount; index++) { + const char* entryName = entryNames + nameOffset; + size_t entryNameLength = fBlock->nameEnds[index] - nameOffset; + + cmp = compare_names(entryName, entryNameLength, name, nameLength); + if (cmp >= 0) + break; + + nameOffset += entryNameLength; + } + + _exactMatch = cmp == 0; + return index; +} + + +/*! Finds a good split index for an insertion of \a bytesNeeded bytes at + index \a index. +*/ +int32 +DirEntryBlock::FindSplitIndex(int32 index, size_t bytesNeeded) const +{ + size_t splitSize = (BytesUsed() + bytesNeeded) / 2; + + int32 entryCount = EntryCount(); + for (int32 i = 0; i < entryCount; i++) { + size_t bytesUsed = BytesUsedFor(i + 1); + if (i == index) + bytesUsed += bytesNeeded; + if (bytesUsed > splitSize) + return i; + } + + // This should never happen. + return entryCount; +} + + +void +DirEntryBlock::InsertEntry(int32 index, const char* name, size_t nameLength, + uint64 blockIndex) +{ + uint64* blockIndices = (uint64*)((uint8*)fBlock + fBlockSize) - 1; + int32 entryCount = fBlock->entryCount; + char* entryNames = (char*)(fBlock->nameEnds + entryCount); + + uint32 nameOffset = index == 0 ? 0 : fBlock->nameEnds[index - 1]; + uint32 lastNameEnd = entryCount == 0 + ? 0 : fBlock->nameEnds[entryCount - 1]; + + if (index < entryCount) { + // make room in the block indices array + memmove(&blockIndices[-entryCount], &blockIndices[1 - entryCount], + 8 * (entryCount - index)); + + // make room in the name array -- we also move 2 bytes more for the + // new entry in the nameEnds array + memmove(entryNames + nameOffset + nameLength + 2, + entryNames + nameOffset, lastNameEnd - nameOffset); + + // move the names < index by 2 bytes + if (index > 0) + memmove(entryNames + 2, entryNames, nameOffset); + + // move and update the nameEnds entries > index + for (int32 i = entryCount; i > index; i--) + fBlock->nameEnds[i] = fBlock->nameEnds[i - 1] + nameLength; + } else if (entryCount > 0) { + // the nameEnds array grows -- move all names + memmove(entryNames + 2, entryNames, lastNameEnd); + } + + // we have made room -- insert the entry + entryNames += 2; + memcpy(entryNames + nameOffset, name, nameLength); + fBlock->nameEnds[index] = nameOffset + nameLength; + blockIndices[-index] = blockIndex; + fBlock->entryCount++; +ASSERT(Check()); +} + + +void +DirEntryBlock::ReplaceEntryName(int32 index, const char* name, + size_t nameLength) +{ + int32 entryCount = fBlock->entryCount; + char* entryNames = (char*)(fBlock->nameEnds + entryCount); + + ASSERT(index >= 0 && index < entryCount); + + uint32 nameOffset = index == 0 ? 0 : fBlock->nameEnds[index - 1]; + uint32 oldNameLength = fBlock->nameEnds[index] - nameOffset; + uint32 lastNameEnd = fBlock->nameEnds[entryCount - 1]; + + if (oldNameLength != nameLength) { + int32 lengthDiff = (int32)nameLength - (int32)oldNameLength; + + ASSERT(lengthDiff <= (int32)FreeSpace()); + + // move names after the changing name + if (index + 1 < entryCount) { + memmove(entryNames + nameOffset + nameLength, + entryNames + nameOffset + oldNameLength, + lastNameEnd - nameOffset - oldNameLength); + } + + // update the name ends + for (int32 i = index; i < entryCount; i++) + fBlock->nameEnds[i] = (int32)fBlock->nameEnds[i] + lengthDiff; + } + + // copy the name + memcpy(entryNames + nameOffset, name, nameLength); +ASSERT(Check()); +} + + +void +DirEntryBlock::RemoveEntry(int32 index) +{ + ASSERT(index >= 0 && index < EntryCount()); + + int32 entryCount = EntryCount(); + if (entryCount == 1) { + // simple case -- removing the last entry + fBlock->entryCount = 0; + return; + } + + uint64* blockIndices = (uint64*)((uint8*)fBlock + fBlockSize) - 1; + char* entryNames = (char*)(fBlock->nameEnds + entryCount); + + uint32 nameOffset = index == 0 ? 0 : fBlock->nameEnds[index - 1]; + uint32 nameEnd = fBlock->nameEnds[index]; + uint32 lastNameEnd = entryCount == 0 + ? 0 : fBlock->nameEnds[entryCount - 1]; + + if (index < entryCount - 1) { + uint32 nameLength = nameEnd - nameOffset; + + // remove the element from the block indices array + memmove(&blockIndices[-entryCount + 2], &blockIndices[-entryCount + 1], + 8 * (entryCount - index - 1)); + + // move and update the nameEnds entries > index + for (int32 i = index + 1; i < entryCount; i++) + fBlock->nameEnds[i - 1] = fBlock->nameEnds[i] - nameLength; + + // move the names < index by 2 bytes + if (index > 0) + memmove(entryNames - 2, entryNames, nameOffset); + + // move the names > index + memmove(entryNames - 2 + nameOffset, entryNames + nameEnd, + lastNameEnd - nameEnd); + } else { + // the nameEnds array shrinks -- move all names + memmove(entryNames - 2, entryNames, nameOffset); + } + + // we have removed the entry + fBlock->entryCount--; +ASSERT(Check()); +} + + +/*! Moves all entries beyond \a splitIndex (inclusively) to the empty block + \a other. +*/ +void +DirEntryBlock::SplitBlock(int32 splitIndex, DirEntryBlock& other) +{ + ASSERT(other.EntryCount() == 0); + ASSERT(splitIndex <= EntryCount()); + + int32 entryCount = EntryCount(); + if (splitIndex == entryCount) + return; + int32 otherEntryCount = entryCount - splitIndex; + + // copy block indices + uint64* blockIndices = (uint64*)((uint8*)fBlock + fBlockSize); + uint64* otherBlockIndices + = (uint64*)((uint8*)other.fBlock + other.fBlockSize); + // note: both point after the last entry, unlike in other methods + memcpy(otherBlockIndices - otherEntryCount, blockIndices - entryCount, + 8 * otherEntryCount); + + // copy the name end offsets + uint32 namesOffset = splitIndex > 0 + ? fBlock->nameEnds[splitIndex - 1] : 0; + for (int32 i = splitIndex; i < entryCount; i++) { + other.fBlock->nameEnds[i - splitIndex] = fBlock->nameEnds[i] + - namesOffset; + } + + // copy the names + char* entryNames = (char*)(fBlock->nameEnds + entryCount); + char* otherEntryNames + = (char*)(other.fBlock->nameEnds + otherEntryCount); + memcpy(otherEntryNames, entryNames + namesOffset, + fBlock->nameEnds[entryCount - 1] - namesOffset); + + // the name ends array shrinks -- move the names + if (splitIndex > 0) { + char* newEntryNames = (char*)(fBlock->nameEnds + splitIndex); + memmove(newEntryNames, entryNames, namesOffset); + } + + // update the entry counts + fBlock->entryCount = splitIndex; + other.fBlock->entryCount = otherEntryCount; +ASSERT(Check()); +ASSERT(other.Check()); +} + + +bool +DirEntryBlock::Check() const +{ + int32 entryCount = EntryCount(); + if (entryCount == 0) + return true; + + // Check size: Both name ends and block index arrays must fit and we need + // at least one byte per name. + size_t size = sizeof(*fBlock) + entryCount * 10; + if (size + entryCount > fBlockSize) { + ERROR("Invalid dir entry block: entry count %d requires minimum size " + "of %" B_PRIuSIZE " + %d bytes, but block size is %" B_PRIuSIZE + "\n", (int)entryCount, size, (int)entryCount, fBlockSize); + return false; + } + + // check the name ends and block indices arrays and the names + const char* entryNames = (char*)(fBlock->nameEnds + entryCount); + const uint64* blockIndices = (uint64*)((uint8*)fBlock + fBlockSize) - 1; + const char* previousName = NULL; + uint16 previousNameLength = 0; + uint16 previousEnd = 0; + + for (int32 i = 0; i < entryCount; i++) { + // check name end + uint16 nameEnd = fBlock->nameEnds[i]; + if (nameEnd <= previousEnd || nameEnd > fBlockSize - size) { + ERROR("Invalid dir entry block: name end offset of entry %" B_PRId32 + ": %u, previous: %u\n", i, nameEnd, previousEnd); + return false; + } + + // check name length + uint16 nameLength = nameEnd - previousEnd; + if (nameLength > kCheckSumFSNameLength) { + ERROR("Invalid dir entry block: name of entry %" B_PRId32 " too " + "long: %u\n", i, nameLength); + return false; + } + + // verify that the name doesn't contain a null char + const char* name = entryNames + previousEnd; + if (strnlen(name, nameLength) != nameLength) { + ERROR("Invalid dir entry block: name of entry %" B_PRId32 + " contains a null char\n", i); + return false; + } + + // compare the name with the previous name + if (i > 0) { + int cmp = compare_names(previousName, previousNameLength, name, + nameLength); + if (cmp == 0) { + ERROR("Invalid dir entry block: entries %" B_PRId32 "/%" + B_PRId32 " have the same name: \"%.*s\"\n", i - 1, i, + (int)nameLength, name); + return false; + } else if (cmp > 0) { + ERROR("Invalid dir entry block: entries %" B_PRId32 "/%" + B_PRId32 " out of order: \"%.*s\" > \"%.*s\"\n", i - 1, i, + (int)previousNameLength, previousName, (int)nameLength, + name); + return false; + } + } + + // check the block index + if (blockIndices[-i] < kCheckSumFSSuperBlockOffset / B_PAGE_SIZE) { + ERROR("Invalid dir entry block: entry %" B_PRId32 + " has invalid block index: %" B_PRIu64, i, blockIndices[-i]); + return false; + } + + previousName = name; + previousNameLength = nameLength; + previousEnd = nameEnd; + } + + return true; +} + + +// #pragma mark - DirEntryTree + + +DirEntryTree::DirEntryTree(Directory* directory) + : + fDirectory(directory) +{ +} + + +status_t +DirEntryTree::LookupEntry(const char* name, uint64& _blockIndex) +{ + FUNCTION("name: \"%s\"\n", name); + + status_t error = _InitReadOnly(); + if (error != B_OK) + RETURN_ERROR(error); + + size_t nameLength = strlen(name); + if (nameLength > kCheckSumFSNameLength) + RETURN_ERROR(B_ENTRY_NOT_FOUND); + + uint32 depth = _Depth(); + + DirEntryBlock entryBlock(fRootEntryBlock, fRootEntryBlockSize); +ASSERT(entryBlock.Check()); + + Block block; + + for (uint32 level = 0; level <= depth; level++) { + if (entryBlock.EntryCount() == 0) + RETURN_ERROR(level == 0 ? B_ENTRY_NOT_FOUND : B_BAD_DATA); + + bool exactMatch; + int32 index = entryBlock.FindInsertionIndex(name, nameLength, + exactMatch); + + // If we haven't found an exact match, the index points to the first + // entry that is greater or after the last entry. + if (!exactMatch) { + if (index == 0) { + // The first entry is already greater, so the branch doesn't + // contain the entry we're looking for. + RETURN_ERROR(B_ENTRY_NOT_FOUND); + } + + index--; + } + + PRINT(" level %" B_PRId32 " -> index: %" B_PRId32 " %sexact\n", level, + index, exactMatch ? "" : " not "); + + uint64 blockIndex = entryBlock.BlockIndexAt(index); + + if (level == depth) { + // final level -- here we should have an exact match + if (!exactMatch) + RETURN_ERROR(B_ENTRY_NOT_FOUND); + + _blockIndex = blockIndex; + return B_OK; + } + + // not the final level -- load the block and descend to the next + // level + if (!block.GetReadable(fDirectory->GetVolume(), blockIndex)) + RETURN_ERROR(B_ERROR); + + entryBlock.SetTo((checksumfs_dir_entry_block*)block.Data(), + B_PAGE_SIZE); +ASSERT(entryBlock.Check()); + } + + // cannot get here, but keep the compiler happy + RETURN_ERROR(B_ENTRY_NOT_FOUND); +} + + +status_t +DirEntryTree::LookupNextEntry(const char* name, char* foundName, + size_t& _foundNameLength, uint64& _blockIndex) +{ + FUNCTION("name: \"%s\"\n", name); + + status_t error = _InitReadOnly(); + if (error != B_OK) + RETURN_ERROR(error); + + size_t nameLength = strlen(name); + if (nameLength > kCheckSumFSNameLength) + RETURN_ERROR(B_ENTRY_NOT_FOUND); + + int32 depth = _Depth(); + + LevelInfo* infos = new(std::nothrow) LevelInfo[ + kCheckSumFSMaxDirEntryTreeDepth + 1]; + if (infos == NULL) + RETURN_ERROR(B_NO_MEMORY); + ArrayDeleter infosDeleter(infos); + + infos[0].entryBlock.SetTo(fRootEntryBlock, fRootEntryBlockSize); +ASSERT(infos[0].entryBlock.Check()); + + // descend the tree + for (int32 level = 0; level <= depth; level++) { + LevelInfo& info = infos[level]; + + if (info.entryBlock.EntryCount() == 0) { + if (level == 0) { + // directory is empty + return B_ENTRY_NOT_FOUND; + } + + RETURN_ERROR(B_BAD_DATA); + } + + info.index = info.entryBlock.FindInsertionIndex(name, nameLength, + info.exactMatch); + + PRINT(" level %" B_PRId32 " -> index: %" B_PRId32 " %sexact\n", level, + info.index, info.exactMatch ? "" : " not "); + + if (level == depth) + break; + + // If we haven't found an exact match, the index points to the first + // entry that is greater or after the last entry. + if (!info.exactMatch && info.index > 0) + info.index--; + + uint64 nextBlockIndex = info.entryBlock.BlockIndexAt(info.index); + + // not the final level -- load the block and descend to the next + // level + LevelInfo& nextInfo = infos[level + 1]; + if (!nextInfo.block.GetReadable(fDirectory->GetVolume(), + nextBlockIndex)) { + RETURN_ERROR(B_ERROR); + } + + nextInfo.entryBlock.SetTo( + (checksumfs_dir_entry_block*)nextInfo.block.Data(), + B_PAGE_SIZE); +ASSERT(nextInfo.entryBlock.Check()); + } + + if (infos[depth].exactMatch) + infos[depth].index++; + + if (infos[depth].index >= infos[depth].entryBlock.EntryCount()) { + // We're at the end of the last block -- we need to track back to find a + // greater branch. + PRINT(" searching for greater branch\n"); + + int32 level; + for (level = depth - 1; level >= 0; level--) { + LevelInfo& info = infos[level]; + if (++info.index < info.entryBlock.EntryCount()) { + PRINT(" found greater branch: level: %" B_PRId32 " -> index: %" + B_PRId32 "\n", level, info.index); + break; + } + } + + if (level < 0) + return B_ENTRY_NOT_FOUND; + + // We've found a greater branch -- get the first entry in that branch. + for (level++; level <= depth; level++) { + LevelInfo& previousInfo = infos[level - 1]; + LevelInfo& info = infos[level]; + + uint64 nextBlockIndex = previousInfo.entryBlock.BlockIndexAt( + previousInfo.index); + + // load the block + if (!info.block.GetReadable(fDirectory->GetVolume(), + nextBlockIndex)) { + RETURN_ERROR(B_ERROR); + } + + info.entryBlock.SetTo( + (checksumfs_dir_entry_block*)info.block.Data(), B_PAGE_SIZE); +ASSERT(info.entryBlock.Check()); + + info.index = 0; + if (info.entryBlock.EntryCount() == 0) + RETURN_ERROR(B_BAD_DATA); + } + } + + // get and check the name + LevelInfo& info = infos[depth]; + + name = info.entryBlock.NameAt(info.index, nameLength); + if (nameLength > kCheckSumFSNameLength + || strnlen(name, nameLength) != nameLength) { + RETURN_ERROR(B_BAD_DATA); + } + + // set the return values + memcpy(foundName, name, nameLength); + foundName[nameLength] = '\0'; + _foundNameLength = nameLength; + _blockIndex = info.entryBlock.BlockIndexAt(info.index); + + PRINT(" found entry: \"%s\" -> %" B_PRIu64 "\n", foundName, _blockIndex); + + return B_OK; +} + + +status_t +DirEntryTree::InsertEntry(const char* name, uint64 blockIndex, + Transaction& transaction) +{ + FUNCTION("name: \"%s\", blockIndex: %" B_PRIu64 "\n", name, blockIndex); + + status_t error = _InitWritable(transaction); + if (error != B_OK) + RETURN_ERROR(error); + + size_t nameLength = strlen(name); + if (nameLength == 0) + RETURN_ERROR(B_BAD_VALUE); + if (nameLength > kCheckSumFSNameLength) + RETURN_ERROR(B_NAME_TOO_LONG); + + int32 depth = _Depth(); + + LevelInfo* infos = new(std::nothrow) LevelInfo[ + kCheckSumFSMaxDirEntryTreeDepth + 1]; + if (infos == NULL) + RETURN_ERROR(B_NO_MEMORY); + ArrayDeleter infosDeleter(infos); + + infos[0].entryBlock.SetTo(fRootEntryBlock, fRootEntryBlockSize); + + for (int32 level = 0; level <= depth; level++) { + LevelInfo& info = infos[level]; + + if (info.entryBlock.EntryCount() == 0) { + if (level == 0) { + PRINT(" directory is empty\n"); + // directory is empty + info.index = 0; + break; + } + + RETURN_ERROR(B_BAD_DATA); + } + + info.index = info.entryBlock.FindInsertionIndex(name, nameLength, + info.exactMatch); + + PRINT(" level %" B_PRId32 ", block %" B_PRIu64 " -> index: %" B_PRId32 + " %sexact\n", level, + level == 0 ? fDirectory->BlockIndex() : info.block.Index(), + info.index, info.exactMatch ? "" : " not "); + + // Finding an exact match -- even in the non-final level -- means + // that there's an entry with that name. + if (info.exactMatch) + RETURN_ERROR(B_FILE_EXISTS); + + if (level == depth) { + // final level -- here we need to insert the entry + break; + } + + // Since we haven't found an exact match, the index points to the + // first entry that is greater or after the last entry. + info.index--; + + uint64 nextBlockIndex = info.entryBlock.BlockIndexAt(info.index); + + // not the final level -- load the block and descend to the next + // level + LevelInfo& nextInfo = infos[level + 1]; + if (!nextInfo.block.GetReadable(fDirectory->GetVolume(), + nextBlockIndex)) { + RETURN_ERROR(B_ERROR); + } + + nextInfo.entryBlock.SetTo( + (checksumfs_dir_entry_block*)nextInfo.block.Data(), + B_PAGE_SIZE); +ASSERT(nextInfo.entryBlock.Check()); + } + + // We've found the insertion point. Insert the key and iterate backwards + // to perform the potentially necessary updates. Insertion at index 0 of + // the block changes the block's key, requiring an update in the parent + // block. Insertion or key update can cause the block to be split (if + // there's not enough space left in it), requiring an insertion in the + // parent block. So we start with a pending insertion in the leaf block + // and work our way upwards, performing key updates and insertions as + // necessary. + + return _UpdateOrInsertKey(infos, depth, name, nameLength, blockIndex, true, + transaction); +} + + +status_t +DirEntryTree::RemoveEntry(const char* name, Transaction& transaction) +{ + FUNCTION("name: \"%s\"\n", name); + + status_t error = _InitWritable(transaction); + if (error != B_OK) + RETURN_ERROR(error); + + size_t nameLength = strlen(name); + if (nameLength == 0) + RETURN_ERROR(B_BAD_VALUE); + if (nameLength > kCheckSumFSNameLength) + RETURN_ERROR(B_ENTRY_NOT_FOUND); + + int32 depth = _Depth(); + + LevelInfo* infos = new(std::nothrow) LevelInfo[ + kCheckSumFSMaxDirEntryTreeDepth + 1]; + if (infos == NULL) + RETURN_ERROR(B_NO_MEMORY); + ArrayDeleter infosDeleter(infos); + + infos[0].entryBlock.SetTo(fRootEntryBlock, fRootEntryBlockSize); + + for (int32 level = 0; level <= depth; level++) { + LevelInfo& info = infos[level]; + + if (info.entryBlock.EntryCount() == 0) { + if (level == 0) { + // directory is empty + PRINT(" directory is empty\n"); + RETURN_ERROR(B_ENTRY_NOT_FOUND); + } + + RETURN_ERROR(B_BAD_DATA); + } + + info.index = info.entryBlock.FindInsertionIndex(name, nameLength, + info.exactMatch); + + PRINT(" level %" B_PRId32 ", block %" B_PRIu64 " -> index: %" B_PRId32 + " %sexact\n", level, + level == 0 ? fDirectory->BlockIndex() : info.block.Index(), + info.index, info.exactMatch ? "" : " not "); + + if (level == depth) { + // final level -- here the entry should be found + if (!info.exactMatch) + RETURN_ERROR(B_ENTRY_NOT_FOUND); + break; + } + + // If we haven't found an exact match, the index points to the first + // entry that is greater or after the last entry. + if (!info.exactMatch) { + if (info.index == 0) { + // The first entry is already greater, so the branch doesn't + // contain the entry we're looking for. + RETURN_ERROR(B_ENTRY_NOT_FOUND); + } + + info.index--; + } + + uint64 nextBlockIndex = info.entryBlock.BlockIndexAt(info.index); + + // not the final level -- load the block and descend to the next + // level + LevelInfo& nextInfo = infos[level + 1]; + if (!nextInfo.block.GetReadable(fDirectory->GetVolume(), + nextBlockIndex)) { + RETURN_ERROR(B_ERROR); + } + + nextInfo.entryBlock.SetTo( + (checksumfs_dir_entry_block*)nextInfo.block.Data(), + B_PAGE_SIZE); +ASSERT(nextInfo.entryBlock.Check()); + } + + // We've found the entry. Insert the key and iterate backwards to perform + // the potentially necessary updates. Removal at index 0 of the block + // changes the block's key, requiring an update in the parent block. + // Removal of the last entry will require removal of the block from its + // parent. Key update can cause the block to be split (if there's not + // enough space left in it), requiring an insertion in the parent block. + // We start with a pending removal in the leaf block and work our way + // upwards as long as the blocks become empty. As soon as a key update is + // required, we delegate the remaining to the update/insert backwards loop. + + for (int32 level = depth; level >= 0; level--) { + LevelInfo& info = infos[level]; + + // make the block writable + if (level > 0) { + error = info.block.MakeWritable(transaction); + if (error != B_OK) + RETURN_ERROR(error); + } + + PRINT(" level: %" B_PRId32 ", index: %" B_PRId32 ": removing key " + "\"%.*s\" (%" B_PRIuSIZE ")\n", level, info.index, (int)nameLength, + name, nameLength); + + if (info.entryBlock.EntryCount() == 1) { + // That's the last key in the block. Unless that's the root level, + // we remove the block completely. + PRINT(" -> block is empty\n"); + if (level == 0) { + info.entryBlock.RemoveEntry(info.index); + return B_OK; + } + + error = fDirectory->GetVolume()->GetBlockAllocator()->Free( + info.block.Index(), 1, transaction); + if (error != B_OK) + RETURN_ERROR(error); + fDirectory->SetSize(fDirectory->Size() - B_PAGE_SIZE); + + // remove the key (the same one) from the parent block + continue; + } + + // There are more entries, so just remove the entry in question. If it + // is not the first one, we're done, otherwise we have to update the + // block's key in the parent block. + info.entryBlock.RemoveEntry(info.index); + + if (info.index > 0 || level == 0) + return B_OK; + + name = info.entryBlock.NameAt(0, nameLength); + return _UpdateOrInsertKey(infos, level - 1, name, nameLength, 0, false, + transaction); + } + + return B_OK; +} + + +bool +DirEntryTree::Check() +{ + if (_InitReadOnly() != B_OK) { + ERROR("DirEntryTree::Check(): Init failed!\n"); + return false; + } + + DirEntryBlock entryBlock(fRootEntryBlock, fRootEntryBlockSize); + return _Check(0, fDirectory->BlockIndex(), NULL, 0, entryBlock); +} + + +status_t +DirEntryTree::_InitReadOnly() +{ + if (!fRootBlock.GetReadable(fDirectory->GetVolume(), + fDirectory->BlockIndex())) { + RETURN_ERROR(B_ERROR); + } + + return _InitCommon(); +} + + +status_t +DirEntryTree::_InitWritable(Transaction& transaction) +{ + if (!fRootBlock.GetWritable(fDirectory->GetVolume(), + fDirectory->BlockIndex(), transaction)) { + RETURN_ERROR(B_ERROR); + } + + return _InitCommon(); +} + + +status_t +DirEntryTree::_InitCommon() +{ + fTree = (checksumfs_dir_entry_tree*) + ((uint8*)fRootBlock.Data() + sizeof(checksumfs_node)); + + fRootEntryBlock = (checksumfs_dir_entry_block*)(fTree + 1); + fRootEntryBlockSize = B_PAGE_SIZE + - ((addr_t)fRootEntryBlock - (addr_t)fRootBlock.Data()); + + if (_Depth() > kCheckSumFSMaxDirEntryTreeDepth) + RETURN_ERROR(B_BAD_DATA); + + return B_OK; +} + + +status_t +DirEntryTree::_UpdateOrInsertKey(LevelInfo* infos, int32 level, + const char* name, size_t nameLength, uint64 blockIndex, bool insertKey, + Transaction& transaction) +{ + FUNCTION("level: %" B_PRId32 ": %s name: \"%.*s\" (%" B_PRIuSIZE "), " + "blockIndex: %" B_PRIu64 "\n", level, insertKey ? "insert" : "update", + (int)nameLength, name, nameLength, blockIndex); + + // Some temporary blocks: newBlock is used when a block is split. The + // other three are used when a key update respectively insertion in the + // parent block becomes necessary. We only need them, since the name + // we update/insert is potentially from a block and instead of cloning + // the name, we simple postpone putting the block until we don't need + // the name anymore. + Block newBlock; + Block tempBlockUpdate; + Block tempBlockUpdateInsert; + Block tempBlockInsert; + + int32 depth = _Depth(); + status_t error; + + bool updateNextKey = !insertKey; + bool insertNextKey = insertKey; + const char* nameToUpdate = name; + size_t nameToUpdateLength = nameLength; + const char* nextNameToInsert = name; + size_t nextNameToInsertLength = nameLength; + uint64 nextBlockIndexToInsert = blockIndex; + + for (; level >= 0; level--) { + LevelInfo& info = infos[level]; + + bool updateThisKey = updateNextKey; + bool insertThisKey = insertNextKey; + + if (!updateThisKey && !insertThisKey) + return B_OK; + + updateNextKey = false; + insertNextKey = false; + + blockIndex = nextBlockIndexToInsert; + name = nextNameToInsert; + nameLength = nextNameToInsertLength; + + // make the block writable + if (level > 0) { + error = info.block.MakeWritable(transaction); + if (error != B_OK) + RETURN_ERROR(error); + } + + if (updateThisKey) { + PRINT(" level: %" B_PRId32 ", index: %" B_PRId32 ": updating key " + "to \"%.*s\" (%" B_PRIuSIZE ")\n", level, info.index, + (int)nameToUpdateLength, nameToUpdate, nameToUpdateLength); + + size_t oldNameLength; + info.entryBlock.NameAt(info.index, oldNameLength); + size_t spaceNeeded = oldNameLength < nameToUpdateLength + ? nameToUpdateLength - oldNameLength : 0; + + if (spaceNeeded <= info.entryBlock.FreeSpace()) { + info.entryBlock.ReplaceEntryName(info.index, nameToUpdate, + nameToUpdateLength); + if (info.index == 0) { + // we updated at index 0, so we need to update this + // block's key in the parent block + updateNextKey = true; + nameToUpdate = info.entryBlock.NameAt(0, + nameToUpdateLength); + + // make sure the new block is kept until we no longer + // use the name in the next iteration + tempBlockUpdate.TransferFrom(info.block); + } + } else if (level == 0) { + // We need to split the root block -- clone it first. + error = _InsertEntryIncrementDepth(infos, transaction); + if (error != B_OK) + RETURN_ERROR(error); + + level = 2; + // _InsertEntryIncrementDepth() moved the root block + // content to level 1, where we want to continue. + updateNextKey = true; + insertNextKey = insertThisKey; + continue; + } else { + // We need to split this non-root block. + int32 splitIndex; + error = _InsertEntrySplitBlock(level, info, spaceNeeded, + transaction, newBlock, splitIndex); + if (error != B_OK) + RETURN_ERROR(error); + + nextBlockIndexToInsert = newBlock.Index(); + + DirEntryBlock newEntryBlock( + (checksumfs_dir_entry_block*)newBlock.Data(), + B_PAGE_SIZE); +ASSERT(newEntryBlock.Check()); + + if (info.index < splitIndex) { + ASSERT(info.entryBlock.FreeSpace() >= spaceNeeded); + + info.entryBlock.ReplaceEntryName(info.index, + nameToUpdate, nameToUpdateLength); + if (info.index == 0) { + // we updated at index 0, so we need to update this + // block's key in the parent block + updateNextKey = true; + nameToUpdate = info.entryBlock.NameAt(0, + nameToUpdateLength); + + // make sure the new block is kept until we no + // longer use the name in the next iteration + tempBlockUpdate.TransferFrom(info.block); + } + } else { + ASSERT(newEntryBlock.FreeSpace() >= spaceNeeded); + + // we need to transfer the block to the info, in case we + // also need to insert a key below + info.block.TransferFrom(newBlock); + info.entryBlock.SetTo( + (checksumfs_dir_entry_block*)info.block.Data(), + B_PAGE_SIZE); +ASSERT(info.entryBlock.Check()); + + info.index -= splitIndex; + + info.entryBlock.ReplaceEntryName(info.index, nameToUpdate, + nameToUpdateLength); + } + + // the newly created block needs to be inserted in the + // parent + insertNextKey = true; + nextNameToInsert = newEntryBlock.NameAt(0, + nextNameToInsertLength); + + // make sure the new block is kept until we no longer use + // the name in the next iteration (might already have been + // transferred to entry.block) + tempBlockUpdateInsert.TransferFrom(newBlock); + } + } + + if (insertThisKey) { + // insert after the block we descended + if (level < depth) + info.index++; + + PRINT(" level: %" B_PRId32 ", index: %" B_PRId32 ": inserting key " + "\"%.*s\" (%" B_PRIuSIZE "), blockIndex: %" B_PRIu64 "\n", + level, info.index, (int)nameLength, name, nameLength, + blockIndex); + + if (info.entryBlock.FreeSpace() >= nameLength + 10) { + info.entryBlock.InsertEntry(info.index, name, + nameLength, blockIndex); + if (info.index == 0) { + // we inserted at index 0, so we need to update this + // block's key in the parent block + updateNextKey = true; + nameToUpdate = info.entryBlock.NameAt(0, + nameToUpdateLength); + + // make sure the new block is kept until we no longer + // use the name in the next iteration + tempBlockUpdate.TransferFrom(info.block); + } + continue; + } + + // Not enough space left in the block -- we need to split it. + ASSERT(!insertNextKey); + + // for level == 0 we need to clone the block first + if (level == 0) { + error = _InsertEntryIncrementDepth(infos, transaction); + if (error != B_OK) + RETURN_ERROR(error); + + level = 2; + // _InsertEntryIncrementDepth() moved the root block + // content to level 1, where we want to continue. + updateNextKey = false; + insertNextKey = true; + continue; + } + + int32 splitIndex; + error = _InsertEntrySplitBlock(level, info, nameLength + 10, + transaction, newBlock, splitIndex); + if (error != B_OK) + RETURN_ERROR(error); + + DirEntryBlock newEntryBlock( + (checksumfs_dir_entry_block*)newBlock.Data(), + B_PAGE_SIZE); +ASSERT(newEntryBlock.Check()); + + if (info.index < splitIndex) { + ASSERT(info.entryBlock.FreeSpace() >= nameLength + 10); + + info.entryBlock.InsertEntry(info.index, name, + nameLength, blockIndex); + if (info.index == 0) { + // we inserted at index 0, so we need to update this + // block's key in the parent block + updateNextKey = true; + nameToUpdate = info.entryBlock.NameAt(0, + nameToUpdateLength); + + // make sure the new block is kept until we no longer + // use the name in the next iteration + tempBlockUpdate.TransferFrom(info.block); + } + } else { + ASSERT(newEntryBlock.FreeSpace() >= nameLength + 10); + + info.index -= splitIndex; + + newEntryBlock.InsertEntry(info.index, name, nameLength, + blockIndex); + } + + // the newly created block needs to be inserted in the parent + insertNextKey = true; + nextNameToInsert = newEntryBlock.NameAt(0, nextNameToInsertLength); + nextBlockIndexToInsert = newBlock.Index(); + + // make sure the new block is kept until we no longer use + // the name in the next iteration + tempBlockInsert.TransferFrom(newBlock); + } + } + + return B_OK; +} + + +status_t +DirEntryTree::_InsertEntryIncrementDepth(LevelInfo* infos, + Transaction& transaction) +{ + FUNCTION("depth: %u -> %u\n", _Depth(), _Depth() + 1); + + if (_Depth() >= kCheckSumFSMaxDirEntryTreeDepth) + RETURN_ERROR(B_DEVICE_FULL); + + // allocate a new block + AllocatedBlock allocatedBlock( + fDirectory->GetVolume()->GetBlockAllocator(), transaction); + + status_t error = allocatedBlock.Allocate(fDirectory->BlockIndex()); + if (error != B_OK) + RETURN_ERROR(error); + fDirectory->SetSize(fDirectory->Size() + B_PAGE_SIZE); + + LevelInfo& newInfo = infos[1]; + if (!newInfo.block.GetZero(fDirectory->GetVolume(), + allocatedBlock.Index(), transaction)) { + RETURN_ERROR(B_ERROR); + } + + allocatedBlock.Detach(); + + newInfo.entryBlock.SetTo( + (checksumfs_dir_entry_block*)newInfo.block.Data(), B_PAGE_SIZE); +ASSERT(newInfo.entryBlock.Check()); + + // move the old root block contents to the new block + LevelInfo& rootInfo = infos[0]; + rootInfo.entryBlock.SplitBlock(0, newInfo.entryBlock); + + // add an entry for the new block to the root block + size_t nameLength; + const char* name = newInfo.entryBlock.NameAt(0, nameLength); + rootInfo.entryBlock.InsertEntry(0, name, nameLength, + newInfo.block.Index()); + + PRINT(" -> new block: %" B_PRIu64 "\n", newInfo.block.Index()); + + newInfo.index = rootInfo.index; + rootInfo.index = 0; + fTree->depth++; + + return B_OK; +} + + +status_t +DirEntryTree::_InsertEntrySplitBlock(int32 level, LevelInfo& info, + size_t needed, Transaction& transaction, Block& newBlock, + int32& _splitIndex) +{ + int32 splitIndex = info.entryBlock.FindSplitIndex(info.index, + needed); + + FUNCTION("level: %" B_PRId32 ", size needed: %" B_PRIuSIZE ", split index: " + "%" B_PRId32 "/%" B_PRId32 "\n", level, needed, splitIndex, + info.entryBlock.EntryCount()); + + // allocate a new block + AllocatedBlock allocatedBlock( + fDirectory->GetVolume()->GetBlockAllocator(), transaction); + + status_t error = allocatedBlock.Allocate(fDirectory->BlockIndex()); + if (error != B_OK) + RETURN_ERROR(error); + fDirectory->SetSize(fDirectory->Size() + B_PAGE_SIZE); + + if (!newBlock.GetZero(fDirectory->GetVolume(), allocatedBlock.Index(), + transaction)) { + RETURN_ERROR(B_ERROR); + } + + allocatedBlock.Detach(); + + // split the old block + DirEntryBlock newEntryBlock( + (checksumfs_dir_entry_block*)newBlock.Data(), B_PAGE_SIZE); +ASSERT(newEntryBlock.Check()); + info.entryBlock.SplitBlock(splitIndex, newEntryBlock); + + PRINT(" -> new block: %" B_PRIu64 "\n", newBlock.Index()); + + _splitIndex = splitIndex; + return B_OK; +} + + +bool +DirEntryTree::_Check(int32 level, uint64 blockIndex, const char* key, + size_t keyLength, DirEntryBlock& entryBlock) +{ + // check block for validity + if (!entryBlock.Check()) { + ERROR("DirEntryTree::Check(): level %" B_PRIu32 ": block %" + B_PRIu64 " not valid!\n", level, blockIndex); + return false; + } + + // The root block is allowed to be empty. For all other blocks that is an + // error. + uint32 entryCount = entryBlock.EntryCount(); + if (entryCount == 0) { + if (level == 0) + return true; + + ERROR("DirEntryTree::Check(): level %" B_PRIu32 ": block %" + B_PRIu64 " empty!\n", level, blockIndex); + return false; + } + + // Verify that the block's first entry matches the key with which the + // parent block refers to it. + if (level > 0) { + size_t nameLength; + const char* name = entryBlock.NameAt(0, nameLength); + if (nameLength != keyLength || strncmp(name, key, keyLength) != 0) { + ERROR("DirEntryTree::Check(): level %" B_PRIu32 ": block %" + B_PRIu64 " key mismatch: is \"%.*s\", should be \"%.*s\"\n", + level, blockIndex, (int)keyLength, key, (int)nameLength, name); + return false; + } + } + + if (level == _Depth()) + return true; + + // not the final level -- recurse + for (uint32 i = 0; i < entryCount; i++) { + size_t nameLength; + const char* name = entryBlock.NameAt(i, nameLength); + uint64 childBlockIndex = entryBlock.BlockIndexAt(i); + + Block childBlock; + if (!childBlock.GetReadable(fDirectory->GetVolume(), childBlockIndex)) { + ERROR("DirEntryTree::Check(): level %" B_PRIu32 ": block %" + B_PRIu64 " failed to get child block %" B_PRIu64 " (entry %" + B_PRIu32 ")\n", level, blockIndex, childBlockIndex, i); + } + + DirEntryBlock childEntryBlock( + (checksumfs_dir_entry_block*)childBlock.Data(), B_PAGE_SIZE); + + if (!_Check(level + 1, childBlockIndex, name, nameLength, + childEntryBlock)) { + return false; + } + } + + return true; +} + + +// #pragma mark - Directory + Directory::Directory(Volume* volume, uint64 blockIndex, const checksumfs_node& nodeData) @@ -25,3 +1452,46 @@ Directory::Directory(Volume* volume, uint64 blockIndex, mode_t mode) Directory::~Directory() { } + + +status_t +Directory::LookupEntry(const char* name, uint64& _blockIndex) +{ + DirEntryTree entryTree(this); + return entryTree.LookupEntry(name, _blockIndex); +} + + +status_t +Directory::LookupNextEntry(const char* name, char* foundName, + size_t& _foundNameLength, uint64& _blockIndex) +{ + DirEntryTree entryTree(this); + return entryTree.LookupNextEntry(name, foundName, _foundNameLength, + _blockIndex); +} + + +status_t +Directory::InsertEntry(const char* name, uint64 blockIndex, + Transaction& transaction) +{ + DirEntryTree entryTree(this); + + status_t error = entryTree.InsertEntry(name, blockIndex, transaction); + if (error == B_OK) + ASSERT(entryTree.Check()); + return error; +} + + +status_t +Directory::RemoveEntry(const char* name, Transaction& transaction) +{ + DirEntryTree entryTree(this); + + status_t error = entryTree.RemoveEntry(name, transaction); + if (error == B_OK) + ASSERT(entryTree.Check()); + return error; +} diff --git a/src/tests/system/kernel/file_corruption/fs/Directory.h b/src/tests/system/kernel/file_corruption/fs/Directory.h index d44b334fd6..4d7547d3ef 100644 --- a/src/tests/system/kernel/file_corruption/fs/Directory.h +++ b/src/tests/system/kernel/file_corruption/fs/Directory.h @@ -16,6 +16,17 @@ public: Directory(Volume* volume, uint64 blockIndex, mode_t mode); virtual ~Directory(); + + status_t LookupEntry(const char* name, + uint64& _blockIndex); + status_t LookupNextEntry(const char* name, + char* foundName, size_t& _foundNameLength, + uint64& _blockIndex); + + status_t InsertEntry(const char* name, uint64 blockIndex, + Transaction& transaction); + status_t RemoveEntry(const char* name, + Transaction& transaction); }; diff --git a/src/tests/system/kernel/file_corruption/fs/Jamfile b/src/tests/system/kernel/file_corruption/fs/Jamfile index 245cbf1d65..d9689886fd 100644 --- a/src/tests/system/kernel/file_corruption/fs/Jamfile +++ b/src/tests/system/kernel/file_corruption/fs/Jamfile @@ -13,6 +13,8 @@ UseHeaders [ FDirName $(HAIKU_TOP) src add-ons kernel file_systems ] ; DEFINES += DEBUG_APP="\\\"checksumfs\\\"" ; +SubDirC++Flags -Werror ; + HAIKU_CHECKSUM_FS_SOURCES = BlockAllocator.cpp @@ -20,6 +22,7 @@ HAIKU_CHECKSUM_FS_SOURCES = Directory.cpp Node.cpp SuperBlock.cpp + Transaction.cpp Volume.cpp ; diff --git a/src/tests/system/kernel/file_corruption/fs/Node.cpp b/src/tests/system/kernel/file_corruption/fs/Node.cpp index ec77f88401..9826e3b0b1 100644 --- a/src/tests/system/kernel/file_corruption/fs/Node.cpp +++ b/src/tests/system/kernel/file_corruption/fs/Node.cpp @@ -8,10 +8,21 @@ #include #include +#include #include "Block.h" +static inline uint64 +current_time_nanos() +{ + timeval time; + gettimeofday(&time, NULL); + + return (uint64)time.tv_sec * 1000000000 + (uint64)time.tv_usec * 1000; +} + + Node::Node(Volume* volume, uint64 blockIndex, const checksumfs_node& nodeData) : fVolume(volume), @@ -20,6 +31,8 @@ Node::Node(Volume* volume, uint64 blockIndex, const checksumfs_node& nodeData) fNodeDataDirty(false) { _Init(); + + fAccessedTime = ModificationTime(); } @@ -35,14 +48,18 @@ Node::Node(Volume* volume, uint64 blockIndex, mode_t mode) fNode.mode = mode; + // set user/group + fNode.uid = geteuid(); + fNode.gid = getegid(); + // set the times timeval time; gettimeofday(&time, NULL); - fNode.creationTime = (uint64)time.tv_sec * 1000000000 - + (uint64)time.tv_usec * 1000; - fNode.modificationTime = fNode.creationTime; - fNode.changeTime = fNode.creationTime; + fAccessedTime = current_time_nanos(); + fNode.creationTime = fAccessedTime; + fNode.modificationTime = fAccessedTime; + fNode.changeTime = fAccessedTime; } @@ -52,14 +69,62 @@ Node::~Node() } +void +Node::SetParentDirectory(uint32 blockIndex) +{ + fNode.parentDirectory = blockIndex; + fNodeDataDirty = true; +} + + +void +Node::SetHardLinks(uint32 value) +{ + fNode.hardLinks = value; + fNodeDataDirty = true; +} + + +void +Node::SetSize(uint64 size) +{ + fNode.size = size; +} + + +void +Node::Touched(int32 mode) +{ + fAccessedTime = current_time_nanos(); + + switch (mode) { + default: + case NODE_MODIFIED: + fNode.modificationTime = fAccessedTime; + case NODE_STAT_CHANGED: + fNode.changeTime = fAccessedTime; + case NODE_ACCESSED: + break; + } +} + + +void +Node::RevertNodeData(const checksumfs_node& nodeData) +{ + fNode = nodeData; + fNodeDataDirty = false; +} + + status_t -Node::Flush() +Node::Flush(Transaction& transaction) { if (!fNodeDataDirty) return B_OK; Block block; - if (!block.GetWritable(fVolume, fBlockIndex)) + if (!block.GetWritable(fVolume, fBlockIndex, transaction)) return B_ERROR; memcpy(block.Data(), &fNode, sizeof(fNode)); diff --git a/src/tests/system/kernel/file_corruption/fs/Node.h b/src/tests/system/kernel/file_corruption/fs/Node.h index 453393be52..ef38ce03cf 100644 --- a/src/tests/system/kernel/file_corruption/fs/Node.h +++ b/src/tests/system/kernel/file_corruption/fs/Node.h @@ -8,14 +8,24 @@ #include +#include + #include #include "checksumfs.h" +class Transaction; class Volume; +enum { + NODE_ACCESSED, + NODE_STAT_CHANGED, + NODE_MODIFIED +}; + + class Node { public: Node(Volume* volume, uint64 blockIndex, @@ -24,6 +34,7 @@ public: mode_t mode); virtual ~Node(); + inline const checksumfs_node& NodeData() const { return fNode; } inline Volume* GetVolume() const { return fVolume; } inline uint64 BlockIndex() const { return fBlockIndex; } inline uint32 Mode() const { return fNode.mode; } @@ -32,16 +43,25 @@ public: inline uint32 UID() const { return fNode.uid; } inline uint32 GID() const { return fNode.gid; } inline uint64 Size() const { return fNode.size; } + inline uint64 AccessedTime() const { return fAccessedTime; } inline uint64 CreationTime() const; inline uint64 ModificationTime() const; inline uint64 ChangeTime() const; + void SetParentDirectory(uint32 blockIndex); + void SetHardLinks(uint32 value); + void SetSize(uint64 size); + + void Touched(int32 mode); + inline bool ReadLock(); inline void ReadUnlock(); inline bool WriteLock(); inline void WriteUnlock(); - status_t Flush(); + void RevertNodeData(const checksumfs_node& nodeData); + + status_t Flush(Transaction& transaction); private: void _Init(); @@ -50,6 +70,7 @@ private: rw_lock fLock; Volume* fVolume; uint64 fBlockIndex; + uint64 fAccessedTime; checksumfs_node fNode; bool fNodeDataDirty; }; @@ -111,4 +132,8 @@ Node::WriteUnlock() } +typedef AutoLocker > NodeReadLocker; +typedef AutoLocker > NodeWriteLocker; + + #endif // NODE_H diff --git a/src/tests/system/kernel/file_corruption/fs/Transaction.cpp b/src/tests/system/kernel/file_corruption/fs/Transaction.cpp new file mode 100644 index 0000000000..c311079098 --- /dev/null +++ b/src/tests/system/kernel/file_corruption/fs/Transaction.cpp @@ -0,0 +1,196 @@ +/* + * Copyright 2010, Ingo Weinhold, ingo_weinhold@gmx.de. + * Distributed under the terms of the MIT License. + */ + + +#include "Transaction.h" + +#include + +#include "BlockAllocator.h" +#include "Volume.h" + + +static inline bool +swap_if_greater(Node*& a, Node*& b) +{ + if (a->BlockIndex() <= b->BlockIndex()) + return false; + + std::swap(a, b); + return true; +} + + +// #pragma mark - Transaction + + +Transaction::Transaction(Volume* volume) + : + fVolume(volume), + fID(-1) +{ +} + + +Transaction::~Transaction() +{ + Abort(); +} + + +status_t +Transaction::Start() +{ + ASSERT(fID < 0); + + fVolume->TransactionStarted(); + + fID = cache_start_transaction(fVolume->BlockCache()); + if (fID < 0) { + fVolume->TransactionFinished(); + return fID; + } + + fOldFreeBlockCount = fVolume->GetBlockAllocator()->FreeBlocks(); + + return B_OK; +} + + +status_t +Transaction::Commit() +{ + ASSERT(fID >= 0); + + // flush the nodes + for (NodeInfoList::Iterator it = fNodeInfos.GetIterator(); + NodeInfo* info = it.Next();) { + status_t error = info->node->Flush(*this); + if (error != B_OK) { + Abort(); + return error; + } + } + + // commit the cache transaction + status_t error = cache_end_transaction(fVolume->BlockCache(), fID, NULL, + NULL); + if (error != B_OK) { + Abort(); + return error; + } + + // clean up + _DeleteNodeInfosAndUnlock(false); + + fVolume->TransactionFinished(); + fID = -1; + + return B_OK; +} + + +void +Transaction::Abort() +{ + if (fID < 0) + return; + + // abort the cache transaction + cache_abort_transaction(fVolume->BlockCache(), fID); + + // revert the nodes + for (NodeInfoList::Iterator it = fNodeInfos.GetIterator(); + NodeInfo* info = it.Next();) { + info->node->RevertNodeData(info->oldNodeData); + } + + // clean up + _DeleteNodeInfosAndUnlock(true); + + fVolume->GetBlockAllocator()->ResetFreeBlocks(fOldFreeBlockCount); + + fVolume->TransactionFinished(); + fID = -1; +} + + +status_t +Transaction::AddNode(Node* node, uint32 flags) +{ + NodeInfo* info = _GetNodeInfo(node); + if (info != NULL) + return B_OK; + + info = new(std::nothrow) NodeInfo; + if (info == NULL) + return B_NO_MEMORY; + + node->WriteLock(); + + info->node = node; + info->oldNodeData = node->NodeData(); + info->flags = flags; + + fNodeInfos.Add(info); + + return B_OK; +} + + +status_t +Transaction::AddNodes(Node* node1, Node* node2, Node* node3) +{ + // sort the nodes + swap_if_greater(node1, node2); + if (node3 != NULL && swap_if_greater(node2, node3)) + swap_if_greater(node1, node2); + + // add them + status_t error = AddNode(node1); + if (error == B_OK) + error = AddNode(node2); + if (error == B_OK && node3 != NULL) + AddNode(node3); + + return error; +} + + +void +Transaction::KeepNode(Node* node) +{ + NodeInfo* info = _GetNodeInfo(node); + if (info == NULL) + return; + + info->flags &= ~(uint32)TRANSACTION_DELETE_NODE; +} + + +Transaction::NodeInfo* +Transaction::_GetNodeInfo(Node* node) const +{ + for (NodeInfoList::ConstIterator it = fNodeInfos.GetIterator(); + NodeInfo* info = it.Next();) { + if (node == info->node) + return info; + } + + return NULL; +} + + +void +Transaction::_DeleteNodeInfosAndUnlock(bool failed) +{ + while (NodeInfo* info = fNodeInfos.RemoveHead()) { + if ((info->flags & TRANSACTION_DELETE_NODE) != 0) + delete info->node; + else + info->node->WriteUnlock(); + delete info; + } +} diff --git a/src/tests/system/kernel/file_corruption/fs/Transaction.h b/src/tests/system/kernel/file_corruption/fs/Transaction.h new file mode 100644 index 0000000000..a5071aa0df --- /dev/null +++ b/src/tests/system/kernel/file_corruption/fs/Transaction.h @@ -0,0 +1,62 @@ +/* + * Copyright 2010, Ingo Weinhold, ingo_weinhold@gmx.de. + * Distributed under the terms of the MIT License. + */ +#ifndef TRANSACTION_H +#define TRANSACTION_H + + +#include + +#include + +#include "Node.h" + + +class Volume; + + +enum { + TRANSACTION_DELETE_NODE = 0x1 +}; + + +class Transaction { +public: + explicit Transaction(Volume* volume); + ~Transaction(); + + int32 ID() const { return fID; } + + status_t Start(); + status_t Commit(); + void Abort(); + + status_t AddNode(Node* node, uint32 flags = 0); + status_t AddNodes(Node* node1, Node* node2, + Node* node3 = NULL); + + void KeepNode(Node* node); + +private: + struct NodeInfo : DoublyLinkedListLinkImpl { + Node* node; + checksumfs_node oldNodeData; + uint32 flags; + }; + + typedef DoublyLinkedList NodeInfoList; + +private: + NodeInfo* _GetNodeInfo(Node* node) const; + void _DeleteNodeInfosAndUnlock(bool failed); + +private: + Volume* fVolume; + int32 fID; + NodeInfoList fNodeInfos; + uint64 fOldFreeBlockCount; +}; + + +#endif // TRANSACTION_H diff --git a/src/tests/system/kernel/file_corruption/fs/Volume.cpp b/src/tests/system/kernel/file_corruption/fs/Volume.cpp index ee80024760..0e5c3be1c3 100644 --- a/src/tests/system/kernel/file_corruption/fs/Volume.cpp +++ b/src/tests/system/kernel/file_corruption/fs/Volume.cpp @@ -38,6 +38,7 @@ Volume::Volume(uint32 flags) fBlockAllocator(NULL), fRootDirectory(NULL) { + mutex_init(&fTransactionLock, "checksumfs transaction"); } @@ -53,6 +54,8 @@ Volume::~Volume() close(fFD); free(fName); + + mutex_destroy(&fTransactionLock); } @@ -170,30 +173,41 @@ Volume::Initialize(const char* name) if (fName == NULL) return B_NO_MEMORY; - status_t error = fBlockAllocator->Initialize(); + Transaction transaction(this); + status_t error = transaction.Start(); + if (error != B_OK) + return error; + + error = fBlockAllocator->Initialize(transaction); if (error != B_OK) return error; // create the root directory error = CreateDirectory(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH, - fRootDirectory); + transaction, fRootDirectory); if (error != B_OK) return error; - error = fRootDirectory->Flush(); - if (error != B_OK) - return error; + transaction.KeepNode(fRootDirectory); + fRootDirectory->SetHardLinks(1); // write the super block Block block; - if (!block.GetZero(this, kCheckSumFSSuperBlockOffset / B_PAGE_SIZE)) + if (!block.GetZero(this, kCheckSumFSSuperBlockOffset / B_PAGE_SIZE, + transaction)) { return B_ERROR; + } SuperBlock* superBlock = (SuperBlock*)block.Data(); superBlock->Initialize(this); block.Put(); + // commit the transaction and flush the block cache + error = transaction.Commit(); + if (error != B_OK) + return error; + return block_cache_sync(fBlockCache); } @@ -235,6 +249,13 @@ Volume::PutNode(Node* node) } +status_t +Volume::RemoveNode(Node* node) +{ + return remove_vnode(fFSVolume, node->BlockIndex()); +} + + status_t Volume::ReadNode(uint64 blockIndex, Node*& _node) { @@ -271,10 +292,11 @@ Volume::ReadNode(uint64 blockIndex, Node*& _node) status_t -Volume::CreateDirectory(mode_t mode, Directory*& _directory) +Volume::CreateDirectory(mode_t mode, Transaction& transaction, + Directory*& _directory) { // allocate a free block - AllocatedBlock allocatedBlock(fBlockAllocator); + AllocatedBlock allocatedBlock(fBlockAllocator, transaction); status_t error = allocatedBlock.Allocate(); if (error != B_OK) return error; @@ -285,6 +307,13 @@ Volume::CreateDirectory(mode_t mode, Directory*& _directory) if (directory == NULL) return B_NO_MEMORY; + // attach the directory to the transaction + error = transaction.AddNode(directory, TRANSACTION_DELETE_NODE); + if (error != B_OK) { + delete directory; + return error; + } + allocatedBlock.Detach(); _directory = directory; @@ -292,6 +321,33 @@ Volume::CreateDirectory(mode_t mode, Directory*& _directory) } +status_t +Volume::DeleteNode(Node* node) +{ + Transaction transaction(this); + status_t error = transaction.Start(); + if (error == B_OK) { + error = fBlockAllocator->Free(node->BlockIndex(), 1, transaction); + if (error == B_OK) { + error = transaction.Commit(); + if (error != B_OK) { + ERROR("Failed to commit transaction for delete node at %" + B_PRIu64 "\n", node->BlockIndex()); + } + } else { + ERROR("Failed to free block for node at %" B_PRIu64 "\n", + node->BlockIndex()); + } + } else { + ERROR("Failed to start transaction for delete node at %" B_PRIu64 "\n", + node->BlockIndex()); + } + + delete node; + return error; +} + + status_t Volume::_Init(uint64 totalBlocks) { diff --git a/src/tests/system/kernel/file_corruption/fs/Volume.h b/src/tests/system/kernel/file_corruption/fs/Volume.h index 3de563bd89..af38006c49 100644 --- a/src/tests/system/kernel/file_corruption/fs/Volume.h +++ b/src/tests/system/kernel/file_corruption/fs/Volume.h @@ -10,10 +10,13 @@ #include #include +#include + class BlockAllocator; class Directory; class Node; +class Transaction; class Volume { @@ -34,11 +37,17 @@ public: status_t PublishNode(Node* node, uint32 flags); status_t GetNode(uint64 blockIndex, Node*& _node); status_t PutNode(Node* node); + status_t RemoveNode(Node* node); status_t ReadNode(uint64 blockIndex, Node*& _node); status_t CreateDirectory(mode_t mode, + Transaction& transaction, Directory*& _directory); + status_t DeleteNode(Node* node); + + inline void TransactionStarted(); + inline void TransactionFinished(); inline dev_t ID() const { return fFSVolume->id; } inline bool IsReadOnly() const; @@ -62,9 +71,24 @@ private: char* fName; BlockAllocator* fBlockAllocator; Directory* fRootDirectory; + mutex fTransactionLock; }; +void +Volume::TransactionStarted() +{ + mutex_lock(&fTransactionLock); +} + + +void +Volume::TransactionFinished() +{ + mutex_unlock(&fTransactionLock); +} + + bool Volume::IsReadOnly() const { diff --git a/src/tests/system/kernel/file_corruption/fs/checksumfs.cpp b/src/tests/system/kernel/file_corruption/fs/checksumfs.cpp index 6ee54305d0..15536fe9cd 100644 --- a/src/tests/system/kernel/file_corruption/fs/checksumfs.cpp +++ b/src/tests/system/kernel/file_corruption/fs/checksumfs.cpp @@ -6,18 +6,24 @@ #include #include +#include +#include #include #include #include +#include + +#include #include "checksumfs.h" #include "checksumfs_private.h" #include "DebugSupport.h" #include "Directory.h" #include "SuperBlock.h" +#include "Transaction.h" #include "Volume.h" @@ -34,6 +40,166 @@ set_timespec(timespec& time, uint64 nanos) } +struct PutNode { + inline void operator()(Node* node) + { + if (node != NULL) + node->GetVolume()->PutNode(node); + } +}; + +typedef BPrivate::AutoDeleter NodePutter; + + +static bool +is_user_in_group(gid_t gid) +{ + gid_t groups[NGROUPS_MAX]; + int groupCount = getgroups(NGROUPS_MAX, groups); + for (int i = 0; i < groupCount; i++) { + if (gid == groups[i]) + return true; + } + + return gid == getegid(); +} + + +static status_t +check_access(Node* node, uint32 accessFlags) +{ + // Note: we assume that the access flags are compatible with the permission + // bits. + STATIC_ASSERT(R_OK == S_IROTH && W_OK == S_IWOTH && X_OK == S_IXOTH); + + // get node permissions + int userPermissions = (node->Mode() & S_IRWXU) >> 6; + int groupPermissions = (node->Mode() & S_IRWXG) >> 3; + int otherPermissions = node->Mode() & S_IRWXO; + + // get the permissions for this uid/gid + int permissions = 0; + uid_t uid = geteuid(); + + if (uid == 0) { + // user is root + // root has always read/write permission, but at least one of the + // X bits must be set for execute permission + permissions = userPermissions | groupPermissions | otherPermissions + | R_OK | W_OK; + } else if (uid == node->UID()) { + // user is node owner + permissions = userPermissions; + } else if (is_user_in_group(node->GID())) { + // user is in owning group + permissions = groupPermissions; + } else { + // user is one of the others + permissions = otherPermissions; + } + + return (accessFlags & ~permissions) == 0 ? B_OK : B_NOT_ALLOWED; +} + + +status_t +remove_entry(fs_volume* fsVolume, fs_vnode* parent, const char* name, + bool removeDirectory) +{ + Volume* volume = (Volume*)fsVolume->private_volume; + Directory* directory + = dynamic_cast((Node*)parent->private_node); + if (directory == NULL) + return B_NOT_A_DIRECTORY; + + if (volume->IsReadOnly()) + return B_READ_ONLY_DEVICE; + + // Since we need to lock both nodes (the directory and the entry's), this + // is a bit cumbersome. We first look up the entry while having the + // directory read-locked, then drop the read lock, write-lock both nodes + // and check whether anything has changed. + Transaction transaction(volume); + Node* childNode; + NodePutter childNodePutter; + + while (true) { + // look up the entry + NodeReadLocker directoryLocker(directory); + + uint64 blockIndex; + status_t error = directory->LookupEntry(name, blockIndex); + if (error != B_OK) + RETURN_ERROR(error); + + directoryLocker.Unlock(); + + // get the entry's node + error = volume->GetNode(blockIndex, childNode); + if (error != B_OK) + RETURN_ERROR(error); + childNodePutter.SetTo(childNode); + + // start the transaction + error = transaction.Start(); + if (error != B_OK) + RETURN_ERROR(error); + + // write-lock the nodes + error = transaction.AddNodes(directory, childNode); + if (error != B_OK) + RETURN_ERROR(error); + + // check the situation again + error = directory->LookupEntry(name, blockIndex); + if (error != B_OK) + RETURN_ERROR(error); + if (blockIndex != childNode->BlockIndex()) { + transaction.Abort(); + continue; + } + + break; + } + + // check permissions + status_t error = check_access(directory, W_OK); + if (error != B_OK) + return error; + + // check whether the child node type agrees with our caller + if (removeDirectory) { + if (!S_ISDIR(childNode->Mode())) + RETURN_ERROR(B_NOT_A_DIRECTORY); + + // directory must be empty + if (childNode->Size() > 0) + RETURN_ERROR(B_DIRECTORY_NOT_EMPTY); + } else if (S_ISDIR(childNode->Mode())) + RETURN_ERROR(B_IS_A_DIRECTORY); + + // remove the entry + error = directory->RemoveEntry(name, transaction); + if (error != B_OK) + RETURN_ERROR(error); + + // update stat data + childNode->SetHardLinks(childNode->HardLinks() - 1); + + directory->Touched(NODE_MODIFIED); + + // remove the child node, if no longer referenced + if (childNode->HardLinks() == 0) { + error = volume->RemoveNode(childNode); + if (error != B_OK) + return error; + } + + // commit the transaction + return transaction.Commit(); +} + + // #pragma mark - FS operations @@ -211,6 +377,12 @@ checksumfs_lookup(fs_volume* fsVolume, fs_vnode* fsDir, const char* name, if (directory == NULL) return B_NOT_A_DIRECTORY; + status_t error = check_access(directory, X_OK); + if (error != B_OK) + return error; + + NodeReadLocker nodeLocker(node); + uint64 blockIndex; if (strcmp(name, ".") == 0) { @@ -218,13 +390,14 @@ checksumfs_lookup(fs_volume* fsVolume, fs_vnode* fsDir, const char* name, } else if (strcmp(name, "..") == 0) { blockIndex = directory->ParentDirectory(); } else { - // TODO: Implement! - return B_ENTRY_NOT_FOUND; + status_t error = directory->LookupEntry(name, blockIndex); + if (error != B_OK) + return error; } // get the node Node* childNode; - status_t error = volume->GetNode(blockIndex, childNode); + error = volume->GetNode(blockIndex, childNode); if (error != B_OK) return error; @@ -242,6 +415,15 @@ checksumfs_put_vnode(fs_volume* fsVolume, fs_vnode* vnode, bool reenter) } +static status_t +checksumfs_remove_vnode(fs_volume* fsVolume, fs_vnode* vnode, bool reenter) +{ + Volume* volume = (Volume*)fsVolume->private_volume; + Node* node = (Node*)vnode->private_node; + return volume->DeleteNode(node); +} + + // #pragma mark - common operations @@ -250,6 +432,8 @@ checksumfs_read_stat(fs_volume* fsVolume, fs_vnode* vnode, struct stat* st) { Node* node = (Node*)vnode->private_node; + NodeReadLocker nodeLocker(node); + st->st_mode = node->Mode(); st->st_nlink = node->HardLinks(); st->st_uid = node->UID(); @@ -259,12 +443,11 @@ checksumfs_read_stat(fs_volume* fsVolume, fs_vnode* vnode, struct stat* st) set_timespec(st->st_mtim, node->ModificationTime()); set_timespec(st->st_ctim, node->ChangeTime()); set_timespec(st->st_crtim, node->CreationTime()); - st->st_atim = st->st_ctim; - // we don't support access time + set_timespec(st->st_atim, node->AccessedTime()); st->st_type = 0; /* attribute/index type */ st->st_blocks = 1 + (st->st_size + B_PAGE_SIZE - 1) / B_PAGE_SIZE; // TODO: That does neither count management structures for the content - // nor attributes. + // (for files) nor attributes. return B_OK; } @@ -288,7 +471,35 @@ static status_t checksumfs_open(fs_volume* fsVolume, fs_vnode* vnode, int openMode, void** _cookie) { - // TODO: Check permissions! + Volume* volume = (Volume*)fsVolume->private_volume; + Node* node = (Node*)vnode->private_node; + + NodeReadLocker nodeLocker(node); + + // check the open mode and permissions + uint32 accessFlags = 0; + switch (openMode & O_RWMASK) { + case O_RDONLY: + accessFlags = R_OK; + break; + case O_WRONLY: + accessFlags = W_OK; + break; + case O_RDWR: + accessFlags = R_OK | W_OK; + break; + } + + if ((accessFlags & W_OK) != 0) { + if (S_ISDIR(node->Mode())) + return B_IS_A_DIRECTORY; + if (volume->IsReadOnly()) + return B_READ_ONLY_DEVICE; + } + + status_t error = check_access(node, accessFlags); + if (error != B_OK) + return error; FileCookie* cookie = new(std::nothrow) FileCookie(openMode); if (cookie == NULL) @@ -319,55 +530,65 @@ checksumfs_free_cookie(fs_volume* fsVolume, fs_vnode* vnode, void* _cookie) struct DirCookie { - enum { - DOT, - DOT_DOT, - OTHERS - }; - - Directory* directory; - int iterationState; - DirCookie(Directory* directory) : - directory(directory), - iterationState(DOT) + fDirectory(directory) { + Rewind(); + } + + Directory* GetDirectory() const + { + return fDirectory; } status_t ReadNextEntry(struct dirent* buffer, size_t size, uint32& _countRead) { const char* name; + size_t nameLength; uint64 blockIndex; int nextIterationState = OTHERS; - switch (iterationState) { + switch (fIterationState) { case DOT: name = "."; - blockIndex = directory->BlockIndex(); + nameLength = 1; + blockIndex = fDirectory->BlockIndex(); nextIterationState = DOT_DOT; break; case DOT_DOT: name = ".."; - blockIndex = directory->ParentDirectory(); + nameLength = 2; + blockIndex = fDirectory->ParentDirectory(); break; default: - // TODO: Implement! - _countRead = 0; - return B_OK; + { + status_t error = fDirectory->LookupNextEntry(fEntryName, + fEntryName, nameLength, blockIndex); + if (error != B_OK) { + if (error != B_ENTRY_NOT_FOUND) + return error; + + _countRead = 0; + return B_OK; + } + + name = fEntryName; + break; + } } - size_t entrySize = sizeof(dirent) + strlen(name); + size_t entrySize = sizeof(dirent) + nameLength; if (entrySize > size) return B_BUFFER_OVERFLOW; - buffer->d_dev = directory->GetVolume()->ID(); + buffer->d_dev = fDirectory->GetVolume()->ID(); buffer->d_ino = blockIndex; buffer->d_reclen = entrySize; strcpy(buffer->d_name, name); - iterationState = nextIterationState; + fIterationState = nextIterationState; _countRead = 1; return B_OK; @@ -375,11 +596,81 @@ struct DirCookie { void Rewind() { - iterationState = DOT; + fIterationState = DOT; + fEntryName[0] = '\0'; } + +private: + enum { + DOT, + DOT_DOT, + OTHERS + }; + + Directory* fDirectory; + int fIterationState; + char fEntryName[kCheckSumFSNameLength + 1]; }; +status_t +checksumfs_create_dir(fs_volume* fsVolume, fs_vnode* parent, const char* name, + int perms) +{ + Volume* volume = (Volume*)fsVolume->private_volume; + Directory* directory + = dynamic_cast((Node*)parent->private_node); + if (directory == NULL) + return B_NOT_A_DIRECTORY; + + if (volume->IsReadOnly()) + return B_READ_ONLY_DEVICE; + + status_t error = check_access(directory, W_OK); + if (error != B_OK) + return error; + + // start a transaction + Transaction transaction(volume); + error = transaction.Start(); + if (error != B_OK) + return error; + + // attach the directory to the transaction (write locks it, too) + error = transaction.AddNode(directory); + if (error != B_OK) + return error; + + // create a directory node + Directory* newDirectory; + error = volume->CreateDirectory(perms, transaction, newDirectory); + if (error != B_OK) + return error; + + // insert the new directory + error = directory->InsertEntry(name, newDirectory->BlockIndex(), + transaction); + if (error != B_OK) + return error; + + // update stat data + newDirectory->SetHardLinks(1); + newDirectory->SetParentDirectory(directory->BlockIndex()); + + directory->Touched(NODE_MODIFIED); + + // commit the transaction + return transaction.Commit(); +} + + +status_t +checksumfs_remove_dir(fs_volume* volume, fs_vnode* parent, const char* name) +{ + return remove_entry(volume, parent, name, true); +} + + static status_t checksumfs_open_dir(fs_volume* fsVolume, fs_vnode* vnode, void** _cookie) { @@ -387,6 +678,10 @@ checksumfs_open_dir(fs_volume* fsVolume, fs_vnode* vnode, void** _cookie) if (directory == NULL) return B_NOT_A_DIRECTORY; + status_t error = check_access(directory, R_OK); + if (error != B_OK) + return error; + DirCookie* cookie = new(std::nothrow) DirCookie(directory); if (cookie == NULL) return B_NO_MEMORY; @@ -420,6 +715,9 @@ checksumfs_read_dir(fs_volume* fsVolume, fs_vnode* vnode, void* _cookie, return B_OK; DirCookie* cookie = (DirCookie*)_cookie; + + NodeReadLocker nodeLocker(cookie->GetDirectory()); + return cookie->ReadNextEntry(buffer, bufferSize, *_num); } @@ -428,6 +726,9 @@ static status_t checksumfs_rewind_dir(fs_volume* fsVolume, fs_vnode* vnode, void* _cookie) { DirCookie* cookie = (DirCookie*)_cookie; + + NodeReadLocker nodeLocker(cookie->GetDirectory()); + cookie->Rewind(); return B_OK; } @@ -547,7 +848,7 @@ fs_vnode_ops gCheckSumFSVnodeOps = { NULL, // get_vnode_name checksumfs_put_vnode, - NULL, // checksumfs_remove_vnode, + checksumfs_remove_vnode, /* VM file access */ NULL, // can_page @@ -588,8 +889,8 @@ fs_vnode_ops gCheckSumFSVnodeOps = { NULL, // checksumfs_write, /* directory operations */ - NULL, // checksumfs_create_dir, - NULL, // checksumfs_remove_dir, + checksumfs_create_dir, + checksumfs_remove_dir, checksumfs_open_dir, checksumfs_close_dir, checksumfs_free_dir_cookie, diff --git a/src/tests/system/kernel/file_corruption/fs/userland/Jamfile b/src/tests/system/kernel/file_corruption/fs/userland/Jamfile index 15dc07e601..3767d10c37 100644 --- a/src/tests/system/kernel/file_corruption/fs/userland/Jamfile +++ b/src/tests/system/kernel/file_corruption/fs/userland/Jamfile @@ -17,6 +17,9 @@ DEFINES += DEBUG_APP="\\\"checksumfs\\\"" ; SEARCH_SOURCE += [ FDirName $(SUBDIR) $(DOTDOT) ] ; +SubDirC++Flags -Werror ; + + Addon checksumfs : $(HAIKU_CHECKSUM_FS_SOURCES)