Initial revision

git-svn-id: file:///srv/svn/repos/haiku/trunk/current@10020 a95241bf-73f2-0310-859d-f6bbb57e9c96
2004-11-19 15:23:44 +00:00 · 2004-11-19 15:23:44 +00:00 · efce0b7346
commit efce0b7346
parent f9081c794e
33 changed files with 15573 additions and 0 deletions
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/BPlusTree.cpp
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/BPlusTree.cpp
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/BPlusTree.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/BPlusTree.h
@ -0,0 +1,491 @@
+#ifndef B_PLUS_TREE_H
+#define B_PLUS_TREE_H
+/* BPlusTree - BFS B+Tree implementation
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** Roughly based on 'btlib' written by Marcus J. Ranum
+** 
+** Copyright (c) 2001-2004 pinc Software. All Rights Reserved.
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include "bfs.h"
+#include "Journal.h"
+#include "Chain.h"
+
+#include <string.h>
+
+
+//****************** on-disk structures ********************
+
+#define BPLUSTREE_NULL			-1LL
+#define BPLUSTREE_FREE			-2LL
+
+struct bplustree_header {
+	uint32		magic;
+	uint32		node_size;
+	uint32		max_number_of_levels;
+	uint32		data_type;
+	off_t		root_node_pointer;
+	off_t		free_node_pointer;
+	off_t		maximum_size;
+
+	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
+	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
+	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
+	off_t RootNode() const { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
+	off_t FreeNode() const { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
+	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
+	uint32 MaxNumberOfLevels() const { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
+
+	inline bool IsValidLink(off_t link);
+} _PACKED;
+
+#define BPLUSTREE_MAGIC 			0x69f6c2e8
+#define BPLUSTREE_NODE_SIZE 		1024
+#define BPLUSTREE_MAX_KEY_LENGTH	256
+#define BPLUSTREE_MIN_KEY_LENGTH	1
+
+enum bplustree_types {
+	BPLUSTREE_STRING_TYPE	= 0,
+	BPLUSTREE_INT32_TYPE	= 1,
+	BPLUSTREE_UINT32_TYPE	= 2,
+	BPLUSTREE_INT64_TYPE	= 3,
+	BPLUSTREE_UINT64_TYPE	= 4,
+	BPLUSTREE_FLOAT_TYPE	= 5,
+	BPLUSTREE_DOUBLE_TYPE	= 6
+};
+
+struct sorted_array;
+typedef sorted_array duplicate_array;
+
+struct bplustree_node {
+	off_t	left_link;
+	off_t	right_link;
+	off_t	overflow_link;
+	uint16	all_key_count;
+	uint16	all_key_length;
+
+	off_t LeftLink() const { return BFS_ENDIAN_TO_HOST_INT64(left_link); }
+	off_t RightLink() const { return BFS_ENDIAN_TO_HOST_INT64(right_link); }
+	off_t OverflowLink() const { return BFS_ENDIAN_TO_HOST_INT64(overflow_link); }
+	uint16 NumKeys() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_count); }
+	uint16 AllKeyLength() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_length); }
+
+	inline uint16 *KeyLengths() const;
+	inline off_t *Values() const;
+	inline uint8 *Keys() const;
+	inline int32 Used() const;
+	uint8 *KeyAt(int32 index, uint16 *keyLength) const;
+
+	inline bool IsLeaf() const;
+
+	void Initialize();
+	uint8 CountDuplicates(off_t offset, bool isFragment) const;
+	off_t DuplicateAt(off_t offset, bool isFragment, int8 index) const;
+	int32 FragmentsUsed(uint32 nodeSize);
+	inline duplicate_array *FragmentAt(int8 index);
+	inline duplicate_array *DuplicateArray();
+
+	static inline uint8 LinkType(off_t link);
+	static inline off_t MakeLink(uint8 type, off_t link, uint32 fragmentIndex = 0);
+	static inline bool IsDuplicate(off_t link);
+	static inline off_t FragmentOffset(off_t link);
+	static inline uint32 FragmentIndex(off_t link);
+
+#ifdef DEBUG
+	void CheckIntegrity(uint32 nodeSize);
+#endif
+} _PACKED;
+
+//#define BPLUSTREE_NODE 0
+#define BPLUSTREE_DUPLICATE_NODE 2
+#define BPLUSTREE_DUPLICATE_FRAGMENT 3
+
+#define NUM_FRAGMENT_VALUES 7
+#define NUM_DUPLICATE_VALUES 125
+
+//**************************************
+
+enum bplustree_traversing {
+	BPLUSTREE_FORWARD = 1,
+	BPLUSTREE_BACKWARD = -1,
+	
+	BPLUSTREE_BEGIN = 0,
+	BPLUSTREE_END = 1
+};
+
+
+//****************** in-memory structures ********************
+
+template<class T> class Stack;
+class BPlusTree;
+class TreeIterator;
+class CachedNode;
+class Inode;
+
+// needed for searching (utilizing a stack)
+struct node_and_key {
+	off_t	nodeOffset;
+	uint16	keyIndex;
+};
+
+
+//***** Cache handling *****
+
+class CachedNode {
+	public:
+		CachedNode(BPlusTree *tree)
+			:
+			fTree(tree),
+			fNode(NULL),
+			fBlock(NULL)
+		{
+		}
+
+		CachedNode(BPlusTree *tree, off_t offset, bool check = true)
+			:
+			fTree(tree),
+			fNode(NULL),
+			fBlock(NULL)
+		{
+			SetTo(offset, check);
+		}
+
+		~CachedNode()
+		{
+			Unset();
+		}
+
+		bplustree_node *SetTo(off_t offset, bool check = true);
+		bplustree_header *SetToHeader();
+		void Unset();
+
+		status_t Free(Transaction *transaction, off_t offset);
+		status_t Allocate(Transaction *transaction, bplustree_node **node, off_t *offset);
+		status_t WriteBack(Transaction *transaction);
+
+		bplustree_node *Node() const { return fNode; }
+
+	protected:
+		bplustree_node	*InternalSetTo(off_t offset);
+
+		BPlusTree		*fTree;
+		bplustree_node	*fNode;
+		uint8			*fBlock;
+		off_t			fBlockNumber;
+};
+
+
+//******** B+tree class *********
+
+class BPlusTree {
+	public:
+		BPlusTree(Transaction *transaction, Inode *stream, int32 nodeSize = BPLUSTREE_NODE_SIZE);
+		BPlusTree(Inode *stream);
+		BPlusTree();
+		~BPlusTree();
+
+		status_t	SetTo(Transaction *transaction, Inode *stream, int32 nodeSize = BPLUSTREE_NODE_SIZE);
+		status_t	SetTo(Inode *stream);
+		status_t	SetStream(Inode *stream);
+
+		status_t	InitCheck();
+		status_t	Validate();
+
+		status_t	Remove(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value);
+		status_t	Insert(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value);
+
+		status_t	Remove(Transaction *transaction, const char *key, off_t value);
+		status_t	Insert(Transaction *transaction, const char *key, off_t value);
+		status_t	Insert(Transaction *transaction, int32 key, off_t value);
+		status_t	Insert(Transaction *transaction, uint32 key, off_t value);
+		status_t	Insert(Transaction *transaction, int64 key, off_t value);
+		status_t	Insert(Transaction *transaction, uint64 key, off_t value);
+		status_t	Insert(Transaction *transaction, float key, off_t value);
+		status_t	Insert(Transaction *transaction, double key, off_t value);
+
+		status_t	Replace(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value);
+		status_t	Find(const uint8 *key, uint16 keyLength, off_t *value);
+
+		static int32 TypeCodeToKeyType(type_code code);
+		static int32 ModeToKeyType(mode_t mode);
+
+	private:
+		BPlusTree(const BPlusTree &);
+		BPlusTree &operator=(const BPlusTree &);
+			// no implementation
+
+		int32		CompareKeys(const void *key1, int keylength1, const void *key2, int keylength2);
+		status_t	FindKey(bplustree_node *node, const uint8 *key, uint16 keyLength,
+						uint16 *index = NULL, off_t *next = NULL);
+		status_t	SeekDown(Stack<node_and_key> &stack, const uint8 *key, uint16 keyLength);
+
+		status_t	FindFreeDuplicateFragment(bplustree_node *node, CachedNode *cached,
+						off_t *_offset, bplustree_node **_fragment, uint32 *_index);
+		status_t	InsertDuplicate(Transaction *transaction, CachedNode *cached,
+						bplustree_node *node, uint16 index, off_t value);
+		void		InsertKey(bplustree_node *node, uint16 index, uint8 *key, uint16 keyLength,
+						off_t value);
+		status_t	SplitNode(bplustree_node *node, off_t nodeOffset, bplustree_node *other,
+						off_t otherOffset, uint16 *_keyIndex, uint8 *key, uint16 *_keyLength,
+						off_t *_value);
+
+		status_t	RemoveDuplicate(Transaction *transaction, bplustree_node *node,
+						CachedNode *cached, uint16 keyIndex, off_t value);
+		void		RemoveKey(bplustree_node *node, uint16 index);
+
+		void		UpdateIterators(off_t offset, off_t nextOffset, uint16 keyIndex,
+						uint16 splitAt, int8 change);
+		void		AddIterator(TreeIterator *iterator);
+		void		RemoveIterator(TreeIterator *iterator);
+
+	private:
+		friend TreeIterator;
+		friend CachedNode;
+
+		Inode		*fStream;
+		bplustree_header *fHeader;
+		CachedNode	fCachedHeader;
+		int32		fNodeSize;
+		bool		fAllowDuplicates;
+		status_t	fStatus;
+		SimpleLock	fIteratorLock;
+		Chain<TreeIterator> fIterators;
+};
+
+
+//***** helper classes/functions *****
+
+extern int32 compareKeys(type_code type, const void *key1, int keyLength1,
+				const void *key2, int keyLength2);
+
+class TreeIterator {
+	public:
+		TreeIterator(BPlusTree *tree);
+		~TreeIterator();
+
+		status_t	Goto(int8 to);
+		status_t	Traverse(int8 direction, void *key, uint16 *keyLength, uint16 maxLength,
+						off_t *value, uint16 *duplicate = NULL);
+		status_t	Find(const uint8 *key, uint16 keyLength);
+
+		status_t	Rewind();
+		status_t	GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength,
+						off_t *value, uint16 *duplicate = NULL);
+		status_t	GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength,
+						off_t *value, uint16 *duplicate = NULL);
+		void		SkipDuplicates();
+
+#ifdef DEBUG
+		void		Dump();
+#endif
+
+	private:
+		BPlusTree	*fTree;
+
+		off_t		fCurrentNodeOffset;	// traverse position
+		int32		fCurrentKey;
+		off_t		fDuplicateNode;
+		uint16		fDuplicate, fNumDuplicates;
+		bool		fIsFragment;
+
+	private:
+		friend Chain<TreeIterator>;
+		friend BPlusTree;
+
+		void Update(off_t offset, off_t nextOffset, uint16 keyIndex, uint16 splitAt, int8 change);
+		void Stop();
+		TreeIterator *fNext;
+};
+
+// BPlusTree's inline functions (most of them may not be needed)
+
+inline status_t
+BPlusTree::Remove(Transaction *transaction, const char *key, off_t value)
+{
+	if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
+		return B_BAD_TYPE;
+	return Remove(transaction, (uint8 *)key, strlen(key), value);
+}
+
+inline status_t
+BPlusTree::Insert(Transaction *transaction, const char *key, off_t value)
+{
+	if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
+		return B_BAD_TYPE;
+	return Insert(transaction, (uint8 *)key, strlen(key), value);
+}
+
+inline status_t
+BPlusTree::Insert(Transaction *transaction, int32 key, off_t value)
+{
+	if (fHeader->data_type != BPLUSTREE_INT32_TYPE)
+		return B_BAD_TYPE;
+	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
+}
+
+inline status_t
+BPlusTree::Insert(Transaction *transaction, uint32 key, off_t value)
+{
+	if (fHeader->data_type != BPLUSTREE_UINT32_TYPE)
+		return B_BAD_TYPE;
+	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
+}
+
+inline status_t
+BPlusTree::Insert(Transaction *transaction, int64 key, off_t value)
+{
+	if (fHeader->data_type != BPLUSTREE_INT64_TYPE)
+		return B_BAD_TYPE;
+	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
+}
+
+inline status_t
+BPlusTree::Insert(Transaction *transaction, uint64 key, off_t value)
+{
+	if (fHeader->data_type != BPLUSTREE_UINT64_TYPE)
+		return B_BAD_TYPE;
+	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
+}
+
+inline status_t
+BPlusTree::Insert(Transaction *transaction, float key, off_t value)
+{
+	if (fHeader->data_type != BPLUSTREE_FLOAT_TYPE)
+		return B_BAD_TYPE;
+	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
+}
+
+inline status_t
+BPlusTree::Insert(Transaction *transaction, double key, off_t value)
+{
+	if (fHeader->data_type != BPLUSTREE_DOUBLE_TYPE)
+		return B_BAD_TYPE;
+	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
+}
+
+
+/************************ TreeIterator inline functions ************************/
+//	#pragma mark -
+
+inline status_t
+TreeIterator::Rewind()
+{
+	return Goto(BPLUSTREE_BEGIN);
+}
+
+inline status_t
+TreeIterator::GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength,
+	off_t *value, uint16 *duplicate)
+{
+	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, duplicate);
+}
+
+inline status_t
+TreeIterator::GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength,
+	off_t *value, uint16 *duplicate)
+{
+	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, duplicate);
+}
+
+/************************ bplustree_header inline functions ************************/
+//	#pragma mark -
+
+
+inline bool
+bplustree_header::IsValidLink(off_t link)
+{
+	return link == BPLUSTREE_NULL || (link > 0 && link <= MaximumSize() - NodeSize());
+}
+
+
+/************************ bplustree_node inline functions ************************/
+//	#pragma mark -
+
+
+inline uint16 *
+bplustree_node::KeyLengths() const
+{
+	return (uint16 *)(((char *)this) + round_up(sizeof(bplustree_node) + AllKeyLength()));
+}
+
+
+inline off_t *
+bplustree_node::Values() const
+{
+	return (off_t *)((char *)KeyLengths() + NumKeys() * sizeof(uint16));
+}
+
+
+inline uint8 *
+bplustree_node::Keys() const
+{
+	return (uint8 *)this + sizeof(bplustree_node);
+}
+
+
+inline int32
+bplustree_node::Used() const
+{
+	return round_up(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() * (sizeof(uint16) + sizeof(off_t));
+}
+
+
+inline bool 
+bplustree_node::IsLeaf() const
+{
+	return OverflowLink() == BPLUSTREE_NULL;
+}
+
+
+inline duplicate_array *
+bplustree_node::FragmentAt(int8 index)
+{
+	return (duplicate_array *)((off_t *)this + index * (NUM_FRAGMENT_VALUES + 1));
+}
+
+
+inline duplicate_array *
+bplustree_node::DuplicateArray()
+{
+	return (duplicate_array *)&this->overflow_link;
+}
+
+
+inline uint8
+bplustree_node::LinkType(off_t link)
+{
+	return *(uint64 *)&link >> 62;
+}
+
+
+inline off_t
+bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
+{
+	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) | (fragmentIndex & 0x3ff);
+}
+
+
+inline bool 
+bplustree_node::IsDuplicate(off_t link)
+{
+	return (LinkType(link) & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
+}
+
+
+inline off_t
+bplustree_node::FragmentOffset(off_t link)
+{
+	return link & 0x3ffffffffffffc00LL;
+}
+
+
+inline uint32
+bplustree_node::FragmentIndex(off_t link)
+{
+	return (uint32)(link & 0x3ff);
+}
+
+#endif	/* B_PLUS_TREE_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/BlockAllocator.cpp
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/BlockAllocator.cpp
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/BlockAllocator.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/BlockAllocator.h
@ -0,0 +1,66 @@
+#ifndef BLOCK_ALLOCATOR_H
+#define BLOCK_ALLOCATOR_H
+/* BlockAllocator - block bitmap handling and allocation policies
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include "Lock.h"
+
+
+class AllocationGroup;
+class Transaction;
+class Volume;
+class Inode;
+struct disk_super_block;
+struct block_run;
+struct check_control;
+struct check_cookie;
+
+
+class BlockAllocator {
+	public:
+		BlockAllocator(Volume *volume);
+		~BlockAllocator();
+
+		status_t Initialize(bool full = true);
+		status_t InitializeAndClearBitmap(Transaction &transaction);
+
+		status_t AllocateForInode(Transaction *transaction, const block_run *parent,
+					mode_t type, block_run &run);
+		status_t Allocate(Transaction *transaction, const Inode *inode, off_t numBlocks,
+					block_run &run, uint16 minimum = 1);
+		status_t Free(Transaction *transaction, block_run run);
+
+		status_t AllocateBlocks(Transaction *transaction, int32 group, uint16 start,
+					uint16 numBlocks, uint16 minimum, block_run &run);
+
+		status_t StartChecking(check_control *control);
+		status_t StopChecking(check_control *control);
+		status_t CheckNextNode(check_control *control);
+
+		status_t CheckBlockRun(block_run run, const char *type = NULL, check_control *control = NULL, bool allocated = true);
+		status_t CheckInode(Inode *inode, check_control *control = NULL);
+
+		size_t BitmapSize() const;
+
+	private:
+		bool IsValidCheckControl(check_control *control);
+		bool CheckBitmapIsUsedAt(off_t block) const;
+		void SetCheckBitmapAt(off_t block);
+
+		static status_t initialize(BlockAllocator *);
+
+		Volume			*fVolume;
+		Semaphore		fLock;
+		AllocationGroup	*fGroups;
+		int32			fNumGroups;
+		uint32			fBlocksPerGroup;
+
+		uint32			*fCheckBitmap;
+		check_cookie	*fCheckCookie;
+};
+
+#endif	/* BLOCK_ALLOCATOR_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/BufferPool.cpp
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/BufferPool.cpp
@ -0,0 +1,162 @@
+/* BufferPool - a buffer pool for uncached file access
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include "BufferPool.h"
+#include "Debug.h"
+
+#include <util/kernel_cpp.h>
+
+
+const uint32 kNumBuffers = 8;
+
+
+BufferPool::BufferPool()
+	:
+	fFirstFree(NULL)
+{
+	fLock = create_sem(1, "buffer lock");
+	fFreeBuffers = create_sem(0, "free buffers");
+
+#ifndef USER
+	set_sem_owner(fLock, B_SYSTEM_TEAM);
+	set_sem_owner(fFreeBuffers, B_SYSTEM_TEAM);
+#endif
+}
+
+
+BufferPool::~BufferPool()
+{
+	delete_sem(fFreeBuffers);
+
+	acquire_sem(fLock);
+		// the return value doesn't interest us anymore
+
+	void **buffer = fFirstFree;
+	while (buffer != NULL) {
+		void **nextBuffer = (void **)*buffer;
+		free(buffer);
+		buffer = nextBuffer;
+	}
+
+	delete_sem(fLock);
+}
+
+
+status_t 
+BufferPool::InitCheck()
+{
+	if (fLock < B_OK
+		|| fFreeBuffers < B_OK)
+		return B_ERROR;
+
+	return B_OK;
+}
+
+
+status_t 
+BufferPool::RequestBuffers(uint32 blockSize)
+{
+	void **buffers[kNumBuffers];
+
+	// allocate and connect buffers
+
+	for (uint32 i = 0; i < kNumBuffers; i++) {
+		buffers[i] = (void **)malloc(blockSize);
+		if (buffers[i] == NULL) {
+			// free already allocated buffers
+			for (;i-- > 0; i++)
+				free(buffers[i]);
+			RETURN_ERROR(B_NO_MEMORY);
+		}
+		if (i > 0)
+			*(buffers[i]) = buffers[i - 1];
+	}
+
+	// add the buffers to the free buffers queue
+
+	status_t status = acquire_sem(fLock);
+	if (status == B_OK) {
+		*(buffers[0]) = fFirstFree;
+		fFirstFree = buffers[kNumBuffers - 1];
+		release_sem(fLock);
+		release_sem_etc(fFreeBuffers, kNumBuffers, B_DO_NOT_RESCHEDULE);
+	} else {
+		for (uint32 i = 0; i < kNumBuffers; i++)
+			free(buffers[i]);
+	}
+
+	RETURN_ERROR(status);
+}
+
+
+status_t 
+BufferPool::ReleaseBuffers()
+{
+	status_t status = acquire_sem_etc(fFreeBuffers, kNumBuffers, 0, 0);
+	if (status < B_OK)
+		return status;
+
+	status = acquire_sem(fLock);
+	if (status < B_OK)
+		return status;
+
+	void **buffer = fFirstFree;
+	for (uint32 i = 0; i < kNumBuffers && buffer; i++) {
+		void **nextBuffer = (void **)*buffer;
+
+		free(buffer);
+		buffer = nextBuffer;
+	}
+	fFirstFree = buffer;
+
+	release_sem(fLock);
+	return B_OK;
+}
+
+
+status_t
+BufferPool::GetBuffer(void **_buffer)
+{
+	status_t status = acquire_sem(fFreeBuffers);
+	if (status < B_OK)
+		return status;
+
+	if ((status = acquire_sem(fLock)) < B_OK) {
+		release_sem(fFreeBuffers);
+		return status;
+	}
+
+	void **buffer = fFirstFree;
+	fFirstFree = (void **)*buffer;
+
+	release_sem(fLock);
+
+	*_buffer = (void *)buffer;
+	return B_OK;
+}
+
+
+status_t 
+BufferPool::PutBuffer(void *_buffer)
+{
+	void **buffer = (void **)_buffer;
+	if (buffer == NULL)
+		return B_BAD_VALUE;
+
+	status_t status = acquire_sem(fLock);
+	if (status < B_OK)
+		return status;
+
+	*buffer = fFirstFree;
+	fFirstFree = buffer;
+
+	release_sem(fLock);
+	release_sem(fFreeBuffers);
+
+	return B_OK;
+}
+
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/BufferPool.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/BufferPool.h
@ -0,0 +1,31 @@
+#ifndef BUFFER_POOL_H
+#define BUFFER_POOL_H
+/* BufferPool - a buffer pool for uncached file access
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include <OS.h>
+
+
+class BufferPool {
+	public:
+		BufferPool();
+		~BufferPool();
+
+		status_t InitCheck();
+
+		status_t RequestBuffers(uint32 blockSize);
+		status_t ReleaseBuffers();
+
+		status_t GetBuffer(void **_buffer);
+		status_t PutBuffer(void *buffer);
+
+	private:
+		sem_id	fLock, fFreeBuffers;
+		void	**fFirstFree;
+};
+
+#endif	/* BUFFER_POOL_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Chain.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Chain.h
@ -0,0 +1,55 @@
+#ifndef CHAIN_H
+#define CHAIN_H
+/* Chain - a chain implementation; it's used for the callback management
+**		throughout the code (currently TreeIterator, and AttributeIterator).
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+/** The Link class you want to use with the Chain class needs to have
+ *	a "fNext" member which is accessable from within the Chain class.
+ */
+
+template<class Link> class Chain {
+	public:
+		Chain()
+			:
+			fFirst(NULL)
+		{
+		}
+
+		void Add(Link *link)
+		{
+			link->fNext = fFirst;
+			fFirst = link;
+		}
+
+		void Remove(Link *link)
+		{
+			// search list for the correct callback to remove
+			Link *last = NULL,*entry;
+			for (entry = fFirst;link != entry;entry = entry->fNext)
+				last = entry;
+			if (link == entry) {
+				if (last)
+					last->fNext = link->fNext;
+				else
+					fFirst = link->fNext;
+			}
+		}
+
+		Link *Next(Link *last)
+		{
+			if (last == NULL)
+				return fFirst;
+
+			return last->fNext;
+		}
+
+	private:
+		Link	*fFirst;
+};
+
+#endif	/* CHAIN_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Debug.cpp
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Debug.cpp
@ -0,0 +1,298 @@
+/* Debug - debug stuff
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** Some code is based on work previously done by Marcus Overhagen
+**
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include "Debug.h"
+#include "BPlusTree.h"
+#include "Inode.h"
+
+#include <KernelExport.h>
+
+#include <time.h>
+
+#define Print __out
+
+
+char *
+get_tupel(uint32 id)
+{
+	static unsigned char tupel[5];
+
+	tupel[0] = 0xff & (id >> 24);
+	tupel[1] = 0xff & (id >> 16);
+	tupel[2] = 0xff & (id >> 8);
+	tupel[3] = 0xff & (id);
+	tupel[4] = 0;
+	for (int16 i = 0;i < 4;i++) {
+		if (tupel[i] < ' ' || tupel[i] > 128)
+			tupel[i] = '.';
+	}
+
+	return (char *)tupel;
+}
+
+
+void
+dump_block_run(const char *prefix, block_run &run)
+{
+	Print("%s(%ld, %d, %d)\n", prefix, run.allocation_group, run.start, run.length);
+}
+
+
+void 
+dump_inode(Inode &inode)
+{
+	Print("Inode (%p) {\n", &inode);
+	Print("\tfVolume = %p\n", inode.fVolume);
+	Print("\tfBlockNumber = 0x%16Lx\n", inode.fBlockNumber);
+	Print("\tfNode = %p\n", inode.Node());
+	Print("\tfTree = %p\n", inode.fTree);
+	Print("\tfAttributes = %p\n", inode.fAttributes);
+	Print("\tfOldSize = 0x%16Lx\n", inode.fOldSize);
+	Print("\tfOldLastModified = 0x%16Lx\n", inode.fOldLastModified);
+	Print("}\n");
+}
+
+
+void
+dump_super_block(disk_super_block *superBlock)
+{
+	Print("disk_super_block:\n");
+	Print("  name           = %s\n", superBlock->name);
+	Print("  magic1         = %#08lx (%s) %s\n", superBlock->magic1, get_tupel(superBlock->magic1), (superBlock->magic1 == SUPER_BLOCK_MAGIC1 ? "valid" : "INVALID"));
+	Print("  fs_byte_order  = %#08lx (%s)\n", superBlock->fs_byte_order, get_tupel(superBlock->fs_byte_order));
+	Print("  block_size     = %lu\n", superBlock->block_size);
+	Print("  block_shift    = %lu\n", superBlock->block_shift);
+	Print("  num_blocks     = %Lu\n", superBlock->num_blocks);
+	Print("  used_blocks    = %Lu\n", superBlock->used_blocks);
+	Print("  inode_size     = %lu\n", superBlock->inode_size);
+	Print("  magic2         = %#08lx (%s) %s\n", superBlock->magic2, get_tupel(superBlock->magic2), (superBlock->magic2 == (int)SUPER_BLOCK_MAGIC2 ? "valid" : "INVALID"));
+	Print("  blocks_per_ag  = %lu\n", superBlock->blocks_per_ag);
+	Print("  ag_shift       = %lu (%ld bytes)\n", superBlock->ag_shift, 1L << superBlock->ag_shift);
+	Print("  num_ags        = %lu\n", superBlock->num_ags);
+	Print("  flags          = %#08lx (%s)\n", superBlock->flags, get_tupel(superBlock->flags));
+	dump_block_run("  log_blocks     = ", superBlock->log_blocks);
+	Print("  log_start      = %Lu\n", superBlock->log_start);
+	Print("  log_end        = %Lu\n", superBlock->log_end);
+	Print("  magic3         = %#08lx (%s) %s\n", superBlock->magic3, get_tupel(superBlock->magic3), (superBlock->magic3 == SUPER_BLOCK_MAGIC3 ? "valid" : "INVALID"));
+	dump_block_run("  root_dir       = ", superBlock->root_dir);
+	dump_block_run("  indices        = ", superBlock->indices);
+}
+
+
+void
+dump_data_stream(data_stream *stream)
+{
+	Print("data_stream:\n");
+	for (int i = 0; i < NUM_DIRECT_BLOCKS; i++) {
+		if (!stream->direct[i].IsZero()) {
+			Print("  direct[%02d]                = ",i);
+			dump_block_run("",stream->direct[i]);
+		}
+	}
+	Print("  max_direct_range          = %Lu\n", stream->max_direct_range);
+
+	if (!stream->indirect.IsZero())
+		dump_block_run("  indirect                  = ", stream->indirect);
+
+	Print("  max_indirect_range        = %Lu\n", stream->max_indirect_range);
+
+	if (!stream->double_indirect.IsZero())
+		dump_block_run("  double_indirect           = ", stream->double_indirect);
+
+	Print("  max_double_indirect_range = %Lu\n", stream->max_double_indirect_range);
+	Print("  size                      = %Lu\n", stream->size);
+}
+
+
+void
+dump_inode(bfs_inode *inode)
+{
+	Print("inode:\n");
+	Print("  magic1             = %08lx (%s) %s\n", inode->magic1,
+		get_tupel(inode->magic1), (inode->magic1 == INODE_MAGIC1 ? "valid" : "INVALID"));
+	dump_block_run(	"  inode_num          = ", inode->inode_num);
+	Print("  uid                = %lu\n", inode->uid);
+	Print("  gid                = %lu\n", inode->gid);
+	Print("  mode               = %08lx\n", inode->mode);
+	Print("  flags              = %08lx\n", inode->flags);
+	Print("  create_time        = %Ld (%Ld)\n", inode->create_time,
+		inode->create_time >> INODE_TIME_SHIFT);
+	Print("  last_modified_time = %Ld (%Ld)\n", inode->last_modified_time,
+		inode->last_modified_time >> INODE_TIME_SHIFT);
+	dump_block_run(	"  parent             = ", inode->parent);
+	dump_block_run(	"  attributes         = ", inode->attributes);
+	Print("  type               = %lu\n", inode->type);
+	Print("  inode_size         = %lu\n", inode->inode_size);
+	Print("  etc                = %#08lx\n", inode->etc);
+	Print("  short_symlink      = %s\n",
+		S_ISLNK(inode->mode) && (inode->flags & INODE_LONG_SYMLINK) == 0 ?
+			inode->short_symlink : "-");
+	dump_data_stream(&(inode->data));
+	Print("  --\n  pad[0]             = %08lx\n", inode->pad[0]);
+	Print("  pad[1]             = %08lx\n", inode->pad[1]);
+	Print("  pad[2]             = %08lx\n", inode->pad[2]);
+	Print("  pad[3]             = %08lx\n", inode->pad[3]);
+}
+
+
+void
+dump_bplustree_header(bplustree_header *header)
+{
+	Print("bplustree_header:\n");
+	Print("  magic                = %#08lx (%s) %s\n", header->magic,
+		get_tupel(header->magic), (header->magic == BPLUSTREE_MAGIC ? "valid" : "INVALID"));
+	Print("  node_size            = %lu\n", header->node_size);
+	Print("  max_number_of_levels = %lu\n", header->max_number_of_levels);
+	Print("  data_type            = %lu\n", header->data_type);
+	Print("  root_node_pointer    = %Ld\n", header->root_node_pointer);
+	Print("  free_node_pointer    = %Ld\n", header->free_node_pointer);
+	Print("  maximum_size         = %Lu\n", header->maximum_size);
+}
+
+
+#define DUMPED_BLOCK_SIZE 16
+
+void
+dump_block(const char *buffer,int size)
+{
+	for(int i = 0;i < size;) {
+		int start = i;
+
+		for(;i < start+DUMPED_BLOCK_SIZE;i++) {
+			if (!(i % 4))
+				Print(" ");
+
+			if (i >= size)
+				Print("  ");
+			else
+				Print("%02x",*(unsigned char *)(buffer+i));
+		}
+		Print("  ");
+
+		for(i = start;i < start + DUMPED_BLOCK_SIZE;i++) {
+			if (i < size) {
+				char c = *(buffer+i);
+
+				if (c < 30)
+					Print(".");
+				else
+					Print("%c",c);
+			} else
+				break;
+		}
+		Print("\n");
+	}
+}
+
+
+void
+dump_bplustree_node(bplustree_node *node,bplustree_header *header,Volume *volume)
+{
+	Print("bplustree_node:\n");
+	Print("  left_link      = %Ld\n", node->left_link);
+	Print("  right_link     = %Ld\n", node->right_link);
+	Print("  overflow_link  = %Ld\n", node->overflow_link);
+	Print("  all_key_count  = %u\n", node->all_key_count);
+	Print("  all_key_length = %u\n", node->all_key_length);
+
+	if (header == NULL)
+		return;
+
+	if (node->all_key_count > node->all_key_length
+		|| uint32(node->all_key_count * 10) > (uint32)header->node_size
+		|| node->all_key_count == 0) {
+		Print("\n");
+		dump_block((char *)node, header->node_size/*, sizeof(off_t)*/);
+		return;
+	}
+
+	Print("\n");
+	for (int32 i = 0;i < node->all_key_count;i++) {
+		uint16 length;
+		char buffer[256], *key = (char *)node->KeyAt(i, &length);
+		if (length > 255 || length == 0) {
+			Print("  %2ld. Invalid length (%u)!!\n", i, length);
+			dump_block((char *)node, header->node_size/*, sizeof(off_t)*/);
+			break;
+		}
+		memcpy(buffer, key, length);
+		buffer[length] = '\0';
+
+		off_t *value = node->Values() + i;
+		if ((uint32)value < (uint32)node || (uint32)value > (uint32)node + header->node_size)
+			Print("  %2ld. Invalid Offset!!\n", i);
+		else {
+			Print("  %2ld. ", i);
+			if (header->data_type == BPLUSTREE_STRING_TYPE)
+				Print("\"%s\"", buffer);
+			else if (header->data_type == BPLUSTREE_INT32_TYPE)
+				Print("int32 = %ld (0x%lx)", *(int32 *)&buffer, *(int32 *)&buffer);
+			else if (header->data_type == BPLUSTREE_UINT32_TYPE)
+				Print("uint32 = %lu (0x%lx)", *(uint32 *)&buffer, *(uint32 *)&buffer);
+			else if (header->data_type == BPLUSTREE_INT64_TYPE)
+				Print("int64 = %Ld (0x%Lx)", *(int64 *)&buffer, *(int64 *)&buffer);
+			else
+				Print("???");
+
+			off_t offset = *value & 0x3fffffffffffffffLL;
+			Print(" (%d bytes) -> %Ld", length, offset);
+			if (volume != NULL) {
+				block_run run = volume->ToBlockRun(offset);
+				Print(" (%ld, %d)", run.allocation_group, run.start);
+			}
+			if (bplustree_node::LinkType(*value) == BPLUSTREE_DUPLICATE_FRAGMENT)
+				Print(" (duplicate fragment %Ld)\n", *value & 0x3ff);
+			else if (bplustree_node::LinkType(*value) == BPLUSTREE_DUPLICATE_NODE)
+				Print(" (duplicate node)\n");
+			else
+				Print("\n");
+		}
+	}
+}
+
+
+//	#pragma mark -
+
+
+#ifndef USER
+//#warn Don't mount more than once... would register twice the debugger commands!
+
+static int
+dbg_inode(int argc, char **argv)
+{
+	if (argc < 2) {
+		kprintf("usage: obfsinode ptr-to-inode\n");
+		return 0;
+	}
+
+	Inode *inode = (Inode *)parse_expression(argv[1]);
+	dump_inode(*inode);
+
+	return B_OK;
+}
+
+#endif
+
+void
+remove_debugger_commands()
+{
+#ifndef USER
+	remove_debugger_command("obfsinode", dbg_inode);
+#endif
+}
+
+
+void
+add_debugger_commands()
+{
+#ifndef USER
+	add_debugger_command("obfsinode", dbg_inode, "dump an Inode object");
+#endif
+}
+
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Debug.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Debug.h
@ -0,0 +1,107 @@
+#ifndef DEBUG_H
+#define DEBUG_H
+/* Debug - debug stuff
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include <KernelExport.h>
+
+#ifdef DEBUG
+#	include <string.h>
+#endif
+
+#ifdef USER
+#	include <stdio.h>
+#	define __out printf
+#else
+#	include <null.h>
+#	define __out dprintf
+#endif
+
+// Which debugger should be used when?
+// The DEBUGGER() macro actually has no effect if DEBUG is not defined,
+// use the DIE() macro if you really want to die.
+#ifdef DEBUG
+#	ifdef USER
+#		define DEBUGGER(x) debugger x
+#	else
+#		define DEBUGGER(x) kernel_debugger x
+#	endif
+#else
+#	define DEBUGGER(x) ;
+#endif
+
+#ifdef USER
+#	define DIE(x) debugger x
+#else
+#	define DIE(x) kernel_debugger x
+#endif
+
+// Short overview over the debug output macros:
+//	PRINT()
+//		is for general messages that very unlikely should appear in a release build
+//	FATAL()
+//		this is for fatal messages, when something has really gone wrong
+//	INFORM()
+//		general information, as disk size, etc.
+//	REPORT_ERROR(status_t)
+//		prints out error information
+//	RETURN_ERROR(status_t)
+//		calls REPORT_ERROR() and return the value
+//	D()
+//		the statements in D() are only included if DEBUG is defined
+
+#ifdef DEBUG
+	#define PRINT(x) { __out("bfs: "); __out x; }
+	#define REPORT_ERROR(status) \
+		__out("bfs: %s:%d: %s\n", __FUNCTION__, __LINE__, strerror(status));
+	#define RETURN_ERROR(err) { status_t _status = err; if (_status < B_OK) REPORT_ERROR(_status); return _status;}
+	#define FATAL(x) { __out("bfs: "); __out x; }
+	#define INFORM(x) { __out("bfs: "); __out x; }
+//	#define FUNCTION() __out("bfs: %s()\n",__FUNCTION__);
+	#define FUNCTION_START(x) { __out("bfs: %s() ",__FUNCTION__); __out x; }
+	#define FUNCTION() ;
+//	#define FUNCTION_START(x) ;
+	#define D(x) {x;};
+	#define ASSERT(x) { if (!(x)) DEBUGGER(("bfs: assert failed: " #x "\n")); }
+#else
+	#define PRINT(x) ;
+	#define REPORT_ERROR(status) ;
+	#define RETURN_ERROR(status) return status;
+	#define FATAL(x) { __out("bfs: "); __out x; }
+	#define INFORM(x) { __out("bfs: "); __out x; }
+	#define FUNCTION() ;
+	#define FUNCTION_START(x) ;
+	#define D(x) ;
+	#define ASSERT(x) ;
+#endif
+
+#ifdef DEBUG
+	struct block_run;
+	struct bplustree_header;
+	struct bplustree_node;
+	struct data_stream;
+	struct bfs_inode;
+	struct disk_super_block;
+	class Inode;
+	class Volume;
+	
+	// some structure dump functions
+	extern void dump_block_run(const char *prefix, block_run &run);
+	extern void dump_inode(Inode &inode);
+	extern void dump_super_block(disk_super_block *superBlock);
+	extern void dump_data_stream(data_stream *stream);
+	extern void dump_inode(bfs_inode *inode);
+	extern void dump_bplustree_header(bplustree_header *header);
+	extern void dump_bplustree_node(bplustree_node *node,
+					bplustree_header *header = NULL, Volume *volume = NULL);
+	extern void dump_block(const char *buffer, int size);
+
+	extern void remove_debugger_commands();
+	extern void add_debugger_commands();
+#endif
+
+#endif	/* DEBUG_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Index.cpp
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Index.cpp
@ -0,0 +1,372 @@
+/* Index - index access functions
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include "Debug.h"
+#include "Index.h"
+#include "Volume.h"
+#include "Inode.h"
+#include "BPlusTree.h"
+
+#include <util/kernel_cpp.h>
+#include <TypeConstants.h>
+
+// B_MIME_STRING_TYPE is defined in storage/Mime.h, but we
+// don't need the whole file here; the type can't change anyway
+#ifndef _MIME_H
+#	define B_MIME_STRING_TYPE 'MIMS'
+#endif
+
+
+Index::Index(Volume *volume)
+	:
+	fVolume(volume),
+	fNode(NULL)
+{
+}
+
+
+Index::~Index()
+{
+	if (fNode == NULL)
+		return;
+
+	put_vnode(fVolume->ID(), fNode->ID());
+}
+
+
+void
+Index::Unset()
+{
+	if (fNode == NULL)
+		return;
+
+	put_vnode(fVolume->ID(), fNode->ID());
+	fNode = NULL;
+	fName = NULL;
+}
+
+
+/** Sets the index to specified one. Returns an error if the index could
+ *	not be found or initialized.
+ *	Note, Index::Update() may be called on the object even if this method
+ *	failed previously. In this case, it will only update live queries for
+ *	the updated attribute.
+ */
+
+status_t 
+Index::SetTo(const char *name)
+{
+	// remove the old node, if the index is set for the second time
+	Unset();
+	
+	fName = name;
+		// only stores the pointer, so it assumes that it will stay constant
+		// in further comparisons (currently only used in Index::Update())
+
+	// Note, the name is saved even if the index couldn't be initialized!
+	// This is used to optimize Index::Update() in case there is no index
+
+	Inode *indices = fVolume->IndicesNode();
+	if (indices == NULL)
+		return B_ENTRY_NOT_FOUND;
+
+	BPlusTree *tree;
+	if (indices->GetTree(&tree) != B_OK)
+		return B_BAD_VALUE;
+
+	vnode_id id;
+	status_t status = tree->Find((uint8 *)name, (uint16)strlen(name), &id);
+	if (status != B_OK)
+		return status;
+
+	Vnode vnode(fVolume, id);
+	if (vnode.Get(&fNode) != B_OK)
+		return B_ENTRY_NOT_FOUND;
+
+	if (fNode == NULL) {
+		FATAL(("fatal error at Index::InitCheck(), get_vnode() returned NULL pointer\n"));
+		return B_ERROR;
+	}
+
+	vnode.Keep();
+	return B_OK;
+}
+
+
+/** Returns a standard type code for the stat() index type codes. Returns
+ *	zero if the type is not known (can only happen if the mode field is
+ *	corrupted somehow or not that of an index).
+ */
+
+uint32 
+Index::Type()
+{
+	if (fNode == NULL)
+		return 0;
+
+	switch (fNode->Mode() & (S_STR_INDEX | S_INT_INDEX | S_UINT_INDEX | S_LONG_LONG_INDEX |
+							 S_ULONG_LONG_INDEX | S_FLOAT_INDEX | S_DOUBLE_INDEX)) {
+		case S_INT_INDEX:
+			return B_INT32_TYPE;
+		case S_UINT_INDEX:
+			return B_UINT32_TYPE;
+		case S_LONG_LONG_INDEX:
+			return B_INT64_TYPE;
+		case S_ULONG_LONG_INDEX:
+			return B_UINT64_TYPE;
+		case S_FLOAT_INDEX:
+			return B_FLOAT_TYPE;
+		case S_DOUBLE_INDEX:
+			return B_DOUBLE_TYPE;
+		case S_STR_INDEX:
+			return B_STRING_TYPE;
+	}
+	FATAL(("index has unknown type!\n"));
+	return 0;
+}
+
+
+size_t
+Index::KeySize()
+{
+	if (fNode == NULL)
+		return 0;
+	
+	int32 mode = fNode->Mode() & (S_STR_INDEX | S_INT_INDEX | S_UINT_INDEX | S_LONG_LONG_INDEX |
+								  S_ULONG_LONG_INDEX | S_FLOAT_INDEX | S_DOUBLE_INDEX);
+
+	if (mode == S_STR_INDEX)
+		// string indices don't have a fixed key size
+		return 0;
+
+	switch (mode) {
+		case S_INT_INDEX:
+		case S_UINT_INDEX:
+			return sizeof(int32);
+		case S_LONG_LONG_INDEX:
+		case S_ULONG_LONG_INDEX:
+			return sizeof(int64);
+		case S_FLOAT_INDEX:
+			return sizeof(float);
+		case S_DOUBLE_INDEX:
+			return sizeof(double);
+	}
+	FATAL(("index has unknown type!\n"));
+	return 0;
+}
+
+
+status_t
+Index::Create(Transaction *transaction, const char *name, uint32 type)
+{
+	Unset();
+
+	int32 mode = 0;
+	switch (type) {
+		case B_INT32_TYPE:
+			mode = S_INT_INDEX;
+			break;
+		case B_UINT32_TYPE:
+			mode = S_UINT_INDEX;
+			break;
+		case B_INT64_TYPE:
+			mode = S_LONG_LONG_INDEX;
+			break;
+		case B_UINT64_TYPE:
+			mode = S_ULONG_LONG_INDEX;
+			break;
+		case B_FLOAT_TYPE:
+			mode = S_FLOAT_INDEX;
+			break;
+		case B_DOUBLE_TYPE:
+			mode = S_DOUBLE_INDEX;
+			break;
+		case B_STRING_TYPE:
+		case B_MIME_STRING_TYPE:
+			// B_MIME_STRING_TYPE is the only supported non-standard type, but
+			// will be handled like a B_STRING_TYPE internally
+			mode = S_STR_INDEX;
+			break;
+		default:
+			return B_BAD_TYPE;
+	}
+
+
+	// do we need to create the index directory first?
+	if (fVolume->IndicesNode() == NULL) {
+		status_t status = fVolume->CreateIndicesRoot(transaction);
+		if (status < B_OK)
+			RETURN_ERROR(status);
+	}
+
+	// Inode::Create() will keep the inode locked for us
+	return Inode::Create(transaction, fVolume->IndicesNode(), name,
+		S_INDEX_DIR | S_DIRECTORY | mode, 0, type, NULL, &fNode);
+}
+
+
+/**	Updates the specified index, the oldKey will be removed from, the newKey
+ *	inserted into the tree.
+ *	If the method returns B_BAD_INDEX, it means the index couldn't be found -
+ *	the most common reason will be that the index doesn't exist.
+ *	You may not want to let the whole transaction fail because of that.
+ */
+
+status_t
+Index::Update(Transaction *transaction, const char *name, int32 type, const uint8 *oldKey,
+	uint16 oldLength, const uint8 *newKey, uint16 newLength, Inode *inode)
+{
+	if (name == NULL
+		|| oldKey == NULL && newKey == NULL
+		|| oldKey != NULL && oldLength == 0
+		|| newKey != NULL && newLength == 0)
+		return B_BAD_VALUE;
+
+	// B_MIME_STRING_TYPE is the only supported non-standard type
+	if (type == B_MIME_STRING_TYPE)
+		type = B_STRING_TYPE;
+
+	// If the two keys are identical, don't do anything - only compare if the
+	// type has been set, until we have a real type code, we can't do much
+	// about the comparison here
+	if (type != 0 && !compareKeys(type, oldKey, oldLength, newKey, newLength))
+		return B_OK;
+
+	// update all live queries about the change, if they have an index or not
+	if (type != 0)
+		fVolume->UpdateLiveQueries(inode, name, type, oldKey, oldLength, newKey, newLength);
+
+	status_t status;
+	if (((name != fName || strcmp(name, fName)) && (status = SetTo(name)) < B_OK)
+		|| fNode == NULL)
+		return B_BAD_INDEX;
+
+	// now that we have the type, check again for equality
+	if (type == 0 && !compareKeys(Type(), oldKey, oldLength, newKey, newLength))
+		return B_OK;
+
+	// same for the live query update
+	if (type == 0)
+		fVolume->UpdateLiveQueries(inode, name, Type(), oldKey, oldLength, newKey, newLength);
+		
+	BPlusTree *tree;
+	if ((status = Node()->GetTree(&tree)) < B_OK)
+		return status;
+
+	// remove the old key from the tree
+
+	if (oldKey != NULL) {
+		status = tree->Remove(transaction, (const uint8 *)oldKey, oldLength, inode->ID());
+		if (status == B_ENTRY_NOT_FOUND) {
+			// That's not nice, but should be no reason to let the whole thing fail
+			FATAL(("Could not find value in index \"%s\"!\n", name));
+		} else if (status < B_OK)
+			return status;
+	}
+
+	// add the new key to the tree
+
+	if (newKey != NULL)
+		status = tree->Insert(transaction, (const uint8 *)newKey, newLength, inode->ID());
+
+	return status;
+}
+
+
+status_t 
+Index::InsertName(Transaction *transaction, const char *name, Inode *inode)
+{
+	return UpdateName(transaction, NULL, name, inode);
+}
+
+
+status_t 
+Index::RemoveName(Transaction *transaction, const char *name, Inode *inode)
+{
+	return UpdateName(transaction, name, NULL, inode);
+}
+
+
+status_t 
+Index::UpdateName(Transaction *transaction, const char *oldName, const char *newName, Inode *inode)
+{
+	uint16 oldLength = oldName ? strlen(oldName) : 0;
+	uint16 newLength = newName ? strlen(newName) : 0;
+	return Update(transaction, "name", B_STRING_TYPE, (uint8 *)oldName, oldLength,
+		(uint8 *)newName, newLength, inode);
+}
+
+
+status_t 
+Index::InsertSize(Transaction *transaction, Inode *inode)
+{
+	off_t size = inode->Size();
+	return Update(transaction, "size", B_INT64_TYPE, NULL, 0, (uint8 *)&size, sizeof(int64), inode);
+}
+
+
+status_t 
+Index::RemoveSize(Transaction *transaction, Inode *inode)
+{
+	// Inode::OldSize() is the size that's in the index
+	off_t size = inode->OldSize();
+	return Update(transaction, "size", B_INT64_TYPE, (uint8 *)&size, sizeof(int64), NULL, 0, inode);
+}
+
+
+status_t
+Index::UpdateSize(Transaction *transaction, Inode *inode)
+{
+	off_t oldSize = inode->OldSize();
+	off_t newSize = inode->Size();
+
+	status_t status = Update(transaction, "size", B_INT64_TYPE, (uint8 *)&oldSize,
+		sizeof(int64), (uint8 *)&newSize, sizeof(int64), inode);
+	if (status == B_OK)
+		inode->UpdateOldSize();
+
+	return status;
+}
+
+
+status_t 
+Index::InsertLastModified(Transaction *transaction, Inode *inode)
+{
+	off_t modified = inode->LastModified();
+	return Update(transaction, "last_modified", B_INT64_TYPE, NULL, 0,
+		(uint8 *)&modified, sizeof(int64), inode);
+}
+
+
+status_t 
+Index::RemoveLastModified(Transaction *transaction, Inode *inode)
+{
+	// Inode::OldLastModified() is the value which is in the index
+	off_t modified = inode->OldLastModified();
+	return Update(transaction, "last_modified", B_INT64_TYPE, (uint8 *)&modified,
+		sizeof(int64), NULL, 0, inode);
+}
+
+
+status_t 
+Index::UpdateLastModified(Transaction *transaction, Inode *inode, off_t modified)
+{
+	off_t oldModified = inode->OldLastModified();
+	if (modified == -1)
+		modified = (bigtime_t)time(NULL) << INODE_TIME_SHIFT;
+	modified |= fVolume->GetUniqueID() & INODE_TIME_MASK;
+
+	status_t status = Update(transaction, "last_modified", B_INT64_TYPE, (uint8 *)&oldModified,
+		sizeof(int64), (uint8 *)&modified, sizeof(int64), inode);
+
+	inode->Node()->last_modified_time = modified;
+	if (status == B_OK)
+		inode->UpdateOldLastModified();
+
+	return status;
+}
+
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Index.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Index.h
@ -0,0 +1,55 @@
+#ifndef INDEX_H
+#define INDEX_H
+/* Index - index access functions
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include <KernelExport.h>
+
+class Transaction;
+class Volume;
+class Inode;
+
+
+class Index {
+	public:
+		Index(Volume *volume);
+		~Index();
+
+		status_t SetTo(const char *name);
+		void Unset();
+
+		Inode *Node() const { return fNode; };
+		uint32 Type();
+		size_t KeySize();
+
+		status_t Create(Transaction *transaction, const char *name, uint32 type);
+
+		status_t Update(Transaction *transaction, const char *name, int32 type, const uint8 *oldKey, uint16 oldLength, const uint8 *newKey, uint16 newLength, Inode *inode);
+
+		status_t InsertName(Transaction *transaction,const char *name,Inode *inode);
+		status_t RemoveName(Transaction *transaction,const char *name,Inode *inode);
+		status_t UpdateName(Transaction *transaction,const char *oldName,const char *newName,Inode *inode);
+
+		status_t InsertSize(Transaction *transaction, Inode *inode);
+		status_t RemoveSize(Transaction *transaction, Inode *inode);
+		status_t UpdateSize(Transaction *transaction, Inode *inode);
+
+		status_t InsertLastModified(Transaction *transaction, Inode *inode);
+		status_t RemoveLastModified(Transaction *transaction, Inode *inode);
+		status_t UpdateLastModified(Transaction *transaction, Inode *inode,off_t modified = -1);
+
+	private:
+		Index(const Index &);
+		Index &operator=(const Index &);
+			// no implementation
+
+		Volume		*fVolume;
+		Inode		*fNode;
+		const char	*fName;
+};
+
+#endif	/* INDEX_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Inode.cpp
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Inode.cpp
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Inode.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Inode.h
@ -0,0 +1,404 @@
+#ifndef INODE_H
+#define INODE_H
+/* Inode - inode access functions
+**
+** Copyright 2001-2004, Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include <KernelExport.h>
+#ifdef USER
+#	include "myfs.h"
+#	include <stdio.h>
+#endif
+
+#ifndef _IMPEXP_KERNEL
+#	define _IMPEXP_KERNEL
+#endif
+
+#include <lock.h>
+#include <cache.h>
+
+#include <string.h>
+#include <unistd.h>
+
+#include "Volume.h"
+#include "Journal.h"
+#include "Lock.h"
+#include "Chain.h"
+#include "Debug.h"
+
+
+class BPlusTree;
+class TreeIterator;
+class AttributeIterator;
+class InodeAllocator;
+
+
+enum inode_type {
+	S_DIRECTORY		= S_IFDIR,
+	S_FILE			= S_IFREG,
+	S_SYMLINK		= S_IFLNK,
+
+	S_REGULAR		= (S_DIRECTORY | S_FILE | S_SYMLINK),
+	S_INDEX_TYPES	= (S_STR_INDEX | S_INT_INDEX | S_UINT_INDEX | S_LONG_LONG_INDEX
+						| S_ULONG_LONG_INDEX | S_FLOAT_INDEX | S_DOUBLE_INDEX)
+};
+
+
+// The CachedBlock class is completely implemented as inlines.
+// It should be used when cache single blocks to make sure they
+// will be properly released after use (and it's also very
+// convenient to use them).
+
+class CachedBlock {
+	public:
+		CachedBlock(Volume *volume);
+		CachedBlock(Volume *volume, off_t block, bool empty = false);
+		CachedBlock(Volume *volume, block_run run, bool empty = false);
+		CachedBlock(CachedBlock *cached);
+		~CachedBlock();
+
+		inline void Keep();
+		inline void Unset();
+		inline uint8 *SetTo(off_t block, bool empty = false);
+		inline uint8 *SetTo(block_run run, bool empty = false);
+		inline status_t WriteBack(Transaction *transaction);
+
+		uint8 *Block() const { return fBlock; }
+		off_t BlockNumber() const { return fBlockNumber; }
+		uint32 BlockSize() const { return fVolume->BlockSize(); }
+		uint32 BlockShift() const { return fVolume->BlockShift(); }
+
+	private:
+		CachedBlock(const CachedBlock &);
+		CachedBlock &operator=(const CachedBlock &);
+			// no implementation
+
+	protected:
+		Volume	*fVolume;
+		off_t	fBlockNumber;
+		uint8	*fBlock;
+};
+
+//--------------------------------------
+
+class Inode : public CachedBlock {
+	public:
+		Inode(Volume *volume, vnode_id id, bool empty = false, uint8 reenter = 0);
+		Inode(CachedBlock *cached);
+		~Inode();
+
+		bfs_inode *Node() const { return (bfs_inode *)fBlock; }
+		vnode_id ID() const { return fVolume->ToVnode(fBlockNumber); }
+
+		ReadWriteLock &Lock() { return fLock; }
+		SimpleLock &SmallDataLock() { return fSmallDataLock; }
+
+		mode_t Mode() const { return Node()->Mode(); }
+		uint32 Type() const { return Node()->Type(); }
+		int32 Flags() const { return Node()->Flags(); }
+		bool IsContainer() const { return Mode() & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR); }
+			// note, that this test will also be true for S_IFBLK (not that it's used in the fs :)
+		bool IsDirectory() const { return (Mode() & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY; }
+		bool IsIndex() const { return (Mode() & (S_INDEX_DIR | 0777)) == S_INDEX_DIR; }
+			// that's a stupid check, but AFAIK the only possible method...
+
+		bool IsAttribute() const { return Mode() & S_ATTR; }
+		bool IsFile() const { return Mode() & S_IFREG; }
+		bool IsRegularNode() const { return (Mode() & (S_ATTR_DIR | S_INDEX_DIR | S_ATTR)) == 0; }
+			// a regular node in the standard namespace (i.e. not an index or attribute)
+		bool IsSymLink() const { return S_ISLNK(Mode()); }
+		bool HasUserAccessableStream() const { return S_ISREG(Mode()); }
+			// currently only files can be accessed with bfs_read()/bfs_write()
+
+		off_t Size() const { return Node()->data.Size(); }
+		off_t LastModified() const { return Node()->last_modified_time; }
+
+		block_run &BlockRun() const { return Node()->inode_num; }
+		block_run &Parent() const { return Node()->parent; }
+		block_run &Attributes() const { return Node()->attributes; }
+		Volume *GetVolume() const { return fVolume; }
+
+		status_t InitCheck(bool checkNode = true);
+
+		status_t CheckPermissions(int accessMode) const;
+
+		// small_data access methods
+		status_t MakeSpaceForSmallData(Transaction *transaction, const char *name, int32 length);
+		status_t RemoveSmallData(Transaction *transaction, const char *name);
+		status_t AddSmallData(Transaction *transaction, const char *name, uint32 type,
+					const uint8 *data, size_t length, bool force = false);
+		status_t GetNextSmallData(small_data **_smallData) const;
+		small_data *FindSmallData(const char *name) const;
+		const char *Name() const;
+		status_t GetName(char *buffer) const;
+		status_t SetName(Transaction *transaction, const char *name);
+
+		// high-level attribute methods
+		status_t ReadAttribute(const char *name, int32 type, off_t pos, uint8 *buffer, size_t *_length);
+		status_t WriteAttribute(Transaction *transaction, const char *name, int32 type, off_t pos, const uint8 *buffer, size_t *_length);
+		status_t RemoveAttribute(Transaction *transaction, const char *name);
+
+		// attribute methods
+		status_t GetAttribute(const char *name, Inode **attribute);
+		void ReleaseAttribute(Inode *attribute);
+		status_t CreateAttribute(Transaction *transaction, const char *name, uint32 type, Inode **attribute);
+
+		// for directories only:
+		status_t GetTree(BPlusTree **);
+		bool IsEmpty();
+
+		// manipulating the data stream
+		status_t FindBlockRun(off_t pos, block_run &run, off_t &offset);
+
+		status_t ReadAt(off_t pos, uint8 *buffer, size_t *length);
+		status_t WriteAt(Transaction *transaction, off_t pos, const uint8 *buffer, size_t *length);
+		status_t FillGapWithZeros(off_t oldSize, off_t newSize);
+
+		status_t SetFileSize(Transaction *transaction, off_t size);
+		status_t Append(Transaction *transaction, off_t bytes);
+		status_t Trim(Transaction *transaction);
+
+		status_t Free(Transaction *transaction);
+		status_t Sync();
+
+		// create/remove inodes
+		status_t Remove(Transaction *transaction, const char *name, off_t *_id = NULL,
+					bool isDirectory = false);
+		static status_t Create(Transaction *transaction, Inode *parent, const char *name,
+					int32 mode, int omode, uint32 type, off_t *_id = NULL, Inode **_inode = NULL);
+
+		// index maintaining helper
+		void UpdateOldSize() { fOldSize = Size(); }
+		void UpdateOldLastModified() { fOldLastModified = Node()->LastModifiedTime(); }
+		off_t OldSize() { return fOldSize; }
+		off_t OldLastModified() { return fOldLastModified; }
+
+		// file cache
+		void *FileCache() const { return fCache; }
+		void SetFileCache(void *cache) { fCache = cache; }
+
+	private:
+		Inode(const Inode &);
+		Inode &operator=(const Inode &);
+			// no implementation
+
+		friend void dump_inode(Inode &inode);
+		friend AttributeIterator;
+		friend InodeAllocator;
+
+		void Initialize();
+
+		status_t RemoveSmallData(small_data *item, int32 index);
+
+		void AddIterator(AttributeIterator *iterator);
+		void RemoveIterator(AttributeIterator *iterator);
+
+		status_t FreeStaticStreamArray(Transaction *transaction, int32 level, block_run run,
+					off_t size, off_t offset, off_t &max);
+		status_t FreeStreamArray(Transaction *transaction, block_run *array, uint32 arrayLength,
+					off_t size, off_t &offset, off_t &max);
+		status_t AllocateBlockArray(Transaction *transaction, block_run &run);
+		status_t GrowStream(Transaction *transaction, off_t size);
+		status_t ShrinkStream(Transaction *transaction, off_t size);
+
+		BPlusTree		*fTree;
+		Inode			*fAttributes;
+		ReadWriteLock	fLock;
+		off_t			fOldSize;			// we need those values to ensure we will remove
+		off_t			fOldLastModified;	// the correct keys from the indices
+		void			*fCache;
+
+		mutable SimpleLock	fSmallDataLock;
+		Chain<AttributeIterator> fIterators;
+};
+
+
+// The Vnode class provides a convenience layer upon get_vnode(), so that
+// you don't have to call put_vnode() anymore, which may make code more
+// readable in some cases
+
+class Vnode {
+	public:
+		Vnode(Volume *volume, vnode_id id)
+			:
+			fVolume(volume),
+			fID(id)
+		{
+		}
+
+		Vnode(Volume *volume, block_run run)
+			:
+			fVolume(volume),
+			fID(volume->ToVnode(run))
+		{
+		}
+
+		~Vnode()
+		{
+			Put();
+		}
+
+		status_t Get(Inode **inode)
+		{
+			// should we check inode against NULL here? it should not be necessary
+#ifdef UNSAFE_GET_VNODE
+			RecursiveLocker locker(fVolume->Lock());
+#endif
+			return get_vnode(fVolume->ID(), fID, (void **)inode);
+		}
+
+		void Put()
+		{
+			if (fVolume)
+				put_vnode(fVolume->ID(), fID);
+			fVolume = NULL;
+		}
+
+		void Keep()
+		{
+			fVolume = NULL;
+		}
+
+	private:
+		Volume		*fVolume;
+		vnode_id	fID;
+};
+
+
+class AttributeIterator {
+	public:
+		AttributeIterator(Inode *inode);
+		~AttributeIterator();
+		
+		status_t Rewind();
+		status_t GetNext(char *name, size_t *length, uint32 *type, vnode_id *id);
+
+	private:
+		int32		fCurrentSmallData;
+		Inode		*fInode, *fAttributes;
+		TreeIterator *fIterator;
+		void		*fBuffer;
+
+	private:
+		friend Chain<AttributeIterator>;
+		friend Inode;
+
+		void Update(uint16 index, int8 change);
+		AttributeIterator *fNext;
+};
+
+
+//--------------------------------------
+// inlines
+
+
+inline
+CachedBlock::CachedBlock(Volume *volume)
+	:
+	fVolume(volume),
+	fBlock(NULL)
+{
+}
+
+
+inline
+CachedBlock::CachedBlock(Volume *volume, off_t block, bool empty)
+	:
+	fVolume(volume),
+	fBlock(NULL)
+{
+	SetTo(block, empty);
+}
+
+
+inline
+CachedBlock::CachedBlock(Volume *volume, block_run run, bool empty)
+	:
+	fVolume(volume),
+	fBlock(NULL)
+{
+	SetTo(volume->ToBlock(run), empty);
+}
+
+
+inline 
+CachedBlock::CachedBlock(CachedBlock *cached)
+	:
+	fVolume(cached->fVolume),
+	fBlockNumber(cached->BlockNumber()),
+	fBlock(cached->fBlock)
+{
+	cached->Keep();
+}
+
+
+inline
+CachedBlock::~CachedBlock()
+{
+	Unset();
+}
+
+
+inline void
+CachedBlock::Keep()
+{
+	fBlock = NULL;
+}
+
+
+inline void
+CachedBlock::Unset()
+{
+	if (fBlock != NULL)
+		release_block(fVolume->Device(), fBlockNumber);
+}
+
+
+inline uint8 *
+CachedBlock::SetTo(off_t block, bool empty)
+{
+	Unset();
+	fBlockNumber = block;
+	return fBlock = empty ? (uint8 *)get_empty_block(fVolume->Device(), block, BlockSize())
+						  : (uint8 *)get_block(fVolume->Device(), block, BlockSize());
+}
+
+
+inline uint8 *
+CachedBlock::SetTo(block_run run, bool empty)
+{
+	return SetTo(fVolume->ToBlock(run), empty);
+}
+
+
+inline status_t
+CachedBlock::WriteBack(Transaction *transaction)
+{
+	if (transaction == NULL || fBlock == NULL)
+		RETURN_ERROR(B_BAD_VALUE);
+
+	return transaction->WriteBlocks(fBlockNumber, fBlock);
+}
+
+
+/**	Converts the "omode", the open flags given to bfs_open(), into
+ *	access modes, e.g. since O_RDONLY requires read access to the
+ *	file, it will be converted to R_OK.
+ */
+
+inline int
+oModeToAccess(int omode)
+{
+	omode &= O_RWMASK;
+	if (omode == O_RDONLY)
+		return R_OK;
+	else if (omode == O_WRONLY)
+		return W_OK;
+	
+	return R_OK | W_OK;
+}
+
+#endif	/* INODE_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Jamfile
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Jamfile
@ -0,0 +1,78 @@
+SubDir OBOS_TOP src tests add-ons kernel file_systems bfs r5 ;
+
+# save original optimization level
+oldOPTIM = $(OPTIM) ;
+
+# set some additional defines
+{
+	local defines =
+		KEEP_WRONG_DIRENT_RECLEN
+		UNSAFE_GET_VNODE
+		#BFS_BIG_ENDIAN_ONLY
+		;
+
+	# By default, the R5 API version is used unless you define this
+	if $(COMPILE_FOR_ZETA) {
+		defines += COMPILE_FOR_ZETA ;
+	}
+
+	# Enable OpenBFS to be compiled as a full BFS replacement. Will
+	# report itself as "bfs" instead of "obfs" (only R5 version)
+	if $(BFS_REPLACEMENT) {
+		defines += BFS_REPLACEMENT ;
+		bfsAddOnName = bfs ;
+	} else {
+		bfsAddOnName = obfs ;
+	}
+
+	if $(DEBUG) = 0 {
+		# the gcc on BeOS doesn't compile BFS correctly with -O2 or more
+		OPTIM = -O1 ;
+	}
+
+	defines = [ FDefines $(defines) ] ;
+	SubDirCcFlags $(defines) -Wall -Wno-multichar ;
+	SubDirC++Flags $(defines) -Wall -Wno-multichar -fno-rtti ;
+}
+
+UsePrivateHeaders [ FDirName kernel ] ;			# For kernel_cpp.cpp
+
+KernelStaticLibrary libbfs :
+	BlockAllocator.cpp
+	BPlusTree.cpp
+	kernel_cpp.cpp
+	Debug.cpp
+	Index.cpp
+	Inode.cpp
+	Journal.cpp
+	Query.cpp
+	Utility.cpp
+	Volume.cpp
+	BufferPool.cpp
+	;
+
+R5KernelAddon $(bfsAddOnName)_r5 : kernel file_systems :
+	kernel_interface_r5.cpp
+	: libbfs.a
+	;
+
+SEARCH on [ FGristFiles
+		kernel_cpp.cpp 
+	] = [ FDirName $(OBOS_TOP) src kernel core util ] ;
+
+#-----------------------------------------------------
+
+rule InstallBFS
+{
+	Depends $(<) : $(>) ;
+}
+
+actions ignore InstallBFS
+{
+	cp $(>) /boot/home/config/add-ons/kernel/file_systems/
+}
+
+InstallBFS install : obfs ;
+
+# restore original optimization level
+OPTIM = $(oldOPTIM) ;
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Journal.cpp
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Journal.cpp
@ -0,0 +1,468 @@
+/* Journal - transaction and logging
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include "Journal.h"
+#include "Inode.h"
+#include "Debug.h"
+
+#include <Drivers.h>
+#include <util/kernel_cpp.h>
+
+
+Journal::Journal(Volume *volume)
+	:
+	fVolume(volume),
+	fLock("bfs journal"),
+	fOwner(NULL),
+	fArray(volume->BlockSize()),
+	fLogSize(volume->Log().length),
+	fMaxTransactionSize(fLogSize / 4 - 5),
+	fUsed(0),
+	fTransactionsInEntry(0)
+{
+	if (fMaxTransactionSize > fLogSize / 2)
+		fMaxTransactionSize = fLogSize / 2 - 5;
+}
+
+
+Journal::~Journal()
+{
+	FlushLogAndBlocks();
+}
+
+
+status_t
+Journal::InitCheck()
+{
+	if (fVolume->LogStart() != fVolume->LogEnd()) {
+		if (fVolume->SuperBlock().flags != SUPER_BLOCK_DISK_DIRTY)
+			FATAL(("log_start and log_end differ, but disk is marked clean - trying to replay log...\n"));
+
+		return ReplayLog();
+	}
+
+	return B_OK;
+}
+
+
+status_t
+Journal::CheckLogEntry(int32 count, off_t *array)
+{
+	// ToDo: check log entry integrity (block numbers and entry size)
+	PRINT(("Log entry has %ld entries (%Ld)\n", count, array[0]));
+	return B_OK;
+}
+
+
+status_t
+Journal::ReplayLogEntry(int32 *_start)
+{
+	PRINT(("ReplayLogEntry(start = %ld)\n", *_start));
+
+	off_t logOffset = fVolume->ToBlock(fVolume->Log());
+	off_t arrayBlock = (*_start % fLogSize) + fVolume->ToBlock(fVolume->Log());
+	int32 blockSize = fVolume->BlockSize();
+	int32 count = 1, valuesInBlock = blockSize / sizeof(off_t);
+	int32 numArrayBlocks;
+	off_t blockNumber = 0;
+	bool first = true;
+
+	CachedBlock cached(fVolume);
+	while (count > 0) {
+		off_t *array = (off_t *)cached.SetTo(arrayBlock);
+		if (array == NULL)
+			return B_IO_ERROR;
+
+		int32 index = 0;
+		if (first) {
+			if (array[0] < 1 || array[0] >= fLogSize)
+				return B_BAD_DATA;
+
+			count = array[0];
+			first = false;
+
+			numArrayBlocks = ((count + 1) * sizeof(off_t) + blockSize - 1) / blockSize;
+			blockNumber = (*_start + numArrayBlocks) % fLogSize;
+				// first real block in this log entry
+			*_start += count;
+			index++;
+				// the first entry in the first block is the number
+				// of blocks in that log entry
+		}
+		(*_start)++;
+
+		if (CheckLogEntry(count, array + 1) < B_OK)
+			return B_BAD_DATA;
+
+		CachedBlock cachedCopy(fVolume);
+		for (; index < valuesInBlock && count-- > 0; index++) {
+			PRINT(("replay block %Ld in log at %Ld!\n", array[index], blockNumber));
+
+			uint8 *copy = cachedCopy.SetTo(logOffset + blockNumber);
+			if (copy == NULL)
+				RETURN_ERROR(B_IO_ERROR);
+
+			ssize_t written = write_pos(fVolume->Device(),
+						array[index] << fVolume->BlockShift(), copy, blockSize);
+			if (written != blockSize)
+				RETURN_ERROR(B_IO_ERROR);
+
+			blockNumber = (blockNumber + 1) % fLogSize;
+		}
+		arrayBlock++;
+		if (arrayBlock > fVolume->ToBlock(fVolume->Log()) + fLogSize)
+			arrayBlock = fVolume->ToBlock(fVolume->Log());
+	}
+	return B_OK;
+}
+
+
+/**	Replays all log entries - this will put the disk into a
+ *	consistent and clean state, if it was not correctly unmounted
+ *	before.
+ *	This method is called by Journal::InitCheck() if the log start
+ *	and end pointer don't match.
+ */
+
+status_t
+Journal::ReplayLog()
+{
+	INFORM(("Replay log, disk was not correctly unmounted...\n"));
+
+	int32 start = fVolume->LogStart();
+	int32 lastStart = -1;
+	while (true) {
+		// stop if the log is completely flushed
+		if (start == fVolume->LogEnd())
+			break;
+
+		if (start == lastStart) {
+			// strange, flushing the log hasn't changed the log_start pointer
+			return B_ERROR;
+		}
+		lastStart = start;
+
+		status_t status = ReplayLogEntry(&start);
+		if (status < B_OK) {
+			FATAL(("replaying log entry from %ld failed: %s\n", start, strerror(status)));
+			return B_ERROR;
+		}
+		start = start % fLogSize;
+	}
+	
+	PRINT(("replaying worked fine!\n"));
+	fVolume->SuperBlock().log_start = fVolume->LogEnd();
+	fVolume->LogStart() = fVolume->LogEnd();
+	fVolume->SuperBlock().flags = SUPER_BLOCK_DISK_CLEAN;
+
+	return fVolume->WriteSuperBlock();
+}
+
+
+/**	This is a callback function that is called by the cache, whenever
+ *	a block is flushed to disk that was updated as part of a transaction.
+ *	This is necessary to keep track of completed transactions, to be
+ *	able to update the log start pointer.
+ */
+
+void
+Journal::blockNotify(off_t blockNumber, size_t numBlocks, void *arg)
+{
+	log_entry *logEntry = (log_entry *)arg;
+
+	logEntry->cached_blocks -= numBlocks;
+	if (logEntry->cached_blocks > 0) {
+		// nothing to do yet...
+		return;
+	}
+
+	Journal *journal = logEntry->journal;
+	disk_super_block &superBlock = journal->fVolume->SuperBlock();
+	bool update = false;
+
+	// Set log_start pointer if possible...
+
+	if (logEntry == journal->fEntries.head) {
+		if (logEntry->Next() != NULL) {
+			int32 length = logEntry->next->start - logEntry->start;
+			superBlock.log_start = (superBlock.log_start + length) % journal->fLogSize;
+		} else
+			superBlock.log_start = journal->fVolume->LogEnd();
+
+		update = true;
+	}
+	journal->fUsed -= logEntry->length;
+
+	journal->fEntriesLock.Lock();
+	logEntry->Remove();
+	journal->fEntriesLock.Unlock();
+
+	free(logEntry);
+
+	// update the super block, and change the disk's state, if necessary
+
+	if (update) {
+		journal->fVolume->LogStart() = superBlock.log_start;
+
+		if (superBlock.log_start == superBlock.log_end)
+			superBlock.flags = SUPER_BLOCK_DISK_CLEAN;
+
+		journal->fVolume->WriteSuperBlock();
+	}
+}
+
+
+status_t
+Journal::WriteLogEntry()
+{
+	fTransactionsInEntry = 0;
+	fHasChangedBlocks = false;
+
+	sorted_array *array = fArray.Array();
+	if (array == NULL || array->count == 0)
+		return B_OK;
+
+	// Make sure there is enough space in the log.
+	// If that fails for whatever reason, panic!
+	force_cache_flush(fVolume->Device(), false);
+	int32 tries = fLogSize / 2 + 1;
+	while (TransactionSize() > FreeLogBlocks() && tries-- > 0)
+		force_cache_flush(fVolume->Device(), true);
+
+	if (tries <= 0) {
+		fVolume->Panic();
+		return B_BAD_DATA;
+	}
+
+	int32 blockShift = fVolume->BlockShift();
+	off_t logOffset = fVolume->ToBlock(fVolume->Log()) << blockShift;
+	off_t logStart = fVolume->LogEnd();
+	off_t logPosition = logStart % fLogSize;
+
+	// Write disk block array
+
+	uint8 *arrayBlock = (uint8 *)array;
+
+	for (int32 size = fArray.BlocksUsed(); size-- > 0;) {
+		write_pos(fVolume->Device(), logOffset + (logPosition << blockShift),
+			arrayBlock, fVolume->BlockSize());
+
+		logPosition = (logPosition + 1) % fLogSize;
+		arrayBlock += fVolume->BlockSize();
+	}
+
+	// Write logged blocks into the log
+
+	CachedBlock cached(fVolume);
+	for (int32 i = 0;i < array->count;i++) {
+		// ToDo: combine blocks if possible (using iovecs)!
+
+		uint8 *block = cached.SetTo(array->values[i]);
+		if (block == NULL)
+			return B_IO_ERROR;
+
+		write_pos(fVolume->Device(), logOffset + (logPosition << blockShift),
+			block, fVolume->BlockSize());
+		logPosition = (logPosition + 1) % fLogSize;
+	}
+
+	log_entry *logEntry = (log_entry *)malloc(sizeof(log_entry));
+	if (logEntry != NULL) {
+		logEntry->start = logStart;
+		logEntry->length = TransactionSize();
+		logEntry->cached_blocks = array->count;
+		logEntry->journal = this;
+
+		fEntriesLock.Lock();
+		fEntries.Add(logEntry);
+		fEntriesLock.Unlock();
+
+		fCurrent = logEntry;
+		fUsed += logEntry->length;
+
+		set_blocks_info(fVolume->Device(), &array->values[0], array->count, blockNotify, logEntry);
+	}
+
+	// If the log goes to the next round (the log is written as a
+	// circular buffer), all blocks will be flushed out which is
+	// possible because we don't have any locked blocks at this
+	// point.
+	if (logPosition < logStart)
+		fVolume->FlushDevice();
+
+	// We need to flush the drives own cache here to ensure
+	// disk consistency.
+	// If that call fails, we can't do anything about it anyway
+	ioctl(fVolume->Device(), B_FLUSH_DRIVE_CACHE);
+
+	fArray.MakeEmpty();
+
+	// Update the log end pointer in the super block
+	fVolume->SuperBlock().flags = SUPER_BLOCK_DISK_DIRTY;
+	fVolume->SuperBlock().log_end = logPosition;
+	fVolume->LogEnd() = logPosition;
+
+	return fVolume->WriteSuperBlock();
+}
+
+
+status_t 
+Journal::FlushLogAndBlocks()
+{
+	status_t status = Lock((Transaction *)this);
+	if (status != B_OK)
+		return status;
+
+	// write the current log entry to disk
+	
+	if (TransactionSize() != 0) {
+		status = WriteLogEntry();
+		if (status < B_OK)
+			FATAL(("writing current log entry failed: %s\n", strerror(status)));
+	}
+	status = fVolume->FlushDevice();
+
+	Unlock((Transaction *)this, true);
+	return status;
+}
+
+
+status_t
+Journal::Lock(Transaction *owner)
+{
+	if (owner == fOwner)
+		return B_OK;
+
+	status_t status = fLock.Lock();
+	if (status == B_OK)
+		fOwner = owner;
+
+	// if the last transaction is older than 2 secs, start a new one
+	if (fTransactionsInEntry != 0 && system_time() - fTimestamp > 2000000L)
+		WriteLogEntry();
+
+	return B_OK;
+}
+
+
+void
+Journal::Unlock(Transaction *owner, bool success)
+{
+	if (owner != fOwner)
+		return;
+
+	TransactionDone(success);
+
+	fTimestamp = system_time();
+	fOwner = NULL;
+	fLock.Unlock();
+}
+
+
+/** If there is a current transaction that the current thread has
+ *	started, this function will give you access to it.
+ */
+
+Transaction *
+Journal::CurrentTransaction()
+{
+	if (fLock.LockWithTimeout(0) != B_OK)
+		return NULL;
+
+	Transaction *owner = fOwner;
+	fLock.Unlock();
+
+	return owner;
+}
+
+
+status_t
+Journal::TransactionDone(bool success)
+{
+	if (!success && fTransactionsInEntry == 0) {
+		// we can safely abort the transaction
+		sorted_array *array = fArray.Array();
+		if (array != NULL) {
+			// release the lock for all blocks in the array (we don't need
+			// to be notified when they are actually written to disk)
+			for (int32 i = 0; i < array->count; i++)
+				release_block(fVolume->Device(), array->values[i]);
+		}
+
+		return B_OK;
+	}
+
+	// Up to a maximum size, we will just batch several
+	// transactions together to improve speed
+	if (TransactionSize() < fMaxTransactionSize) {
+		fTransactionsInEntry++;
+		fHasChangedBlocks = false;
+
+		return B_OK;
+	}
+
+	return WriteLogEntry();
+}
+
+
+status_t
+Journal::LogBlocks(off_t blockNumber, const uint8 *buffer, size_t numBlocks)
+{
+	// ToDo: that's for now - we should change the log file size here
+	if (TransactionSize() + numBlocks + 1 > fLogSize)
+		return B_DEVICE_FULL;
+
+	fHasChangedBlocks = true;
+	int32 blockSize = fVolume->BlockSize();
+
+	for (;numBlocks-- > 0; blockNumber++, buffer += blockSize) {
+		if (fArray.Find(blockNumber) >= 0) {
+			// The block is already in the log, so just update its data
+			// Note, this is only necessary if this method is called with a buffer
+			// different from the cached block buffer - which is unlikely but
+			// we'll make sure this way (costs one cache lookup, though).
+			status_t status = cached_write(fVolume->Device(), blockNumber, buffer, 1, blockSize);
+			if (status < B_OK)
+				return status;
+
+			continue;
+		}
+
+		// Insert the block into the transaction's array, and write the changes
+		// back into the locked cache buffer
+		fArray.Insert(blockNumber);
+		status_t status = cached_write_locked(fVolume->Device(), blockNumber, buffer, 1, blockSize);
+		if (status < B_OK)
+			return status;
+	}
+
+	// If necessary, flush the log, so that we have enough space for this transaction
+	if (TransactionSize() > FreeLogBlocks())
+		force_cache_flush(fVolume->Device(), true);
+
+	return B_OK;
+}
+
+
+//	#pragma mark -
+
+
+status_t 
+Transaction::Start(Volume *volume, off_t refBlock)
+{
+	// has it already been started?
+	if (fJournal != NULL)
+		return B_OK;
+
+	fJournal = volume->GetJournal(refBlock);
+	if (fJournal != NULL && fJournal->Lock(this) == B_OK)
+		return B_OK;
+
+	fJournal = NULL;
+	return B_ERROR;
+}
+
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Journal.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Journal.h
@ -0,0 +1,159 @@
+#ifndef JOURNAL_H
+#define JOURNAL_H
+/* Journal - transaction and logging
+**
+** Copyright 2001-2004, Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include <KernelExport.h>
+
+#ifdef USER
+#	include "myfs.h"
+#	include <stdio.h>
+#endif
+
+#ifndef _IMPEXP_KERNEL
+#	define _IMPEXP_KERNEL
+#endif
+
+#include <lock.h>
+#include <cache.h>
+
+#include "Volume.h"
+#include "Chain.h"
+#include "Utility.h"
+
+
+struct log_entry : node<log_entry> {
+	uint16		start;
+	uint16		length;
+	uint32		cached_blocks;
+	Journal		*journal;
+};
+
+
+// Locking policy in BFS: if you need both, the volume lock and the
+//	journal lock, you must lock the volume first - or else you will
+//	end up in a deadlock.
+//	That is, if you start a transaction, and will need to lock the
+//	volume while the transaction is in progress (for the unsafe
+//	get_vnode() call, for example), you must lock the volume before
+//	starting the transaction.
+
+class Journal {
+	public:
+		Journal(Volume *);
+		~Journal();
+		
+		status_t InitCheck();
+
+		status_t Lock(Transaction *owner);
+		void Unlock(Transaction *owner, bool success);
+
+		status_t CheckLogEntry(int32 count, off_t *array);
+		status_t ReplayLogEntry(int32 *start);
+		status_t ReplayLog();
+
+		status_t WriteLogEntry();
+		status_t LogBlocks(off_t blockNumber, const uint8 *buffer, size_t numBlocks);
+
+		Transaction *CurrentTransaction();
+		uint32 TransactionSize() const { return fArray.CountItems() + fArray.BlocksUsed(); }
+
+		status_t FlushLogAndBlocks();
+		Volume *GetVolume() const { return fVolume; }
+
+		inline uint32 FreeLogBlocks() const;
+
+	private:
+		friend log_entry;
+
+		static void blockNotify(off_t blockNumber, size_t numBlocks, void *arg);
+		status_t TransactionDone(bool success);
+
+		Volume		*fVolume;
+		RecursiveLock	fLock;
+		Transaction *fOwner;
+		BlockArray	fArray;
+		uint32		fLogSize, fMaxTransactionSize, fUsed;
+		int32		fTransactionsInEntry;
+		SimpleLock	fEntriesLock;
+		list<log_entry>	fEntries;
+		log_entry	*fCurrent;
+		bool		fHasChangedBlocks;
+		bigtime_t	fTimestamp;
+};
+
+
+inline uint32 
+Journal::FreeLogBlocks() const
+{
+	return fVolume->LogStart() <= fVolume->LogEnd() ?
+		fLogSize - fVolume->LogEnd() + fVolume->LogStart()
+		: fVolume->LogStart() - fVolume->LogEnd();
+}
+
+
+// For now, that's only a dumb class that does more or less nothing
+// else than writing the blocks directly to the real location.
+// It doesn't yet use logging.
+
+class Transaction {
+	public:
+		Transaction(Volume *volume, off_t refBlock)
+			:
+			fJournal(NULL)
+		{
+			Start(volume, refBlock);
+		}
+
+		Transaction(Volume *volume, block_run refRun)
+			:
+			fJournal(NULL)
+		{
+			Start(volume, volume->ToBlock(refRun));
+		}
+
+		Transaction()
+			:
+			fJournal(NULL)
+		{
+		}
+
+		~Transaction()
+		{
+			if (fJournal)
+				fJournal->Unlock(this, false);
+		}
+
+		status_t Start(Volume *volume, off_t refBlock);
+		bool IsStarted() const { return fJournal != NULL; }
+
+		void Done()
+		{
+			if (fJournal != NULL)
+				fJournal->Unlock(this, true);
+			fJournal = NULL;
+		}
+
+		status_t WriteBlocks(off_t blockNumber, const uint8 *buffer, size_t numBlocks = 1)
+		{
+			if (fJournal == NULL)
+				return B_NO_INIT;
+
+			return fJournal->LogBlocks(blockNumber, buffer, numBlocks);
+		}
+
+		Volume	*GetVolume() { return fJournal != NULL ? fJournal->GetVolume() : NULL; }
+
+	private:
+		Transaction(const Transaction &);
+		Transaction &operator=(const Transaction &);
+			// no implementation
+
+		Journal	*fJournal;
+};
+
+#endif	/* JOURNAL_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Lock.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Lock.h
@ -0,0 +1,529 @@
+#ifndef LOCK_H
+#define LOCK_H
+/* Lock - simple semaphores, read/write lock implementation
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** Roughly based on a Be sample code written by Nathan Schrenk.
+**
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include <KernelExport.h>
+#include <stdio.h>
+#include "Utility.h"
+#include "Debug.h"
+
+
+// Configure here if and when real benaphores should be used
+#define USE_BENAPHORE
+	// if defined, benaphores are used for the Semaphore/RecursiveLock classes
+#ifdef USER
+//#	define FAST_LOCK
+	// the ReadWriteLock class uses a second Semaphore to
+	// speed up locking - only makes sense if USE_BENAPHORE
+	// is defined, too.
+#endif
+
+
+class Semaphore {
+	public:
+		Semaphore(const char *name)
+			:
+#ifdef USE_BENAPHORE
+			fSemaphore(create_sem(0, name)),
+			fCount(1)
+#else
+			fSemaphore(create_sem(1, name))
+#endif
+		{
+#ifndef USER
+			set_sem_owner(fSemaphore, B_SYSTEM_TEAM);
+#endif
+		}
+
+		~Semaphore()
+		{
+			delete_sem(fSemaphore);
+		}
+
+		status_t InitCheck()
+		{
+			if (fSemaphore < B_OK)
+				return fSemaphore;
+			
+			return B_OK;
+		}
+
+		status_t Lock()
+		{
+#ifdef USE_BENAPHORE
+			if (atomic_add(&fCount, -1) <= 0)
+#endif
+				return acquire_sem(fSemaphore);
+#ifdef USE_BENAPHORE
+			return B_OK;
+#endif
+		}
+	
+		status_t Unlock()
+		{
+#ifdef USE_BENAPHORE
+			if (atomic_add(&fCount, 1) < 0)
+#endif
+				return release_sem(fSemaphore);
+#ifdef USE_BENAPHORE
+			return B_OK;
+#endif
+		}
+
+	private:
+		sem_id	fSemaphore;
+#ifdef USE_BENAPHORE
+		vint32	fCount;
+#endif
+};
+
+// a convenience class to lock a Semaphore object
+
+class Locker {
+	public:
+		Locker(Semaphore &lock)
+			: fLock(lock)
+		{
+			fStatus = lock.Lock();
+			ASSERT(fStatus == B_OK);
+		}
+
+		~Locker()
+		{
+			if (fStatus == B_OK)
+				fLock.Unlock();
+		}
+
+		status_t Status() const
+		{
+			return fStatus;
+		}
+
+	private:
+		Semaphore	&fLock;
+		status_t	fStatus;
+};
+
+
+//**** Recursive Lock
+
+class RecursiveLock {
+	public:
+		RecursiveLock(const char *name)
+			:
+#ifdef USE_BENAPHORE
+			fSemaphore(create_sem(0, name)),
+			fCount(1),
+#else
+			fSemaphore(create_sem(1, name)),
+#endif
+			fOwner(-1)
+		{
+#ifndef USER
+			set_sem_owner(fSemaphore, B_SYSTEM_TEAM);
+#endif
+		}
+
+		status_t LockWithTimeout(bigtime_t timeout)
+		{
+			thread_id thread = find_thread(NULL);
+			if (thread == fOwner) {
+				fOwnerCount++;
+				return B_OK;
+			}
+
+			status_t status;
+#ifdef USE_BENAPHORE
+			if (atomic_add(&fCount, -1) > 0)
+				status = B_OK;
+			else
+#endif
+				status = acquire_sem_etc(fSemaphore, 1, B_RELATIVE_TIMEOUT, timeout);
+
+			if (status == B_OK) {
+				fOwner = thread;
+				fOwnerCount = 1;
+			}
+
+			return status;
+		}
+
+		status_t Lock()
+		{
+			return LockWithTimeout(B_INFINITE_TIMEOUT);
+		}
+
+		status_t Unlock()
+		{
+			thread_id thread = find_thread(NULL);
+			if (thread != fOwner) {
+				#if __MWERKS__ && !USER //--- The R5 PowerPC kernel doesn't have panic()
+					char blip[255];
+					sprintf(blip,"RecursiveLock unlocked by %ld, owned by %ld\n", thread, fOwner);
+					kernel_debugger(blip);
+				#else
+					panic("RecursiveLock unlocked by %ld, owned by %ld\n", thread, fOwner);
+				#endif
+			}
+
+			if (--fOwnerCount == 0) {
+				fOwner = -1;
+#ifdef USE_BENAPHORE
+				if (atomic_add(&fCount, 1) < 0)
+#endif
+					return release_sem(fSemaphore);
+			}
+
+			return B_OK;
+		}
+
+	private:
+		sem_id	fSemaphore;
+#ifdef USE_BENAPHORE
+		vint32	fCount;
+#endif
+		thread_id	fOwner;
+		int32		fOwnerCount;
+};
+
+// a convenience class to lock an RecursiveLock object
+
+class RecursiveLocker {
+	public:
+		RecursiveLocker(RecursiveLock &lock)
+			: fLock(lock)
+		{
+			fStatus = lock.Lock();
+			ASSERT(fStatus == B_OK);
+		}
+
+		~RecursiveLocker()
+		{
+			if (fStatus == B_OK)
+				fLock.Unlock();
+		}
+
+		status_t Status() const
+		{
+			return fStatus;
+		}
+
+	private:
+		RecursiveLock	&fLock;
+		status_t		fStatus;
+};
+
+
+//**** Many Reader/Single Writer Lock
+
+// This is a "fast" implementation of a single writer/many reader
+// locking scheme. It's fast because it uses the benaphore idea
+// to do lazy semaphore locking - in most cases it will only have
+// to do some simple integer arithmetic.
+// The second semaphore (fWriteLock) is needed to prevent the situation
+// that a second writer can acquire the lock when there are still readers
+// holding it.
+
+#define MAX_READERS 100000
+
+// Note: this code will break if you actually have 100000 readers
+// at once. With the current thread/... limits in BeOS you can't
+// touch that value, but it might be possible in the future.
+// Also, you can only have about 20000 concurrent writers until
+// the semaphore count exceeds the int32 bounds
+
+// Timeouts:
+// It may be a good idea to have timeouts for the WriteLocked class,
+// in case something went wrong - we'll see if this is necessary,
+// but it would be a somewhat poor work-around for a deadlock...
+// But the only real problem with timeouts could be for things like
+// "chkbfs" - because such a tool may need to lock for some more time
+
+
+// define if you want to have fast locks as the foundation for the
+// ReadWriteLock class - the benefit is that acquire_sem() doesn't
+// have to be called when there is no one waiting.
+// The disadvantage is the use of 2 real semaphores which is quite
+// expensive regarding that BeOS only allows for a total of 64k
+// semaphores (since every open BFS inode has such a lock).
+
+#ifdef FAST_LOCK
+class ReadWriteLock {
+	public:
+		ReadWriteLock(const char *name)
+			:
+			fWriteLock(name)
+		{
+			Initialize(name);
+		}
+
+		ReadWriteLock()
+			:
+			fWriteLock("bfs r/w w-lock")
+		{
+		}
+
+		~ReadWriteLock()
+		{
+			delete_sem(fSemaphore);
+		}
+
+		status_t Initialize(const char *name = "bfs r/w lock")
+		{
+			fSemaphore = create_sem(0, name);
+			fCount = MAX_READERS;
+#ifndef USER
+			set_sem_owner(fSemaphore, B_SYSTEM_TEAM);
+#endif
+			return fSemaphore;
+		}
+
+		status_t InitCheck()
+		{
+			if (fSemaphore < B_OK)
+				return fSemaphore;
+			
+			return B_OK;
+		}
+
+		status_t Lock()
+		{
+			if (atomic_add(&fCount, -1) <= 0)
+				return acquire_sem(fSemaphore);
+			
+			return B_OK;
+		}
+		
+		void Unlock()
+		{
+			if (atomic_add(&fCount, 1) < 0)
+				release_sem(fSemaphore);
+		}
+		
+		status_t LockWrite()
+		{
+			if (fWriteLock.Lock() < B_OK)
+				return B_ERROR;
+
+			int32 readers = atomic_add(&fCount, -MAX_READERS);
+			status_t status = B_OK;
+
+			if (readers < MAX_READERS) {
+				// Acquire sem for all readers currently not using a semaphore.
+				// But if we are not the only write lock in the queue, just get
+				// the one for us
+				status = acquire_sem_etc(fSemaphore, readers <= 0 ? 1 : MAX_READERS - readers, 0, 0);
+			}
+			fWriteLock.Unlock();
+
+			return status;
+		}
+		
+		void UnlockWrite()
+		{
+			int32 readers = atomic_add(&fCount, MAX_READERS);
+			if (readers < 0) {
+				// release sem for all readers only when we were the only writer
+				release_sem_etc(fSemaphore, readers <= -MAX_READERS ? 1 : -readers, 0);
+			}
+		}
+
+	private:
+		friend class ReadLocked;
+		friend class WriteLocked;
+
+		sem_id		fSemaphore;
+		vint32		fCount;
+		Semaphore	fWriteLock;
+};
+#else	// FAST_LOCK
+class ReadWriteLock {
+	public:
+		ReadWriteLock(const char *name)
+		{
+			Initialize(name);
+		}
+
+		ReadWriteLock()
+		{
+		}
+
+		~ReadWriteLock()
+		{
+			delete_sem(fSemaphore);
+		}
+
+		status_t Initialize(const char *name = "bfs r/w lock")
+		{
+			fSemaphore = create_sem(MAX_READERS, name);
+#ifndef USER
+			set_sem_owner(fSemaphore, B_SYSTEM_TEAM);
+#endif
+			return fSemaphore;
+		}
+
+		status_t InitCheck()
+		{
+			if (fSemaphore < B_OK)
+				return fSemaphore;
+			
+			return B_OK;
+		}
+
+		status_t Lock()
+		{
+			return acquire_sem(fSemaphore);
+		}
+		
+		void Unlock()
+		{
+			release_sem(fSemaphore);
+		}
+		
+		status_t LockWrite()
+		{
+			return acquire_sem_etc(fSemaphore, MAX_READERS, 0, 0);
+		}
+		
+		void UnlockWrite()
+		{
+			release_sem_etc(fSemaphore, MAX_READERS, 0);
+		}
+
+	private:
+		friend class ReadLocked;
+		friend class WriteLocked;
+
+		sem_id		fSemaphore;
+};
+#endif	// FAST_LOCK
+
+
+class ReadLocked {
+	public:
+		ReadLocked(ReadWriteLock &lock)
+			:
+			fLock(lock)
+		{
+			fStatus = lock.Lock();
+		}
+		
+		~ReadLocked()
+		{
+			if (fStatus == B_OK)
+				fLock.Unlock();
+		}
+	
+	private:
+		ReadWriteLock	&fLock;
+		status_t		fStatus;
+};
+
+
+class WriteLocked {
+	public:
+		WriteLocked(ReadWriteLock &lock)
+			:
+			fLock(lock)
+		{
+			fStatus = lock.LockWrite();
+		}
+
+		~WriteLocked()
+		{
+			if (fStatus == B_OK)
+				fLock.UnlockWrite();
+		}
+
+		status_t IsLocked()
+		{
+			return fStatus;
+		}
+
+	private:
+		ReadWriteLock	&fLock;
+		status_t		fStatus;
+};
+
+
+// A simple locking structure that doesn't use a semaphore - it's useful
+// if you have to protect critical parts with a short runtime.
+// It also allows to nest several locks for the same thread.
+
+class SimpleLock {
+	public:
+		SimpleLock()
+			:
+			fHolder(-1),
+			fCount(0)
+		{
+		}
+
+		status_t Lock(bigtime_t time = 500)
+		{
+			int32 thisThread = find_thread(NULL);
+			int32 current;
+			while (1) {
+				/*if (fHolder == -1) {
+					current = fHolder;
+					fHolder = thisThread;
+				}*/
+				current = _atomic_test_and_set(&fHolder, thisThread, -1);
+				if (current == -1)
+					break;
+				if (current == thisThread)
+					break;
+					
+				snooze(time);
+			}
+
+			// ToDo: the lock cannot fail currently! We may want
+			// to change this
+			atomic_add(&fCount, 1);
+			return B_OK;
+		}
+
+		void Unlock()
+		{
+			if (atomic_add(&fCount, -1) == 1)
+				_atomic_set(&fHolder, -1);
+		}
+
+		bool IsLocked() const
+		{
+			return fHolder == find_thread(NULL);
+		}
+
+	private:
+		vint32	fHolder;
+		vint32	fCount;
+};
+
+// A convenience class to lock the SimpleLock, note the
+// different timing compared to the direct call
+
+class SimpleLocker {
+	public:
+		SimpleLocker(SimpleLock &lock,bigtime_t time = 1000)
+			: fLock(lock)
+		{
+			lock.Lock(time);
+		}
+
+		~SimpleLocker()
+		{
+			fLock.Unlock();
+		}
+
+	private:
+		SimpleLock	&fLock;
+};
+
+#endif	/* LOCK_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Query.cpp
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Query.cpp
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Query.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Query.h
@ -0,0 +1,77 @@
+#ifndef QUERY_H
+#define QUERY_H
+/* Query - query parsing and evaluation
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include <SupportDefs.h>
+
+#include "Index.h"
+#include "Stack.h"
+#include "Chain.h"
+
+class Volume;
+class Term;
+class Equation;
+class TreeIterator;
+class Query;
+
+
+class Expression {
+	public:
+		Expression(char *expr);
+		~Expression();
+
+		status_t InitCheck();
+		const char *Position() const { return fPosition; }
+		Term *Root() const { return fTerm; }
+
+	protected:
+		Term *ParseOr(char **expr);
+		Term *ParseAnd(char **expr);
+		Term *ParseEquation(char **expr);
+
+		bool IsOperator(char **expr,char op);
+
+	private:
+		Expression(const Expression &);
+		Expression &operator=(const Expression &);
+			// no implementation
+
+		char *fPosition;
+		Term *fTerm;
+};
+
+class Query {
+	public:
+		Query(Volume *volume, Expression *expression, uint32 flags);
+		~Query();
+
+		status_t GetNextEntry(struct dirent *,size_t size);
+
+		void SetLiveMode(port_id port,int32 token);
+		void LiveUpdate(Inode *inode,const char *attribute,int32 type,const uint8 *oldKey,size_t oldLength,const uint8 *newKey,size_t newLength);
+
+		Expression *GetExpression() const { return fExpression; }
+
+	private:
+		Volume			*fVolume;
+		Expression		*fExpression;
+		Equation		*fCurrent;
+		TreeIterator	*fIterator;
+		Index			fIndex;
+		Stack<Equation *> fStack;
+
+		uint32			fFlags;
+		port_id			fPort;
+		int32			fToken;
+
+	private:
+		friend Chain<Query>;
+		Query			*fNext;
+};
+
+#endif	/* QUERY_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Stack.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Stack.h
@ -0,0 +1,58 @@
+#ifndef STACK_H
+#define STACK_H
+/* Stack - a template stack class
+**
+** Copyright 2001 pinc Software. All Rights Reserved.
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include <SupportDefs.h>
+
+
+template<class T> class Stack {
+	public:
+		Stack()
+			:
+			fArray(NULL),
+			fUsed(0),
+			fMax(0)
+		{
+		}
+		
+		~Stack()
+		{
+			if (fArray)
+				free(fArray);
+		}
+		
+		status_t Push(T value)
+		{
+			if (fUsed >= fMax) {
+				fMax += 16;
+				T *newArray = (T *)realloc(fArray,fMax * sizeof(T));
+				if (newArray == NULL)
+					return B_NO_MEMORY;
+
+				fArray = newArray;
+			}
+			fArray[fUsed++] = value;
+			return B_OK;
+		}
+		
+		bool Pop(T *value)
+		{
+			if (fUsed == 0)
+				return false;
+
+			*value = fArray[--fUsed];
+			return true;
+		}
+		
+	private:
+		T		*fArray;
+		int32	fUsed;
+		int32	fMax;
+};
+
+#endif	/* STACK_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Stream.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Stream.h
@ -0,0 +1,636 @@
+/* Stream - inode stream access functions
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include <Inode.h>
+
+
+// The classes in the namespace "Access" provide different type of access
+// to the inode's data stream.
+// Uncached accesses the underlaying device directly, Cached uses the
+// standard cache, while Logged directs write accesses through the log.
+//
+// The classes interface is similar to the one of the CachedBlock class,
+// but adds two other (static) functions for reading/writing several
+// blocks at once.
+// We don't use a real pure virtual interface as the class base, but we
+// provide the same mechanism using templates.
+
+namespace Access {
+
+class Uncached {
+	public:
+		Uncached(Volume *volume);
+		Uncached(Volume *volume, off_t block, bool empty = false);
+		Uncached(Volume *volume, block_run run, bool empty = false);
+		~Uncached();
+
+		void Unset();
+		uint8 *SetTo(off_t block, bool empty = false);
+		uint8 *SetTo(block_run run, bool empty = false);
+		status_t WriteBack(Transaction *transaction);
+
+		uint8 *Block() const { return fBlock; }
+		off_t BlockNumber() const { return fBlockNumber; }
+		uint32 BlockSize() const { return fVolume->BlockSize(); }
+		uint32 BlockShift() const { return fVolume->BlockShift(); }
+
+		static status_t Read(Volume *volume, block_run run, uint8 *buffer);
+		static status_t Write(Transaction *transaction, Volume *volume, block_run run, const uint8 *buffer);
+
+	private:
+		Volume	*fVolume;
+		off_t	fBlockNumber;
+		uint8	*fBlock;
+};
+
+class Cached : public CachedBlock {
+	public:
+		Cached(Volume *volume);
+		Cached(Volume *volume, off_t block, bool empty = false);
+		Cached(Volume *volume, block_run run, bool empty = false);
+
+		status_t WriteBack(Transaction *transaction);
+		static status_t Read(Volume *volume, block_run run, uint8 *buffer);
+		static status_t Write(Transaction *transaction, Volume *volume, block_run run, const uint8 *buffer);
+};
+
+class Logged : public CachedBlock {
+	public:
+		Logged(Volume *volume);
+		Logged(Volume *volume,off_t block, bool empty = false);
+		Logged(Volume *volume, block_run run, bool empty = false);
+
+		static status_t Read(Volume *volume, block_run run, uint8 *buffer);
+		static status_t Write(Transaction *transaction, Volume *volume, block_run run, const uint8 *buffer);
+};
+
+
+Uncached::Uncached(Volume *volume)
+	:
+	fVolume(volume),
+	fBlock(NULL)
+{
+}
+
+
+Uncached::Uncached(Volume *volume,off_t block, bool empty)
+	:
+	fVolume(volume),
+	fBlock(NULL)
+{
+	SetTo(block,empty);
+}
+
+
+Uncached::Uncached(Volume *volume,block_run run,bool empty)
+	:
+	fVolume(volume),
+	fBlock(NULL)
+{
+	SetTo(volume->ToBlock(run),empty);
+}
+
+
+Uncached::~Uncached()
+{
+	Unset();
+}
+
+
+void
+Uncached::Unset()
+{
+	if (fBlock != NULL)
+		fVolume->Pool().PutBuffer((void *)fBlock);
+}
+
+
+uint8 *
+Uncached::SetTo(off_t block, bool empty)
+{
+	Unset();
+	fBlockNumber = block;
+	if (fVolume->Pool().GetBuffer((void **)&fBlock) < B_OK)
+		return NULL;
+
+	if (empty)
+		memset(fBlock, 0, BlockSize());
+	else
+		read_pos(fVolume->Device(), fBlockNumber << BlockShift(), fBlock, BlockSize());
+
+	return fBlock;
+}
+
+
+uint8 *
+Uncached::SetTo(block_run run, bool empty)
+{
+	return SetTo(fVolume->ToBlock(run), empty);
+}
+
+
+status_t
+Uncached::WriteBack(Transaction *transaction)
+{
+	if (fBlock == NULL)
+		RETURN_ERROR(B_BAD_VALUE);
+
+	return write_pos(fVolume->Device(), fBlockNumber << BlockShift(), fBlock, BlockSize());
+}
+
+
+status_t
+Uncached::Read(Volume *volume, block_run run, uint8 *buffer)
+{
+	return read_pos(volume->Device(), volume->ToBlock(run) << volume->BlockShift(), buffer, run.Length() << volume->BlockShift());
+}
+
+
+status_t
+Uncached::Write(Transaction *transaction, Volume *volume, block_run run, const uint8 *buffer)
+{
+	return write_pos(volume->Device(), volume->ToBlock(run) << volume->BlockShift(), buffer, run.Length() << volume->BlockShift());
+}
+
+
+//	#pragma mark -
+
+
+Cached::Cached(Volume *volume)
+	: CachedBlock(volume)
+{
+}
+
+
+Cached::Cached(Volume *volume,off_t block,bool empty)
+	: CachedBlock(volume, block, empty)
+{
+}
+
+
+Cached::Cached(Volume *volume,block_run run,bool empty)
+	: CachedBlock(volume, run, empty)
+{
+}
+
+
+status_t
+Cached::WriteBack(Transaction *transaction)
+{
+	if (transaction == NULL || fBlock == NULL)
+		RETURN_ERROR(B_BAD_VALUE);
+
+	return fVolume->WriteBlocks(fBlockNumber, fBlock, 1);
+}
+
+
+status_t
+Cached::Read(Volume *volume, block_run run, uint8 *buffer)
+{
+	return cached_read(volume->Device(), volume->ToBlock(run), buffer, run.Length(), volume->BlockSize());
+}
+
+
+status_t
+Cached::Write(Transaction *transaction, Volume *volume, block_run run, const uint8 *buffer)
+{
+	return volume->WriteBlocks(volume->ToBlock(run), buffer, run.Length());
+}
+
+
+//	#pragma mark -
+
+
+Logged::Logged(Volume *volume)
+	: CachedBlock(volume)
+{
+}
+
+
+Logged::Logged(Volume *volume, off_t block, bool empty)
+	: CachedBlock(volume, block, empty)
+{
+}
+
+
+Logged::Logged(Volume *volume, block_run run, bool empty)
+	: CachedBlock(volume, run, empty)
+{
+}
+
+
+status_t
+Logged::Read(Volume *volume, block_run run, uint8 *buffer)
+{
+	return cached_read(volume->Device(), volume->ToBlock(run), buffer, run.Length(), volume->BlockSize());
+}
+
+
+status_t
+Logged::Write(Transaction *transaction, Volume *volume, block_run run, const uint8 *buffer)
+{
+	return transaction->WriteBlocks(volume->ToBlock(run), buffer, run.Length());
+}
+
+};	// namespace Access
+
+
+//	#pragma mark -
+
+
+// The Stream template class allows to have only one straight-forward
+// implementation of the FindBlockRun(), ReadAt(), and WriteAt() methods.
+// They will access the disk through the given cache class only, which
+// means either uncached, cached, or logged (see above).
+
+template<class Cache>
+class Stream : public Inode {
+	private:
+		// The constructor only exists to make the compiler happy - it
+		// is never called in the code itself
+		Stream() : Inode(NULL, -1) {}
+
+	public:
+		status_t FindBlockRun(off_t pos, block_run &run, off_t &offset);
+		status_t ReadAt(off_t pos, uint8 *buffer, size_t *length);
+		status_t WriteAt(Transaction *transaction, off_t pos, const uint8 *buffer, size_t *length);
+};
+
+
+/** see Inode::FindBlockRun() for the documentation of this method */
+
+template<class Cache>
+status_t
+Stream<Cache>::FindBlockRun(off_t pos, block_run &run, off_t &offset)
+{
+	data_stream *data = &Node()->data;
+
+	// find matching block run
+
+	if (data->MaxDirectRange() > 0 && pos >= data->MaxDirectRange()) {
+		if (data->MaxDoubleIndirectRange() > 0 && pos >= data->MaxIndirectRange()) {
+			// access to double indirect blocks
+
+			Cache cached(fVolume);
+
+			off_t start = pos - data->MaxIndirectRange();
+			int32 indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT + cached.BlockShift()))
+				* (fVolume->BlockSize() / sizeof(block_run));
+			int32 directSize = NUM_ARRAY_BLOCKS << cached.BlockShift();
+			int32 index = start / indirectSize;
+			int32 runsPerBlock = cached.BlockSize() / sizeof(block_run);
+
+			block_run *indirect = (block_run *)cached.SetTo(
+					fVolume->ToBlock(data->double_indirect) + index / runsPerBlock);
+			if (indirect == NULL)
+				RETURN_ERROR(B_ERROR);
+
+			//printf("\tstart = %Ld, indirectSize = %ld, directSize = %ld, index = %ld\n",start,indirectSize,directSize,index);
+			//printf("\tlook for indirect block at %ld,%d\n",indirect[index].allocation_group,indirect[index].start);
+
+			int32 current = (start % indirectSize) / directSize;
+
+			indirect = (block_run *)cached.SetTo(
+					fVolume->ToBlock(indirect[index % runsPerBlock]) + current / runsPerBlock);
+			if (indirect == NULL)
+				RETURN_ERROR(B_ERROR);
+
+			run = indirect[current % runsPerBlock];
+			offset = data->MaxIndirectRange() + (index * indirectSize) + (current * directSize);
+			//printf("\tfCurrent = %ld, fRunFileOffset = %Ld, fRunBlockEnd = %Ld, fRun = %ld,%d\n",fCurrent,fRunFileOffset,fRunBlockEnd,fRun.allocation_group,fRun.start);
+		} else {
+			// access to indirect blocks
+
+			int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
+			off_t runBlockEnd = data->MaxDirectRange();
+
+			Cache cached(fVolume);
+			off_t block = fVolume->ToBlock(data->indirect);
+
+			for (int32 i = 0; i < data->indirect.Length(); i++) {
+				block_run *indirect = (block_run *)cached.SetTo(block + i);
+				if (indirect == NULL)
+					RETURN_ERROR(B_IO_ERROR);
+
+				int32 current = -1;
+				while (++current < runsPerBlock) {
+					if (indirect[current].IsZero())
+						break;
+
+					runBlockEnd += indirect[current].Length() << cached.BlockShift();
+					if (runBlockEnd > pos) {
+						run = indirect[current];
+						offset = runBlockEnd - (run.Length() << cached.BlockShift());
+						//printf("reading from indirect block: %ld,%d\n",fRun.allocation_group,fRun.start);
+						//printf("### indirect-run[%ld] = (%ld,%d,%d), offset = %Ld\n",fCurrent,fRun.allocation_group,fRun.start,fRun.Length(),fRunFileOffset);
+						return fVolume->ValidateBlockRun(run);
+					}
+				}
+			}
+			RETURN_ERROR(B_ERROR);
+		}
+	} else {
+		// access from direct blocks
+
+		off_t runBlockEnd = 0LL;
+		int32 current = -1;
+
+		while (++current < NUM_DIRECT_BLOCKS) {
+			if (data->direct[current].IsZero())
+				break;
+
+			runBlockEnd += data->direct[current].Length() << fVolume->BlockShift();
+			if (runBlockEnd > pos) {
+				run = data->direct[current];
+				offset = runBlockEnd - (run.Length() << fVolume->BlockShift());
+				//printf("### run[%ld] = (%ld,%d,%d), offset = %Ld\n",fCurrent,fRun.allocation_group,fRun.start,fRun.Length(),fRunFileOffset);
+				return fVolume->ValidateBlockRun(run);
+			}
+		}
+		//PRINT(("FindBlockRun() failed in direct range: size = %Ld, pos = %Ld\n",data->size,pos));
+		return B_ENTRY_NOT_FOUND;
+	}
+	return fVolume->ValidateBlockRun(run);
+}
+
+
+template<class Cache>
+status_t
+Stream<Cache>::ReadAt(off_t pos, uint8 *buffer, size_t *_length)
+{
+	size_t length = *_length;
+
+	// set/check boundaries for pos/length
+	if (pos < 0)
+		return B_BAD_VALUE;
+	if (pos >= Node()->data.Size() || length == 0) {
+		*_length = 0;
+		return B_NO_ERROR;
+	}
+
+	if (pos + length > Node()->data.Size())
+		length = Node()->data.Size() - pos;
+
+	block_run run;
+	off_t offset;
+	if (FindBlockRun(pos, run, offset) < B_OK) {
+		*_length = 0;
+		RETURN_ERROR(B_BAD_VALUE);
+	}
+
+	uint32 bytesRead = 0;
+	uint32 blockSize = fVolume->BlockSize();
+	uint32 blockShift = fVolume->BlockShift();
+	uint8 *block;
+
+	// the first block_run we read could not be aligned to the block_size boundary
+	// (read partial block at the beginning)
+
+	// pos % block_size == (pos - offset) % block_size, offset % block_size == 0
+	if (pos % blockSize != 0) {
+		run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + ((pos - offset) >> blockShift));
+		run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - ((pos - offset) >> blockShift));
+
+		Cache cached(fVolume,run);
+		if ((block = cached.Block()) == NULL) {
+			*_length = 0;
+			RETURN_ERROR(B_BAD_VALUE);
+		}
+
+		bytesRead = blockSize - (pos % blockSize);
+		if (length < bytesRead)
+			bytesRead = length;
+
+		memcpy(buffer, block + (pos % blockSize), bytesRead);
+		pos += bytesRead;
+
+		length -= bytesRead;
+		if (length == 0) {
+			*_length = bytesRead;
+			return B_OK;
+		}
+
+		if (FindBlockRun(pos, run, offset) < B_OK) {
+			*_length = bytesRead;
+			RETURN_ERROR(B_BAD_VALUE);
+		}
+	}
+
+	// the first block_run is already filled in at this point
+	// read the following complete blocks using cached_read(),
+	// the last partial block is read using the generic Cache class
+
+	bool partial = false;
+
+	while (length > 0) {
+		// offset is the offset to the current pos in the block_run
+		run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + ((pos - offset) >> blockShift));
+		run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - ((pos - offset) >> blockShift));
+
+		if (uint32(run.Length() << blockShift) > length) {
+			if (length < blockSize) {
+				Cache cached(fVolume, run);
+				if ((block = cached.Block()) == NULL) {
+					*_length = bytesRead;
+					RETURN_ERROR(B_BAD_VALUE);
+				}
+				memcpy(buffer + bytesRead, block, length);
+				bytesRead += length;
+				break;
+			}
+			run.length = HOST_ENDIAN_TO_BFS_INT16(length >> blockShift);
+			partial = true;
+		}
+
+		if (Cache::Read(fVolume, run, buffer + bytesRead) < B_OK) {
+			*_length = bytesRead;
+			RETURN_ERROR(B_BAD_VALUE);
+		}
+
+		int32 bytes = run.Length() << blockShift;
+#ifdef DEBUG
+		if ((uint32)bytes > length)
+			DEBUGGER(("bytes greater than length"));
+#endif
+		length -= bytes;
+		bytesRead += bytes;
+		if (length == 0)
+			break;
+
+		pos += bytes;
+
+		if (partial) {
+			// if the last block was read only partially, point block_run
+			// to the remaining part
+			run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + run.Length());
+			run.length = HOST_ENDIAN_TO_BFS_INT16(1);
+			offset = pos;
+		} else if (FindBlockRun(pos, run, offset) < B_OK) {
+			*_length = bytesRead;
+			RETURN_ERROR(B_BAD_VALUE);
+		}
+	}
+
+	*_length = bytesRead;
+	return B_OK;
+}
+
+
+template<class Cache>
+status_t
+Stream<Cache>::WriteAt(Transaction *transaction, off_t pos, const uint8 *buffer, size_t *_length)
+{
+	size_t length = *_length;
+
+	// set/check boundaries for pos/length
+	if (pos < 0)
+		return B_BAD_VALUE;
+
+	if (pos + length > Size()) {
+		off_t oldSize = Size();
+
+		// uncached files can't be resized (Inode::SetFileSize() also
+		// doesn't allow this, but this way we don't have to start a
+		// transaction to find out).
+		if (Flags() & INODE_NO_CACHE)
+			return B_BAD_VALUE;
+
+		// the transaction doesn't have to be started already
+		// ToDo: what's that INODE_NO_TRANSACTION flag good for again?
+		if ((Flags() & INODE_NO_TRANSACTION) == 0
+			&& !transaction->IsStarted())
+			transaction->Start(fVolume, BlockNumber());
+
+		// let's grow the data stream to the size needed
+		status_t status = SetFileSize(transaction, pos + length);
+		if (status < B_OK) {
+			*_length = 0;
+			RETURN_ERROR(status);
+		}
+		// If the position of the write was beyond the file size, we
+		// have to fill the gap between that position and the old file
+		// size with zeros.
+		FillGapWithZeros(oldSize, pos);
+	}
+
+	// If we don't want to write anything, we can now return (we may
+	// just have changed the file size using the position parameter)
+	if (length == 0)
+		return B_OK;
+
+	block_run run;
+	off_t offset;
+	if (FindBlockRun(pos, run, offset) < B_OK) {
+		*_length = 0;
+		RETURN_ERROR(B_BAD_VALUE);
+	}
+
+	bool logStream = (Flags() & INODE_LOGGED) == INODE_LOGGED;
+	if (logStream
+		&& !transaction->IsStarted())
+		transaction->Start(fVolume, BlockNumber());
+
+	uint32 bytesWritten = 0;
+	uint32 blockSize = fVolume->BlockSize();
+	uint32 blockShift = fVolume->BlockShift();
+	uint8 *block;
+
+	// the first block_run we write could not be aligned to the block_size boundary
+	// (write partial block at the beginning)
+
+	// pos % block_size == (pos - offset) % block_size, offset % block_size == 0
+	if (pos % blockSize != 0) {
+		run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + ((pos - offset) >> blockShift));
+		run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - ((pos - offset) >> blockShift));
+
+		Cache cached(fVolume, run);
+		if ((block = cached.Block()) == NULL) {
+			*_length = 0;
+			RETURN_ERROR(B_BAD_VALUE);
+		}
+
+		bytesWritten = blockSize - (pos % blockSize);
+		if (length < bytesWritten)
+			bytesWritten = length;
+
+		memcpy(block + (pos % blockSize),buffer,bytesWritten);
+
+		cached.WriteBack(transaction);
+
+		pos += bytesWritten;
+		
+		length -= bytesWritten;
+		if (length == 0) {
+			*_length = bytesWritten;
+			return B_OK;
+		}
+
+		if (FindBlockRun(pos, run, offset) < B_OK) {
+			*_length = bytesWritten;
+			RETURN_ERROR(B_BAD_VALUE);
+		}
+	}
+
+	// the first block_run is already filled in at this point
+	// write the following complete blocks using Volume::WriteBlocks(),
+	// the last partial block is written using the generic Cache class
+
+	bool partial = false;
+
+	while (length > 0) {
+		// offset is the offset to the current pos in the block_run
+		run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + ((pos - offset) >> blockShift));
+		run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - ((pos - offset) >> blockShift));
+
+		if (uint32(run.Length() << blockShift) > length) {
+			if (length < blockSize) {
+				Cache cached(fVolume,run);
+				if ((block = cached.Block()) == NULL) {
+					*_length = bytesWritten;
+					RETURN_ERROR(B_BAD_VALUE);
+				}
+				memcpy(block, buffer + bytesWritten, length);
+
+				cached.WriteBack(transaction);
+
+				bytesWritten += length;
+				break;
+			}
+			run.length = HOST_ENDIAN_TO_BFS_INT16(length >> blockShift);
+			partial = true;
+		}
+
+		if (Cache::Write(transaction, fVolume, run, buffer + bytesWritten) < B_OK) {
+			*_length = bytesWritten;
+			RETURN_ERROR(B_BAD_VALUE);
+		}
+
+		int32 bytes = run.Length() << blockShift;
+		length -= bytes;
+		bytesWritten += bytes;
+		if (length == 0)
+			break;
+
+		pos += bytes;
+
+		if (partial) {
+			// if the last block was written only partially, point block_run
+			// to the remaining part
+			run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + run.Length());
+			run.length = HOST_ENDIAN_TO_BFS_INT16(1);
+			offset = pos;
+		} else if (FindBlockRun(pos, run, offset) < B_OK) {
+			*_length = bytesWritten;
+			RETURN_ERROR(B_BAD_VALUE);
+		}
+	}
+
+	*_length = bytesWritten;
+
+	return B_OK;
+}
+
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/ToDo
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/ToDo
@ -0,0 +1,84 @@
+BFS - ToDo, May 26th, 2003
+-----
+
+BlockAllocator
+
+ - the BlockAllocator is only slightly optimized and probably slow
+ - the first free and the largest range are currently not correctly maintained (only efficiency suffers - it does work correctly)
+ - the allocation policies will have to stand against some real world tests
+ - the access to the block bitmap is currently managed using a global lock (doesn't matter as long as transactions are serialized)
+
+
+DataStream
+
+ - only files are trimmed back (in bfs_close()), but every inode has a preallocated stream...
+ - Inode::GrowStream(): merging of block_runs doesn't work between range/block boundaries
+ - check the array block size in BFS for different block sizes
+
+
+Queries
+
+ - There shouldn't be any cases where you can speed up a query with reordering the query expression - test it
+ - Check permissions of the parent directories
+ - Add protection against crashing applications which had a query open - at least the original BeOS kernel does not free the cookie (which throws some memory away *and* prevents unmounting the disk), but that won't be needed for OpenBeOS
+ - the query set for "!=" and last_modified/size is not the same as for "="; last_modified/size don't contain directories
+ - check if the query has to be checked for a live update
+ - accept hex values 0x...
+
+
+Journal
+
+ - Check if there are any standard and often-happening cases for a transaction to fail, and if so, start the transaction only when necessary
+ - if the system crashes between bfs_unlink() and bfs_remove_vnode(), the inode can be removed from the tree, but its memory is still allocated - this can happen if the inode is still in use by someone (and that's what the "chkbfs" utility is for, mainly).
+ - add delayed index updating (+ delete actions to solve the issue above)
+ - multiple log files, parallel transactions? (note that parallel transactions would require more locking to be done)
+ - variable sized log file
+ - as long as we have a fixed-sized log file, it should be possible to reserve space for a transaction to be able to decide if batching it is possible
+
+
+BPlusTree
+
+ - BPlusTree::Remove() could trigger CachedNode::Free() to go through the free nodes list and free all pages at the end of the data stream
+ - BPlusTree::Remove() could let the tree shrink (simple kind of reorganization)
+ - updating the TreeIterators doesn't work yet for duplicates (which may be a problem if a duplicate node will go away after a remove)
+ - BPlusTree::RemoveDuplicate() could merge the contents of duplicate node with only a few entries to save some space (right now, only empty nodes are freed)
+
+
+Inode
+
+ - exchange Inode::OldLastModified() with Inode::NewLastModified(), and don't change the last_modified field directly in Inode::WriteAt() for consistency in case of a crash
+ - the size is only updated in bfs_close() - but if the system crashes before, the entry in the size index doesn't match the one in the inode anymore - it would be better to let the data.size not reflect the real file size in this case (since the max_xxx_range entries are always correct)
+ - Inode::FillGapWithZeros() currently disabled; apart from being slow, it really shouldn't be executed while a transaction is running, because that stops all other threads from doing anything (which can be a long time for a 100 MB file)
+ - need better locking mechanism in combination with B+trees etc.!
+
+
+Indices
+
+ - consider Index::UpdateLastModified() writing back the updated inode
+ - clearing up Index::Update() and live query update (seems to be a bit confusing right now)
+ - investigate adding an index cache to improve performance
+
+Attributes
+
+ - Inode::WriteAttribute() doesn't check if the attribute data may fit into the small_data region if there already is that attribute as an attribute file
+ - for indices, we could get the old data from there when doing a query update
+
+
+Volume
+
+
+kernel_interface
+
+ - missing functions, maybe they are not really needed: bfs_rename_attr(), bfs_rename_index(), bfs_initialize(), bfs_link()
+ - bfs_rename() currently doesn't respect any permissions
+
+
+general stuff
+
+ - There are also some comments with a leading "ToDo:" directly in the code which may not be mentioned here.
+ - implement mkbfs (try to do it in OpenBeOS style directly - only write the super block from user space)
+
+
+-----
+Axel Dörfler
+axeld@pinc-software.de
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Utility.cpp
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Utility.cpp
@ -0,0 +1,158 @@
+/* Utility - some helper classes
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include "Utility.h"
+#include "Debug.h"
+
+#include <util/kernel_cpp.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+
+bool
+sorted_array::FindInternal(off_t value, int32 &index) const
+{
+	int32 min = 0, max = count-1;
+	off_t cmp;
+	while (min <= max) {
+		index = (min + max) / 2;
+
+		cmp = values[index] - value;
+		if (cmp < 0)
+			min = index + 1;
+		else if (cmp > 0)
+			max = index - 1;
+		else
+			return true;
+	}
+	return false;
+}
+
+
+void 
+sorted_array::Insert(off_t value)
+{
+	// if there are more than 8 values in this array, use a
+	// binary search, if not, just iterate linearly to find
+	// the insertion point
+	int32 i;
+	if (count > 8 ) {
+		if (!FindInternal(value,i)
+			&& values[i] <= value)
+			i++;
+	} else {
+		for (i = 0;i < count; i++)
+			if (values[i] > value)
+				break;
+	}
+
+	memmove(&values[i+1],&values[i],(count - i) * sizeof(off_t));
+	values[i] = value;
+	count++;
+}
+
+
+bool 
+sorted_array::Remove(off_t value)
+{
+	int32 index = Find(value);
+	if (index == -1)
+		return false;
+
+	memmove(&values[index],&values[index + 1],(count - index) * sizeof(off_t));
+	count--;
+
+	return true;
+}
+
+
+//	#pragma mark -
+
+
+BlockArray::BlockArray(int32 blockSize)
+	:
+	fArray(NULL),
+	fBlockSize(blockSize),
+	fSize(0)
+{
+}
+
+
+BlockArray::~BlockArray()
+{
+	if (fArray)
+		free(fArray);
+}
+
+
+int32
+BlockArray::Find(off_t value)
+{
+	if (fArray == NULL)
+		return -1;
+	
+	return fArray->Find(value);
+}
+
+
+status_t
+BlockArray::Insert(off_t value)
+{
+	if (fArray == NULL || fArray->count + 1 > fMaxBlocks) {
+		sorted_array *array = (sorted_array *)realloc(fArray,fSize + fBlockSize);
+		if (array == NULL)
+			return B_NO_MEMORY;
+		
+		if (fArray == NULL)
+			array->count = 0;
+
+		fArray = array;
+		fSize += fBlockSize;
+		fMaxBlocks = fSize / sizeof(off_t) - 1;
+	}
+
+	fArray->Insert(value);
+	return B_OK;
+}
+
+
+status_t
+BlockArray::Remove(off_t value)
+{
+	if (fArray == NULL)
+		return B_ENTRY_NOT_FOUND;
+
+	return fArray->Remove(value) ? B_OK : B_ENTRY_NOT_FOUND;
+}
+
+
+void 
+BlockArray::MakeEmpty()
+{
+	fArray->count = 0;
+}
+
+
+//	#pragma mark -
+
+
+extern "C" size_t
+strlcpy(char *dest, char const *source, size_t length)
+{
+	if (length == 0)
+		return strlen(source);
+
+	size_t i = 0;
+	for (; i < length - 1 && source[i]; i++)
+		dest[i] = source[i];
+
+	dest[i] = '\0';
+
+	return i + strlen(source + i);
+}
+
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Utility.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Utility.h
@ -0,0 +1,201 @@
+#ifndef UTILITY_H
+#define UTILITY_H
+/* Utility - some helper classes
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include <SupportDefs.h>
+
+
+// Simple array, used for the duplicate handling in the B+Tree,
+// and for the log entries.
+
+struct sorted_array {
+	public:
+		off_t	count;
+		
+		#if __MWERKS__
+			off_t	values[1];
+		#else
+			off_t	values[0];
+		#endif
+
+		inline int32 Find(off_t value) const;
+		void Insert(off_t value);
+		bool Remove(off_t value);
+
+	private:
+		bool FindInternal(off_t value,int32 &index) const;
+};
+
+
+inline int32
+sorted_array::Find(off_t value) const
+{
+	int32 i;
+	return FindInternal(value,i) ? i : -1;
+}
+
+
+// The BlockArray reserves a multiple of "blockSize" and
+// maintain array size for new entries.
+// This is used for the in-memory log entries before they
+// are written to disk.
+
+class BlockArray {
+	public:
+		BlockArray(int32 blockSize);
+		~BlockArray();
+
+		int32 Find(off_t value);
+		status_t Insert(off_t value);
+		status_t Remove(off_t value);
+
+		void MakeEmpty();
+
+		int32 CountItems() const { return fArray != NULL ? fArray->count : 0; }
+		int32 BlocksUsed() const { return fArray != NULL ? ((fArray->count + 1) * sizeof(off_t) + fBlockSize - 1) / fBlockSize : 0; }
+		sorted_array *Array() const { return fArray; }
+		int32 Size() const { return fSize; }
+
+	private:
+		sorted_array *fArray;
+		int32	fBlockSize;
+		int32	fSize;
+		int32	fMaxBlocks;
+};
+
+
+// Doubly linked list
+
+template<class Node> struct node {
+	Node *next,*prev;
+
+	void
+	Remove()
+	{
+		prev->next = next;
+		next->prev = prev;
+	}
+
+	Node *
+	Next()
+	{
+		if (next && next->next != NULL)
+			return next;
+
+		return NULL;
+	}
+};
+
+template<class Node> struct list {
+	Node *head,*tail,*last;
+
+	list()
+	{
+		head = (Node *)&tail;
+		tail = NULL;
+		last = (Node *)&head;
+	}
+
+	void
+	Add(Node *entry)
+	{
+		entry->next = (Node *)&tail;
+		entry->prev = last;
+		last->next = entry;
+		last = entry;
+	}
+};
+
+
+// Some atomic operations that are somehow missing in BeOS:
+//
+//	_atomic_test_and_set(value, newValue, testAgainst)
+//		sets "value" to "newValue", if "value" is equal to "testAgainst"
+//	_atomic_set(value, newValue)
+//		sets "value" to "newValue"
+
+#if _NO_INLINE_ASM
+	// Note that these atomic versions *don't* work as expected!
+	// They are only used for single processor user space tests
+	// (and don't even work correctly there)
+	inline int32
+	_atomic_test_and_set(volatile int32 *value, int32 newValue, int32 testAgainst)
+	{
+		int32 oldValue = *value;
+		if (oldValue == testAgainst)
+			*value = newValue;
+
+		return oldValue;
+	}
+
+	inline void
+	_atomic_set(volatile int32 *value, int32 newValue)
+	{
+		*value = newValue;
+	}
+#elif __INTEL__
+	inline int32
+	_atomic_test_and_set(volatile int32 *value, int32 newValue, int32 testAgainst)
+	{
+		int32 oldValue;
+		asm volatile("lock; cmpxchg %%ecx, (%%edx)"
+			: "=a" (oldValue) : "a" (testAgainst), "c" (newValue), "d" (value));
+		return oldValue;
+	}
+
+	inline void
+	_atomic_set(volatile int32 *value, int32 newValue)
+	{
+		asm volatile("lock; xchg %%eax, (%%edx)"
+			: : "a" (newValue), "d" (value));
+	}
+#elif __POWERPC__ && __MWERKS__ /* GCC has different assembler syntax */
+inline asm int32
+	_atomic_set(volatile int32 *value, int32)
+	{
+		loop:
+			dcbf	r0, r3;
+			lwarx	r0, 0, r3;
+			stwcx.	r4, 0, r3;
+			bc        5, 2, loop
+		mr r3,r5;
+		isync;
+		blr;	
+	}
+	
+inline asm int32
+	_atomic_test_and_set(volatile int32 *value, int32 newValue, int32 testAgainst)
+	{
+		loop:
+			dcbf	r0, r3;
+			lwarx	r0, 0, r3;
+			cmpw	r5, r0;
+			bne		no_dice;
+			stwcx.	r4, 0, r3;
+			bc      5, 2, loop
+			
+		mr 		r3,r0;
+		isync;
+		blr;
+		
+		no_dice:
+			stwcx.	r0, 0, r3;
+			mr 		r3,r0;
+			isync;
+			blr;
+	}
+			
+#else
+#	error The macros _atomic_set(), and _atomic_test_and_set() are not defined for the target processor
+#endif
+
+
+extern "C" size_t strlcpy(char *dest, char const *source, size_t length);
+
+
+#endif	/* UTILITY_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Volume.cpp
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Volume.cpp
@ -0,0 +1,653 @@
+/* Volume - BFS super block, mounting, etc.
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include "Debug.h"
+#include "Volume.h"
+#include "Journal.h"
+#include "Inode.h"
+#include "Query.h"
+
+#include <util/kernel_cpp.h>
+#include <KernelExport.h>
+#include <Drivers.h>
+#include <fs_volume.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+
+static const int32 kDesiredAllocationGroups = 56;
+	// This is the number of allocation groups that will be tried
+	// to be given for newly initialized disks.
+	// That's only relevant for smaller disks, though, since any
+	// of today's disk sizes already reach the maximum length
+	// of an allocation group (65536 blocks).
+	// It seems to create appropriate numbers for smaller disks
+	// with this setting, though (i.e. you can create a 400 MB
+	// file on a 1 GB disk without the need for double indirect
+	// blocks).
+
+
+class DeviceOpener {
+	public:
+		DeviceOpener(const char *device, int mode);
+		~DeviceOpener();
+
+		int Open(const char *device, int mode);
+		status_t InitCache(off_t numBlocks);
+		void RemoveCache(int mode);
+
+		void Keep();
+
+		int Device() const { return fDevice; }
+
+		status_t GetSize(off_t *_size, uint32 *_blockSize = NULL);
+
+	private:
+		int		fDevice;
+		bool	fCached;
+};
+
+
+DeviceOpener::DeviceOpener(const char *device, int mode)
+	:
+	fCached(false)
+{
+	Open(device, mode);
+}
+
+
+DeviceOpener::~DeviceOpener()
+{
+	if (fDevice >= B_OK) {
+		close(fDevice);
+		if (fCached)
+			remove_cached_device_blocks(fDevice, NO_WRITES);
+	}
+}
+
+
+int 
+DeviceOpener::Open(const char *device, int mode)
+{
+	fDevice = open(device, mode);
+	return fDevice;
+}
+
+
+status_t
+DeviceOpener::InitCache(off_t numBlocks)
+{
+	if (init_cache_for_device(fDevice, numBlocks) == B_OK) {
+		fCached = true;
+		return B_OK;
+	}
+
+	return B_ERROR;
+}
+
+
+void 
+DeviceOpener::RemoveCache(int mode)
+{
+	if (!fCached)
+		return;
+
+	remove_cached_device_blocks(fDevice, mode);
+	fCached = false;
+}
+
+
+void 
+DeviceOpener::Keep()
+{
+	fDevice = -1;
+}
+
+
+/** Returns the size of the device in bytes. It uses B_GET_GEOMETRY
+ *	to compute the size, or fstat() if that failed.
+ */
+
+status_t 
+DeviceOpener::GetSize(off_t *_size, uint32 *_blockSize)
+{
+	device_geometry geometry;
+	if (ioctl(fDevice, B_GET_GEOMETRY, &geometry) < 0) {
+		// maybe it's just a file
+		struct stat stat;
+		if (fstat(fDevice, &stat) < 0)
+			return B_ERROR;
+
+		if (_size)
+			*_size = stat.st_size;
+		if (_blockSize)	// that shouldn't cause us any problems
+			*_blockSize = 512;
+
+		return B_OK;
+	}
+
+	if (_size) {
+		*_size = 1LL * geometry.head_count * geometry.cylinder_count
+					* geometry.sectors_per_track * geometry.bytes_per_sector;
+	}
+	if (_blockSize)
+		*_blockSize = geometry.bytes_per_sector;
+
+	return B_OK;
+}
+
+
+//	#pragma mark -
+
+
+bool
+disk_super_block::IsValid()
+{
+	if (Magic1() != (int32)SUPER_BLOCK_MAGIC1
+		|| Magic2() != (int32)SUPER_BLOCK_MAGIC2
+		|| Magic3() != (int32)SUPER_BLOCK_MAGIC3
+		|| (int32)block_size != inode_size
+		|| ByteOrder() != SUPER_BLOCK_FS_LENDIAN
+		|| (1UL << BlockShift()) != BlockSize()
+		|| AllocationGroups() < 1
+		|| AllocationGroupShift() < 1
+		|| BlocksPerAllocationGroup() < 1
+		|| NumBlocks() < 10
+		|| AllocationGroups() != divide_roundup(NumBlocks(),
+			1L << AllocationGroupShift()))
+		return false;
+
+	return true;
+}
+
+
+void
+disk_super_block::Initialize(const char *diskName, off_t numBlocks, uint32 blockSize)
+{
+	memset(this, 0, sizeof(disk_super_block));
+
+	magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1);
+	magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2);
+	magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3);
+	fs_byte_order = SUPER_BLOCK_FS_LENDIAN;
+	flags = SUPER_BLOCK_DISK_CLEAN;
+
+	strlcpy(name, diskName, sizeof(name));
+
+	block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize);
+	for (block_shift = 9; (1UL << block_shift) < blockSize; block_shift++);
+
+	num_blocks = numBlocks;
+	used_blocks = 0;
+
+	// Get the minimum ag_shift (that's determined by the block size)
+
+	blocks_per_ag = 1;
+	ag_shift = 13;
+
+	int32 bitsPerBlock = blockSize << 3;
+	off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock;
+
+	for (int32 i = 8192; i < bitsPerBlock; i *= 2) {
+		ag_shift++;
+	}
+
+	// Many allocation groups help applying allocation policies, but if
+	// they are too small, we will need to many block_runs to cover large
+	// files (see above to get an explanation of the kDesiredAllocationGroups
+	// constant).
+
+	while (true) {
+		num_ags = (bitmapBlocks + blocks_per_ag - 1) / blocks_per_ag;
+		if (num_ags > kDesiredAllocationGroups) {
+			if (ag_shift == 16)
+				break;
+
+			ag_shift++;
+			blocks_per_ag *= 2;
+		} else
+			break;
+	}
+}
+
+
+//	#pragma mark -
+
+
+Volume::Volume(mount_id id)
+	:
+	fID(id),
+	fBlockAllocator(this),
+	fLock("bfs volume"),
+	fRootNode(NULL),
+	fIndicesNode(NULL),
+	fDirtyCachedBlocks(0),
+	fUniqueID(0),
+	fFlags(0)
+{
+}
+
+
+Volume::~Volume()
+{
+}
+
+
+bool
+Volume::IsValidSuperBlock()
+{
+	return fSuperBlock.IsValid();
+}
+
+
+void 
+Volume::Panic()
+{
+	FATAL(("we have to panic... switch to read-only mode!\n"));
+	fFlags |= VOLUME_READ_ONLY;
+#ifdef USER
+	debugger("BFS panics!");
+#elif defined(DEBUG)
+	kernel_debugger("BFS panics!");
+#endif
+}
+
+
+status_t
+Volume::Mount(const char *deviceName, uint32 flags)
+{
+	if (flags & B_MOUNT_READ_ONLY)
+		fFlags |= VOLUME_READ_ONLY;
+
+	// ToDo: validate the FS in write mode as well!
+#if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \
+	|| (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY))
+	// in big endian mode, we only mount read-only for now
+	flags |= B_MOUNT_READ_ONLY;
+#endif
+
+	DeviceOpener opener(deviceName, flags & B_MOUNT_READ_ONLY ? O_RDONLY : O_RDWR);
+
+	// if we couldn't open the device, try read-only (don't rely on a specific error code)
+	if (opener.Device() < B_OK && (flags & B_MOUNT_READ_ONLY) == 0) {
+		opener.Open(deviceName, O_RDONLY);
+		fFlags |= VOLUME_READ_ONLY;
+	}
+
+	fDevice = opener.Device();
+	if (fDevice < B_OK)
+		RETURN_ERROR(fDevice);
+
+	// check if it's a regular file, and if so, disable the cache for the
+	// underlaying file system
+	struct stat stat;
+	if (fstat(fDevice, &stat) < 0)
+		RETURN_ERROR(B_ERROR);
+
+#ifndef NO_FILE_UNCACHED_IO
+	if (stat.st_mode & S_FILE && ioctl(fDevice, IOCTL_FILE_UNCACHED_IO, NULL) < 0) {
+		// mount read-only if the cache couldn't be disabled
+#	ifdef DEBUG
+		FATAL(("couldn't disable cache for image file - system may dead-lock!\n"));
+#	else
+		FATAL(("couldn't disable cache for image file!\n"));
+		Panic();
+#	endif
+	}
+#endif
+
+	// read the super block
+	if (Identify(fDevice, &fSuperBlock) != B_OK) {
+		FATAL(("invalid super block!\n"));
+		return B_BAD_VALUE;
+	}
+
+	// initialize short hands to the super block (to save byte swapping)
+	fBlockSize = fSuperBlock.BlockSize();
+	fBlockShift = fSuperBlock.BlockShift();
+	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
+
+	// check if the device size is large enough to hold the file system
+	off_t diskSize;
+	if (opener.GetSize(&diskSize) < B_OK)
+		RETURN_ERROR(B_ERROR);
+	if (diskSize < (NumBlocks() << BlockShift()))
+		RETURN_ERROR(B_BAD_VALUE);
+
+	// set the current log pointers, so that journaling will work correctly
+	fLogStart = fSuperBlock.LogStart();
+	fLogEnd = fSuperBlock.LogEnd();
+
+	if (opener.InitCache(NumBlocks()) != B_OK)
+		return B_ERROR;
+
+	fJournal = new Journal(this);
+	// replaying the log is the first thing we will do on this disk
+	if (fJournal && fJournal->InitCheck() < B_OK
+		|| fBlockAllocator.Initialize() < B_OK) {
+		// ToDo: improve error reporting for a bad journal
+		FATAL(("could not initialize journal/block bitmap allocator!\n"));
+		return B_NO_MEMORY;
+	}
+
+	status_t status = B_OK;
+
+	fRootNode = new Inode(this, ToVnode(Root()));
+	if (fRootNode && fRootNode->InitCheck() == B_OK) {
+		status = new_vnode(fID, ToVnode(Root()), (void *)fRootNode);
+		if (status == B_OK) {
+			// try to get indices root dir
+
+			// question: why doesn't get_vnode() work here??
+			// answer: we have not yet backpropagated the pointer to the
+			// volume in bfs_mount(), so bfs_read_vnode() can't get it.
+			// But it's not needed to do that anyway.
+
+			if (!Indices().IsZero())
+				fIndicesNode = new Inode(this, ToVnode(Indices()));
+
+			if (fIndicesNode == NULL
+				|| fIndicesNode->InitCheck() < B_OK
+				|| !fIndicesNode->IsContainer()) {
+				INFORM(("bfs: volume doesn't have indices!\n"));
+
+				if (fIndicesNode) {
+					// if this is the case, the index root node is gone bad, and
+					// BFS switch to read-only mode
+					fFlags |= VOLUME_READ_ONLY;
+					delete fIndicesNode;
+					fIndicesNode = NULL;
+				}
+			}
+
+			// all went fine
+			opener.Keep();
+			return B_OK;
+		} else
+			FATAL(("could not create root node: new_vnode() failed!\n"));
+
+		delete fRootNode;
+	} else {
+		status = B_BAD_VALUE;
+		FATAL(("could not create root node!\n"));
+	}
+
+	return status;
+}
+
+
+status_t
+Volume::Unmount()
+{
+	// This will also flush the log & all blocks to disk
+	delete fJournal;
+	fJournal = NULL;
+
+	delete fIndicesNode;
+
+	remove_cached_device_blocks(fDevice, IsReadOnly() ? NO_WRITES : ALLOW_WRITES);
+	close(fDevice);
+
+	return B_OK;
+}
+
+
+status_t 
+Volume::Sync()
+{
+	return fJournal->FlushLogAndBlocks();
+}
+
+
+status_t
+Volume::ValidateBlockRun(block_run run)
+{
+	if (run.AllocationGroup() < 0 || run.AllocationGroup() > (int32)AllocationGroups()
+		|| run.Start() > (1UL << AllocationGroupShift())
+		|| run.length == 0
+		|| uint32(run.Length() + run.Start()) > (1UL << AllocationGroupShift())) {
+		Panic();
+		FATAL(("*** invalid run(%ld,%d,%d)\n", run.AllocationGroup(), run.Start(), run.Length()));
+		return B_BAD_DATA;
+	}
+	return B_OK;
+}
+
+
+block_run 
+Volume::ToBlockRun(off_t block) const
+{
+	block_run run;
+	run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(block >> AllocationGroupShift());
+	run.start = HOST_ENDIAN_TO_BFS_INT16(block & ((1LL << AllocationGroupShift()) - 1));
+	run.length = HOST_ENDIAN_TO_BFS_INT16(1);
+	return run;
+}
+
+
+status_t
+Volume::CreateIndicesRoot(Transaction *transaction)
+{
+	off_t id;
+	status_t status = Inode::Create(transaction, NULL, NULL,
+		S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, &id, &fIndicesNode);
+	if (status < B_OK)
+		RETURN_ERROR(status);
+
+	fSuperBlock.indices = ToBlockRun(id);
+	return WriteSuperBlock();
+}
+
+
+status_t 
+Volume::AllocateForInode(Transaction *transaction, const Inode *parent, mode_t type, block_run &run)
+{
+	return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(), type, run);
+}
+
+
+status_t 
+Volume::WriteSuperBlock()
+{
+	if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block)) != sizeof(disk_super_block))
+		return B_IO_ERROR;
+
+	return B_OK;
+}
+
+
+void
+Volume::UpdateLiveQueries(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey,
+	size_t oldLength, const uint8 *newKey, size_t newLength)
+{
+	if (fQueryLock.Lock() < B_OK)
+		return;
+
+	Query *query = NULL;
+	while ((query = fQueries.Next(query)) != NULL)
+		query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey, newLength);
+
+	fQueryLock.Unlock();
+}
+
+
+/** Checks if there is a live query whose results depend on the presence
+ *	or value of the specified attribute.
+ *	Don't use it if you already have all the data together to evaluate
+ *	the queries - it wouldn't safe you anything in this case.
+ */
+
+bool 
+Volume::CheckForLiveQuery(const char *attribute)
+{
+	// ToDo: check for a live query that depends on the specified attribute
+	return true;
+}
+
+
+void 
+Volume::AddQuery(Query *query)
+{
+	if (fQueryLock.Lock() < B_OK)
+		return;
+
+	fQueries.Add(query);
+
+	fQueryLock.Unlock();
+}
+
+
+void 
+Volume::RemoveQuery(Query *query)
+{
+	if (fQueryLock.Lock() < B_OK)
+		return;
+
+	fQueries.Remove(query);
+
+	fQueryLock.Unlock();
+}
+
+
+//	#pragma mark -
+//	Disk scanning and initialization
+
+
+status_t
+Volume::Identify(int fd, disk_super_block *superBlock)
+{
+	char buffer[1024];
+	if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer))
+		return B_IO_ERROR;
+
+	// Note: that does work only for x86, for PowerPC, the super block
+	// may be located at offset 0!
+	memcpy(superBlock, buffer + 512, sizeof(disk_super_block));
+	if (!superBlock->IsValid()) {
+#ifndef BFS_LITTLE_ENDIAN_ONLY
+		memcpy(superBlock, buffer, sizeof(disk_super_block));
+		if (!superBlock->IsValid())
+			return B_BAD_VALUE;
+#else
+		return B_BAD_VALUE;
+#endif
+	}
+
+	return B_OK;
+}
+
+
+#ifdef USER
+extern "C" void kill_device_vnodes(dev_t id);
+	// This call is only available in the userland fs_shell
+
+status_t
+Volume::Initialize(const char *device, const char *name, uint32 blockSize, uint32 flags)
+{
+	// although there is no really good reason for it, we won't
+	// accept '/' in disk names (mkbfs does this, too - and since
+	// Tracker names mounted volumes like their name)
+	if (strchr(name, '/') != NULL)
+		return B_BAD_VALUE;
+
+	if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096 && blockSize != 8192)
+		return B_BAD_VALUE;
+
+	DeviceOpener opener(device, O_RDWR);
+	if (opener.Device() < B_OK)
+		return B_BAD_VALUE;
+
+	fDevice = opener.Device();
+
+	uint32 deviceBlockSize;
+	off_t deviceSize;
+	if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK)
+		return B_ERROR;
+
+	off_t numBlocks = deviceSize / blockSize;
+
+	// create valid super block
+
+	fSuperBlock.Initialize(name, numBlocks, blockSize);
+	
+	// initialize short hands to the super block (to save byte swapping)
+	fBlockSize = fSuperBlock.BlockSize();
+	fBlockShift = fSuperBlock.BlockShift();
+	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
+
+	// since the allocator has not been initialized yet, we
+	// cannot use BlockAllocator::BitmapSize() here
+	fSuperBlock.log_blocks = ToBlockRun(AllocationGroups()
+		* fSuperBlock.BlocksPerAllocationGroup() + 1);
+	fSuperBlock.log_blocks.length = 2048;
+		// ToDo: set the log size depending on the disk size
+	fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64(ToBlock(Log()));
+
+	// set the current log pointers, so that journaling will work correctly
+	fLogStart = fSuperBlock.LogStart();
+	fLogEnd = fSuperBlock.LogEnd();
+
+	if (!IsValidSuperBlock())
+		RETURN_ERROR(B_ERROR);
+
+	if (opener.InitCache(numBlocks) != B_OK)
+		return B_ERROR;
+
+	fJournal = new Journal(this);
+	if (fJournal == NULL || fJournal->InitCheck() < B_OK)
+		RETURN_ERROR(B_ERROR);
+
+	// ready to write data to disk
+
+	Transaction transaction(this, 0);
+
+	if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK)
+		RETURN_ERROR(B_ERROR);
+
+	off_t id;
+	status_t status = Inode::Create(&transaction, NULL, NULL,
+		S_DIRECTORY | 0755, 0, 0, &id, &fRootNode);
+	if (status < B_OK)
+		RETURN_ERROR(status);
+
+	fSuperBlock.root_dir = ToBlockRun(id);
+
+	if ((flags & VOLUME_NO_INDICES) == 0) {
+		// The indices root directory will be created automatically
+		// when the standard indices are created (or any other).
+		Index index(this);
+		status = index.Create(&transaction, "name", B_STRING_TYPE);
+		if (status < B_OK)
+			return status;
+
+		status = index.Create(&transaction, "last_modified", B_INT64_TYPE);
+		if (status < B_OK)
+			return status;
+
+		status = index.Create(&transaction, "size", B_INT64_TYPE);
+		if (status < B_OK)
+			return status;
+	}
+
+	WriteSuperBlock();
+	transaction.Done();
+
+	put_vnode(ID(), fRootNode->ID());
+	if (fIndicesNode != NULL)
+		put_vnode(ID(), fIndicesNode->ID());
+
+	kill_device_vnodes(ID());
+		// This call is only available in the userland fs_shell
+
+	Sync();
+	opener.RemoveCache(ALLOW_WRITES);
+	return B_OK;
+}
+#endif
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/Volume.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/Volume.h
@ -0,0 +1,240 @@
+#ifndef VOLUME_H
+#define VOLUME_H
+/* Volume - BFS super block, mounting, etc.
+**
+** Initial version by Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include <KernelExport.h>
+#include <fs_interface.h>
+
+extern "C" {
+	#ifndef _IMPEXP_KERNEL
+	#	define _IMPEXP_KERNEL
+	#endif
+	#include "lock.h"
+	#include "cache.h"
+}
+
+#include "bfs.h"
+#include "BlockAllocator.h"
+#include "BufferPool.h"
+#include "Chain.h"
+
+class Journal;
+class Inode;
+class Query;
+
+enum volume_flags {
+	VOLUME_READ_ONLY	= 0x0001
+};
+
+enum volume_initialize_flags {
+	VOLUME_NO_INDICES	= 0x0001,
+};
+
+class Volume {
+	public:
+		Volume(mount_id id);
+		~Volume();
+
+		status_t			Mount(const char *device, uint32 flags);
+		status_t			Unmount();
+		status_t			Initialize(const char *device, const char *name,
+								uint32 blockSize, uint32 flags);
+
+		bool				IsValidSuperBlock();
+		bool				IsReadOnly() const;
+		void				Panic();
+		RecursiveLock		&Lock();
+
+		block_run			Root() const { return fSuperBlock.root_dir; }
+		Inode				*RootNode() const { return fRootNode; }
+		block_run			Indices() const { return fSuperBlock.indices; }
+		Inode				*IndicesNode() const { return fIndicesNode; }
+		block_run			Log() const { return fSuperBlock.log_blocks; }
+		vint32				&LogStart() { return fLogStart; }
+		vint32				&LogEnd() { return fLogEnd; }
+		int					Device() const { return fDevice; }
+
+		mount_id			ID() const { return fID; }
+		const char			*Name() const { return fSuperBlock.name; }
+
+		off_t				NumBlocks() const { return fSuperBlock.NumBlocks(); }
+		off_t				UsedBlocks() const { return fSuperBlock.UsedBlocks(); }
+		off_t				FreeBlocks() const { return NumBlocks() - UsedBlocks(); }
+
+		uint32				BlockSize() const { return fBlockSize; }
+		uint32				BlockShift() const { return fBlockShift; }
+		uint32				InodeSize() const { return fSuperBlock.InodeSize(); }
+		uint32				AllocationGroups() const { return fSuperBlock.AllocationGroups(); }
+		uint32				AllocationGroupShift() const { return fAllocationGroupShift; }
+		disk_super_block	&SuperBlock() { return fSuperBlock; }
+
+		off_t				ToOffset(block_run run) const { return ToBlock(run) << BlockShift(); }
+		off_t				ToBlock(block_run run) const { return ((((off_t)run.AllocationGroup()) << AllocationGroupShift()) | (off_t)run.Start()); }
+		block_run			ToBlockRun(off_t block) const;
+		status_t			ValidateBlockRun(block_run run);
+
+		off_t				ToVnode(block_run run) const { return ToBlock(run); }
+		off_t				ToVnode(off_t block) const { return block; }
+		off_t				VnodeToBlock(vnode_id id) const { return (off_t)id; }
+
+		status_t			CreateIndicesRoot(Transaction *transaction);
+
+		// block bitmap
+		BlockAllocator		&Allocator();
+		status_t			AllocateForInode(Transaction *transaction, const Inode *parent,
+								mode_t type, block_run &run);
+		status_t			AllocateForInode(Transaction *transaction, const block_run *parent,
+								mode_t type, block_run &run);
+		status_t			Allocate(Transaction *transaction,const Inode *inode,
+								off_t numBlocks, block_run &run, uint16 minimum = 1);
+		status_t			Free(Transaction *transaction, block_run run);
+
+		// cache access
+		status_t			WriteSuperBlock();
+		status_t			WriteBlocks(off_t blockNumber, const uint8 *block, uint32 numBlocks);
+		void				WriteCachedBlocksIfNecessary();
+		status_t			FlushDevice();
+
+		// queries
+		void				UpdateLiveQueries(Inode *inode, const char *attribute, int32 type,
+								const uint8 *oldKey, size_t oldLength,
+								const uint8 *newKey, size_t newLength);
+		bool				CheckForLiveQuery(const char *attribute);
+		void				AddQuery(Query *query);
+		void				RemoveQuery(Query *query);
+
+		status_t			Sync();
+		Journal				*GetJournal(off_t refBlock) const;
+
+		BufferPool			&Pool();
+
+		uint32				GetUniqueID();
+
+		static status_t		Identify(int fd, disk_super_block *superBlock);
+
+	protected:
+		mount_id			fID;
+		int					fDevice;
+		disk_super_block	fSuperBlock;
+
+		uint32				fBlockSize;
+		uint32				fBlockShift;
+		uint32				fAllocationGroupShift;
+
+		BlockAllocator		fBlockAllocator;
+		RecursiveLock		fLock;
+		Journal				*fJournal;
+		vint32				fLogStart, fLogEnd;
+
+		Inode				*fRootNode;
+		Inode				*fIndicesNode;
+
+		vint32				fDirtyCachedBlocks;
+
+		SimpleLock			fQueryLock;
+		Chain<Query>		fQueries;
+
+		int32				fUniqueID;
+		uint32				fFlags;
+
+		BufferPool			fBufferPool;
+};
+
+
+// inline functions
+
+inline bool 
+Volume::IsReadOnly() const
+{
+	 return fFlags & VOLUME_READ_ONLY;
+}
+
+
+inline RecursiveLock &
+Volume::Lock()
+{
+	 return fLock;
+}
+
+
+inline BlockAllocator &
+Volume::Allocator()
+{
+	 return fBlockAllocator;
+}
+
+
+inline status_t 
+Volume::AllocateForInode(Transaction *transaction, const block_run *parent, mode_t type, block_run &run)
+{
+	return fBlockAllocator.AllocateForInode(transaction, parent, type, run);
+}
+
+
+inline status_t 
+Volume::Allocate(Transaction *transaction, const Inode *inode, off_t numBlocks, block_run &run, uint16 minimum)
+{
+	return fBlockAllocator.Allocate(transaction, inode, numBlocks, run, minimum);
+}
+
+
+inline status_t 
+Volume::Free(Transaction *transaction, block_run run)
+{
+	return fBlockAllocator.Free(transaction, run);
+}
+
+
+inline status_t 
+Volume::WriteBlocks(off_t blockNumber, const uint8 *block, uint32 numBlocks)
+{
+	atomic_add(&fDirtyCachedBlocks, numBlocks);
+	return cached_write(fDevice, blockNumber, block, numBlocks, fSuperBlock.block_size);
+}
+
+
+inline void 
+Volume::WriteCachedBlocksIfNecessary()
+{
+	// the specific values are only valid for the current BeOS cache
+	if (fDirtyCachedBlocks > 128) {
+		force_cache_flush(fDevice, false);
+		atomic_add(&fDirtyCachedBlocks, -64);
+	}
+}
+
+
+inline status_t 
+Volume::FlushDevice()
+{
+	fDirtyCachedBlocks = 0;
+	return flush_device(fDevice, 0);
+}
+
+
+inline Journal *
+Volume::GetJournal(off_t /*refBlock*/) const
+{
+	 return fJournal;
+}
+
+
+inline BufferPool &
+Volume::Pool()
+{
+	 return fBufferPool;
+}
+
+
+inline uint32 
+Volume::GetUniqueID()
+{
+	 return atomic_add(&fUniqueID, 1);
+}
+
+#endif	/* VOLUME_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/bfs.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/bfs.h
@ -0,0 +1,400 @@
+#ifndef BFS_H
+#define BFS_H
+/* bfs - BFS definitions and helper functions
+**
+** Copyright 2001-2004, Axel Dörfler, axeld@pinc-software.de
+** Parts of this code is based on work previously done by Marcus Overhagen
+**
+** Copyright 2001, pinc Software. All Rights Reserved.
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include <SupportDefs.h>
+
+#include "bfs_endian.h"
+
+
+#ifndef B_BEOS_VERSION_DANO
+#	define B_BAD_DATA B_ERROR
+#endif
+
+// ToDo: temporary fix! (missing but public ioctls)
+#define IOCTL_FILE_UNCACHED_IO	10000
+
+struct block_run {
+	int32		allocation_group;
+	uint16		start;
+	uint16		length;
+
+	int32 AllocationGroup() const { return BFS_ENDIAN_TO_HOST_INT32(allocation_group); }
+	uint16 Start() const { return BFS_ENDIAN_TO_HOST_INT16(start); }
+	uint16 Length() const { return BFS_ENDIAN_TO_HOST_INT16(length); }
+
+	inline bool operator==(const block_run &run) const;
+	inline bool operator!=(const block_run &run) const;
+	inline bool IsZero();
+	inline bool MergeableWith(block_run run) const;
+	inline void SetTo(int32 group, uint16 start, uint16 length = 1);
+
+	inline static block_run Run(int32 group, uint16 start, uint16 length = 1);
+		// can't have a constructor because it's used in a union
+} _PACKED;
+
+typedef block_run inode_addr;
+
+// Since the block_run::length field spans 16 bits, the largest number of
+// blocks covered by a block_run is 65535 (as long as we don't want to
+// break compatibility and take a zero length for 65536).
+#define MAX_BLOCK_RUN_LENGTH	65535
+
+//**************************************
+
+
+#define BFS_DISK_NAME_LENGTH	32
+
+struct disk_super_block {
+	char		name[BFS_DISK_NAME_LENGTH];
+	int32		magic1;
+	int32		fs_byte_order;
+	uint32		block_size;
+	uint32		block_shift;
+	off_t		num_blocks;
+	off_t		used_blocks;
+	int32		inode_size;
+	int32		magic2;
+	int32		blocks_per_ag;
+	int32		ag_shift;
+	int32		num_ags;
+	int32		flags;
+	block_run	log_blocks;
+	off_t		log_start;
+	off_t		log_end;
+	int32		magic3;
+	inode_addr	root_dir;
+	inode_addr	indices;
+	int32		pad[8];
+
+	int32 Magic1() const { return BFS_ENDIAN_TO_HOST_INT32(magic1); }
+	int32 Magic2() const { return BFS_ENDIAN_TO_HOST_INT32(magic2); }
+	int32 Magic3() const { return BFS_ENDIAN_TO_HOST_INT32(magic3); }
+	int32 ByteOrder() const { return BFS_ENDIAN_TO_HOST_INT32(fs_byte_order); }
+	uint32 BlockSize() const { return BFS_ENDIAN_TO_HOST_INT32(block_size); }
+	uint32 BlockShift() const { return BFS_ENDIAN_TO_HOST_INT32(block_shift); }
+	off_t NumBlocks() const { return BFS_ENDIAN_TO_HOST_INT64(num_blocks); }
+	off_t UsedBlocks() const { return BFS_ENDIAN_TO_HOST_INT64(used_blocks); }
+	int32 InodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(inode_size); }
+	int32 BlocksPerAllocationGroup() const { return BFS_ENDIAN_TO_HOST_INT32(blocks_per_ag); }
+	int32 AllocationGroups() const { return BFS_ENDIAN_TO_HOST_INT32(num_ags); }
+	int32 AllocationGroupShift() const { return BFS_ENDIAN_TO_HOST_INT32(ag_shift); }
+	int32 Flags() const { return BFS_ENDIAN_TO_HOST_INT32(flags); }
+	off_t LogStart() const { return BFS_ENDIAN_TO_HOST_INT64(log_start); }
+	off_t LogEnd() const { return BFS_ENDIAN_TO_HOST_INT64(log_end); }
+
+	// implemented in Volume.cpp:
+	bool IsValid();
+	void Initialize(const char *name, off_t numBlocks, uint32 blockSize);
+} _PACKED;
+
+#define SUPER_BLOCK_FS_LENDIAN		'BIGE'		/* BIGE */
+
+#define SUPER_BLOCK_MAGIC1			'BFS1'		/* BFS1 */
+#define SUPER_BLOCK_MAGIC2			0xdd121031
+#define SUPER_BLOCK_MAGIC3			0x15b6830e
+
+#define SUPER_BLOCK_DISK_CLEAN		'CLEN'		/* CLEN */
+#define SUPER_BLOCK_DISK_DIRTY		'DIRT'		/* DIRT */
+
+//**************************************
+
+#define NUM_DIRECT_BLOCKS			12
+
+struct data_stream {
+	block_run	direct[NUM_DIRECT_BLOCKS];
+	off_t		max_direct_range;
+	block_run	indirect;
+	off_t		max_indirect_range;
+	block_run	double_indirect;
+	off_t		max_double_indirect_range;
+	off_t		size;
+
+	off_t MaxDirectRange() const { return BFS_ENDIAN_TO_HOST_INT64(max_direct_range); }
+	off_t MaxIndirectRange() const { return BFS_ENDIAN_TO_HOST_INT64(max_indirect_range); }
+	off_t MaxDoubleIndirectRange() const { return BFS_ENDIAN_TO_HOST_INT64(max_double_indirect_range); }
+	off_t Size() const { return BFS_ENDIAN_TO_HOST_INT64(size); }
+} _PACKED;
+
+// This defines the size of the indirect and double indirect
+// blocks. Note: the code may not work correctly at some places
+// if this value is changed (it's not tested).
+#define NUM_ARRAY_BLOCKS		4
+#define ARRAY_BLOCKS_SHIFT		2
+#define INDIRECT_BLOCKS_SHIFT	(ARRAY_BLOCKS_SHIFT + ARRAY_BLOCKS_SHIFT)
+
+//**************************************
+
+struct bfs_inode;
+
+struct small_data {
+	uint32		type;
+	uint16		name_size;
+	uint16		data_size;
+
+#if !__MWERKS__ //-- mwcc doesn't support thingy[0], so we patch Name() instead
+	char		name[0];	// name_size long, followed by data
+#endif
+
+	uint32 Type() const { return BFS_ENDIAN_TO_HOST_INT32(type); }
+	uint16 NameSize() const { return BFS_ENDIAN_TO_HOST_INT16(name_size); }
+	uint16 DataSize() const { return BFS_ENDIAN_TO_HOST_INT16(data_size); }
+
+	inline char		*Name() const;
+	inline uint8	*Data() const;
+	inline uint32	Size() const;
+	inline small_data *Next() const;
+	inline bool		IsLast(const bfs_inode *inode) const;
+} _PACKED;
+
+// the file name is part of the small_data structure
+#define FILE_NAME_TYPE			'CSTR'
+#define FILE_NAME_NAME			0x13 
+#define FILE_NAME_NAME_LENGTH	1 
+
+
+//**************************************
+
+class Volume;
+
+#define SHORT_SYMLINK_NAME_LENGTH	144 // length incl. terminating '\0'
+
+struct bfs_inode {
+	int32		magic1;
+	inode_addr	inode_num;
+	int32		uid;
+	int32		gid;
+	int32		mode;				// see sys/stat.h
+	int32		flags;
+	bigtime_t	create_time;
+	bigtime_t	last_modified_time;
+	inode_addr	parent;
+	inode_addr	attributes;
+	uint32		type;				// attribute type
+	
+	int32		inode_size;
+	uint32		etc;				// a pointer to the Inode object during construction
+
+	union {
+		data_stream		data;
+		char 			short_symlink[SHORT_SYMLINK_NAME_LENGTH];
+	};
+	int32		pad[4];
+
+#if !__MWERKS__
+	small_data	small_data_start[0];
+#endif
+	
+	int32 Magic1() const { return BFS_ENDIAN_TO_HOST_INT32(magic1); }
+	int32 UserID() const { return BFS_ENDIAN_TO_HOST_INT32(uid); }
+	int32 GroupID() const { return BFS_ENDIAN_TO_HOST_INT32(gid); }
+	int32 Mode() const { return BFS_ENDIAN_TO_HOST_INT32(mode); }
+	int32 Flags() const { return BFS_ENDIAN_TO_HOST_INT32(flags); }
+	int32 Type() const { return BFS_ENDIAN_TO_HOST_INT32(type); }
+	int32 InodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(inode_size); }
+	bigtime_t LastModifiedTime() const { return BFS_ENDIAN_TO_HOST_INT64(last_modified_time); }
+	bigtime_t CreateTime() const { return BFS_ENDIAN_TO_HOST_INT64(create_time); }
+
+	inline small_data *SmallDataStart();
+
+	status_t InitCheck(Volume *volume);
+		// defined in Inode.cpp
+} _PACKED;	
+
+#define INODE_MAGIC1			0x3bbe0ad9
+#define INODE_TIME_SHIFT		16
+#define INODE_TIME_MASK			0xffff
+#define INODE_FILE_NAME_LENGTH	256
+
+enum inode_flags {
+	INODE_IN_USE			= 0x00000001,	// always set
+	INODE_ATTR_INODE		= 0x00000004,
+	INODE_LOGGED			= 0x00000008,	// log changes to the data stream
+	INODE_DELETED			= 0x00000010,
+	INODE_NOT_READY			= 0x00000020,	// used during Inode construction
+	INODE_LONG_SYMLINK		= 0x00000040,	// symlink in data stream
+
+	INODE_PERMANENT_FLAGS	= 0x0000ffff,
+
+	INODE_NO_CACHE			= 0x00010000,
+	INODE_WAS_WRITTEN		= 0x00020000,
+	INODE_NO_TRANSACTION	= 0x00040000,
+	INODE_DONT_FREE_SPACE	= 0x00080000,	// only used by the "chkbfs" functionality
+	INODE_CHKBFS_RUNNING	= 0x00200000,
+};
+
+//**************************************
+
+struct file_cookie {
+	bigtime_t last_notification;
+	off_t	last_size;
+	int		open_mode;
+};
+
+// notify every second if the file size has changed
+#define INODE_NOTIFICATION_INTERVAL	1000000LL
+
+//**************************************
+
+
+inline int32
+divide_roundup(int32 num,int32 divisor)
+{
+	return (num + divisor - 1) / divisor;
+}
+
+inline int64
+divide_roundup(int64 num,int32 divisor)
+{
+	return (num + divisor - 1) / divisor;
+}
+
+inline int
+get_shift(uint64 i)
+{
+	int c;
+	c = 0;
+	while (i > 1) {
+		i >>= 1;
+		c++;
+	}
+	return c;
+}
+
+inline int32
+round_up(uint32 data)
+{
+	// rounds up to the next off_t boundary
+	return (data + sizeof(off_t) - 1) & ~(sizeof(off_t) - 1);
+}
+
+
+/************************ block_run inline functions ************************/
+//	#pragma mark -
+
+
+inline bool
+block_run::operator==(const block_run &run) const
+{
+	return allocation_group == run.allocation_group
+		&& start == run.start
+		&& length == run.length;
+}
+
+
+inline bool
+block_run::operator!=(const block_run &run) const
+{
+	return allocation_group != run.allocation_group
+		|| start != run.start
+		|| length != run.length;
+}
+
+
+inline bool
+block_run::IsZero()
+{
+	return allocation_group == 0 && start == 0 && length == 0;
+}
+
+
+inline bool 
+block_run::MergeableWith(block_run run) const
+{
+	// 65535 is the maximum allowed run size for BFS
+	return allocation_group == run.allocation_group
+		&& Start() + Length() == run.Start()
+		&& (uint32)Length() + run.Length() <= MAX_BLOCK_RUN_LENGTH;
+}
+
+
+inline void
+block_run::SetTo(int32 _group,uint16 _start,uint16 _length)
+{
+	allocation_group = HOST_ENDIAN_TO_BFS_INT32(_group);
+	start = HOST_ENDIAN_TO_BFS_INT16(_start);
+	length = HOST_ENDIAN_TO_BFS_INT16(_length);
+}
+
+
+inline block_run
+block_run::Run(int32 group, uint16 start, uint16 length)
+{
+	block_run run;
+	run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(group);
+	run.start = HOST_ENDIAN_TO_BFS_INT16(start);
+	run.length = HOST_ENDIAN_TO_BFS_INT16(length);
+	return run;
+}
+
+
+/************************ small_data inline functions ************************/
+//	#pragma mark -
+
+
+inline char *
+small_data::Name() const
+{
+#if __MWERKS__
+	return (char *)(uint32(&data_size)+uint32(sizeof(data_size)));
+#else
+	return const_cast<char *>(name);
+#endif
+}
+
+
+inline uint8 *
+small_data::Data() const
+{
+	return (uint8 *)Name() + NameSize() + 3;
+}
+
+
+inline uint32 
+small_data::Size() const
+{
+	return sizeof(small_data) + NameSize() + 3 + DataSize() + 1;
+}
+
+
+inline small_data *
+small_data::Next() const
+{
+	return (small_data *)((uint8 *)this + Size());
+}
+
+
+inline bool
+small_data::IsLast(const bfs_inode *inode) const
+{
+	// we need to check the location first, because if name_size is already beyond
+	// the block, we would touch invalid memory (although that can't cause wrong
+	// results)
+	return (uint32)this > (uint32)inode + inode->InodeSize() - sizeof(small_data) || name_size == 0;
+}
+
+
+/************************ bfs_inode inline functions ************************/
+//	#pragma mark -
+
+
+inline small_data *
+bfs_inode::SmallDataStart()
+{
+#if __MWERKS__
+	return (small_data *)(&pad[4] /* last item in pad + sizeof(int32) */);
+#else
+	return small_data_start;
+#endif
+}
+
+
+#endif	/* BFS_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/bfs_control.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/bfs_control.h
@ -0,0 +1,68 @@
+#ifndef BFS_CONTROL_H
+#define BFS_CONTROL_H
+/* bfs_control - additional functionality exported via ioctl()
+**
+** Copyright 2001-2004, Axel Dörfler, axeld@pinc-software.de
+** This file may be used under the terms of the OpenBeOS License.
+*/
+
+
+#include <fs_interface.h>
+
+
+/* ioctl to check the version of BFS used - parameter is a uint32 *
+ * where the number is stored
+ */
+#define BFS_IOCTL_VERSION			14200
+
+/* ioctls to use the "chkbfs" feature from the outside
+ * all calls use a struct check_result as single parameter
+ */
+#define	BFS_IOCTL_START_CHECKING	14201
+#define BFS_IOCTL_STOP_CHECKING		14202
+#define BFS_IOCTL_CHECK_NEXT_NODE	14203
+
+/* all fields except "flags", and "name" must be set to zero before
+ * BFS_IOCTL_START_CHECKING is called
+ */
+struct check_control {
+	uint32		magic;
+	uint32		flags;
+	char		name[B_FILE_NAME_LENGTH];
+	vnode_id	inode;
+	uint32		mode;
+	uint32		errors;
+	struct {
+		uint64	missing;
+		uint64	already_set;
+		uint64	freed;
+	} stats;
+	status_t	status;
+	void		*cookie;
+};
+
+/* values for the flags field */
+#define BFS_FIX_BITMAP_ERRORS	1
+#define BFS_REMOVE_WRONG_TYPES	2
+	/* files that shouldn't be part of its parent will be removed
+	 * (i.e. a directory contains an attribute, ...)
+	 * Works only if B_FIX_BITMAP_ERRORS is set, too
+	 */
+#define BFS_REMOVE_INVALID		4
+	/* removes nodes that couldn't be opened at all from its parent
+	 * directory.
+	 * Also requires the B_FIX_BITMAP_ERRORS to be set.
+	 */
+
+/* values for the errors field */
+#define BFS_MISSING_BLOCKS		1
+#define BFS_BLOCKS_ALREADY_SET	2
+#define BFS_INVALID_BLOCK_RUN	4
+#define	BFS_COULD_NOT_OPEN		8
+#define BFS_WRONG_TYPE			16
+#define BFS_NAMES_DONT_MATCH	32
+
+/* check control magic value */
+#define BFS_IOCTL_CHECK_MAGIC	'BChk'
+
+#endif	/* BFS_CONTROL_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/bfs_endian.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/bfs_endian.h
@ -0,0 +1,44 @@
+/*
+** Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
+** Distributed under the terms of the OpenBeOS License.
+*/
+#ifndef BFS_ENDIAN_H
+#define BFS_ENDIAN_H
+
+
+#include <ByteOrder.h>
+
+
+#if !defined(BFS_LITTLE_ENDIAN_ONLY) && !defined(BFS_BIG_ENDIAN_ONLY)
+//	default setting; BFS is now primarily a little endian file system
+#	define BFS_LITTLE_ENDIAN_ONLY
+#endif
+
+
+#if defined(BFS_LITTLE_ENDIAN_ONLY) && B_HOST_IS_LENDIAN \
+	|| defined(BFS_BIG_ENDIAN_ONLY) && B_HOST_IS_BENDIAN
+		/* host is BFS endian */
+#	define BFS_ENDIAN_TO_HOST_INT16(value) value
+#	define BFS_ENDIAN_TO_HOST_INT32(value) value
+#	define BFS_ENDIAN_TO_HOST_INT64(value) value
+#	define HOST_ENDIAN_TO_BFS_INT16(value) value
+#	define HOST_ENDIAN_TO_BFS_INT32(value) value
+#	define HOST_ENDIAN_TO_BFS_INT64(value) value
+#elif defined(BFS_LITTLE_ENDIAN_ONLY) && B_HOST_IS_BENDIAN \
+	|| defined(BFS_BIG_ENDIAN_ONLY) && B_HOST_IS_LENDIAN
+		/* host is big endian, BFS is little endian or vice versa */
+#	define BFS_ENDIAN_TO_HOST_INT16(value) __swap_int16(value)
+#	define BFS_ENDIAN_TO_HOST_INT32(value) __swap_int32(value)
+#	define BFS_ENDIAN_TO_HOST_INT64(value) __swap_int64(value)
+#	define HOST_ENDIAN_TO_BFS_INT16(value) __swap_int16(value)
+#	define HOST_ENDIAN_TO_BFS_INT32(value) __swap_int32(value)
+#	define HOST_ENDIAN_TO_BFS_INT64(value) __swap_int64(value)
+#else
+	// ToDo: maybe build a version that supports both, big & little endian?
+	//		But since that will need some kind of global data (to
+	//		know of what type this file system is), it's probably 
+	//		something for the boot loader; anything else would be
+	//		a major pain.
+#endif
+
+#endif	/* BFS_ENDIAN_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/cache.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/cache.h
@ -0,0 +1,50 @@
+/*
+	Copyright 1999-2001, Be Incorporated.   All Rights Reserved.
+	This file may be used under the terms of the Be Sample Code License.
+*/
+
+#ifndef _CACHE_H_
+#define _CACHE_H_
+
+#include <BeBuild.h>
+
+#define ALLOW_WRITES  1
+#define NO_WRITES     0
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int   init_block_cache(int max_blocks, int flags);
+extern void  shutdown_block_cache(void);
+
+extern void  force_cache_flush(int dev, int prefer_log_blocks);
+extern int   flush_blocks(int dev, off_t bnum, int nblocks);
+extern int   flush_device(int dev, int warn_locked);
+
+extern int   init_cache_for_device(int fd, off_t max_blocks);
+extern int   remove_cached_device_blocks(int dev, int allow_write);
+
+extern void *get_block(int dev, off_t bnum, int bsize);
+extern void *get_empty_block(int dev, off_t bnum, int bsize);
+extern int   release_block(int dev, off_t bnum);
+extern int   mark_blocks_dirty(int dev, off_t bnum, int nblocks);
+
+
+extern int  cached_read(int dev, off_t bnum, void *data, off_t num_blocks, int bsize);
+extern int  cached_write(int dev, off_t bnum, const void *data,
+				off_t num_blocks, int bsize);
+extern int  cached_write_locked(int dev, off_t bnum, const void *data,
+				off_t num_blocks, int bsize);
+extern int  set_blocks_info(int dev, off_t *blocks, int nblocks,
+				void (*func)(off_t bnum, size_t nblocks, void *arg),
+				void *arg);
+
+extern size_t read_phys_blocks (int fd, off_t bnum, void *data, uint num_blocks, int bsize);
+extern size_t write_phys_blocks(int fd, off_t bnum, void *data, uint num_blocks, int bsize);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CACHE_H_ */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/fsproto.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/fsproto.h
@ -0,0 +1,247 @@
+/*
+	Copyright 1999-2001, Be Incorporated.   All Rights Reserved.
+	This file may be used under the terms of the Be Sample Code License.
+*/
+
+#ifndef _FSPROTO_H
+#define _FSPROTO_H
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <iovec.h>
+
+#include <OS.h>
+#include <NodeMonitor.h>
+#include <fs_attr.h>
+#include <fs_info.h>
+#include <BeBuild.h>
+#include <Drivers.h>
+
+typedef dev_t		nspace_id;
+typedef ino_t		vnode_id;
+
+/*
+ * PUBLIC PART OF THE FILE SYSTEM PROTOCOL
+ */
+
+#define		WSTAT_MODE		0x0001
+#define		WSTAT_UID		0x0002
+#define		WSTAT_GID		0x0004
+#define		WSTAT_SIZE		0x0008
+#define		WSTAT_ATIME		0x0010
+#define		WSTAT_MTIME		0x0020
+#define		WSTAT_CRTIME	0x0040
+
+#define		WFSSTAT_NAME	0x0001
+
+#define		SELECT_READ			1
+#define		SELECT_WRITE		2
+#define 	SELECT_EXCEPTION	3
+
+// missing ioctl() call added
+#define		IOCTL_FILE_UNCACHED_IO	10000
+#define		IOCTL_CREATE_TIME		10002
+#define		IOCTL_MODIFIED_TIME		10003
+
+
+// B_CUR_FS_API_VERSION is 2 for R5, but 3 on Zeta and Dano, because
+// of the added calls for power management - so it's set to 3 here
+// because that's a requirement to let Dano boot from our fs...
+#ifdef COMPILE_FOR_ZETA
+#	define B_CUR_FS_API_VERSION	3
+#else
+#	define B_CUR_FS_API_VERSION 2
+#endif
+
+struct attr_info;
+struct index_info;
+
+typedef int	op_read_vnode(void *ns, vnode_id vnid, char r, void **node);
+typedef int	op_write_vnode(void *ns, void *node, char r);
+typedef int	op_remove_vnode(void *ns, void *node, char r);
+typedef int	op_secure_vnode(void *ns, void *node);
+typedef int op_wake_vnode(void *ns, void *node);
+typedef int op_suspend_vnode(void *ns, void *node);
+
+typedef int	op_walk(void *ns, void *base, const char *file, char **newpath,
+					vnode_id *vnid);
+
+typedef int	op_access(void *ns, void *node, int mode);
+
+typedef int	op_create(void *ns, void *dir, const char *name,
+					int omode, int perms, vnode_id *vnid, void **cookie);
+typedef int	op_mkdir(void *ns, void *dir, const char *name,	int perms);
+typedef int	op_symlink(void *ns, void *dir, const char *name,
+					const char *path);
+typedef int op_link(void *ns, void *dir, const char *name, void *node);
+
+typedef int	op_rename(void *ns, void *olddir, const char *oldname,
+					void *newdir, const char *newname);
+typedef int	op_unlink(void *ns, void *dir, const char *name);
+typedef int	op_rmdir(void *ns, void *dir, const char *name);
+
+typedef int	op_readlink(void *ns, void *node, char *buf, size_t *bufsize);
+
+typedef int op_opendir(void *ns, void *node, void **cookie);
+typedef int	op_closedir(void *ns, void *node, void *cookie);
+typedef int	op_rewinddir(void *ns, void *node, void *cookie);
+typedef int	op_readdir(void *ns, void *node, void *cookie, long *num,
+					struct dirent *buf, size_t bufsize);
+
+typedef int	op_open(void *ns, void *node, int omode, void **cookie);
+typedef int	op_close(void *ns, void *node, void *cookie);
+typedef int op_free_cookie(void *ns, void *node, void *cookie);
+typedef int op_read(void *ns, void *node, void *cookie, off_t pos, void *buf,
+					size_t *len);
+typedef int op_write(void *ns, void *node, void *cookie, off_t pos,
+					const void *buf, size_t *len);
+typedef int op_readv(void *ns, void *node, void *cookie, off_t pos, const iovec *vec,
+					size_t count, size_t *len);
+typedef int op_writev(void *ns, void *node, void *cookie, off_t pos, const iovec *vec,
+					size_t count, size_t *len);
+typedef int	op_ioctl(void *ns, void *node, void *cookie, int cmd, void *buf,
+					size_t len);
+typedef	int	op_setflags(void *ns, void *node, void *cookie, int flags);
+
+typedef int	op_rstat(void *ns, void *node, struct stat *);
+typedef int op_wstat(void *ns, void *node, struct stat *, long mask);
+typedef int	op_fsync(void *ns, void *node);
+
+typedef int	op_select(void *ns, void *node, void *cookie, uint8 event,
+				uint32 ref, selectsync *sync);
+typedef int	op_deselect(void *ns, void *node, void *cookie, uint8 event,
+				selectsync *sync);
+
+typedef int	op_initialize(const char *devname, void *parms, size_t len);
+typedef int	op_mount(nspace_id nsid, const char *devname, ulong flags,
+					void *parms, size_t len, void **data, vnode_id *vnid);
+typedef int	op_unmount(void *ns);
+typedef int	op_sync(void *ns);
+typedef int op_rfsstat(void *ns, struct fs_info *);
+typedef int op_wfsstat(void *ns, struct fs_info *, long mask);
+
+
+typedef int	op_open_attrdir(void *ns, void *node, void **cookie);
+typedef int	op_close_attrdir(void *ns, void *node, void *cookie);
+typedef int	op_rewind_attrdir(void *ns, void *node, void *cookie);
+typedef int	op_read_attrdir(void *ns, void *node, void *cookie, long *num,
+					struct dirent *buf, size_t bufsize);
+typedef int	op_remove_attr(void *ns, void *node, const char *name);
+typedef	int	op_rename_attr(void *ns, void *node, const char *oldname,
+					const char *newname);
+typedef int	op_stat_attr(void *ns, void *node, const char *name,
+					struct attr_info *buf);
+
+typedef int	op_write_attr(void *ns, void *node, const char *name, int type,
+					const void *buf, size_t *len, off_t pos);
+typedef int	op_read_attr(void *ns, void *node, const char *name, int type,
+					void *buf, size_t *len, off_t pos);
+
+typedef int	op_open_indexdir(void *ns, void **cookie);
+typedef int	op_close_indexdir(void *ns, void *cookie);
+typedef int	op_rewind_indexdir(void *ns, void *cookie);
+typedef int	op_read_indexdir(void *ns, void *cookie, long *num,
+					struct dirent *buf, size_t bufsize);
+typedef int	op_create_index(void *ns, const char *name, int type, int flags);
+typedef int	op_remove_index(void *ns, const char *name);
+typedef	int	op_rename_index(void *ns, const char *oldname, 
+					const char *newname);
+typedef int	op_stat_index(void *ns, const char *name, struct index_info *buf);
+
+typedef int	op_open_query(void *ns, const char *query, ulong flags,
+					port_id port, long token, void **cookie);
+typedef int	op_close_query(void *ns, void *cookie);
+typedef int	op_read_query(void *ns, void *cookie, long *num,
+					struct dirent *buf, size_t bufsize);
+
+typedef struct vnode_ops {
+	op_read_vnode			(*read_vnode);
+	op_write_vnode			(*write_vnode);
+	op_remove_vnode			(*remove_vnode);
+	op_secure_vnode			(*secure_vnode);
+	op_walk					(*walk);
+	op_access				(*access);
+	op_create				(*create);
+	op_mkdir				(*mkdir);
+	op_symlink				(*symlink);
+	op_link					(*link);
+	op_rename				(*rename);
+	op_unlink				(*unlink);
+	op_rmdir				(*rmdir);
+	op_readlink				(*readlink);
+	op_opendir				(*opendir);
+	op_closedir				(*closedir);
+	op_free_cookie			(*free_dircookie);
+	op_rewinddir			(*rewinddir);
+	op_readdir				(*readdir);
+	op_open					(*open);
+	op_close				(*close);
+	op_free_cookie			(*free_cookie);
+	op_read					(*read);
+	op_write				(*write);
+	op_readv				(*readv);
+	op_writev				(*writev);
+	op_ioctl				(*ioctl);
+	op_setflags				(*setflags);
+	op_rstat				(*rstat);
+	op_wstat				(*wstat);
+	op_fsync				(*fsync);
+	op_initialize			(*initialize);
+	op_mount				(*mount);
+	op_unmount				(*unmount);
+	op_sync					(*sync);
+	op_rfsstat				(*rfsstat);
+	op_wfsstat				(*wfsstat);
+	op_select				(*select);
+	op_deselect				(*deselect);
+	op_open_indexdir		(*open_indexdir);
+	op_close_indexdir		(*close_indexdir);
+	op_free_cookie			(*free_indexdircookie);
+	op_rewind_indexdir		(*rewind_indexdir);
+	op_read_indexdir		(*read_indexdir);
+	op_create_index			(*create_index);
+	op_remove_index			(*remove_index);
+	op_rename_index			(*rename_index);
+	op_stat_index			(*stat_index);
+	op_open_attrdir			(*open_attrdir);
+	op_close_attrdir		(*close_attrdir);
+	op_free_cookie			(*free_attrdircookie);
+	op_rewind_attrdir		(*rewind_attrdir);
+	op_read_attrdir			(*read_attrdir);
+	op_write_attr			(*write_attr);
+	op_read_attr			(*read_attr);
+	op_remove_attr			(*remove_attr);
+	op_rename_attr			(*rename_attr);
+	op_stat_attr			(*stat_attr);
+	op_open_query			(*open_query);
+	op_close_query			(*close_query);
+	op_free_cookie			(*free_querycookie);
+	op_read_query			(*read_query);
+	// for Dano compatibility only
+	op_wake_vnode			(*wake_vnode);
+	op_suspend_vnode		(*suspend_vnode);
+} vnode_ops;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern _IMPEXP_KERNEL int		new_path(const char *path, char **copy);
+extern _IMPEXP_KERNEL void		free_path(char *p);
+
+extern _IMPEXP_KERNEL void		notify_select_event(selectsync *sync, uint32 ref);
+extern _IMPEXP_KERNEL status_t	is_vnode_removed(nspace_id nsid, vnode_id vnid);
+
+// The missing prototypes can be found in the fs_interface.h file.
+// That part of the VFS is still compatible with BeOS :)
+
+#ifdef __cplusplus
+}
+#endif
+
+extern _EXPORT vnode_ops	fs_entry;
+extern _EXPORT int32		api_version;
+
+#endif	/* _FSPROTO_H */
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/kernel_interface_r5.cpp
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/kernel_interface_r5.cpp
--- a/src/tests/add-ons/kernel/file_systems/bfs/r5/lock.h
+++ b/src/tests/add-ons/kernel/file_systems/bfs/r5/lock.h
@ -0,0 +1,51 @@
+/*
+	Copyright 1999-2001, Be Incorporated.   All Rights Reserved.
+	This file may be used under the terms of the Be Sample Code License.
+*/
+
+#ifndef _LOCK_H
+#define _LOCK_H
+
+#include <BeBuild.h>
+
+#include <OS.h>
+
+#ifdef __cplusplus
+	extern "C" {
+#else
+	typedef struct lock lock;
+	typedef struct mlock mlock;
+#endif
+
+
+struct lock {
+	sem_id		s;
+	long		c;
+};
+
+struct mlock {
+	sem_id		s;
+};
+
+extern _IMPEXP_KERNEL int	new_lock(lock *l, const char *name);
+extern _IMPEXP_KERNEL int	free_lock(lock *l);
+
+#ifdef LOCK
+#undef LOCK
+#endif
+
+#define	LOCK(l)		if (atomic_add(&l.c, -1) <= 0) acquire_sem(l.s);
+#define	UNLOCK(l)	if (atomic_add(&l.c, 1) < 0) release_sem(l.s);
+
+extern _IMPEXP_KERNEL int	new_mlock(mlock *l, long c, const char *name);
+extern _IMPEXP_KERNEL int	free_mlock(mlock *l);
+
+#define		LOCKM(l,cnt)	acquire_sem_etc(l.s, cnt, 0, 0)
+#define		UNLOCKM(l,cnt)	release_sem_etc(l.s, cnt, 0)
+
+
+#ifdef __cplusplus
+  } // extern "C"
+#endif
+
+#endif