Initial revision

git-svn-id: file:///srv/svn/repos/haiku/trunk/current@10020 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Axel Dörfler 2004-11-19 15:23:44 +00:00
parent f9081c794e
commit efce0b7346
33 changed files with 15573 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,491 @@
#ifndef B_PLUS_TREE_H
#define B_PLUS_TREE_H
/* BPlusTree - BFS B+Tree implementation
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** Roughly based on 'btlib' written by Marcus J. Ranum
**
** Copyright (c) 2001-2004 pinc Software. All Rights Reserved.
** This file may be used under the terms of the OpenBeOS License.
*/
#include "bfs.h"
#include "Journal.h"
#include "Chain.h"
#include <string.h>
//****************** on-disk structures ********************
#define BPLUSTREE_NULL -1LL
#define BPLUSTREE_FREE -2LL
struct bplustree_header {
uint32 magic;
uint32 node_size;
uint32 max_number_of_levels;
uint32 data_type;
off_t root_node_pointer;
off_t free_node_pointer;
off_t maximum_size;
uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
off_t RootNode() const { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
off_t FreeNode() const { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
uint32 MaxNumberOfLevels() const { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
inline bool IsValidLink(off_t link);
} _PACKED;
#define BPLUSTREE_MAGIC 0x69f6c2e8
#define BPLUSTREE_NODE_SIZE 1024
#define BPLUSTREE_MAX_KEY_LENGTH 256
#define BPLUSTREE_MIN_KEY_LENGTH 1
enum bplustree_types {
BPLUSTREE_STRING_TYPE = 0,
BPLUSTREE_INT32_TYPE = 1,
BPLUSTREE_UINT32_TYPE = 2,
BPLUSTREE_INT64_TYPE = 3,
BPLUSTREE_UINT64_TYPE = 4,
BPLUSTREE_FLOAT_TYPE = 5,
BPLUSTREE_DOUBLE_TYPE = 6
};
struct sorted_array;
typedef sorted_array duplicate_array;
struct bplustree_node {
off_t left_link;
off_t right_link;
off_t overflow_link;
uint16 all_key_count;
uint16 all_key_length;
off_t LeftLink() const { return BFS_ENDIAN_TO_HOST_INT64(left_link); }
off_t RightLink() const { return BFS_ENDIAN_TO_HOST_INT64(right_link); }
off_t OverflowLink() const { return BFS_ENDIAN_TO_HOST_INT64(overflow_link); }
uint16 NumKeys() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_count); }
uint16 AllKeyLength() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_length); }
inline uint16 *KeyLengths() const;
inline off_t *Values() const;
inline uint8 *Keys() const;
inline int32 Used() const;
uint8 *KeyAt(int32 index, uint16 *keyLength) const;
inline bool IsLeaf() const;
void Initialize();
uint8 CountDuplicates(off_t offset, bool isFragment) const;
off_t DuplicateAt(off_t offset, bool isFragment, int8 index) const;
int32 FragmentsUsed(uint32 nodeSize);
inline duplicate_array *FragmentAt(int8 index);
inline duplicate_array *DuplicateArray();
static inline uint8 LinkType(off_t link);
static inline off_t MakeLink(uint8 type, off_t link, uint32 fragmentIndex = 0);
static inline bool IsDuplicate(off_t link);
static inline off_t FragmentOffset(off_t link);
static inline uint32 FragmentIndex(off_t link);
#ifdef DEBUG
void CheckIntegrity(uint32 nodeSize);
#endif
} _PACKED;
//#define BPLUSTREE_NODE 0
#define BPLUSTREE_DUPLICATE_NODE 2
#define BPLUSTREE_DUPLICATE_FRAGMENT 3
#define NUM_FRAGMENT_VALUES 7
#define NUM_DUPLICATE_VALUES 125
//**************************************
enum bplustree_traversing {
BPLUSTREE_FORWARD = 1,
BPLUSTREE_BACKWARD = -1,
BPLUSTREE_BEGIN = 0,
BPLUSTREE_END = 1
};
//****************** in-memory structures ********************
template<class T> class Stack;
class BPlusTree;
class TreeIterator;
class CachedNode;
class Inode;
// needed for searching (utilizing a stack)
struct node_and_key {
off_t nodeOffset;
uint16 keyIndex;
};
//***** Cache handling *****
class CachedNode {
public:
CachedNode(BPlusTree *tree)
:
fTree(tree),
fNode(NULL),
fBlock(NULL)
{
}
CachedNode(BPlusTree *tree, off_t offset, bool check = true)
:
fTree(tree),
fNode(NULL),
fBlock(NULL)
{
SetTo(offset, check);
}
~CachedNode()
{
Unset();
}
bplustree_node *SetTo(off_t offset, bool check = true);
bplustree_header *SetToHeader();
void Unset();
status_t Free(Transaction *transaction, off_t offset);
status_t Allocate(Transaction *transaction, bplustree_node **node, off_t *offset);
status_t WriteBack(Transaction *transaction);
bplustree_node *Node() const { return fNode; }
protected:
bplustree_node *InternalSetTo(off_t offset);
BPlusTree *fTree;
bplustree_node *fNode;
uint8 *fBlock;
off_t fBlockNumber;
};
//******** B+tree class *********
class BPlusTree {
public:
BPlusTree(Transaction *transaction, Inode *stream, int32 nodeSize = BPLUSTREE_NODE_SIZE);
BPlusTree(Inode *stream);
BPlusTree();
~BPlusTree();
status_t SetTo(Transaction *transaction, Inode *stream, int32 nodeSize = BPLUSTREE_NODE_SIZE);
status_t SetTo(Inode *stream);
status_t SetStream(Inode *stream);
status_t InitCheck();
status_t Validate();
status_t Remove(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value);
status_t Insert(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value);
status_t Remove(Transaction *transaction, const char *key, off_t value);
status_t Insert(Transaction *transaction, const char *key, off_t value);
status_t Insert(Transaction *transaction, int32 key, off_t value);
status_t Insert(Transaction *transaction, uint32 key, off_t value);
status_t Insert(Transaction *transaction, int64 key, off_t value);
status_t Insert(Transaction *transaction, uint64 key, off_t value);
status_t Insert(Transaction *transaction, float key, off_t value);
status_t Insert(Transaction *transaction, double key, off_t value);
status_t Replace(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value);
status_t Find(const uint8 *key, uint16 keyLength, off_t *value);
static int32 TypeCodeToKeyType(type_code code);
static int32 ModeToKeyType(mode_t mode);
private:
BPlusTree(const BPlusTree &);
BPlusTree &operator=(const BPlusTree &);
// no implementation
int32 CompareKeys(const void *key1, int keylength1, const void *key2, int keylength2);
status_t FindKey(bplustree_node *node, const uint8 *key, uint16 keyLength,
uint16 *index = NULL, off_t *next = NULL);
status_t SeekDown(Stack<node_and_key> &stack, const uint8 *key, uint16 keyLength);
status_t FindFreeDuplicateFragment(bplustree_node *node, CachedNode *cached,
off_t *_offset, bplustree_node **_fragment, uint32 *_index);
status_t InsertDuplicate(Transaction *transaction, CachedNode *cached,
bplustree_node *node, uint16 index, off_t value);
void InsertKey(bplustree_node *node, uint16 index, uint8 *key, uint16 keyLength,
off_t value);
status_t SplitNode(bplustree_node *node, off_t nodeOffset, bplustree_node *other,
off_t otherOffset, uint16 *_keyIndex, uint8 *key, uint16 *_keyLength,
off_t *_value);
status_t RemoveDuplicate(Transaction *transaction, bplustree_node *node,
CachedNode *cached, uint16 keyIndex, off_t value);
void RemoveKey(bplustree_node *node, uint16 index);
void UpdateIterators(off_t offset, off_t nextOffset, uint16 keyIndex,
uint16 splitAt, int8 change);
void AddIterator(TreeIterator *iterator);
void RemoveIterator(TreeIterator *iterator);
private:
friend TreeIterator;
friend CachedNode;
Inode *fStream;
bplustree_header *fHeader;
CachedNode fCachedHeader;
int32 fNodeSize;
bool fAllowDuplicates;
status_t fStatus;
SimpleLock fIteratorLock;
Chain<TreeIterator> fIterators;
};
//***** helper classes/functions *****
extern int32 compareKeys(type_code type, const void *key1, int keyLength1,
const void *key2, int keyLength2);
class TreeIterator {
public:
TreeIterator(BPlusTree *tree);
~TreeIterator();
status_t Goto(int8 to);
status_t Traverse(int8 direction, void *key, uint16 *keyLength, uint16 maxLength,
off_t *value, uint16 *duplicate = NULL);
status_t Find(const uint8 *key, uint16 keyLength);
status_t Rewind();
status_t GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength,
off_t *value, uint16 *duplicate = NULL);
status_t GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength,
off_t *value, uint16 *duplicate = NULL);
void SkipDuplicates();
#ifdef DEBUG
void Dump();
#endif
private:
BPlusTree *fTree;
off_t fCurrentNodeOffset; // traverse position
int32 fCurrentKey;
off_t fDuplicateNode;
uint16 fDuplicate, fNumDuplicates;
bool fIsFragment;
private:
friend Chain<TreeIterator>;
friend BPlusTree;
void Update(off_t offset, off_t nextOffset, uint16 keyIndex, uint16 splitAt, int8 change);
void Stop();
TreeIterator *fNext;
};
// BPlusTree's inline functions (most of them may not be needed)
inline status_t
BPlusTree::Remove(Transaction *transaction, const char *key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
return B_BAD_TYPE;
return Remove(transaction, (uint8 *)key, strlen(key), value);
}
inline status_t
BPlusTree::Insert(Transaction *transaction, const char *key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
return B_BAD_TYPE;
return Insert(transaction, (uint8 *)key, strlen(key), value);
}
inline status_t
BPlusTree::Insert(Transaction *transaction, int32 key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_INT32_TYPE)
return B_BAD_TYPE;
return Insert(transaction, (uint8 *)&key, sizeof(key), value);
}
inline status_t
BPlusTree::Insert(Transaction *transaction, uint32 key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_UINT32_TYPE)
return B_BAD_TYPE;
return Insert(transaction, (uint8 *)&key, sizeof(key), value);
}
inline status_t
BPlusTree::Insert(Transaction *transaction, int64 key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_INT64_TYPE)
return B_BAD_TYPE;
return Insert(transaction, (uint8 *)&key, sizeof(key), value);
}
inline status_t
BPlusTree::Insert(Transaction *transaction, uint64 key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_UINT64_TYPE)
return B_BAD_TYPE;
return Insert(transaction, (uint8 *)&key, sizeof(key), value);
}
inline status_t
BPlusTree::Insert(Transaction *transaction, float key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_FLOAT_TYPE)
return B_BAD_TYPE;
return Insert(transaction, (uint8 *)&key, sizeof(key), value);
}
inline status_t
BPlusTree::Insert(Transaction *transaction, double key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_DOUBLE_TYPE)
return B_BAD_TYPE;
return Insert(transaction, (uint8 *)&key, sizeof(key), value);
}
/************************ TreeIterator inline functions ************************/
// #pragma mark -
inline status_t
TreeIterator::Rewind()
{
return Goto(BPLUSTREE_BEGIN);
}
inline status_t
TreeIterator::GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength,
off_t *value, uint16 *duplicate)
{
return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, duplicate);
}
inline status_t
TreeIterator::GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength,
off_t *value, uint16 *duplicate)
{
return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, duplicate);
}
/************************ bplustree_header inline functions ************************/
// #pragma mark -
inline bool
bplustree_header::IsValidLink(off_t link)
{
return link == BPLUSTREE_NULL || (link > 0 && link <= MaximumSize() - NodeSize());
}
/************************ bplustree_node inline functions ************************/
// #pragma mark -
inline uint16 *
bplustree_node::KeyLengths() const
{
return (uint16 *)(((char *)this) + round_up(sizeof(bplustree_node) + AllKeyLength()));
}
inline off_t *
bplustree_node::Values() const
{
return (off_t *)((char *)KeyLengths() + NumKeys() * sizeof(uint16));
}
inline uint8 *
bplustree_node::Keys() const
{
return (uint8 *)this + sizeof(bplustree_node);
}
inline int32
bplustree_node::Used() const
{
return round_up(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() * (sizeof(uint16) + sizeof(off_t));
}
inline bool
bplustree_node::IsLeaf() const
{
return OverflowLink() == BPLUSTREE_NULL;
}
inline duplicate_array *
bplustree_node::FragmentAt(int8 index)
{
return (duplicate_array *)((off_t *)this + index * (NUM_FRAGMENT_VALUES + 1));
}
inline duplicate_array *
bplustree_node::DuplicateArray()
{
return (duplicate_array *)&this->overflow_link;
}
inline uint8
bplustree_node::LinkType(off_t link)
{
return *(uint64 *)&link >> 62;
}
inline off_t
bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
{
return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) | (fragmentIndex & 0x3ff);
}
inline bool
bplustree_node::IsDuplicate(off_t link)
{
return (LinkType(link) & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
}
inline off_t
bplustree_node::FragmentOffset(off_t link)
{
return link & 0x3ffffffffffffc00LL;
}
inline uint32
bplustree_node::FragmentIndex(off_t link)
{
return (uint32)(link & 0x3ff);
}
#endif /* B_PLUS_TREE_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,66 @@
#ifndef BLOCK_ALLOCATOR_H
#define BLOCK_ALLOCATOR_H
/* BlockAllocator - block bitmap handling and allocation policies
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include "Lock.h"
class AllocationGroup;
class Transaction;
class Volume;
class Inode;
struct disk_super_block;
struct block_run;
struct check_control;
struct check_cookie;
class BlockAllocator {
public:
BlockAllocator(Volume *volume);
~BlockAllocator();
status_t Initialize(bool full = true);
status_t InitializeAndClearBitmap(Transaction &transaction);
status_t AllocateForInode(Transaction *transaction, const block_run *parent,
mode_t type, block_run &run);
status_t Allocate(Transaction *transaction, const Inode *inode, off_t numBlocks,
block_run &run, uint16 minimum = 1);
status_t Free(Transaction *transaction, block_run run);
status_t AllocateBlocks(Transaction *transaction, int32 group, uint16 start,
uint16 numBlocks, uint16 minimum, block_run &run);
status_t StartChecking(check_control *control);
status_t StopChecking(check_control *control);
status_t CheckNextNode(check_control *control);
status_t CheckBlockRun(block_run run, const char *type = NULL, check_control *control = NULL, bool allocated = true);
status_t CheckInode(Inode *inode, check_control *control = NULL);
size_t BitmapSize() const;
private:
bool IsValidCheckControl(check_control *control);
bool CheckBitmapIsUsedAt(off_t block) const;
void SetCheckBitmapAt(off_t block);
static status_t initialize(BlockAllocator *);
Volume *fVolume;
Semaphore fLock;
AllocationGroup *fGroups;
int32 fNumGroups;
uint32 fBlocksPerGroup;
uint32 *fCheckBitmap;
check_cookie *fCheckCookie;
};
#endif /* BLOCK_ALLOCATOR_H */

View File

@ -0,0 +1,162 @@
/* BufferPool - a buffer pool for uncached file access
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include "BufferPool.h"
#include "Debug.h"
#include <util/kernel_cpp.h>
const uint32 kNumBuffers = 8;
BufferPool::BufferPool()
:
fFirstFree(NULL)
{
fLock = create_sem(1, "buffer lock");
fFreeBuffers = create_sem(0, "free buffers");
#ifndef USER
set_sem_owner(fLock, B_SYSTEM_TEAM);
set_sem_owner(fFreeBuffers, B_SYSTEM_TEAM);
#endif
}
BufferPool::~BufferPool()
{
delete_sem(fFreeBuffers);
acquire_sem(fLock);
// the return value doesn't interest us anymore
void **buffer = fFirstFree;
while (buffer != NULL) {
void **nextBuffer = (void **)*buffer;
free(buffer);
buffer = nextBuffer;
}
delete_sem(fLock);
}
status_t
BufferPool::InitCheck()
{
if (fLock < B_OK
|| fFreeBuffers < B_OK)
return B_ERROR;
return B_OK;
}
status_t
BufferPool::RequestBuffers(uint32 blockSize)
{
void **buffers[kNumBuffers];
// allocate and connect buffers
for (uint32 i = 0; i < kNumBuffers; i++) {
buffers[i] = (void **)malloc(blockSize);
if (buffers[i] == NULL) {
// free already allocated buffers
for (;i-- > 0; i++)
free(buffers[i]);
RETURN_ERROR(B_NO_MEMORY);
}
if (i > 0)
*(buffers[i]) = buffers[i - 1];
}
// add the buffers to the free buffers queue
status_t status = acquire_sem(fLock);
if (status == B_OK) {
*(buffers[0]) = fFirstFree;
fFirstFree = buffers[kNumBuffers - 1];
release_sem(fLock);
release_sem_etc(fFreeBuffers, kNumBuffers, B_DO_NOT_RESCHEDULE);
} else {
for (uint32 i = 0; i < kNumBuffers; i++)
free(buffers[i]);
}
RETURN_ERROR(status);
}
status_t
BufferPool::ReleaseBuffers()
{
status_t status = acquire_sem_etc(fFreeBuffers, kNumBuffers, 0, 0);
if (status < B_OK)
return status;
status = acquire_sem(fLock);
if (status < B_OK)
return status;
void **buffer = fFirstFree;
for (uint32 i = 0; i < kNumBuffers && buffer; i++) {
void **nextBuffer = (void **)*buffer;
free(buffer);
buffer = nextBuffer;
}
fFirstFree = buffer;
release_sem(fLock);
return B_OK;
}
status_t
BufferPool::GetBuffer(void **_buffer)
{
status_t status = acquire_sem(fFreeBuffers);
if (status < B_OK)
return status;
if ((status = acquire_sem(fLock)) < B_OK) {
release_sem(fFreeBuffers);
return status;
}
void **buffer = fFirstFree;
fFirstFree = (void **)*buffer;
release_sem(fLock);
*_buffer = (void *)buffer;
return B_OK;
}
status_t
BufferPool::PutBuffer(void *_buffer)
{
void **buffer = (void **)_buffer;
if (buffer == NULL)
return B_BAD_VALUE;
status_t status = acquire_sem(fLock);
if (status < B_OK)
return status;
*buffer = fFirstFree;
fFirstFree = buffer;
release_sem(fLock);
release_sem(fFreeBuffers);
return B_OK;
}

View File

@ -0,0 +1,31 @@
#ifndef BUFFER_POOL_H
#define BUFFER_POOL_H
/* BufferPool - a buffer pool for uncached file access
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <OS.h>
class BufferPool {
public:
BufferPool();
~BufferPool();
status_t InitCheck();
status_t RequestBuffers(uint32 blockSize);
status_t ReleaseBuffers();
status_t GetBuffer(void **_buffer);
status_t PutBuffer(void *buffer);
private:
sem_id fLock, fFreeBuffers;
void **fFirstFree;
};
#endif /* BUFFER_POOL_H */

View File

@ -0,0 +1,55 @@
#ifndef CHAIN_H
#define CHAIN_H
/* Chain - a chain implementation; it's used for the callback management
** throughout the code (currently TreeIterator, and AttributeIterator).
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
/** The Link class you want to use with the Chain class needs to have
* a "fNext" member which is accessable from within the Chain class.
*/
template<class Link> class Chain {
public:
Chain()
:
fFirst(NULL)
{
}
void Add(Link *link)
{
link->fNext = fFirst;
fFirst = link;
}
void Remove(Link *link)
{
// search list for the correct callback to remove
Link *last = NULL,*entry;
for (entry = fFirst;link != entry;entry = entry->fNext)
last = entry;
if (link == entry) {
if (last)
last->fNext = link->fNext;
else
fFirst = link->fNext;
}
}
Link *Next(Link *last)
{
if (last == NULL)
return fFirst;
return last->fNext;
}
private:
Link *fFirst;
};
#endif /* CHAIN_H */

View File

@ -0,0 +1,298 @@
/* Debug - debug stuff
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** Some code is based on work previously done by Marcus Overhagen
**
** This file may be used under the terms of the OpenBeOS License.
*/
#include "Debug.h"
#include "BPlusTree.h"
#include "Inode.h"
#include <KernelExport.h>
#include <time.h>
#define Print __out
char *
get_tupel(uint32 id)
{
static unsigned char tupel[5];
tupel[0] = 0xff & (id >> 24);
tupel[1] = 0xff & (id >> 16);
tupel[2] = 0xff & (id >> 8);
tupel[3] = 0xff & (id);
tupel[4] = 0;
for (int16 i = 0;i < 4;i++) {
if (tupel[i] < ' ' || tupel[i] > 128)
tupel[i] = '.';
}
return (char *)tupel;
}
void
dump_block_run(const char *prefix, block_run &run)
{
Print("%s(%ld, %d, %d)\n", prefix, run.allocation_group, run.start, run.length);
}
void
dump_inode(Inode &inode)
{
Print("Inode (%p) {\n", &inode);
Print("\tfVolume = %p\n", inode.fVolume);
Print("\tfBlockNumber = 0x%16Lx\n", inode.fBlockNumber);
Print("\tfNode = %p\n", inode.Node());
Print("\tfTree = %p\n", inode.fTree);
Print("\tfAttributes = %p\n", inode.fAttributes);
Print("\tfOldSize = 0x%16Lx\n", inode.fOldSize);
Print("\tfOldLastModified = 0x%16Lx\n", inode.fOldLastModified);
Print("}\n");
}
void
dump_super_block(disk_super_block *superBlock)
{
Print("disk_super_block:\n");
Print(" name = %s\n", superBlock->name);
Print(" magic1 = %#08lx (%s) %s\n", superBlock->magic1, get_tupel(superBlock->magic1), (superBlock->magic1 == SUPER_BLOCK_MAGIC1 ? "valid" : "INVALID"));
Print(" fs_byte_order = %#08lx (%s)\n", superBlock->fs_byte_order, get_tupel(superBlock->fs_byte_order));
Print(" block_size = %lu\n", superBlock->block_size);
Print(" block_shift = %lu\n", superBlock->block_shift);
Print(" num_blocks = %Lu\n", superBlock->num_blocks);
Print(" used_blocks = %Lu\n", superBlock->used_blocks);
Print(" inode_size = %lu\n", superBlock->inode_size);
Print(" magic2 = %#08lx (%s) %s\n", superBlock->magic2, get_tupel(superBlock->magic2), (superBlock->magic2 == (int)SUPER_BLOCK_MAGIC2 ? "valid" : "INVALID"));
Print(" blocks_per_ag = %lu\n", superBlock->blocks_per_ag);
Print(" ag_shift = %lu (%ld bytes)\n", superBlock->ag_shift, 1L << superBlock->ag_shift);
Print(" num_ags = %lu\n", superBlock->num_ags);
Print(" flags = %#08lx (%s)\n", superBlock->flags, get_tupel(superBlock->flags));
dump_block_run(" log_blocks = ", superBlock->log_blocks);
Print(" log_start = %Lu\n", superBlock->log_start);
Print(" log_end = %Lu\n", superBlock->log_end);
Print(" magic3 = %#08lx (%s) %s\n", superBlock->magic3, get_tupel(superBlock->magic3), (superBlock->magic3 == SUPER_BLOCK_MAGIC3 ? "valid" : "INVALID"));
dump_block_run(" root_dir = ", superBlock->root_dir);
dump_block_run(" indices = ", superBlock->indices);
}
void
dump_data_stream(data_stream *stream)
{
Print("data_stream:\n");
for (int i = 0; i < NUM_DIRECT_BLOCKS; i++) {
if (!stream->direct[i].IsZero()) {
Print(" direct[%02d] = ",i);
dump_block_run("",stream->direct[i]);
}
}
Print(" max_direct_range = %Lu\n", stream->max_direct_range);
if (!stream->indirect.IsZero())
dump_block_run(" indirect = ", stream->indirect);
Print(" max_indirect_range = %Lu\n", stream->max_indirect_range);
if (!stream->double_indirect.IsZero())
dump_block_run(" double_indirect = ", stream->double_indirect);
Print(" max_double_indirect_range = %Lu\n", stream->max_double_indirect_range);
Print(" size = %Lu\n", stream->size);
}
void
dump_inode(bfs_inode *inode)
{
Print("inode:\n");
Print(" magic1 = %08lx (%s) %s\n", inode->magic1,
get_tupel(inode->magic1), (inode->magic1 == INODE_MAGIC1 ? "valid" : "INVALID"));
dump_block_run( " inode_num = ", inode->inode_num);
Print(" uid = %lu\n", inode->uid);
Print(" gid = %lu\n", inode->gid);
Print(" mode = %08lx\n", inode->mode);
Print(" flags = %08lx\n", inode->flags);
Print(" create_time = %Ld (%Ld)\n", inode->create_time,
inode->create_time >> INODE_TIME_SHIFT);
Print(" last_modified_time = %Ld (%Ld)\n", inode->last_modified_time,
inode->last_modified_time >> INODE_TIME_SHIFT);
dump_block_run( " parent = ", inode->parent);
dump_block_run( " attributes = ", inode->attributes);
Print(" type = %lu\n", inode->type);
Print(" inode_size = %lu\n", inode->inode_size);
Print(" etc = %#08lx\n", inode->etc);
Print(" short_symlink = %s\n",
S_ISLNK(inode->mode) && (inode->flags & INODE_LONG_SYMLINK) == 0 ?
inode->short_symlink : "-");
dump_data_stream(&(inode->data));
Print(" --\n pad[0] = %08lx\n", inode->pad[0]);
Print(" pad[1] = %08lx\n", inode->pad[1]);
Print(" pad[2] = %08lx\n", inode->pad[2]);
Print(" pad[3] = %08lx\n", inode->pad[3]);
}
void
dump_bplustree_header(bplustree_header *header)
{
Print("bplustree_header:\n");
Print(" magic = %#08lx (%s) %s\n", header->magic,
get_tupel(header->magic), (header->magic == BPLUSTREE_MAGIC ? "valid" : "INVALID"));
Print(" node_size = %lu\n", header->node_size);
Print(" max_number_of_levels = %lu\n", header->max_number_of_levels);
Print(" data_type = %lu\n", header->data_type);
Print(" root_node_pointer = %Ld\n", header->root_node_pointer);
Print(" free_node_pointer = %Ld\n", header->free_node_pointer);
Print(" maximum_size = %Lu\n", header->maximum_size);
}
#define DUMPED_BLOCK_SIZE 16
void
dump_block(const char *buffer,int size)
{
for(int i = 0;i < size;) {
int start = i;
for(;i < start+DUMPED_BLOCK_SIZE;i++) {
if (!(i % 4))
Print(" ");
if (i >= size)
Print(" ");
else
Print("%02x",*(unsigned char *)(buffer+i));
}
Print(" ");
for(i = start;i < start + DUMPED_BLOCK_SIZE;i++) {
if (i < size) {
char c = *(buffer+i);
if (c < 30)
Print(".");
else
Print("%c",c);
} else
break;
}
Print("\n");
}
}
void
dump_bplustree_node(bplustree_node *node,bplustree_header *header,Volume *volume)
{
Print("bplustree_node:\n");
Print(" left_link = %Ld\n", node->left_link);
Print(" right_link = %Ld\n", node->right_link);
Print(" overflow_link = %Ld\n", node->overflow_link);
Print(" all_key_count = %u\n", node->all_key_count);
Print(" all_key_length = %u\n", node->all_key_length);
if (header == NULL)
return;
if (node->all_key_count > node->all_key_length
|| uint32(node->all_key_count * 10) > (uint32)header->node_size
|| node->all_key_count == 0) {
Print("\n");
dump_block((char *)node, header->node_size/*, sizeof(off_t)*/);
return;
}
Print("\n");
for (int32 i = 0;i < node->all_key_count;i++) {
uint16 length;
char buffer[256], *key = (char *)node->KeyAt(i, &length);
if (length > 255 || length == 0) {
Print(" %2ld. Invalid length (%u)!!\n", i, length);
dump_block((char *)node, header->node_size/*, sizeof(off_t)*/);
break;
}
memcpy(buffer, key, length);
buffer[length] = '\0';
off_t *value = node->Values() + i;
if ((uint32)value < (uint32)node || (uint32)value > (uint32)node + header->node_size)
Print(" %2ld. Invalid Offset!!\n", i);
else {
Print(" %2ld. ", i);
if (header->data_type == BPLUSTREE_STRING_TYPE)
Print("\"%s\"", buffer);
else if (header->data_type == BPLUSTREE_INT32_TYPE)
Print("int32 = %ld (0x%lx)", *(int32 *)&buffer, *(int32 *)&buffer);
else if (header->data_type == BPLUSTREE_UINT32_TYPE)
Print("uint32 = %lu (0x%lx)", *(uint32 *)&buffer, *(uint32 *)&buffer);
else if (header->data_type == BPLUSTREE_INT64_TYPE)
Print("int64 = %Ld (0x%Lx)", *(int64 *)&buffer, *(int64 *)&buffer);
else
Print("???");
off_t offset = *value & 0x3fffffffffffffffLL;
Print(" (%d bytes) -> %Ld", length, offset);
if (volume != NULL) {
block_run run = volume->ToBlockRun(offset);
Print(" (%ld, %d)", run.allocation_group, run.start);
}
if (bplustree_node::LinkType(*value) == BPLUSTREE_DUPLICATE_FRAGMENT)
Print(" (duplicate fragment %Ld)\n", *value & 0x3ff);
else if (bplustree_node::LinkType(*value) == BPLUSTREE_DUPLICATE_NODE)
Print(" (duplicate node)\n");
else
Print("\n");
}
}
}
// #pragma mark -
#ifndef USER
//#warn Don't mount more than once... would register twice the debugger commands!
static int
dbg_inode(int argc, char **argv)
{
if (argc < 2) {
kprintf("usage: obfsinode ptr-to-inode\n");
return 0;
}
Inode *inode = (Inode *)parse_expression(argv[1]);
dump_inode(*inode);
return B_OK;
}
#endif
void
remove_debugger_commands()
{
#ifndef USER
remove_debugger_command("obfsinode", dbg_inode);
#endif
}
void
add_debugger_commands()
{
#ifndef USER
add_debugger_command("obfsinode", dbg_inode, "dump an Inode object");
#endif
}

View File

@ -0,0 +1,107 @@
#ifndef DEBUG_H
#define DEBUG_H
/* Debug - debug stuff
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <KernelExport.h>
#ifdef DEBUG
# include <string.h>
#endif
#ifdef USER
# include <stdio.h>
# define __out printf
#else
# include <null.h>
# define __out dprintf
#endif
// Which debugger should be used when?
// The DEBUGGER() macro actually has no effect if DEBUG is not defined,
// use the DIE() macro if you really want to die.
#ifdef DEBUG
# ifdef USER
# define DEBUGGER(x) debugger x
# else
# define DEBUGGER(x) kernel_debugger x
# endif
#else
# define DEBUGGER(x) ;
#endif
#ifdef USER
# define DIE(x) debugger x
#else
# define DIE(x) kernel_debugger x
#endif
// Short overview over the debug output macros:
// PRINT()
// is for general messages that very unlikely should appear in a release build
// FATAL()
// this is for fatal messages, when something has really gone wrong
// INFORM()
// general information, as disk size, etc.
// REPORT_ERROR(status_t)
// prints out error information
// RETURN_ERROR(status_t)
// calls REPORT_ERROR() and return the value
// D()
// the statements in D() are only included if DEBUG is defined
#ifdef DEBUG
#define PRINT(x) { __out("bfs: "); __out x; }
#define REPORT_ERROR(status) \
__out("bfs: %s:%d: %s\n", __FUNCTION__, __LINE__, strerror(status));
#define RETURN_ERROR(err) { status_t _status = err; if (_status < B_OK) REPORT_ERROR(_status); return _status;}
#define FATAL(x) { __out("bfs: "); __out x; }
#define INFORM(x) { __out("bfs: "); __out x; }
// #define FUNCTION() __out("bfs: %s()\n",__FUNCTION__);
#define FUNCTION_START(x) { __out("bfs: %s() ",__FUNCTION__); __out x; }
#define FUNCTION() ;
// #define FUNCTION_START(x) ;
#define D(x) {x;};
#define ASSERT(x) { if (!(x)) DEBUGGER(("bfs: assert failed: " #x "\n")); }
#else
#define PRINT(x) ;
#define REPORT_ERROR(status) ;
#define RETURN_ERROR(status) return status;
#define FATAL(x) { __out("bfs: "); __out x; }
#define INFORM(x) { __out("bfs: "); __out x; }
#define FUNCTION() ;
#define FUNCTION_START(x) ;
#define D(x) ;
#define ASSERT(x) ;
#endif
#ifdef DEBUG
struct block_run;
struct bplustree_header;
struct bplustree_node;
struct data_stream;
struct bfs_inode;
struct disk_super_block;
class Inode;
class Volume;
// some structure dump functions
extern void dump_block_run(const char *prefix, block_run &run);
extern void dump_inode(Inode &inode);
extern void dump_super_block(disk_super_block *superBlock);
extern void dump_data_stream(data_stream *stream);
extern void dump_inode(bfs_inode *inode);
extern void dump_bplustree_header(bplustree_header *header);
extern void dump_bplustree_node(bplustree_node *node,
bplustree_header *header = NULL, Volume *volume = NULL);
extern void dump_block(const char *buffer, int size);
extern void remove_debugger_commands();
extern void add_debugger_commands();
#endif
#endif /* DEBUG_H */

View File

@ -0,0 +1,372 @@
/* Index - index access functions
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include "Debug.h"
#include "Index.h"
#include "Volume.h"
#include "Inode.h"
#include "BPlusTree.h"
#include <util/kernel_cpp.h>
#include <TypeConstants.h>
// B_MIME_STRING_TYPE is defined in storage/Mime.h, but we
// don't need the whole file here; the type can't change anyway
#ifndef _MIME_H
# define B_MIME_STRING_TYPE 'MIMS'
#endif
Index::Index(Volume *volume)
:
fVolume(volume),
fNode(NULL)
{
}
Index::~Index()
{
if (fNode == NULL)
return;
put_vnode(fVolume->ID(), fNode->ID());
}
void
Index::Unset()
{
if (fNode == NULL)
return;
put_vnode(fVolume->ID(), fNode->ID());
fNode = NULL;
fName = NULL;
}
/** Sets the index to specified one. Returns an error if the index could
* not be found or initialized.
* Note, Index::Update() may be called on the object even if this method
* failed previously. In this case, it will only update live queries for
* the updated attribute.
*/
status_t
Index::SetTo(const char *name)
{
// remove the old node, if the index is set for the second time
Unset();
fName = name;
// only stores the pointer, so it assumes that it will stay constant
// in further comparisons (currently only used in Index::Update())
// Note, the name is saved even if the index couldn't be initialized!
// This is used to optimize Index::Update() in case there is no index
Inode *indices = fVolume->IndicesNode();
if (indices == NULL)
return B_ENTRY_NOT_FOUND;
BPlusTree *tree;
if (indices->GetTree(&tree) != B_OK)
return B_BAD_VALUE;
vnode_id id;
status_t status = tree->Find((uint8 *)name, (uint16)strlen(name), &id);
if (status != B_OK)
return status;
Vnode vnode(fVolume, id);
if (vnode.Get(&fNode) != B_OK)
return B_ENTRY_NOT_FOUND;
if (fNode == NULL) {
FATAL(("fatal error at Index::InitCheck(), get_vnode() returned NULL pointer\n"));
return B_ERROR;
}
vnode.Keep();
return B_OK;
}
/** Returns a standard type code for the stat() index type codes. Returns
* zero if the type is not known (can only happen if the mode field is
* corrupted somehow or not that of an index).
*/
uint32
Index::Type()
{
if (fNode == NULL)
return 0;
switch (fNode->Mode() & (S_STR_INDEX | S_INT_INDEX | S_UINT_INDEX | S_LONG_LONG_INDEX |
S_ULONG_LONG_INDEX | S_FLOAT_INDEX | S_DOUBLE_INDEX)) {
case S_INT_INDEX:
return B_INT32_TYPE;
case S_UINT_INDEX:
return B_UINT32_TYPE;
case S_LONG_LONG_INDEX:
return B_INT64_TYPE;
case S_ULONG_LONG_INDEX:
return B_UINT64_TYPE;
case S_FLOAT_INDEX:
return B_FLOAT_TYPE;
case S_DOUBLE_INDEX:
return B_DOUBLE_TYPE;
case S_STR_INDEX:
return B_STRING_TYPE;
}
FATAL(("index has unknown type!\n"));
return 0;
}
size_t
Index::KeySize()
{
if (fNode == NULL)
return 0;
int32 mode = fNode->Mode() & (S_STR_INDEX | S_INT_INDEX | S_UINT_INDEX | S_LONG_LONG_INDEX |
S_ULONG_LONG_INDEX | S_FLOAT_INDEX | S_DOUBLE_INDEX);
if (mode == S_STR_INDEX)
// string indices don't have a fixed key size
return 0;
switch (mode) {
case S_INT_INDEX:
case S_UINT_INDEX:
return sizeof(int32);
case S_LONG_LONG_INDEX:
case S_ULONG_LONG_INDEX:
return sizeof(int64);
case S_FLOAT_INDEX:
return sizeof(float);
case S_DOUBLE_INDEX:
return sizeof(double);
}
FATAL(("index has unknown type!\n"));
return 0;
}
status_t
Index::Create(Transaction *transaction, const char *name, uint32 type)
{
Unset();
int32 mode = 0;
switch (type) {
case B_INT32_TYPE:
mode = S_INT_INDEX;
break;
case B_UINT32_TYPE:
mode = S_UINT_INDEX;
break;
case B_INT64_TYPE:
mode = S_LONG_LONG_INDEX;
break;
case B_UINT64_TYPE:
mode = S_ULONG_LONG_INDEX;
break;
case B_FLOAT_TYPE:
mode = S_FLOAT_INDEX;
break;
case B_DOUBLE_TYPE:
mode = S_DOUBLE_INDEX;
break;
case B_STRING_TYPE:
case B_MIME_STRING_TYPE:
// B_MIME_STRING_TYPE is the only supported non-standard type, but
// will be handled like a B_STRING_TYPE internally
mode = S_STR_INDEX;
break;
default:
return B_BAD_TYPE;
}
// do we need to create the index directory first?
if (fVolume->IndicesNode() == NULL) {
status_t status = fVolume->CreateIndicesRoot(transaction);
if (status < B_OK)
RETURN_ERROR(status);
}
// Inode::Create() will keep the inode locked for us
return Inode::Create(transaction, fVolume->IndicesNode(), name,
S_INDEX_DIR | S_DIRECTORY | mode, 0, type, NULL, &fNode);
}
/** Updates the specified index, the oldKey will be removed from, the newKey
* inserted into the tree.
* If the method returns B_BAD_INDEX, it means the index couldn't be found -
* the most common reason will be that the index doesn't exist.
* You may not want to let the whole transaction fail because of that.
*/
status_t
Index::Update(Transaction *transaction, const char *name, int32 type, const uint8 *oldKey,
uint16 oldLength, const uint8 *newKey, uint16 newLength, Inode *inode)
{
if (name == NULL
|| oldKey == NULL && newKey == NULL
|| oldKey != NULL && oldLength == 0
|| newKey != NULL && newLength == 0)
return B_BAD_VALUE;
// B_MIME_STRING_TYPE is the only supported non-standard type
if (type == B_MIME_STRING_TYPE)
type = B_STRING_TYPE;
// If the two keys are identical, don't do anything - only compare if the
// type has been set, until we have a real type code, we can't do much
// about the comparison here
if (type != 0 && !compareKeys(type, oldKey, oldLength, newKey, newLength))
return B_OK;
// update all live queries about the change, if they have an index or not
if (type != 0)
fVolume->UpdateLiveQueries(inode, name, type, oldKey, oldLength, newKey, newLength);
status_t status;
if (((name != fName || strcmp(name, fName)) && (status = SetTo(name)) < B_OK)
|| fNode == NULL)
return B_BAD_INDEX;
// now that we have the type, check again for equality
if (type == 0 && !compareKeys(Type(), oldKey, oldLength, newKey, newLength))
return B_OK;
// same for the live query update
if (type == 0)
fVolume->UpdateLiveQueries(inode, name, Type(), oldKey, oldLength, newKey, newLength);
BPlusTree *tree;
if ((status = Node()->GetTree(&tree)) < B_OK)
return status;
// remove the old key from the tree
if (oldKey != NULL) {
status = tree->Remove(transaction, (const uint8 *)oldKey, oldLength, inode->ID());
if (status == B_ENTRY_NOT_FOUND) {
// That's not nice, but should be no reason to let the whole thing fail
FATAL(("Could not find value in index \"%s\"!\n", name));
} else if (status < B_OK)
return status;
}
// add the new key to the tree
if (newKey != NULL)
status = tree->Insert(transaction, (const uint8 *)newKey, newLength, inode->ID());
return status;
}
status_t
Index::InsertName(Transaction *transaction, const char *name, Inode *inode)
{
return UpdateName(transaction, NULL, name, inode);
}
status_t
Index::RemoveName(Transaction *transaction, const char *name, Inode *inode)
{
return UpdateName(transaction, name, NULL, inode);
}
status_t
Index::UpdateName(Transaction *transaction, const char *oldName, const char *newName, Inode *inode)
{
uint16 oldLength = oldName ? strlen(oldName) : 0;
uint16 newLength = newName ? strlen(newName) : 0;
return Update(transaction, "name", B_STRING_TYPE, (uint8 *)oldName, oldLength,
(uint8 *)newName, newLength, inode);
}
status_t
Index::InsertSize(Transaction *transaction, Inode *inode)
{
off_t size = inode->Size();
return Update(transaction, "size", B_INT64_TYPE, NULL, 0, (uint8 *)&size, sizeof(int64), inode);
}
status_t
Index::RemoveSize(Transaction *transaction, Inode *inode)
{
// Inode::OldSize() is the size that's in the index
off_t size = inode->OldSize();
return Update(transaction, "size", B_INT64_TYPE, (uint8 *)&size, sizeof(int64), NULL, 0, inode);
}
status_t
Index::UpdateSize(Transaction *transaction, Inode *inode)
{
off_t oldSize = inode->OldSize();
off_t newSize = inode->Size();
status_t status = Update(transaction, "size", B_INT64_TYPE, (uint8 *)&oldSize,
sizeof(int64), (uint8 *)&newSize, sizeof(int64), inode);
if (status == B_OK)
inode->UpdateOldSize();
return status;
}
status_t
Index::InsertLastModified(Transaction *transaction, Inode *inode)
{
off_t modified = inode->LastModified();
return Update(transaction, "last_modified", B_INT64_TYPE, NULL, 0,
(uint8 *)&modified, sizeof(int64), inode);
}
status_t
Index::RemoveLastModified(Transaction *transaction, Inode *inode)
{
// Inode::OldLastModified() is the value which is in the index
off_t modified = inode->OldLastModified();
return Update(transaction, "last_modified", B_INT64_TYPE, (uint8 *)&modified,
sizeof(int64), NULL, 0, inode);
}
status_t
Index::UpdateLastModified(Transaction *transaction, Inode *inode, off_t modified)
{
off_t oldModified = inode->OldLastModified();
if (modified == -1)
modified = (bigtime_t)time(NULL) << INODE_TIME_SHIFT;
modified |= fVolume->GetUniqueID() & INODE_TIME_MASK;
status_t status = Update(transaction, "last_modified", B_INT64_TYPE, (uint8 *)&oldModified,
sizeof(int64), (uint8 *)&modified, sizeof(int64), inode);
inode->Node()->last_modified_time = modified;
if (status == B_OK)
inode->UpdateOldLastModified();
return status;
}

View File

@ -0,0 +1,55 @@
#ifndef INDEX_H
#define INDEX_H
/* Index - index access functions
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <KernelExport.h>
class Transaction;
class Volume;
class Inode;
class Index {
public:
Index(Volume *volume);
~Index();
status_t SetTo(const char *name);
void Unset();
Inode *Node() const { return fNode; };
uint32 Type();
size_t KeySize();
status_t Create(Transaction *transaction, const char *name, uint32 type);
status_t Update(Transaction *transaction, const char *name, int32 type, const uint8 *oldKey, uint16 oldLength, const uint8 *newKey, uint16 newLength, Inode *inode);
status_t InsertName(Transaction *transaction,const char *name,Inode *inode);
status_t RemoveName(Transaction *transaction,const char *name,Inode *inode);
status_t UpdateName(Transaction *transaction,const char *oldName,const char *newName,Inode *inode);
status_t InsertSize(Transaction *transaction, Inode *inode);
status_t RemoveSize(Transaction *transaction, Inode *inode);
status_t UpdateSize(Transaction *transaction, Inode *inode);
status_t InsertLastModified(Transaction *transaction, Inode *inode);
status_t RemoveLastModified(Transaction *transaction, Inode *inode);
status_t UpdateLastModified(Transaction *transaction, Inode *inode,off_t modified = -1);
private:
Index(const Index &);
Index &operator=(const Index &);
// no implementation
Volume *fVolume;
Inode *fNode;
const char *fName;
};
#endif /* INDEX_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,404 @@
#ifndef INODE_H
#define INODE_H
/* Inode - inode access functions
**
** Copyright 2001-2004, Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <KernelExport.h>
#ifdef USER
# include "myfs.h"
# include <stdio.h>
#endif
#ifndef _IMPEXP_KERNEL
# define _IMPEXP_KERNEL
#endif
#include <lock.h>
#include <cache.h>
#include <string.h>
#include <unistd.h>
#include "Volume.h"
#include "Journal.h"
#include "Lock.h"
#include "Chain.h"
#include "Debug.h"
class BPlusTree;
class TreeIterator;
class AttributeIterator;
class InodeAllocator;
enum inode_type {
S_DIRECTORY = S_IFDIR,
S_FILE = S_IFREG,
S_SYMLINK = S_IFLNK,
S_REGULAR = (S_DIRECTORY | S_FILE | S_SYMLINK),
S_INDEX_TYPES = (S_STR_INDEX | S_INT_INDEX | S_UINT_INDEX | S_LONG_LONG_INDEX
| S_ULONG_LONG_INDEX | S_FLOAT_INDEX | S_DOUBLE_INDEX)
};
// The CachedBlock class is completely implemented as inlines.
// It should be used when cache single blocks to make sure they
// will be properly released after use (and it's also very
// convenient to use them).
class CachedBlock {
public:
CachedBlock(Volume *volume);
CachedBlock(Volume *volume, off_t block, bool empty = false);
CachedBlock(Volume *volume, block_run run, bool empty = false);
CachedBlock(CachedBlock *cached);
~CachedBlock();
inline void Keep();
inline void Unset();
inline uint8 *SetTo(off_t block, bool empty = false);
inline uint8 *SetTo(block_run run, bool empty = false);
inline status_t WriteBack(Transaction *transaction);
uint8 *Block() const { return fBlock; }
off_t BlockNumber() const { return fBlockNumber; }
uint32 BlockSize() const { return fVolume->BlockSize(); }
uint32 BlockShift() const { return fVolume->BlockShift(); }
private:
CachedBlock(const CachedBlock &);
CachedBlock &operator=(const CachedBlock &);
// no implementation
protected:
Volume *fVolume;
off_t fBlockNumber;
uint8 *fBlock;
};
//--------------------------------------
class Inode : public CachedBlock {
public:
Inode(Volume *volume, vnode_id id, bool empty = false, uint8 reenter = 0);
Inode(CachedBlock *cached);
~Inode();
bfs_inode *Node() const { return (bfs_inode *)fBlock; }
vnode_id ID() const { return fVolume->ToVnode(fBlockNumber); }
ReadWriteLock &Lock() { return fLock; }
SimpleLock &SmallDataLock() { return fSmallDataLock; }
mode_t Mode() const { return Node()->Mode(); }
uint32 Type() const { return Node()->Type(); }
int32 Flags() const { return Node()->Flags(); }
bool IsContainer() const { return Mode() & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR); }
// note, that this test will also be true for S_IFBLK (not that it's used in the fs :)
bool IsDirectory() const { return (Mode() & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY; }
bool IsIndex() const { return (Mode() & (S_INDEX_DIR | 0777)) == S_INDEX_DIR; }
// that's a stupid check, but AFAIK the only possible method...
bool IsAttribute() const { return Mode() & S_ATTR; }
bool IsFile() const { return Mode() & S_IFREG; }
bool IsRegularNode() const { return (Mode() & (S_ATTR_DIR | S_INDEX_DIR | S_ATTR)) == 0; }
// a regular node in the standard namespace (i.e. not an index or attribute)
bool IsSymLink() const { return S_ISLNK(Mode()); }
bool HasUserAccessableStream() const { return S_ISREG(Mode()); }
// currently only files can be accessed with bfs_read()/bfs_write()
off_t Size() const { return Node()->data.Size(); }
off_t LastModified() const { return Node()->last_modified_time; }
block_run &BlockRun() const { return Node()->inode_num; }
block_run &Parent() const { return Node()->parent; }
block_run &Attributes() const { return Node()->attributes; }
Volume *GetVolume() const { return fVolume; }
status_t InitCheck(bool checkNode = true);
status_t CheckPermissions(int accessMode) const;
// small_data access methods
status_t MakeSpaceForSmallData(Transaction *transaction, const char *name, int32 length);
status_t RemoveSmallData(Transaction *transaction, const char *name);
status_t AddSmallData(Transaction *transaction, const char *name, uint32 type,
const uint8 *data, size_t length, bool force = false);
status_t GetNextSmallData(small_data **_smallData) const;
small_data *FindSmallData(const char *name) const;
const char *Name() const;
status_t GetName(char *buffer) const;
status_t SetName(Transaction *transaction, const char *name);
// high-level attribute methods
status_t ReadAttribute(const char *name, int32 type, off_t pos, uint8 *buffer, size_t *_length);
status_t WriteAttribute(Transaction *transaction, const char *name, int32 type, off_t pos, const uint8 *buffer, size_t *_length);
status_t RemoveAttribute(Transaction *transaction, const char *name);
// attribute methods
status_t GetAttribute(const char *name, Inode **attribute);
void ReleaseAttribute(Inode *attribute);
status_t CreateAttribute(Transaction *transaction, const char *name, uint32 type, Inode **attribute);
// for directories only:
status_t GetTree(BPlusTree **);
bool IsEmpty();
// manipulating the data stream
status_t FindBlockRun(off_t pos, block_run &run, off_t &offset);
status_t ReadAt(off_t pos, uint8 *buffer, size_t *length);
status_t WriteAt(Transaction *transaction, off_t pos, const uint8 *buffer, size_t *length);
status_t FillGapWithZeros(off_t oldSize, off_t newSize);
status_t SetFileSize(Transaction *transaction, off_t size);
status_t Append(Transaction *transaction, off_t bytes);
status_t Trim(Transaction *transaction);
status_t Free(Transaction *transaction);
status_t Sync();
// create/remove inodes
status_t Remove(Transaction *transaction, const char *name, off_t *_id = NULL,
bool isDirectory = false);
static status_t Create(Transaction *transaction, Inode *parent, const char *name,
int32 mode, int omode, uint32 type, off_t *_id = NULL, Inode **_inode = NULL);
// index maintaining helper
void UpdateOldSize() { fOldSize = Size(); }
void UpdateOldLastModified() { fOldLastModified = Node()->LastModifiedTime(); }
off_t OldSize() { return fOldSize; }
off_t OldLastModified() { return fOldLastModified; }
// file cache
void *FileCache() const { return fCache; }
void SetFileCache(void *cache) { fCache = cache; }
private:
Inode(const Inode &);
Inode &operator=(const Inode &);
// no implementation
friend void dump_inode(Inode &inode);
friend AttributeIterator;
friend InodeAllocator;
void Initialize();
status_t RemoveSmallData(small_data *item, int32 index);
void AddIterator(AttributeIterator *iterator);
void RemoveIterator(AttributeIterator *iterator);
status_t FreeStaticStreamArray(Transaction *transaction, int32 level, block_run run,
off_t size, off_t offset, off_t &max);
status_t FreeStreamArray(Transaction *transaction, block_run *array, uint32 arrayLength,
off_t size, off_t &offset, off_t &max);
status_t AllocateBlockArray(Transaction *transaction, block_run &run);
status_t GrowStream(Transaction *transaction, off_t size);
status_t ShrinkStream(Transaction *transaction, off_t size);
BPlusTree *fTree;
Inode *fAttributes;
ReadWriteLock fLock;
off_t fOldSize; // we need those values to ensure we will remove
off_t fOldLastModified; // the correct keys from the indices
void *fCache;
mutable SimpleLock fSmallDataLock;
Chain<AttributeIterator> fIterators;
};
// The Vnode class provides a convenience layer upon get_vnode(), so that
// you don't have to call put_vnode() anymore, which may make code more
// readable in some cases
class Vnode {
public:
Vnode(Volume *volume, vnode_id id)
:
fVolume(volume),
fID(id)
{
}
Vnode(Volume *volume, block_run run)
:
fVolume(volume),
fID(volume->ToVnode(run))
{
}
~Vnode()
{
Put();
}
status_t Get(Inode **inode)
{
// should we check inode against NULL here? it should not be necessary
#ifdef UNSAFE_GET_VNODE
RecursiveLocker locker(fVolume->Lock());
#endif
return get_vnode(fVolume->ID(), fID, (void **)inode);
}
void Put()
{
if (fVolume)
put_vnode(fVolume->ID(), fID);
fVolume = NULL;
}
void Keep()
{
fVolume = NULL;
}
private:
Volume *fVolume;
vnode_id fID;
};
class AttributeIterator {
public:
AttributeIterator(Inode *inode);
~AttributeIterator();
status_t Rewind();
status_t GetNext(char *name, size_t *length, uint32 *type, vnode_id *id);
private:
int32 fCurrentSmallData;
Inode *fInode, *fAttributes;
TreeIterator *fIterator;
void *fBuffer;
private:
friend Chain<AttributeIterator>;
friend Inode;
void Update(uint16 index, int8 change);
AttributeIterator *fNext;
};
//--------------------------------------
// inlines
inline
CachedBlock::CachedBlock(Volume *volume)
:
fVolume(volume),
fBlock(NULL)
{
}
inline
CachedBlock::CachedBlock(Volume *volume, off_t block, bool empty)
:
fVolume(volume),
fBlock(NULL)
{
SetTo(block, empty);
}
inline
CachedBlock::CachedBlock(Volume *volume, block_run run, bool empty)
:
fVolume(volume),
fBlock(NULL)
{
SetTo(volume->ToBlock(run), empty);
}
inline
CachedBlock::CachedBlock(CachedBlock *cached)
:
fVolume(cached->fVolume),
fBlockNumber(cached->BlockNumber()),
fBlock(cached->fBlock)
{
cached->Keep();
}
inline
CachedBlock::~CachedBlock()
{
Unset();
}
inline void
CachedBlock::Keep()
{
fBlock = NULL;
}
inline void
CachedBlock::Unset()
{
if (fBlock != NULL)
release_block(fVolume->Device(), fBlockNumber);
}
inline uint8 *
CachedBlock::SetTo(off_t block, bool empty)
{
Unset();
fBlockNumber = block;
return fBlock = empty ? (uint8 *)get_empty_block(fVolume->Device(), block, BlockSize())
: (uint8 *)get_block(fVolume->Device(), block, BlockSize());
}
inline uint8 *
CachedBlock::SetTo(block_run run, bool empty)
{
return SetTo(fVolume->ToBlock(run), empty);
}
inline status_t
CachedBlock::WriteBack(Transaction *transaction)
{
if (transaction == NULL || fBlock == NULL)
RETURN_ERROR(B_BAD_VALUE);
return transaction->WriteBlocks(fBlockNumber, fBlock);
}
/** Converts the "omode", the open flags given to bfs_open(), into
* access modes, e.g. since O_RDONLY requires read access to the
* file, it will be converted to R_OK.
*/
inline int
oModeToAccess(int omode)
{
omode &= O_RWMASK;
if (omode == O_RDONLY)
return R_OK;
else if (omode == O_WRONLY)
return W_OK;
return R_OK | W_OK;
}
#endif /* INODE_H */

View File

@ -0,0 +1,78 @@
SubDir OBOS_TOP src tests add-ons kernel file_systems bfs r5 ;
# save original optimization level
oldOPTIM = $(OPTIM) ;
# set some additional defines
{
local defines =
KEEP_WRONG_DIRENT_RECLEN
UNSAFE_GET_VNODE
#BFS_BIG_ENDIAN_ONLY
;
# By default, the R5 API version is used unless you define this
if $(COMPILE_FOR_ZETA) {
defines += COMPILE_FOR_ZETA ;
}
# Enable OpenBFS to be compiled as a full BFS replacement. Will
# report itself as "bfs" instead of "obfs" (only R5 version)
if $(BFS_REPLACEMENT) {
defines += BFS_REPLACEMENT ;
bfsAddOnName = bfs ;
} else {
bfsAddOnName = obfs ;
}
if $(DEBUG) = 0 {
# the gcc on BeOS doesn't compile BFS correctly with -O2 or more
OPTIM = -O1 ;
}
defines = [ FDefines $(defines) ] ;
SubDirCcFlags $(defines) -Wall -Wno-multichar ;
SubDirC++Flags $(defines) -Wall -Wno-multichar -fno-rtti ;
}
UsePrivateHeaders [ FDirName kernel ] ; # For kernel_cpp.cpp
KernelStaticLibrary libbfs :
BlockAllocator.cpp
BPlusTree.cpp
kernel_cpp.cpp
Debug.cpp
Index.cpp
Inode.cpp
Journal.cpp
Query.cpp
Utility.cpp
Volume.cpp
BufferPool.cpp
;
R5KernelAddon $(bfsAddOnName)_r5 : kernel file_systems :
kernel_interface_r5.cpp
: libbfs.a
;
SEARCH on [ FGristFiles
kernel_cpp.cpp
] = [ FDirName $(OBOS_TOP) src kernel core util ] ;
#-----------------------------------------------------
rule InstallBFS
{
Depends $(<) : $(>) ;
}
actions ignore InstallBFS
{
cp $(>) /boot/home/config/add-ons/kernel/file_systems/
}
InstallBFS install : obfs ;
# restore original optimization level
OPTIM = $(oldOPTIM) ;

View File

@ -0,0 +1,468 @@
/* Journal - transaction and logging
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include "Journal.h"
#include "Inode.h"
#include "Debug.h"
#include <Drivers.h>
#include <util/kernel_cpp.h>
Journal::Journal(Volume *volume)
:
fVolume(volume),
fLock("bfs journal"),
fOwner(NULL),
fArray(volume->BlockSize()),
fLogSize(volume->Log().length),
fMaxTransactionSize(fLogSize / 4 - 5),
fUsed(0),
fTransactionsInEntry(0)
{
if (fMaxTransactionSize > fLogSize / 2)
fMaxTransactionSize = fLogSize / 2 - 5;
}
Journal::~Journal()
{
FlushLogAndBlocks();
}
status_t
Journal::InitCheck()
{
if (fVolume->LogStart() != fVolume->LogEnd()) {
if (fVolume->SuperBlock().flags != SUPER_BLOCK_DISK_DIRTY)
FATAL(("log_start and log_end differ, but disk is marked clean - trying to replay log...\n"));
return ReplayLog();
}
return B_OK;
}
status_t
Journal::CheckLogEntry(int32 count, off_t *array)
{
// ToDo: check log entry integrity (block numbers and entry size)
PRINT(("Log entry has %ld entries (%Ld)\n", count, array[0]));
return B_OK;
}
status_t
Journal::ReplayLogEntry(int32 *_start)
{
PRINT(("ReplayLogEntry(start = %ld)\n", *_start));
off_t logOffset = fVolume->ToBlock(fVolume->Log());
off_t arrayBlock = (*_start % fLogSize) + fVolume->ToBlock(fVolume->Log());
int32 blockSize = fVolume->BlockSize();
int32 count = 1, valuesInBlock = blockSize / sizeof(off_t);
int32 numArrayBlocks;
off_t blockNumber = 0;
bool first = true;
CachedBlock cached(fVolume);
while (count > 0) {
off_t *array = (off_t *)cached.SetTo(arrayBlock);
if (array == NULL)
return B_IO_ERROR;
int32 index = 0;
if (first) {
if (array[0] < 1 || array[0] >= fLogSize)
return B_BAD_DATA;
count = array[0];
first = false;
numArrayBlocks = ((count + 1) * sizeof(off_t) + blockSize - 1) / blockSize;
blockNumber = (*_start + numArrayBlocks) % fLogSize;
// first real block in this log entry
*_start += count;
index++;
// the first entry in the first block is the number
// of blocks in that log entry
}
(*_start)++;
if (CheckLogEntry(count, array + 1) < B_OK)
return B_BAD_DATA;
CachedBlock cachedCopy(fVolume);
for (; index < valuesInBlock && count-- > 0; index++) {
PRINT(("replay block %Ld in log at %Ld!\n", array[index], blockNumber));
uint8 *copy = cachedCopy.SetTo(logOffset + blockNumber);
if (copy == NULL)
RETURN_ERROR(B_IO_ERROR);
ssize_t written = write_pos(fVolume->Device(),
array[index] << fVolume->BlockShift(), copy, blockSize);
if (written != blockSize)
RETURN_ERROR(B_IO_ERROR);
blockNumber = (blockNumber + 1) % fLogSize;
}
arrayBlock++;
if (arrayBlock > fVolume->ToBlock(fVolume->Log()) + fLogSize)
arrayBlock = fVolume->ToBlock(fVolume->Log());
}
return B_OK;
}
/** Replays all log entries - this will put the disk into a
* consistent and clean state, if it was not correctly unmounted
* before.
* This method is called by Journal::InitCheck() if the log start
* and end pointer don't match.
*/
status_t
Journal::ReplayLog()
{
INFORM(("Replay log, disk was not correctly unmounted...\n"));
int32 start = fVolume->LogStart();
int32 lastStart = -1;
while (true) {
// stop if the log is completely flushed
if (start == fVolume->LogEnd())
break;
if (start == lastStart) {
// strange, flushing the log hasn't changed the log_start pointer
return B_ERROR;
}
lastStart = start;
status_t status = ReplayLogEntry(&start);
if (status < B_OK) {
FATAL(("replaying log entry from %ld failed: %s\n", start, strerror(status)));
return B_ERROR;
}
start = start % fLogSize;
}
PRINT(("replaying worked fine!\n"));
fVolume->SuperBlock().log_start = fVolume->LogEnd();
fVolume->LogStart() = fVolume->LogEnd();
fVolume->SuperBlock().flags = SUPER_BLOCK_DISK_CLEAN;
return fVolume->WriteSuperBlock();
}
/** This is a callback function that is called by the cache, whenever
* a block is flushed to disk that was updated as part of a transaction.
* This is necessary to keep track of completed transactions, to be
* able to update the log start pointer.
*/
void
Journal::blockNotify(off_t blockNumber, size_t numBlocks, void *arg)
{
log_entry *logEntry = (log_entry *)arg;
logEntry->cached_blocks -= numBlocks;
if (logEntry->cached_blocks > 0) {
// nothing to do yet...
return;
}
Journal *journal = logEntry->journal;
disk_super_block &superBlock = journal->fVolume->SuperBlock();
bool update = false;
// Set log_start pointer if possible...
if (logEntry == journal->fEntries.head) {
if (logEntry->Next() != NULL) {
int32 length = logEntry->next->start - logEntry->start;
superBlock.log_start = (superBlock.log_start + length) % journal->fLogSize;
} else
superBlock.log_start = journal->fVolume->LogEnd();
update = true;
}
journal->fUsed -= logEntry->length;
journal->fEntriesLock.Lock();
logEntry->Remove();
journal->fEntriesLock.Unlock();
free(logEntry);
// update the super block, and change the disk's state, if necessary
if (update) {
journal->fVolume->LogStart() = superBlock.log_start;
if (superBlock.log_start == superBlock.log_end)
superBlock.flags = SUPER_BLOCK_DISK_CLEAN;
journal->fVolume->WriteSuperBlock();
}
}
status_t
Journal::WriteLogEntry()
{
fTransactionsInEntry = 0;
fHasChangedBlocks = false;
sorted_array *array = fArray.Array();
if (array == NULL || array->count == 0)
return B_OK;
// Make sure there is enough space in the log.
// If that fails for whatever reason, panic!
force_cache_flush(fVolume->Device(), false);
int32 tries = fLogSize / 2 + 1;
while (TransactionSize() > FreeLogBlocks() && tries-- > 0)
force_cache_flush(fVolume->Device(), true);
if (tries <= 0) {
fVolume->Panic();
return B_BAD_DATA;
}
int32 blockShift = fVolume->BlockShift();
off_t logOffset = fVolume->ToBlock(fVolume->Log()) << blockShift;
off_t logStart = fVolume->LogEnd();
off_t logPosition = logStart % fLogSize;
// Write disk block array
uint8 *arrayBlock = (uint8 *)array;
for (int32 size = fArray.BlocksUsed(); size-- > 0;) {
write_pos(fVolume->Device(), logOffset + (logPosition << blockShift),
arrayBlock, fVolume->BlockSize());
logPosition = (logPosition + 1) % fLogSize;
arrayBlock += fVolume->BlockSize();
}
// Write logged blocks into the log
CachedBlock cached(fVolume);
for (int32 i = 0;i < array->count;i++) {
// ToDo: combine blocks if possible (using iovecs)!
uint8 *block = cached.SetTo(array->values[i]);
if (block == NULL)
return B_IO_ERROR;
write_pos(fVolume->Device(), logOffset + (logPosition << blockShift),
block, fVolume->BlockSize());
logPosition = (logPosition + 1) % fLogSize;
}
log_entry *logEntry = (log_entry *)malloc(sizeof(log_entry));
if (logEntry != NULL) {
logEntry->start = logStart;
logEntry->length = TransactionSize();
logEntry->cached_blocks = array->count;
logEntry->journal = this;
fEntriesLock.Lock();
fEntries.Add(logEntry);
fEntriesLock.Unlock();
fCurrent = logEntry;
fUsed += logEntry->length;
set_blocks_info(fVolume->Device(), &array->values[0], array->count, blockNotify, logEntry);
}
// If the log goes to the next round (the log is written as a
// circular buffer), all blocks will be flushed out which is
// possible because we don't have any locked blocks at this
// point.
if (logPosition < logStart)
fVolume->FlushDevice();
// We need to flush the drives own cache here to ensure
// disk consistency.
// If that call fails, we can't do anything about it anyway
ioctl(fVolume->Device(), B_FLUSH_DRIVE_CACHE);
fArray.MakeEmpty();
// Update the log end pointer in the super block
fVolume->SuperBlock().flags = SUPER_BLOCK_DISK_DIRTY;
fVolume->SuperBlock().log_end = logPosition;
fVolume->LogEnd() = logPosition;
return fVolume->WriteSuperBlock();
}
status_t
Journal::FlushLogAndBlocks()
{
status_t status = Lock((Transaction *)this);
if (status != B_OK)
return status;
// write the current log entry to disk
if (TransactionSize() != 0) {
status = WriteLogEntry();
if (status < B_OK)
FATAL(("writing current log entry failed: %s\n", strerror(status)));
}
status = fVolume->FlushDevice();
Unlock((Transaction *)this, true);
return status;
}
status_t
Journal::Lock(Transaction *owner)
{
if (owner == fOwner)
return B_OK;
status_t status = fLock.Lock();
if (status == B_OK)
fOwner = owner;
// if the last transaction is older than 2 secs, start a new one
if (fTransactionsInEntry != 0 && system_time() - fTimestamp > 2000000L)
WriteLogEntry();
return B_OK;
}
void
Journal::Unlock(Transaction *owner, bool success)
{
if (owner != fOwner)
return;
TransactionDone(success);
fTimestamp = system_time();
fOwner = NULL;
fLock.Unlock();
}
/** If there is a current transaction that the current thread has
* started, this function will give you access to it.
*/
Transaction *
Journal::CurrentTransaction()
{
if (fLock.LockWithTimeout(0) != B_OK)
return NULL;
Transaction *owner = fOwner;
fLock.Unlock();
return owner;
}
status_t
Journal::TransactionDone(bool success)
{
if (!success && fTransactionsInEntry == 0) {
// we can safely abort the transaction
sorted_array *array = fArray.Array();
if (array != NULL) {
// release the lock for all blocks in the array (we don't need
// to be notified when they are actually written to disk)
for (int32 i = 0; i < array->count; i++)
release_block(fVolume->Device(), array->values[i]);
}
return B_OK;
}
// Up to a maximum size, we will just batch several
// transactions together to improve speed
if (TransactionSize() < fMaxTransactionSize) {
fTransactionsInEntry++;
fHasChangedBlocks = false;
return B_OK;
}
return WriteLogEntry();
}
status_t
Journal::LogBlocks(off_t blockNumber, const uint8 *buffer, size_t numBlocks)
{
// ToDo: that's for now - we should change the log file size here
if (TransactionSize() + numBlocks + 1 > fLogSize)
return B_DEVICE_FULL;
fHasChangedBlocks = true;
int32 blockSize = fVolume->BlockSize();
for (;numBlocks-- > 0; blockNumber++, buffer += blockSize) {
if (fArray.Find(blockNumber) >= 0) {
// The block is already in the log, so just update its data
// Note, this is only necessary if this method is called with a buffer
// different from the cached block buffer - which is unlikely but
// we'll make sure this way (costs one cache lookup, though).
status_t status = cached_write(fVolume->Device(), blockNumber, buffer, 1, blockSize);
if (status < B_OK)
return status;
continue;
}
// Insert the block into the transaction's array, and write the changes
// back into the locked cache buffer
fArray.Insert(blockNumber);
status_t status = cached_write_locked(fVolume->Device(), blockNumber, buffer, 1, blockSize);
if (status < B_OK)
return status;
}
// If necessary, flush the log, so that we have enough space for this transaction
if (TransactionSize() > FreeLogBlocks())
force_cache_flush(fVolume->Device(), true);
return B_OK;
}
// #pragma mark -
status_t
Transaction::Start(Volume *volume, off_t refBlock)
{
// has it already been started?
if (fJournal != NULL)
return B_OK;
fJournal = volume->GetJournal(refBlock);
if (fJournal != NULL && fJournal->Lock(this) == B_OK)
return B_OK;
fJournal = NULL;
return B_ERROR;
}

View File

@ -0,0 +1,159 @@
#ifndef JOURNAL_H
#define JOURNAL_H
/* Journal - transaction and logging
**
** Copyright 2001-2004, Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <KernelExport.h>
#ifdef USER
# include "myfs.h"
# include <stdio.h>
#endif
#ifndef _IMPEXP_KERNEL
# define _IMPEXP_KERNEL
#endif
#include <lock.h>
#include <cache.h>
#include "Volume.h"
#include "Chain.h"
#include "Utility.h"
struct log_entry : node<log_entry> {
uint16 start;
uint16 length;
uint32 cached_blocks;
Journal *journal;
};
// Locking policy in BFS: if you need both, the volume lock and the
// journal lock, you must lock the volume first - or else you will
// end up in a deadlock.
// That is, if you start a transaction, and will need to lock the
// volume while the transaction is in progress (for the unsafe
// get_vnode() call, for example), you must lock the volume before
// starting the transaction.
class Journal {
public:
Journal(Volume *);
~Journal();
status_t InitCheck();
status_t Lock(Transaction *owner);
void Unlock(Transaction *owner, bool success);
status_t CheckLogEntry(int32 count, off_t *array);
status_t ReplayLogEntry(int32 *start);
status_t ReplayLog();
status_t WriteLogEntry();
status_t LogBlocks(off_t blockNumber, const uint8 *buffer, size_t numBlocks);
Transaction *CurrentTransaction();
uint32 TransactionSize() const { return fArray.CountItems() + fArray.BlocksUsed(); }
status_t FlushLogAndBlocks();
Volume *GetVolume() const { return fVolume; }
inline uint32 FreeLogBlocks() const;
private:
friend log_entry;
static void blockNotify(off_t blockNumber, size_t numBlocks, void *arg);
status_t TransactionDone(bool success);
Volume *fVolume;
RecursiveLock fLock;
Transaction *fOwner;
BlockArray fArray;
uint32 fLogSize, fMaxTransactionSize, fUsed;
int32 fTransactionsInEntry;
SimpleLock fEntriesLock;
list<log_entry> fEntries;
log_entry *fCurrent;
bool fHasChangedBlocks;
bigtime_t fTimestamp;
};
inline uint32
Journal::FreeLogBlocks() const
{
return fVolume->LogStart() <= fVolume->LogEnd() ?
fLogSize - fVolume->LogEnd() + fVolume->LogStart()
: fVolume->LogStart() - fVolume->LogEnd();
}
// For now, that's only a dumb class that does more or less nothing
// else than writing the blocks directly to the real location.
// It doesn't yet use logging.
class Transaction {
public:
Transaction(Volume *volume, off_t refBlock)
:
fJournal(NULL)
{
Start(volume, refBlock);
}
Transaction(Volume *volume, block_run refRun)
:
fJournal(NULL)
{
Start(volume, volume->ToBlock(refRun));
}
Transaction()
:
fJournal(NULL)
{
}
~Transaction()
{
if (fJournal)
fJournal->Unlock(this, false);
}
status_t Start(Volume *volume, off_t refBlock);
bool IsStarted() const { return fJournal != NULL; }
void Done()
{
if (fJournal != NULL)
fJournal->Unlock(this, true);
fJournal = NULL;
}
status_t WriteBlocks(off_t blockNumber, const uint8 *buffer, size_t numBlocks = 1)
{
if (fJournal == NULL)
return B_NO_INIT;
return fJournal->LogBlocks(blockNumber, buffer, numBlocks);
}
Volume *GetVolume() { return fJournal != NULL ? fJournal->GetVolume() : NULL; }
private:
Transaction(const Transaction &);
Transaction &operator=(const Transaction &);
// no implementation
Journal *fJournal;
};
#endif /* JOURNAL_H */

View File

@ -0,0 +1,529 @@
#ifndef LOCK_H
#define LOCK_H
/* Lock - simple semaphores, read/write lock implementation
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** Roughly based on a Be sample code written by Nathan Schrenk.
**
** This file may be used under the terms of the OpenBeOS License.
*/
#include <KernelExport.h>
#include <stdio.h>
#include "Utility.h"
#include "Debug.h"
// Configure here if and when real benaphores should be used
#define USE_BENAPHORE
// if defined, benaphores are used for the Semaphore/RecursiveLock classes
#ifdef USER
//# define FAST_LOCK
// the ReadWriteLock class uses a second Semaphore to
// speed up locking - only makes sense if USE_BENAPHORE
// is defined, too.
#endif
class Semaphore {
public:
Semaphore(const char *name)
:
#ifdef USE_BENAPHORE
fSemaphore(create_sem(0, name)),
fCount(1)
#else
fSemaphore(create_sem(1, name))
#endif
{
#ifndef USER
set_sem_owner(fSemaphore, B_SYSTEM_TEAM);
#endif
}
~Semaphore()
{
delete_sem(fSemaphore);
}
status_t InitCheck()
{
if (fSemaphore < B_OK)
return fSemaphore;
return B_OK;
}
status_t Lock()
{
#ifdef USE_BENAPHORE
if (atomic_add(&fCount, -1) <= 0)
#endif
return acquire_sem(fSemaphore);
#ifdef USE_BENAPHORE
return B_OK;
#endif
}
status_t Unlock()
{
#ifdef USE_BENAPHORE
if (atomic_add(&fCount, 1) < 0)
#endif
return release_sem(fSemaphore);
#ifdef USE_BENAPHORE
return B_OK;
#endif
}
private:
sem_id fSemaphore;
#ifdef USE_BENAPHORE
vint32 fCount;
#endif
};
// a convenience class to lock a Semaphore object
class Locker {
public:
Locker(Semaphore &lock)
: fLock(lock)
{
fStatus = lock.Lock();
ASSERT(fStatus == B_OK);
}
~Locker()
{
if (fStatus == B_OK)
fLock.Unlock();
}
status_t Status() const
{
return fStatus;
}
private:
Semaphore &fLock;
status_t fStatus;
};
//**** Recursive Lock
class RecursiveLock {
public:
RecursiveLock(const char *name)
:
#ifdef USE_BENAPHORE
fSemaphore(create_sem(0, name)),
fCount(1),
#else
fSemaphore(create_sem(1, name)),
#endif
fOwner(-1)
{
#ifndef USER
set_sem_owner(fSemaphore, B_SYSTEM_TEAM);
#endif
}
status_t LockWithTimeout(bigtime_t timeout)
{
thread_id thread = find_thread(NULL);
if (thread == fOwner) {
fOwnerCount++;
return B_OK;
}
status_t status;
#ifdef USE_BENAPHORE
if (atomic_add(&fCount, -1) > 0)
status = B_OK;
else
#endif
status = acquire_sem_etc(fSemaphore, 1, B_RELATIVE_TIMEOUT, timeout);
if (status == B_OK) {
fOwner = thread;
fOwnerCount = 1;
}
return status;
}
status_t Lock()
{
return LockWithTimeout(B_INFINITE_TIMEOUT);
}
status_t Unlock()
{
thread_id thread = find_thread(NULL);
if (thread != fOwner) {
#if __MWERKS__ && !USER //--- The R5 PowerPC kernel doesn't have panic()
char blip[255];
sprintf(blip,"RecursiveLock unlocked by %ld, owned by %ld\n", thread, fOwner);
kernel_debugger(blip);
#else
panic("RecursiveLock unlocked by %ld, owned by %ld\n", thread, fOwner);
#endif
}
if (--fOwnerCount == 0) {
fOwner = -1;
#ifdef USE_BENAPHORE
if (atomic_add(&fCount, 1) < 0)
#endif
return release_sem(fSemaphore);
}
return B_OK;
}
private:
sem_id fSemaphore;
#ifdef USE_BENAPHORE
vint32 fCount;
#endif
thread_id fOwner;
int32 fOwnerCount;
};
// a convenience class to lock an RecursiveLock object
class RecursiveLocker {
public:
RecursiveLocker(RecursiveLock &lock)
: fLock(lock)
{
fStatus = lock.Lock();
ASSERT(fStatus == B_OK);
}
~RecursiveLocker()
{
if (fStatus == B_OK)
fLock.Unlock();
}
status_t Status() const
{
return fStatus;
}
private:
RecursiveLock &fLock;
status_t fStatus;
};
//**** Many Reader/Single Writer Lock
// This is a "fast" implementation of a single writer/many reader
// locking scheme. It's fast because it uses the benaphore idea
// to do lazy semaphore locking - in most cases it will only have
// to do some simple integer arithmetic.
// The second semaphore (fWriteLock) is needed to prevent the situation
// that a second writer can acquire the lock when there are still readers
// holding it.
#define MAX_READERS 100000
// Note: this code will break if you actually have 100000 readers
// at once. With the current thread/... limits in BeOS you can't
// touch that value, but it might be possible in the future.
// Also, you can only have about 20000 concurrent writers until
// the semaphore count exceeds the int32 bounds
// Timeouts:
// It may be a good idea to have timeouts for the WriteLocked class,
// in case something went wrong - we'll see if this is necessary,
// but it would be a somewhat poor work-around for a deadlock...
// But the only real problem with timeouts could be for things like
// "chkbfs" - because such a tool may need to lock for some more time
// define if you want to have fast locks as the foundation for the
// ReadWriteLock class - the benefit is that acquire_sem() doesn't
// have to be called when there is no one waiting.
// The disadvantage is the use of 2 real semaphores which is quite
// expensive regarding that BeOS only allows for a total of 64k
// semaphores (since every open BFS inode has such a lock).
#ifdef FAST_LOCK
class ReadWriteLock {
public:
ReadWriteLock(const char *name)
:
fWriteLock(name)
{
Initialize(name);
}
ReadWriteLock()
:
fWriteLock("bfs r/w w-lock")
{
}
~ReadWriteLock()
{
delete_sem(fSemaphore);
}
status_t Initialize(const char *name = "bfs r/w lock")
{
fSemaphore = create_sem(0, name);
fCount = MAX_READERS;
#ifndef USER
set_sem_owner(fSemaphore, B_SYSTEM_TEAM);
#endif
return fSemaphore;
}
status_t InitCheck()
{
if (fSemaphore < B_OK)
return fSemaphore;
return B_OK;
}
status_t Lock()
{
if (atomic_add(&fCount, -1) <= 0)
return acquire_sem(fSemaphore);
return B_OK;
}
void Unlock()
{
if (atomic_add(&fCount, 1) < 0)
release_sem(fSemaphore);
}
status_t LockWrite()
{
if (fWriteLock.Lock() < B_OK)
return B_ERROR;
int32 readers = atomic_add(&fCount, -MAX_READERS);
status_t status = B_OK;
if (readers < MAX_READERS) {
// Acquire sem for all readers currently not using a semaphore.
// But if we are not the only write lock in the queue, just get
// the one for us
status = acquire_sem_etc(fSemaphore, readers <= 0 ? 1 : MAX_READERS - readers, 0, 0);
}
fWriteLock.Unlock();
return status;
}
void UnlockWrite()
{
int32 readers = atomic_add(&fCount, MAX_READERS);
if (readers < 0) {
// release sem for all readers only when we were the only writer
release_sem_etc(fSemaphore, readers <= -MAX_READERS ? 1 : -readers, 0);
}
}
private:
friend class ReadLocked;
friend class WriteLocked;
sem_id fSemaphore;
vint32 fCount;
Semaphore fWriteLock;
};
#else // FAST_LOCK
class ReadWriteLock {
public:
ReadWriteLock(const char *name)
{
Initialize(name);
}
ReadWriteLock()
{
}
~ReadWriteLock()
{
delete_sem(fSemaphore);
}
status_t Initialize(const char *name = "bfs r/w lock")
{
fSemaphore = create_sem(MAX_READERS, name);
#ifndef USER
set_sem_owner(fSemaphore, B_SYSTEM_TEAM);
#endif
return fSemaphore;
}
status_t InitCheck()
{
if (fSemaphore < B_OK)
return fSemaphore;
return B_OK;
}
status_t Lock()
{
return acquire_sem(fSemaphore);
}
void Unlock()
{
release_sem(fSemaphore);
}
status_t LockWrite()
{
return acquire_sem_etc(fSemaphore, MAX_READERS, 0, 0);
}
void UnlockWrite()
{
release_sem_etc(fSemaphore, MAX_READERS, 0);
}
private:
friend class ReadLocked;
friend class WriteLocked;
sem_id fSemaphore;
};
#endif // FAST_LOCK
class ReadLocked {
public:
ReadLocked(ReadWriteLock &lock)
:
fLock(lock)
{
fStatus = lock.Lock();
}
~ReadLocked()
{
if (fStatus == B_OK)
fLock.Unlock();
}
private:
ReadWriteLock &fLock;
status_t fStatus;
};
class WriteLocked {
public:
WriteLocked(ReadWriteLock &lock)
:
fLock(lock)
{
fStatus = lock.LockWrite();
}
~WriteLocked()
{
if (fStatus == B_OK)
fLock.UnlockWrite();
}
status_t IsLocked()
{
return fStatus;
}
private:
ReadWriteLock &fLock;
status_t fStatus;
};
// A simple locking structure that doesn't use a semaphore - it's useful
// if you have to protect critical parts with a short runtime.
// It also allows to nest several locks for the same thread.
class SimpleLock {
public:
SimpleLock()
:
fHolder(-1),
fCount(0)
{
}
status_t Lock(bigtime_t time = 500)
{
int32 thisThread = find_thread(NULL);
int32 current;
while (1) {
/*if (fHolder == -1) {
current = fHolder;
fHolder = thisThread;
}*/
current = _atomic_test_and_set(&fHolder, thisThread, -1);
if (current == -1)
break;
if (current == thisThread)
break;
snooze(time);
}
// ToDo: the lock cannot fail currently! We may want
// to change this
atomic_add(&fCount, 1);
return B_OK;
}
void Unlock()
{
if (atomic_add(&fCount, -1) == 1)
_atomic_set(&fHolder, -1);
}
bool IsLocked() const
{
return fHolder == find_thread(NULL);
}
private:
vint32 fHolder;
vint32 fCount;
};
// A convenience class to lock the SimpleLock, note the
// different timing compared to the direct call
class SimpleLocker {
public:
SimpleLocker(SimpleLock &lock,bigtime_t time = 1000)
: fLock(lock)
{
lock.Lock(time);
}
~SimpleLocker()
{
fLock.Unlock();
}
private:
SimpleLock &fLock;
};
#endif /* LOCK_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,77 @@
#ifndef QUERY_H
#define QUERY_H
/* Query - query parsing and evaluation
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <SupportDefs.h>
#include "Index.h"
#include "Stack.h"
#include "Chain.h"
class Volume;
class Term;
class Equation;
class TreeIterator;
class Query;
class Expression {
public:
Expression(char *expr);
~Expression();
status_t InitCheck();
const char *Position() const { return fPosition; }
Term *Root() const { return fTerm; }
protected:
Term *ParseOr(char **expr);
Term *ParseAnd(char **expr);
Term *ParseEquation(char **expr);
bool IsOperator(char **expr,char op);
private:
Expression(const Expression &);
Expression &operator=(const Expression &);
// no implementation
char *fPosition;
Term *fTerm;
};
class Query {
public:
Query(Volume *volume, Expression *expression, uint32 flags);
~Query();
status_t GetNextEntry(struct dirent *,size_t size);
void SetLiveMode(port_id port,int32 token);
void LiveUpdate(Inode *inode,const char *attribute,int32 type,const uint8 *oldKey,size_t oldLength,const uint8 *newKey,size_t newLength);
Expression *GetExpression() const { return fExpression; }
private:
Volume *fVolume;
Expression *fExpression;
Equation *fCurrent;
TreeIterator *fIterator;
Index fIndex;
Stack<Equation *> fStack;
uint32 fFlags;
port_id fPort;
int32 fToken;
private:
friend Chain<Query>;
Query *fNext;
};
#endif /* QUERY_H */

View File

@ -0,0 +1,58 @@
#ifndef STACK_H
#define STACK_H
/* Stack - a template stack class
**
** Copyright 2001 pinc Software. All Rights Reserved.
** This file may be used under the terms of the OpenBeOS License.
*/
#include <SupportDefs.h>
template<class T> class Stack {
public:
Stack()
:
fArray(NULL),
fUsed(0),
fMax(0)
{
}
~Stack()
{
if (fArray)
free(fArray);
}
status_t Push(T value)
{
if (fUsed >= fMax) {
fMax += 16;
T *newArray = (T *)realloc(fArray,fMax * sizeof(T));
if (newArray == NULL)
return B_NO_MEMORY;
fArray = newArray;
}
fArray[fUsed++] = value;
return B_OK;
}
bool Pop(T *value)
{
if (fUsed == 0)
return false;
*value = fArray[--fUsed];
return true;
}
private:
T *fArray;
int32 fUsed;
int32 fMax;
};
#endif /* STACK_H */

View File

@ -0,0 +1,636 @@
/* Stream - inode stream access functions
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <Inode.h>
// The classes in the namespace "Access" provide different type of access
// to the inode's data stream.
// Uncached accesses the underlaying device directly, Cached uses the
// standard cache, while Logged directs write accesses through the log.
//
// The classes interface is similar to the one of the CachedBlock class,
// but adds two other (static) functions for reading/writing several
// blocks at once.
// We don't use a real pure virtual interface as the class base, but we
// provide the same mechanism using templates.
namespace Access {
class Uncached {
public:
Uncached(Volume *volume);
Uncached(Volume *volume, off_t block, bool empty = false);
Uncached(Volume *volume, block_run run, bool empty = false);
~Uncached();
void Unset();
uint8 *SetTo(off_t block, bool empty = false);
uint8 *SetTo(block_run run, bool empty = false);
status_t WriteBack(Transaction *transaction);
uint8 *Block() const { return fBlock; }
off_t BlockNumber() const { return fBlockNumber; }
uint32 BlockSize() const { return fVolume->BlockSize(); }
uint32 BlockShift() const { return fVolume->BlockShift(); }
static status_t Read(Volume *volume, block_run run, uint8 *buffer);
static status_t Write(Transaction *transaction, Volume *volume, block_run run, const uint8 *buffer);
private:
Volume *fVolume;
off_t fBlockNumber;
uint8 *fBlock;
};
class Cached : public CachedBlock {
public:
Cached(Volume *volume);
Cached(Volume *volume, off_t block, bool empty = false);
Cached(Volume *volume, block_run run, bool empty = false);
status_t WriteBack(Transaction *transaction);
static status_t Read(Volume *volume, block_run run, uint8 *buffer);
static status_t Write(Transaction *transaction, Volume *volume, block_run run, const uint8 *buffer);
};
class Logged : public CachedBlock {
public:
Logged(Volume *volume);
Logged(Volume *volume,off_t block, bool empty = false);
Logged(Volume *volume, block_run run, bool empty = false);
static status_t Read(Volume *volume, block_run run, uint8 *buffer);
static status_t Write(Transaction *transaction, Volume *volume, block_run run, const uint8 *buffer);
};
Uncached::Uncached(Volume *volume)
:
fVolume(volume),
fBlock(NULL)
{
}
Uncached::Uncached(Volume *volume,off_t block, bool empty)
:
fVolume(volume),
fBlock(NULL)
{
SetTo(block,empty);
}
Uncached::Uncached(Volume *volume,block_run run,bool empty)
:
fVolume(volume),
fBlock(NULL)
{
SetTo(volume->ToBlock(run),empty);
}
Uncached::~Uncached()
{
Unset();
}
void
Uncached::Unset()
{
if (fBlock != NULL)
fVolume->Pool().PutBuffer((void *)fBlock);
}
uint8 *
Uncached::SetTo(off_t block, bool empty)
{
Unset();
fBlockNumber = block;
if (fVolume->Pool().GetBuffer((void **)&fBlock) < B_OK)
return NULL;
if (empty)
memset(fBlock, 0, BlockSize());
else
read_pos(fVolume->Device(), fBlockNumber << BlockShift(), fBlock, BlockSize());
return fBlock;
}
uint8 *
Uncached::SetTo(block_run run, bool empty)
{
return SetTo(fVolume->ToBlock(run), empty);
}
status_t
Uncached::WriteBack(Transaction *transaction)
{
if (fBlock == NULL)
RETURN_ERROR(B_BAD_VALUE);
return write_pos(fVolume->Device(), fBlockNumber << BlockShift(), fBlock, BlockSize());
}
status_t
Uncached::Read(Volume *volume, block_run run, uint8 *buffer)
{
return read_pos(volume->Device(), volume->ToBlock(run) << volume->BlockShift(), buffer, run.Length() << volume->BlockShift());
}
status_t
Uncached::Write(Transaction *transaction, Volume *volume, block_run run, const uint8 *buffer)
{
return write_pos(volume->Device(), volume->ToBlock(run) << volume->BlockShift(), buffer, run.Length() << volume->BlockShift());
}
// #pragma mark -
Cached::Cached(Volume *volume)
: CachedBlock(volume)
{
}
Cached::Cached(Volume *volume,off_t block,bool empty)
: CachedBlock(volume, block, empty)
{
}
Cached::Cached(Volume *volume,block_run run,bool empty)
: CachedBlock(volume, run, empty)
{
}
status_t
Cached::WriteBack(Transaction *transaction)
{
if (transaction == NULL || fBlock == NULL)
RETURN_ERROR(B_BAD_VALUE);
return fVolume->WriteBlocks(fBlockNumber, fBlock, 1);
}
status_t
Cached::Read(Volume *volume, block_run run, uint8 *buffer)
{
return cached_read(volume->Device(), volume->ToBlock(run), buffer, run.Length(), volume->BlockSize());
}
status_t
Cached::Write(Transaction *transaction, Volume *volume, block_run run, const uint8 *buffer)
{
return volume->WriteBlocks(volume->ToBlock(run), buffer, run.Length());
}
// #pragma mark -
Logged::Logged(Volume *volume)
: CachedBlock(volume)
{
}
Logged::Logged(Volume *volume, off_t block, bool empty)
: CachedBlock(volume, block, empty)
{
}
Logged::Logged(Volume *volume, block_run run, bool empty)
: CachedBlock(volume, run, empty)
{
}
status_t
Logged::Read(Volume *volume, block_run run, uint8 *buffer)
{
return cached_read(volume->Device(), volume->ToBlock(run), buffer, run.Length(), volume->BlockSize());
}
status_t
Logged::Write(Transaction *transaction, Volume *volume, block_run run, const uint8 *buffer)
{
return transaction->WriteBlocks(volume->ToBlock(run), buffer, run.Length());
}
}; // namespace Access
// #pragma mark -
// The Stream template class allows to have only one straight-forward
// implementation of the FindBlockRun(), ReadAt(), and WriteAt() methods.
// They will access the disk through the given cache class only, which
// means either uncached, cached, or logged (see above).
template<class Cache>
class Stream : public Inode {
private:
// The constructor only exists to make the compiler happy - it
// is never called in the code itself
Stream() : Inode(NULL, -1) {}
public:
status_t FindBlockRun(off_t pos, block_run &run, off_t &offset);
status_t ReadAt(off_t pos, uint8 *buffer, size_t *length);
status_t WriteAt(Transaction *transaction, off_t pos, const uint8 *buffer, size_t *length);
};
/** see Inode::FindBlockRun() for the documentation of this method */
template<class Cache>
status_t
Stream<Cache>::FindBlockRun(off_t pos, block_run &run, off_t &offset)
{
data_stream *data = &Node()->data;
// find matching block run
if (data->MaxDirectRange() > 0 && pos >= data->MaxDirectRange()) {
if (data->MaxDoubleIndirectRange() > 0 && pos >= data->MaxIndirectRange()) {
// access to double indirect blocks
Cache cached(fVolume);
off_t start = pos - data->MaxIndirectRange();
int32 indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT + cached.BlockShift()))
* (fVolume->BlockSize() / sizeof(block_run));
int32 directSize = NUM_ARRAY_BLOCKS << cached.BlockShift();
int32 index = start / indirectSize;
int32 runsPerBlock = cached.BlockSize() / sizeof(block_run);
block_run *indirect = (block_run *)cached.SetTo(
fVolume->ToBlock(data->double_indirect) + index / runsPerBlock);
if (indirect == NULL)
RETURN_ERROR(B_ERROR);
//printf("\tstart = %Ld, indirectSize = %ld, directSize = %ld, index = %ld\n",start,indirectSize,directSize,index);
//printf("\tlook for indirect block at %ld,%d\n",indirect[index].allocation_group,indirect[index].start);
int32 current = (start % indirectSize) / directSize;
indirect = (block_run *)cached.SetTo(
fVolume->ToBlock(indirect[index % runsPerBlock]) + current / runsPerBlock);
if (indirect == NULL)
RETURN_ERROR(B_ERROR);
run = indirect[current % runsPerBlock];
offset = data->MaxIndirectRange() + (index * indirectSize) + (current * directSize);
//printf("\tfCurrent = %ld, fRunFileOffset = %Ld, fRunBlockEnd = %Ld, fRun = %ld,%d\n",fCurrent,fRunFileOffset,fRunBlockEnd,fRun.allocation_group,fRun.start);
} else {
// access to indirect blocks
int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
off_t runBlockEnd = data->MaxDirectRange();
Cache cached(fVolume);
off_t block = fVolume->ToBlock(data->indirect);
for (int32 i = 0; i < data->indirect.Length(); i++) {
block_run *indirect = (block_run *)cached.SetTo(block + i);
if (indirect == NULL)
RETURN_ERROR(B_IO_ERROR);
int32 current = -1;
while (++current < runsPerBlock) {
if (indirect[current].IsZero())
break;
runBlockEnd += indirect[current].Length() << cached.BlockShift();
if (runBlockEnd > pos) {
run = indirect[current];
offset = runBlockEnd - (run.Length() << cached.BlockShift());
//printf("reading from indirect block: %ld,%d\n",fRun.allocation_group,fRun.start);
//printf("### indirect-run[%ld] = (%ld,%d,%d), offset = %Ld\n",fCurrent,fRun.allocation_group,fRun.start,fRun.Length(),fRunFileOffset);
return fVolume->ValidateBlockRun(run);
}
}
}
RETURN_ERROR(B_ERROR);
}
} else {
// access from direct blocks
off_t runBlockEnd = 0LL;
int32 current = -1;
while (++current < NUM_DIRECT_BLOCKS) {
if (data->direct[current].IsZero())
break;
runBlockEnd += data->direct[current].Length() << fVolume->BlockShift();
if (runBlockEnd > pos) {
run = data->direct[current];
offset = runBlockEnd - (run.Length() << fVolume->BlockShift());
//printf("### run[%ld] = (%ld,%d,%d), offset = %Ld\n",fCurrent,fRun.allocation_group,fRun.start,fRun.Length(),fRunFileOffset);
return fVolume->ValidateBlockRun(run);
}
}
//PRINT(("FindBlockRun() failed in direct range: size = %Ld, pos = %Ld\n",data->size,pos));
return B_ENTRY_NOT_FOUND;
}
return fVolume->ValidateBlockRun(run);
}
template<class Cache>
status_t
Stream<Cache>::ReadAt(off_t pos, uint8 *buffer, size_t *_length)
{
size_t length = *_length;
// set/check boundaries for pos/length
if (pos < 0)
return B_BAD_VALUE;
if (pos >= Node()->data.Size() || length == 0) {
*_length = 0;
return B_NO_ERROR;
}
if (pos + length > Node()->data.Size())
length = Node()->data.Size() - pos;
block_run run;
off_t offset;
if (FindBlockRun(pos, run, offset) < B_OK) {
*_length = 0;
RETURN_ERROR(B_BAD_VALUE);
}
uint32 bytesRead = 0;
uint32 blockSize = fVolume->BlockSize();
uint32 blockShift = fVolume->BlockShift();
uint8 *block;
// the first block_run we read could not be aligned to the block_size boundary
// (read partial block at the beginning)
// pos % block_size == (pos - offset) % block_size, offset % block_size == 0
if (pos % blockSize != 0) {
run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + ((pos - offset) >> blockShift));
run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - ((pos - offset) >> blockShift));
Cache cached(fVolume,run);
if ((block = cached.Block()) == NULL) {
*_length = 0;
RETURN_ERROR(B_BAD_VALUE);
}
bytesRead = blockSize - (pos % blockSize);
if (length < bytesRead)
bytesRead = length;
memcpy(buffer, block + (pos % blockSize), bytesRead);
pos += bytesRead;
length -= bytesRead;
if (length == 0) {
*_length = bytesRead;
return B_OK;
}
if (FindBlockRun(pos, run, offset) < B_OK) {
*_length = bytesRead;
RETURN_ERROR(B_BAD_VALUE);
}
}
// the first block_run is already filled in at this point
// read the following complete blocks using cached_read(),
// the last partial block is read using the generic Cache class
bool partial = false;
while (length > 0) {
// offset is the offset to the current pos in the block_run
run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + ((pos - offset) >> blockShift));
run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - ((pos - offset) >> blockShift));
if (uint32(run.Length() << blockShift) > length) {
if (length < blockSize) {
Cache cached(fVolume, run);
if ((block = cached.Block()) == NULL) {
*_length = bytesRead;
RETURN_ERROR(B_BAD_VALUE);
}
memcpy(buffer + bytesRead, block, length);
bytesRead += length;
break;
}
run.length = HOST_ENDIAN_TO_BFS_INT16(length >> blockShift);
partial = true;
}
if (Cache::Read(fVolume, run, buffer + bytesRead) < B_OK) {
*_length = bytesRead;
RETURN_ERROR(B_BAD_VALUE);
}
int32 bytes = run.Length() << blockShift;
#ifdef DEBUG
if ((uint32)bytes > length)
DEBUGGER(("bytes greater than length"));
#endif
length -= bytes;
bytesRead += bytes;
if (length == 0)
break;
pos += bytes;
if (partial) {
// if the last block was read only partially, point block_run
// to the remaining part
run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + run.Length());
run.length = HOST_ENDIAN_TO_BFS_INT16(1);
offset = pos;
} else if (FindBlockRun(pos, run, offset) < B_OK) {
*_length = bytesRead;
RETURN_ERROR(B_BAD_VALUE);
}
}
*_length = bytesRead;
return B_OK;
}
template<class Cache>
status_t
Stream<Cache>::WriteAt(Transaction *transaction, off_t pos, const uint8 *buffer, size_t *_length)
{
size_t length = *_length;
// set/check boundaries for pos/length
if (pos < 0)
return B_BAD_VALUE;
if (pos + length > Size()) {
off_t oldSize = Size();
// uncached files can't be resized (Inode::SetFileSize() also
// doesn't allow this, but this way we don't have to start a
// transaction to find out).
if (Flags() & INODE_NO_CACHE)
return B_BAD_VALUE;
// the transaction doesn't have to be started already
// ToDo: what's that INODE_NO_TRANSACTION flag good for again?
if ((Flags() & INODE_NO_TRANSACTION) == 0
&& !transaction->IsStarted())
transaction->Start(fVolume, BlockNumber());
// let's grow the data stream to the size needed
status_t status = SetFileSize(transaction, pos + length);
if (status < B_OK) {
*_length = 0;
RETURN_ERROR(status);
}
// If the position of the write was beyond the file size, we
// have to fill the gap between that position and the old file
// size with zeros.
FillGapWithZeros(oldSize, pos);
}
// If we don't want to write anything, we can now return (we may
// just have changed the file size using the position parameter)
if (length == 0)
return B_OK;
block_run run;
off_t offset;
if (FindBlockRun(pos, run, offset) < B_OK) {
*_length = 0;
RETURN_ERROR(B_BAD_VALUE);
}
bool logStream = (Flags() & INODE_LOGGED) == INODE_LOGGED;
if (logStream
&& !transaction->IsStarted())
transaction->Start(fVolume, BlockNumber());
uint32 bytesWritten = 0;
uint32 blockSize = fVolume->BlockSize();
uint32 blockShift = fVolume->BlockShift();
uint8 *block;
// the first block_run we write could not be aligned to the block_size boundary
// (write partial block at the beginning)
// pos % block_size == (pos - offset) % block_size, offset % block_size == 0
if (pos % blockSize != 0) {
run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + ((pos - offset) >> blockShift));
run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - ((pos - offset) >> blockShift));
Cache cached(fVolume, run);
if ((block = cached.Block()) == NULL) {
*_length = 0;
RETURN_ERROR(B_BAD_VALUE);
}
bytesWritten = blockSize - (pos % blockSize);
if (length < bytesWritten)
bytesWritten = length;
memcpy(block + (pos % blockSize),buffer,bytesWritten);
cached.WriteBack(transaction);
pos += bytesWritten;
length -= bytesWritten;
if (length == 0) {
*_length = bytesWritten;
return B_OK;
}
if (FindBlockRun(pos, run, offset) < B_OK) {
*_length = bytesWritten;
RETURN_ERROR(B_BAD_VALUE);
}
}
// the first block_run is already filled in at this point
// write the following complete blocks using Volume::WriteBlocks(),
// the last partial block is written using the generic Cache class
bool partial = false;
while (length > 0) {
// offset is the offset to the current pos in the block_run
run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + ((pos - offset) >> blockShift));
run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - ((pos - offset) >> blockShift));
if (uint32(run.Length() << blockShift) > length) {
if (length < blockSize) {
Cache cached(fVolume,run);
if ((block = cached.Block()) == NULL) {
*_length = bytesWritten;
RETURN_ERROR(B_BAD_VALUE);
}
memcpy(block, buffer + bytesWritten, length);
cached.WriteBack(transaction);
bytesWritten += length;
break;
}
run.length = HOST_ENDIAN_TO_BFS_INT16(length >> blockShift);
partial = true;
}
if (Cache::Write(transaction, fVolume, run, buffer + bytesWritten) < B_OK) {
*_length = bytesWritten;
RETURN_ERROR(B_BAD_VALUE);
}
int32 bytes = run.Length() << blockShift;
length -= bytes;
bytesWritten += bytes;
if (length == 0)
break;
pos += bytes;
if (partial) {
// if the last block was written only partially, point block_run
// to the remaining part
run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + run.Length());
run.length = HOST_ENDIAN_TO_BFS_INT16(1);
offset = pos;
} else if (FindBlockRun(pos, run, offset) < B_OK) {
*_length = bytesWritten;
RETURN_ERROR(B_BAD_VALUE);
}
}
*_length = bytesWritten;
return B_OK;
}

View File

@ -0,0 +1,84 @@
BFS - ToDo, May 26th, 2003
-----
BlockAllocator
- the BlockAllocator is only slightly optimized and probably slow
- the first free and the largest range are currently not correctly maintained (only efficiency suffers - it does work correctly)
- the allocation policies will have to stand against some real world tests
- the access to the block bitmap is currently managed using a global lock (doesn't matter as long as transactions are serialized)
DataStream
- only files are trimmed back (in bfs_close()), but every inode has a preallocated stream...
- Inode::GrowStream(): merging of block_runs doesn't work between range/block boundaries
- check the array block size in BFS for different block sizes
Queries
- There shouldn't be any cases where you can speed up a query with reordering the query expression - test it
- Check permissions of the parent directories
- Add protection against crashing applications which had a query open - at least the original BeOS kernel does not free the cookie (which throws some memory away *and* prevents unmounting the disk), but that won't be needed for OpenBeOS
- the query set for "!=" and last_modified/size is not the same as for "="; last_modified/size don't contain directories
- check if the query has to be checked for a live update
- accept hex values 0x...
Journal
- Check if there are any standard and often-happening cases for a transaction to fail, and if so, start the transaction only when necessary
- if the system crashes between bfs_unlink() and bfs_remove_vnode(), the inode can be removed from the tree, but its memory is still allocated - this can happen if the inode is still in use by someone (and that's what the "chkbfs" utility is for, mainly).
- add delayed index updating (+ delete actions to solve the issue above)
- multiple log files, parallel transactions? (note that parallel transactions would require more locking to be done)
- variable sized log file
- as long as we have a fixed-sized log file, it should be possible to reserve space for a transaction to be able to decide if batching it is possible
BPlusTree
- BPlusTree::Remove() could trigger CachedNode::Free() to go through the free nodes list and free all pages at the end of the data stream
- BPlusTree::Remove() could let the tree shrink (simple kind of reorganization)
- updating the TreeIterators doesn't work yet for duplicates (which may be a problem if a duplicate node will go away after a remove)
- BPlusTree::RemoveDuplicate() could merge the contents of duplicate node with only a few entries to save some space (right now, only empty nodes are freed)
Inode
- exchange Inode::OldLastModified() with Inode::NewLastModified(), and don't change the last_modified field directly in Inode::WriteAt() for consistency in case of a crash
- the size is only updated in bfs_close() - but if the system crashes before, the entry in the size index doesn't match the one in the inode anymore - it would be better to let the data.size not reflect the real file size in this case (since the max_xxx_range entries are always correct)
- Inode::FillGapWithZeros() currently disabled; apart from being slow, it really shouldn't be executed while a transaction is running, because that stops all other threads from doing anything (which can be a long time for a 100 MB file)
- need better locking mechanism in combination with B+trees etc.!
Indices
- consider Index::UpdateLastModified() writing back the updated inode
- clearing up Index::Update() and live query update (seems to be a bit confusing right now)
- investigate adding an index cache to improve performance
Attributes
- Inode::WriteAttribute() doesn't check if the attribute data may fit into the small_data region if there already is that attribute as an attribute file
- for indices, we could get the old data from there when doing a query update
Volume
kernel_interface
- missing functions, maybe they are not really needed: bfs_rename_attr(), bfs_rename_index(), bfs_initialize(), bfs_link()
- bfs_rename() currently doesn't respect any permissions
general stuff
- There are also some comments with a leading "ToDo:" directly in the code which may not be mentioned here.
- implement mkbfs (try to do it in OpenBeOS style directly - only write the super block from user space)
-----
Axel Dörfler
axeld@pinc-software.de

View File

@ -0,0 +1,158 @@
/* Utility - some helper classes
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include "Utility.h"
#include "Debug.h"
#include <util/kernel_cpp.h>
#include <stdlib.h>
#include <string.h>
bool
sorted_array::FindInternal(off_t value, int32 &index) const
{
int32 min = 0, max = count-1;
off_t cmp;
while (min <= max) {
index = (min + max) / 2;
cmp = values[index] - value;
if (cmp < 0)
min = index + 1;
else if (cmp > 0)
max = index - 1;
else
return true;
}
return false;
}
void
sorted_array::Insert(off_t value)
{
// if there are more than 8 values in this array, use a
// binary search, if not, just iterate linearly to find
// the insertion point
int32 i;
if (count > 8 ) {
if (!FindInternal(value,i)
&& values[i] <= value)
i++;
} else {
for (i = 0;i < count; i++)
if (values[i] > value)
break;
}
memmove(&values[i+1],&values[i],(count - i) * sizeof(off_t));
values[i] = value;
count++;
}
bool
sorted_array::Remove(off_t value)
{
int32 index = Find(value);
if (index == -1)
return false;
memmove(&values[index],&values[index + 1],(count - index) * sizeof(off_t));
count--;
return true;
}
// #pragma mark -
BlockArray::BlockArray(int32 blockSize)
:
fArray(NULL),
fBlockSize(blockSize),
fSize(0)
{
}
BlockArray::~BlockArray()
{
if (fArray)
free(fArray);
}
int32
BlockArray::Find(off_t value)
{
if (fArray == NULL)
return -1;
return fArray->Find(value);
}
status_t
BlockArray::Insert(off_t value)
{
if (fArray == NULL || fArray->count + 1 > fMaxBlocks) {
sorted_array *array = (sorted_array *)realloc(fArray,fSize + fBlockSize);
if (array == NULL)
return B_NO_MEMORY;
if (fArray == NULL)
array->count = 0;
fArray = array;
fSize += fBlockSize;
fMaxBlocks = fSize / sizeof(off_t) - 1;
}
fArray->Insert(value);
return B_OK;
}
status_t
BlockArray::Remove(off_t value)
{
if (fArray == NULL)
return B_ENTRY_NOT_FOUND;
return fArray->Remove(value) ? B_OK : B_ENTRY_NOT_FOUND;
}
void
BlockArray::MakeEmpty()
{
fArray->count = 0;
}
// #pragma mark -
extern "C" size_t
strlcpy(char *dest, char const *source, size_t length)
{
if (length == 0)
return strlen(source);
size_t i = 0;
for (; i < length - 1 && source[i]; i++)
dest[i] = source[i];
dest[i] = '\0';
return i + strlen(source + i);
}

View File

@ -0,0 +1,201 @@
#ifndef UTILITY_H
#define UTILITY_H
/* Utility - some helper classes
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <SupportDefs.h>
// Simple array, used for the duplicate handling in the B+Tree,
// and for the log entries.
struct sorted_array {
public:
off_t count;
#if __MWERKS__
off_t values[1];
#else
off_t values[0];
#endif
inline int32 Find(off_t value) const;
void Insert(off_t value);
bool Remove(off_t value);
private:
bool FindInternal(off_t value,int32 &index) const;
};
inline int32
sorted_array::Find(off_t value) const
{
int32 i;
return FindInternal(value,i) ? i : -1;
}
// The BlockArray reserves a multiple of "blockSize" and
// maintain array size for new entries.
// This is used for the in-memory log entries before they
// are written to disk.
class BlockArray {
public:
BlockArray(int32 blockSize);
~BlockArray();
int32 Find(off_t value);
status_t Insert(off_t value);
status_t Remove(off_t value);
void MakeEmpty();
int32 CountItems() const { return fArray != NULL ? fArray->count : 0; }
int32 BlocksUsed() const { return fArray != NULL ? ((fArray->count + 1) * sizeof(off_t) + fBlockSize - 1) / fBlockSize : 0; }
sorted_array *Array() const { return fArray; }
int32 Size() const { return fSize; }
private:
sorted_array *fArray;
int32 fBlockSize;
int32 fSize;
int32 fMaxBlocks;
};
// Doubly linked list
template<class Node> struct node {
Node *next,*prev;
void
Remove()
{
prev->next = next;
next->prev = prev;
}
Node *
Next()
{
if (next && next->next != NULL)
return next;
return NULL;
}
};
template<class Node> struct list {
Node *head,*tail,*last;
list()
{
head = (Node *)&tail;
tail = NULL;
last = (Node *)&head;
}
void
Add(Node *entry)
{
entry->next = (Node *)&tail;
entry->prev = last;
last->next = entry;
last = entry;
}
};
// Some atomic operations that are somehow missing in BeOS:
//
// _atomic_test_and_set(value, newValue, testAgainst)
// sets "value" to "newValue", if "value" is equal to "testAgainst"
// _atomic_set(value, newValue)
// sets "value" to "newValue"
#if _NO_INLINE_ASM
// Note that these atomic versions *don't* work as expected!
// They are only used for single processor user space tests
// (and don't even work correctly there)
inline int32
_atomic_test_and_set(volatile int32 *value, int32 newValue, int32 testAgainst)
{
int32 oldValue = *value;
if (oldValue == testAgainst)
*value = newValue;
return oldValue;
}
inline void
_atomic_set(volatile int32 *value, int32 newValue)
{
*value = newValue;
}
#elif __INTEL__
inline int32
_atomic_test_and_set(volatile int32 *value, int32 newValue, int32 testAgainst)
{
int32 oldValue;
asm volatile("lock; cmpxchg %%ecx, (%%edx)"
: "=a" (oldValue) : "a" (testAgainst), "c" (newValue), "d" (value));
return oldValue;
}
inline void
_atomic_set(volatile int32 *value, int32 newValue)
{
asm volatile("lock; xchg %%eax, (%%edx)"
: : "a" (newValue), "d" (value));
}
#elif __POWERPC__ && __MWERKS__ /* GCC has different assembler syntax */
inline asm int32
_atomic_set(volatile int32 *value, int32)
{
loop:
dcbf r0, r3;
lwarx r0, 0, r3;
stwcx. r4, 0, r3;
bc 5, 2, loop
mr r3,r5;
isync;
blr;
}
inline asm int32
_atomic_test_and_set(volatile int32 *value, int32 newValue, int32 testAgainst)
{
loop:
dcbf r0, r3;
lwarx r0, 0, r3;
cmpw r5, r0;
bne no_dice;
stwcx. r4, 0, r3;
bc 5, 2, loop
mr r3,r0;
isync;
blr;
no_dice:
stwcx. r0, 0, r3;
mr r3,r0;
isync;
blr;
}
#else
# error The macros _atomic_set(), and _atomic_test_and_set() are not defined for the target processor
#endif
extern "C" size_t strlcpy(char *dest, char const *source, size_t length);
#endif /* UTILITY_H */

View File

@ -0,0 +1,653 @@
/* Volume - BFS super block, mounting, etc.
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include "Debug.h"
#include "Volume.h"
#include "Journal.h"
#include "Inode.h"
#include "Query.h"
#include <util/kernel_cpp.h>
#include <KernelExport.h>
#include <Drivers.h>
#include <fs_volume.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
static const int32 kDesiredAllocationGroups = 56;
// This is the number of allocation groups that will be tried
// to be given for newly initialized disks.
// That's only relevant for smaller disks, though, since any
// of today's disk sizes already reach the maximum length
// of an allocation group (65536 blocks).
// It seems to create appropriate numbers for smaller disks
// with this setting, though (i.e. you can create a 400 MB
// file on a 1 GB disk without the need for double indirect
// blocks).
class DeviceOpener {
public:
DeviceOpener(const char *device, int mode);
~DeviceOpener();
int Open(const char *device, int mode);
status_t InitCache(off_t numBlocks);
void RemoveCache(int mode);
void Keep();
int Device() const { return fDevice; }
status_t GetSize(off_t *_size, uint32 *_blockSize = NULL);
private:
int fDevice;
bool fCached;
};
DeviceOpener::DeviceOpener(const char *device, int mode)
:
fCached(false)
{
Open(device, mode);
}
DeviceOpener::~DeviceOpener()
{
if (fDevice >= B_OK) {
close(fDevice);
if (fCached)
remove_cached_device_blocks(fDevice, NO_WRITES);
}
}
int
DeviceOpener::Open(const char *device, int mode)
{
fDevice = open(device, mode);
return fDevice;
}
status_t
DeviceOpener::InitCache(off_t numBlocks)
{
if (init_cache_for_device(fDevice, numBlocks) == B_OK) {
fCached = true;
return B_OK;
}
return B_ERROR;
}
void
DeviceOpener::RemoveCache(int mode)
{
if (!fCached)
return;
remove_cached_device_blocks(fDevice, mode);
fCached = false;
}
void
DeviceOpener::Keep()
{
fDevice = -1;
}
/** Returns the size of the device in bytes. It uses B_GET_GEOMETRY
* to compute the size, or fstat() if that failed.
*/
status_t
DeviceOpener::GetSize(off_t *_size, uint32 *_blockSize)
{
device_geometry geometry;
if (ioctl(fDevice, B_GET_GEOMETRY, &geometry) < 0) {
// maybe it's just a file
struct stat stat;
if (fstat(fDevice, &stat) < 0)
return B_ERROR;
if (_size)
*_size = stat.st_size;
if (_blockSize) // that shouldn't cause us any problems
*_blockSize = 512;
return B_OK;
}
if (_size) {
*_size = 1LL * geometry.head_count * geometry.cylinder_count
* geometry.sectors_per_track * geometry.bytes_per_sector;
}
if (_blockSize)
*_blockSize = geometry.bytes_per_sector;
return B_OK;
}
// #pragma mark -
bool
disk_super_block::IsValid()
{
if (Magic1() != (int32)SUPER_BLOCK_MAGIC1
|| Magic2() != (int32)SUPER_BLOCK_MAGIC2
|| Magic3() != (int32)SUPER_BLOCK_MAGIC3
|| (int32)block_size != inode_size
|| ByteOrder() != SUPER_BLOCK_FS_LENDIAN
|| (1UL << BlockShift()) != BlockSize()
|| AllocationGroups() < 1
|| AllocationGroupShift() < 1
|| BlocksPerAllocationGroup() < 1
|| NumBlocks() < 10
|| AllocationGroups() != divide_roundup(NumBlocks(),
1L << AllocationGroupShift()))
return false;
return true;
}
void
disk_super_block::Initialize(const char *diskName, off_t numBlocks, uint32 blockSize)
{
memset(this, 0, sizeof(disk_super_block));
magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1);
magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2);
magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3);
fs_byte_order = SUPER_BLOCK_FS_LENDIAN;
flags = SUPER_BLOCK_DISK_CLEAN;
strlcpy(name, diskName, sizeof(name));
block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize);
for (block_shift = 9; (1UL << block_shift) < blockSize; block_shift++);
num_blocks = numBlocks;
used_blocks = 0;
// Get the minimum ag_shift (that's determined by the block size)
blocks_per_ag = 1;
ag_shift = 13;
int32 bitsPerBlock = blockSize << 3;
off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock;
for (int32 i = 8192; i < bitsPerBlock; i *= 2) {
ag_shift++;
}
// Many allocation groups help applying allocation policies, but if
// they are too small, we will need to many block_runs to cover large
// files (see above to get an explanation of the kDesiredAllocationGroups
// constant).
while (true) {
num_ags = (bitmapBlocks + blocks_per_ag - 1) / blocks_per_ag;
if (num_ags > kDesiredAllocationGroups) {
if (ag_shift == 16)
break;
ag_shift++;
blocks_per_ag *= 2;
} else
break;
}
}
// #pragma mark -
Volume::Volume(mount_id id)
:
fID(id),
fBlockAllocator(this),
fLock("bfs volume"),
fRootNode(NULL),
fIndicesNode(NULL),
fDirtyCachedBlocks(0),
fUniqueID(0),
fFlags(0)
{
}
Volume::~Volume()
{
}
bool
Volume::IsValidSuperBlock()
{
return fSuperBlock.IsValid();
}
void
Volume::Panic()
{
FATAL(("we have to panic... switch to read-only mode!\n"));
fFlags |= VOLUME_READ_ONLY;
#ifdef USER
debugger("BFS panics!");
#elif defined(DEBUG)
kernel_debugger("BFS panics!");
#endif
}
status_t
Volume::Mount(const char *deviceName, uint32 flags)
{
if (flags & B_MOUNT_READ_ONLY)
fFlags |= VOLUME_READ_ONLY;
// ToDo: validate the FS in write mode as well!
#if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \
|| (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY))
// in big endian mode, we only mount read-only for now
flags |= B_MOUNT_READ_ONLY;
#endif
DeviceOpener opener(deviceName, flags & B_MOUNT_READ_ONLY ? O_RDONLY : O_RDWR);
// if we couldn't open the device, try read-only (don't rely on a specific error code)
if (opener.Device() < B_OK && (flags & B_MOUNT_READ_ONLY) == 0) {
opener.Open(deviceName, O_RDONLY);
fFlags |= VOLUME_READ_ONLY;
}
fDevice = opener.Device();
if (fDevice < B_OK)
RETURN_ERROR(fDevice);
// check if it's a regular file, and if so, disable the cache for the
// underlaying file system
struct stat stat;
if (fstat(fDevice, &stat) < 0)
RETURN_ERROR(B_ERROR);
#ifndef NO_FILE_UNCACHED_IO
if (stat.st_mode & S_FILE && ioctl(fDevice, IOCTL_FILE_UNCACHED_IO, NULL) < 0) {
// mount read-only if the cache couldn't be disabled
# ifdef DEBUG
FATAL(("couldn't disable cache for image file - system may dead-lock!\n"));
# else
FATAL(("couldn't disable cache for image file!\n"));
Panic();
# endif
}
#endif
// read the super block
if (Identify(fDevice, &fSuperBlock) != B_OK) {
FATAL(("invalid super block!\n"));
return B_BAD_VALUE;
}
// initialize short hands to the super block (to save byte swapping)
fBlockSize = fSuperBlock.BlockSize();
fBlockShift = fSuperBlock.BlockShift();
fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
// check if the device size is large enough to hold the file system
off_t diskSize;
if (opener.GetSize(&diskSize) < B_OK)
RETURN_ERROR(B_ERROR);
if (diskSize < (NumBlocks() << BlockShift()))
RETURN_ERROR(B_BAD_VALUE);
// set the current log pointers, so that journaling will work correctly
fLogStart = fSuperBlock.LogStart();
fLogEnd = fSuperBlock.LogEnd();
if (opener.InitCache(NumBlocks()) != B_OK)
return B_ERROR;
fJournal = new Journal(this);
// replaying the log is the first thing we will do on this disk
if (fJournal && fJournal->InitCheck() < B_OK
|| fBlockAllocator.Initialize() < B_OK) {
// ToDo: improve error reporting for a bad journal
FATAL(("could not initialize journal/block bitmap allocator!\n"));
return B_NO_MEMORY;
}
status_t status = B_OK;
fRootNode = new Inode(this, ToVnode(Root()));
if (fRootNode && fRootNode->InitCheck() == B_OK) {
status = new_vnode(fID, ToVnode(Root()), (void *)fRootNode);
if (status == B_OK) {
// try to get indices root dir
// question: why doesn't get_vnode() work here??
// answer: we have not yet backpropagated the pointer to the
// volume in bfs_mount(), so bfs_read_vnode() can't get it.
// But it's not needed to do that anyway.
if (!Indices().IsZero())
fIndicesNode = new Inode(this, ToVnode(Indices()));
if (fIndicesNode == NULL
|| fIndicesNode->InitCheck() < B_OK
|| !fIndicesNode->IsContainer()) {
INFORM(("bfs: volume doesn't have indices!\n"));
if (fIndicesNode) {
// if this is the case, the index root node is gone bad, and
// BFS switch to read-only mode
fFlags |= VOLUME_READ_ONLY;
delete fIndicesNode;
fIndicesNode = NULL;
}
}
// all went fine
opener.Keep();
return B_OK;
} else
FATAL(("could not create root node: new_vnode() failed!\n"));
delete fRootNode;
} else {
status = B_BAD_VALUE;
FATAL(("could not create root node!\n"));
}
return status;
}
status_t
Volume::Unmount()
{
// This will also flush the log & all blocks to disk
delete fJournal;
fJournal = NULL;
delete fIndicesNode;
remove_cached_device_blocks(fDevice, IsReadOnly() ? NO_WRITES : ALLOW_WRITES);
close(fDevice);
return B_OK;
}
status_t
Volume::Sync()
{
return fJournal->FlushLogAndBlocks();
}
status_t
Volume::ValidateBlockRun(block_run run)
{
if (run.AllocationGroup() < 0 || run.AllocationGroup() > (int32)AllocationGroups()
|| run.Start() > (1UL << AllocationGroupShift())
|| run.length == 0
|| uint32(run.Length() + run.Start()) > (1UL << AllocationGroupShift())) {
Panic();
FATAL(("*** invalid run(%ld,%d,%d)\n", run.AllocationGroup(), run.Start(), run.Length()));
return B_BAD_DATA;
}
return B_OK;
}
block_run
Volume::ToBlockRun(off_t block) const
{
block_run run;
run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(block >> AllocationGroupShift());
run.start = HOST_ENDIAN_TO_BFS_INT16(block & ((1LL << AllocationGroupShift()) - 1));
run.length = HOST_ENDIAN_TO_BFS_INT16(1);
return run;
}
status_t
Volume::CreateIndicesRoot(Transaction *transaction)
{
off_t id;
status_t status = Inode::Create(transaction, NULL, NULL,
S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, &id, &fIndicesNode);
if (status < B_OK)
RETURN_ERROR(status);
fSuperBlock.indices = ToBlockRun(id);
return WriteSuperBlock();
}
status_t
Volume::AllocateForInode(Transaction *transaction, const Inode *parent, mode_t type, block_run &run)
{
return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(), type, run);
}
status_t
Volume::WriteSuperBlock()
{
if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block)) != sizeof(disk_super_block))
return B_IO_ERROR;
return B_OK;
}
void
Volume::UpdateLiveQueries(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey,
size_t oldLength, const uint8 *newKey, size_t newLength)
{
if (fQueryLock.Lock() < B_OK)
return;
Query *query = NULL;
while ((query = fQueries.Next(query)) != NULL)
query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey, newLength);
fQueryLock.Unlock();
}
/** Checks if there is a live query whose results depend on the presence
* or value of the specified attribute.
* Don't use it if you already have all the data together to evaluate
* the queries - it wouldn't safe you anything in this case.
*/
bool
Volume::CheckForLiveQuery(const char *attribute)
{
// ToDo: check for a live query that depends on the specified attribute
return true;
}
void
Volume::AddQuery(Query *query)
{
if (fQueryLock.Lock() < B_OK)
return;
fQueries.Add(query);
fQueryLock.Unlock();
}
void
Volume::RemoveQuery(Query *query)
{
if (fQueryLock.Lock() < B_OK)
return;
fQueries.Remove(query);
fQueryLock.Unlock();
}
// #pragma mark -
// Disk scanning and initialization
status_t
Volume::Identify(int fd, disk_super_block *superBlock)
{
char buffer[1024];
if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer))
return B_IO_ERROR;
// Note: that does work only for x86, for PowerPC, the super block
// may be located at offset 0!
memcpy(superBlock, buffer + 512, sizeof(disk_super_block));
if (!superBlock->IsValid()) {
#ifndef BFS_LITTLE_ENDIAN_ONLY
memcpy(superBlock, buffer, sizeof(disk_super_block));
if (!superBlock->IsValid())
return B_BAD_VALUE;
#else
return B_BAD_VALUE;
#endif
}
return B_OK;
}
#ifdef USER
extern "C" void kill_device_vnodes(dev_t id);
// This call is only available in the userland fs_shell
status_t
Volume::Initialize(const char *device, const char *name, uint32 blockSize, uint32 flags)
{
// although there is no really good reason for it, we won't
// accept '/' in disk names (mkbfs does this, too - and since
// Tracker names mounted volumes like their name)
if (strchr(name, '/') != NULL)
return B_BAD_VALUE;
if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096 && blockSize != 8192)
return B_BAD_VALUE;
DeviceOpener opener(device, O_RDWR);
if (opener.Device() < B_OK)
return B_BAD_VALUE;
fDevice = opener.Device();
uint32 deviceBlockSize;
off_t deviceSize;
if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK)
return B_ERROR;
off_t numBlocks = deviceSize / blockSize;
// create valid super block
fSuperBlock.Initialize(name, numBlocks, blockSize);
// initialize short hands to the super block (to save byte swapping)
fBlockSize = fSuperBlock.BlockSize();
fBlockShift = fSuperBlock.BlockShift();
fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
// since the allocator has not been initialized yet, we
// cannot use BlockAllocator::BitmapSize() here
fSuperBlock.log_blocks = ToBlockRun(AllocationGroups()
* fSuperBlock.BlocksPerAllocationGroup() + 1);
fSuperBlock.log_blocks.length = 2048;
// ToDo: set the log size depending on the disk size
fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64(ToBlock(Log()));
// set the current log pointers, so that journaling will work correctly
fLogStart = fSuperBlock.LogStart();
fLogEnd = fSuperBlock.LogEnd();
if (!IsValidSuperBlock())
RETURN_ERROR(B_ERROR);
if (opener.InitCache(numBlocks) != B_OK)
return B_ERROR;
fJournal = new Journal(this);
if (fJournal == NULL || fJournal->InitCheck() < B_OK)
RETURN_ERROR(B_ERROR);
// ready to write data to disk
Transaction transaction(this, 0);
if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK)
RETURN_ERROR(B_ERROR);
off_t id;
status_t status = Inode::Create(&transaction, NULL, NULL,
S_DIRECTORY | 0755, 0, 0, &id, &fRootNode);
if (status < B_OK)
RETURN_ERROR(status);
fSuperBlock.root_dir = ToBlockRun(id);
if ((flags & VOLUME_NO_INDICES) == 0) {
// The indices root directory will be created automatically
// when the standard indices are created (or any other).
Index index(this);
status = index.Create(&transaction, "name", B_STRING_TYPE);
if (status < B_OK)
return status;
status = index.Create(&transaction, "last_modified", B_INT64_TYPE);
if (status < B_OK)
return status;
status = index.Create(&transaction, "size", B_INT64_TYPE);
if (status < B_OK)
return status;
}
WriteSuperBlock();
transaction.Done();
put_vnode(ID(), fRootNode->ID());
if (fIndicesNode != NULL)
put_vnode(ID(), fIndicesNode->ID());
kill_device_vnodes(ID());
// This call is only available in the userland fs_shell
Sync();
opener.RemoveCache(ALLOW_WRITES);
return B_OK;
}
#endif

View File

@ -0,0 +1,240 @@
#ifndef VOLUME_H
#define VOLUME_H
/* Volume - BFS super block, mounting, etc.
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <KernelExport.h>
#include <fs_interface.h>
extern "C" {
#ifndef _IMPEXP_KERNEL
# define _IMPEXP_KERNEL
#endif
#include "lock.h"
#include "cache.h"
}
#include "bfs.h"
#include "BlockAllocator.h"
#include "BufferPool.h"
#include "Chain.h"
class Journal;
class Inode;
class Query;
enum volume_flags {
VOLUME_READ_ONLY = 0x0001
};
enum volume_initialize_flags {
VOLUME_NO_INDICES = 0x0001,
};
class Volume {
public:
Volume(mount_id id);
~Volume();
status_t Mount(const char *device, uint32 flags);
status_t Unmount();
status_t Initialize(const char *device, const char *name,
uint32 blockSize, uint32 flags);
bool IsValidSuperBlock();
bool IsReadOnly() const;
void Panic();
RecursiveLock &Lock();
block_run Root() const { return fSuperBlock.root_dir; }
Inode *RootNode() const { return fRootNode; }
block_run Indices() const { return fSuperBlock.indices; }
Inode *IndicesNode() const { return fIndicesNode; }
block_run Log() const { return fSuperBlock.log_blocks; }
vint32 &LogStart() { return fLogStart; }
vint32 &LogEnd() { return fLogEnd; }
int Device() const { return fDevice; }
mount_id ID() const { return fID; }
const char *Name() const { return fSuperBlock.name; }
off_t NumBlocks() const { return fSuperBlock.NumBlocks(); }
off_t UsedBlocks() const { return fSuperBlock.UsedBlocks(); }
off_t FreeBlocks() const { return NumBlocks() - UsedBlocks(); }
uint32 BlockSize() const { return fBlockSize; }
uint32 BlockShift() const { return fBlockShift; }
uint32 InodeSize() const { return fSuperBlock.InodeSize(); }
uint32 AllocationGroups() const { return fSuperBlock.AllocationGroups(); }
uint32 AllocationGroupShift() const { return fAllocationGroupShift; }
disk_super_block &SuperBlock() { return fSuperBlock; }
off_t ToOffset(block_run run) const { return ToBlock(run) << BlockShift(); }
off_t ToBlock(block_run run) const { return ((((off_t)run.AllocationGroup()) << AllocationGroupShift()) | (off_t)run.Start()); }
block_run ToBlockRun(off_t block) const;
status_t ValidateBlockRun(block_run run);
off_t ToVnode(block_run run) const { return ToBlock(run); }
off_t ToVnode(off_t block) const { return block; }
off_t VnodeToBlock(vnode_id id) const { return (off_t)id; }
status_t CreateIndicesRoot(Transaction *transaction);
// block bitmap
BlockAllocator &Allocator();
status_t AllocateForInode(Transaction *transaction, const Inode *parent,
mode_t type, block_run &run);
status_t AllocateForInode(Transaction *transaction, const block_run *parent,
mode_t type, block_run &run);
status_t Allocate(Transaction *transaction,const Inode *inode,
off_t numBlocks, block_run &run, uint16 minimum = 1);
status_t Free(Transaction *transaction, block_run run);
// cache access
status_t WriteSuperBlock();
status_t WriteBlocks(off_t blockNumber, const uint8 *block, uint32 numBlocks);
void WriteCachedBlocksIfNecessary();
status_t FlushDevice();
// queries
void UpdateLiveQueries(Inode *inode, const char *attribute, int32 type,
const uint8 *oldKey, size_t oldLength,
const uint8 *newKey, size_t newLength);
bool CheckForLiveQuery(const char *attribute);
void AddQuery(Query *query);
void RemoveQuery(Query *query);
status_t Sync();
Journal *GetJournal(off_t refBlock) const;
BufferPool &Pool();
uint32 GetUniqueID();
static status_t Identify(int fd, disk_super_block *superBlock);
protected:
mount_id fID;
int fDevice;
disk_super_block fSuperBlock;
uint32 fBlockSize;
uint32 fBlockShift;
uint32 fAllocationGroupShift;
BlockAllocator fBlockAllocator;
RecursiveLock fLock;
Journal *fJournal;
vint32 fLogStart, fLogEnd;
Inode *fRootNode;
Inode *fIndicesNode;
vint32 fDirtyCachedBlocks;
SimpleLock fQueryLock;
Chain<Query> fQueries;
int32 fUniqueID;
uint32 fFlags;
BufferPool fBufferPool;
};
// inline functions
inline bool
Volume::IsReadOnly() const
{
return fFlags & VOLUME_READ_ONLY;
}
inline RecursiveLock &
Volume::Lock()
{
return fLock;
}
inline BlockAllocator &
Volume::Allocator()
{
return fBlockAllocator;
}
inline status_t
Volume::AllocateForInode(Transaction *transaction, const block_run *parent, mode_t type, block_run &run)
{
return fBlockAllocator.AllocateForInode(transaction, parent, type, run);
}
inline status_t
Volume::Allocate(Transaction *transaction, const Inode *inode, off_t numBlocks, block_run &run, uint16 minimum)
{
return fBlockAllocator.Allocate(transaction, inode, numBlocks, run, minimum);
}
inline status_t
Volume::Free(Transaction *transaction, block_run run)
{
return fBlockAllocator.Free(transaction, run);
}
inline status_t
Volume::WriteBlocks(off_t blockNumber, const uint8 *block, uint32 numBlocks)
{
atomic_add(&fDirtyCachedBlocks, numBlocks);
return cached_write(fDevice, blockNumber, block, numBlocks, fSuperBlock.block_size);
}
inline void
Volume::WriteCachedBlocksIfNecessary()
{
// the specific values are only valid for the current BeOS cache
if (fDirtyCachedBlocks > 128) {
force_cache_flush(fDevice, false);
atomic_add(&fDirtyCachedBlocks, -64);
}
}
inline status_t
Volume::FlushDevice()
{
fDirtyCachedBlocks = 0;
return flush_device(fDevice, 0);
}
inline Journal *
Volume::GetJournal(off_t /*refBlock*/) const
{
return fJournal;
}
inline BufferPool &
Volume::Pool()
{
return fBufferPool;
}
inline uint32
Volume::GetUniqueID()
{
return atomic_add(&fUniqueID, 1);
}
#endif /* VOLUME_H */

View File

@ -0,0 +1,400 @@
#ifndef BFS_H
#define BFS_H
/* bfs - BFS definitions and helper functions
**
** Copyright 2001-2004, Axel Dörfler, axeld@pinc-software.de
** Parts of this code is based on work previously done by Marcus Overhagen
**
** Copyright 2001, pinc Software. All Rights Reserved.
** This file may be used under the terms of the OpenBeOS License.
*/
#include <SupportDefs.h>
#include "bfs_endian.h"
#ifndef B_BEOS_VERSION_DANO
# define B_BAD_DATA B_ERROR
#endif
// ToDo: temporary fix! (missing but public ioctls)
#define IOCTL_FILE_UNCACHED_IO 10000
struct block_run {
int32 allocation_group;
uint16 start;
uint16 length;
int32 AllocationGroup() const { return BFS_ENDIAN_TO_HOST_INT32(allocation_group); }
uint16 Start() const { return BFS_ENDIAN_TO_HOST_INT16(start); }
uint16 Length() const { return BFS_ENDIAN_TO_HOST_INT16(length); }
inline bool operator==(const block_run &run) const;
inline bool operator!=(const block_run &run) const;
inline bool IsZero();
inline bool MergeableWith(block_run run) const;
inline void SetTo(int32 group, uint16 start, uint16 length = 1);
inline static block_run Run(int32 group, uint16 start, uint16 length = 1);
// can't have a constructor because it's used in a union
} _PACKED;
typedef block_run inode_addr;
// Since the block_run::length field spans 16 bits, the largest number of
// blocks covered by a block_run is 65535 (as long as we don't want to
// break compatibility and take a zero length for 65536).
#define MAX_BLOCK_RUN_LENGTH 65535
//**************************************
#define BFS_DISK_NAME_LENGTH 32
struct disk_super_block {
char name[BFS_DISK_NAME_LENGTH];
int32 magic1;
int32 fs_byte_order;
uint32 block_size;
uint32 block_shift;
off_t num_blocks;
off_t used_blocks;
int32 inode_size;
int32 magic2;
int32 blocks_per_ag;
int32 ag_shift;
int32 num_ags;
int32 flags;
block_run log_blocks;
off_t log_start;
off_t log_end;
int32 magic3;
inode_addr root_dir;
inode_addr indices;
int32 pad[8];
int32 Magic1() const { return BFS_ENDIAN_TO_HOST_INT32(magic1); }
int32 Magic2() const { return BFS_ENDIAN_TO_HOST_INT32(magic2); }
int32 Magic3() const { return BFS_ENDIAN_TO_HOST_INT32(magic3); }
int32 ByteOrder() const { return BFS_ENDIAN_TO_HOST_INT32(fs_byte_order); }
uint32 BlockSize() const { return BFS_ENDIAN_TO_HOST_INT32(block_size); }
uint32 BlockShift() const { return BFS_ENDIAN_TO_HOST_INT32(block_shift); }
off_t NumBlocks() const { return BFS_ENDIAN_TO_HOST_INT64(num_blocks); }
off_t UsedBlocks() const { return BFS_ENDIAN_TO_HOST_INT64(used_blocks); }
int32 InodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(inode_size); }
int32 BlocksPerAllocationGroup() const { return BFS_ENDIAN_TO_HOST_INT32(blocks_per_ag); }
int32 AllocationGroups() const { return BFS_ENDIAN_TO_HOST_INT32(num_ags); }
int32 AllocationGroupShift() const { return BFS_ENDIAN_TO_HOST_INT32(ag_shift); }
int32 Flags() const { return BFS_ENDIAN_TO_HOST_INT32(flags); }
off_t LogStart() const { return BFS_ENDIAN_TO_HOST_INT64(log_start); }
off_t LogEnd() const { return BFS_ENDIAN_TO_HOST_INT64(log_end); }
// implemented in Volume.cpp:
bool IsValid();
void Initialize(const char *name, off_t numBlocks, uint32 blockSize);
} _PACKED;
#define SUPER_BLOCK_FS_LENDIAN 'BIGE' /* BIGE */
#define SUPER_BLOCK_MAGIC1 'BFS1' /* BFS1 */
#define SUPER_BLOCK_MAGIC2 0xdd121031
#define SUPER_BLOCK_MAGIC3 0x15b6830e
#define SUPER_BLOCK_DISK_CLEAN 'CLEN' /* CLEN */
#define SUPER_BLOCK_DISK_DIRTY 'DIRT' /* DIRT */
//**************************************
#define NUM_DIRECT_BLOCKS 12
struct data_stream {
block_run direct[NUM_DIRECT_BLOCKS];
off_t max_direct_range;
block_run indirect;
off_t max_indirect_range;
block_run double_indirect;
off_t max_double_indirect_range;
off_t size;
off_t MaxDirectRange() const { return BFS_ENDIAN_TO_HOST_INT64(max_direct_range); }
off_t MaxIndirectRange() const { return BFS_ENDIAN_TO_HOST_INT64(max_indirect_range); }
off_t MaxDoubleIndirectRange() const { return BFS_ENDIAN_TO_HOST_INT64(max_double_indirect_range); }
off_t Size() const { return BFS_ENDIAN_TO_HOST_INT64(size); }
} _PACKED;
// This defines the size of the indirect and double indirect
// blocks. Note: the code may not work correctly at some places
// if this value is changed (it's not tested).
#define NUM_ARRAY_BLOCKS 4
#define ARRAY_BLOCKS_SHIFT 2
#define INDIRECT_BLOCKS_SHIFT (ARRAY_BLOCKS_SHIFT + ARRAY_BLOCKS_SHIFT)
//**************************************
struct bfs_inode;
struct small_data {
uint32 type;
uint16 name_size;
uint16 data_size;
#if !__MWERKS__ //-- mwcc doesn't support thingy[0], so we patch Name() instead
char name[0]; // name_size long, followed by data
#endif
uint32 Type() const { return BFS_ENDIAN_TO_HOST_INT32(type); }
uint16 NameSize() const { return BFS_ENDIAN_TO_HOST_INT16(name_size); }
uint16 DataSize() const { return BFS_ENDIAN_TO_HOST_INT16(data_size); }
inline char *Name() const;
inline uint8 *Data() const;
inline uint32 Size() const;
inline small_data *Next() const;
inline bool IsLast(const bfs_inode *inode) const;
} _PACKED;
// the file name is part of the small_data structure
#define FILE_NAME_TYPE 'CSTR'
#define FILE_NAME_NAME 0x13
#define FILE_NAME_NAME_LENGTH 1
//**************************************
class Volume;
#define SHORT_SYMLINK_NAME_LENGTH 144 // length incl. terminating '\0'
struct bfs_inode {
int32 magic1;
inode_addr inode_num;
int32 uid;
int32 gid;
int32 mode; // see sys/stat.h
int32 flags;
bigtime_t create_time;
bigtime_t last_modified_time;
inode_addr parent;
inode_addr attributes;
uint32 type; // attribute type
int32 inode_size;
uint32 etc; // a pointer to the Inode object during construction
union {
data_stream data;
char short_symlink[SHORT_SYMLINK_NAME_LENGTH];
};
int32 pad[4];
#if !__MWERKS__
small_data small_data_start[0];
#endif
int32 Magic1() const { return BFS_ENDIAN_TO_HOST_INT32(magic1); }
int32 UserID() const { return BFS_ENDIAN_TO_HOST_INT32(uid); }
int32 GroupID() const { return BFS_ENDIAN_TO_HOST_INT32(gid); }
int32 Mode() const { return BFS_ENDIAN_TO_HOST_INT32(mode); }
int32 Flags() const { return BFS_ENDIAN_TO_HOST_INT32(flags); }
int32 Type() const { return BFS_ENDIAN_TO_HOST_INT32(type); }
int32 InodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(inode_size); }
bigtime_t LastModifiedTime() const { return BFS_ENDIAN_TO_HOST_INT64(last_modified_time); }
bigtime_t CreateTime() const { return BFS_ENDIAN_TO_HOST_INT64(create_time); }
inline small_data *SmallDataStart();
status_t InitCheck(Volume *volume);
// defined in Inode.cpp
} _PACKED;
#define INODE_MAGIC1 0x3bbe0ad9
#define INODE_TIME_SHIFT 16
#define INODE_TIME_MASK 0xffff
#define INODE_FILE_NAME_LENGTH 256
enum inode_flags {
INODE_IN_USE = 0x00000001, // always set
INODE_ATTR_INODE = 0x00000004,
INODE_LOGGED = 0x00000008, // log changes to the data stream
INODE_DELETED = 0x00000010,
INODE_NOT_READY = 0x00000020, // used during Inode construction
INODE_LONG_SYMLINK = 0x00000040, // symlink in data stream
INODE_PERMANENT_FLAGS = 0x0000ffff,
INODE_NO_CACHE = 0x00010000,
INODE_WAS_WRITTEN = 0x00020000,
INODE_NO_TRANSACTION = 0x00040000,
INODE_DONT_FREE_SPACE = 0x00080000, // only used by the "chkbfs" functionality
INODE_CHKBFS_RUNNING = 0x00200000,
};
//**************************************
struct file_cookie {
bigtime_t last_notification;
off_t last_size;
int open_mode;
};
// notify every second if the file size has changed
#define INODE_NOTIFICATION_INTERVAL 1000000LL
//**************************************
inline int32
divide_roundup(int32 num,int32 divisor)
{
return (num + divisor - 1) / divisor;
}
inline int64
divide_roundup(int64 num,int32 divisor)
{
return (num + divisor - 1) / divisor;
}
inline int
get_shift(uint64 i)
{
int c;
c = 0;
while (i > 1) {
i >>= 1;
c++;
}
return c;
}
inline int32
round_up(uint32 data)
{
// rounds up to the next off_t boundary
return (data + sizeof(off_t) - 1) & ~(sizeof(off_t) - 1);
}
/************************ block_run inline functions ************************/
// #pragma mark -
inline bool
block_run::operator==(const block_run &run) const
{
return allocation_group == run.allocation_group
&& start == run.start
&& length == run.length;
}
inline bool
block_run::operator!=(const block_run &run) const
{
return allocation_group != run.allocation_group
|| start != run.start
|| length != run.length;
}
inline bool
block_run::IsZero()
{
return allocation_group == 0 && start == 0 && length == 0;
}
inline bool
block_run::MergeableWith(block_run run) const
{
// 65535 is the maximum allowed run size for BFS
return allocation_group == run.allocation_group
&& Start() + Length() == run.Start()
&& (uint32)Length() + run.Length() <= MAX_BLOCK_RUN_LENGTH;
}
inline void
block_run::SetTo(int32 _group,uint16 _start,uint16 _length)
{
allocation_group = HOST_ENDIAN_TO_BFS_INT32(_group);
start = HOST_ENDIAN_TO_BFS_INT16(_start);
length = HOST_ENDIAN_TO_BFS_INT16(_length);
}
inline block_run
block_run::Run(int32 group, uint16 start, uint16 length)
{
block_run run;
run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(group);
run.start = HOST_ENDIAN_TO_BFS_INT16(start);
run.length = HOST_ENDIAN_TO_BFS_INT16(length);
return run;
}
/************************ small_data inline functions ************************/
// #pragma mark -
inline char *
small_data::Name() const
{
#if __MWERKS__
return (char *)(uint32(&data_size)+uint32(sizeof(data_size)));
#else
return const_cast<char *>(name);
#endif
}
inline uint8 *
small_data::Data() const
{
return (uint8 *)Name() + NameSize() + 3;
}
inline uint32
small_data::Size() const
{
return sizeof(small_data) + NameSize() + 3 + DataSize() + 1;
}
inline small_data *
small_data::Next() const
{
return (small_data *)((uint8 *)this + Size());
}
inline bool
small_data::IsLast(const bfs_inode *inode) const
{
// we need to check the location first, because if name_size is already beyond
// the block, we would touch invalid memory (although that can't cause wrong
// results)
return (uint32)this > (uint32)inode + inode->InodeSize() - sizeof(small_data) || name_size == 0;
}
/************************ bfs_inode inline functions ************************/
// #pragma mark -
inline small_data *
bfs_inode::SmallDataStart()
{
#if __MWERKS__
return (small_data *)(&pad[4] /* last item in pad + sizeof(int32) */);
#else
return small_data_start;
#endif
}
#endif /* BFS_H */

View File

@ -0,0 +1,68 @@
#ifndef BFS_CONTROL_H
#define BFS_CONTROL_H
/* bfs_control - additional functionality exported via ioctl()
**
** Copyright 2001-2004, Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <fs_interface.h>
/* ioctl to check the version of BFS used - parameter is a uint32 *
* where the number is stored
*/
#define BFS_IOCTL_VERSION 14200
/* ioctls to use the "chkbfs" feature from the outside
* all calls use a struct check_result as single parameter
*/
#define BFS_IOCTL_START_CHECKING 14201
#define BFS_IOCTL_STOP_CHECKING 14202
#define BFS_IOCTL_CHECK_NEXT_NODE 14203
/* all fields except "flags", and "name" must be set to zero before
* BFS_IOCTL_START_CHECKING is called
*/
struct check_control {
uint32 magic;
uint32 flags;
char name[B_FILE_NAME_LENGTH];
vnode_id inode;
uint32 mode;
uint32 errors;
struct {
uint64 missing;
uint64 already_set;
uint64 freed;
} stats;
status_t status;
void *cookie;
};
/* values for the flags field */
#define BFS_FIX_BITMAP_ERRORS 1
#define BFS_REMOVE_WRONG_TYPES 2
/* files that shouldn't be part of its parent will be removed
* (i.e. a directory contains an attribute, ...)
* Works only if B_FIX_BITMAP_ERRORS is set, too
*/
#define BFS_REMOVE_INVALID 4
/* removes nodes that couldn't be opened at all from its parent
* directory.
* Also requires the B_FIX_BITMAP_ERRORS to be set.
*/
/* values for the errors field */
#define BFS_MISSING_BLOCKS 1
#define BFS_BLOCKS_ALREADY_SET 2
#define BFS_INVALID_BLOCK_RUN 4
#define BFS_COULD_NOT_OPEN 8
#define BFS_WRONG_TYPE 16
#define BFS_NAMES_DONT_MATCH 32
/* check control magic value */
#define BFS_IOCTL_CHECK_MAGIC 'BChk'
#endif /* BFS_CONTROL_H */

View File

@ -0,0 +1,44 @@
/*
** Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
** Distributed under the terms of the OpenBeOS License.
*/
#ifndef BFS_ENDIAN_H
#define BFS_ENDIAN_H
#include <ByteOrder.h>
#if !defined(BFS_LITTLE_ENDIAN_ONLY) && !defined(BFS_BIG_ENDIAN_ONLY)
// default setting; BFS is now primarily a little endian file system
# define BFS_LITTLE_ENDIAN_ONLY
#endif
#if defined(BFS_LITTLE_ENDIAN_ONLY) && B_HOST_IS_LENDIAN \
|| defined(BFS_BIG_ENDIAN_ONLY) && B_HOST_IS_BENDIAN
/* host is BFS endian */
# define BFS_ENDIAN_TO_HOST_INT16(value) value
# define BFS_ENDIAN_TO_HOST_INT32(value) value
# define BFS_ENDIAN_TO_HOST_INT64(value) value
# define HOST_ENDIAN_TO_BFS_INT16(value) value
# define HOST_ENDIAN_TO_BFS_INT32(value) value
# define HOST_ENDIAN_TO_BFS_INT64(value) value
#elif defined(BFS_LITTLE_ENDIAN_ONLY) && B_HOST_IS_BENDIAN \
|| defined(BFS_BIG_ENDIAN_ONLY) && B_HOST_IS_LENDIAN
/* host is big endian, BFS is little endian or vice versa */
# define BFS_ENDIAN_TO_HOST_INT16(value) __swap_int16(value)
# define BFS_ENDIAN_TO_HOST_INT32(value) __swap_int32(value)
# define BFS_ENDIAN_TO_HOST_INT64(value) __swap_int64(value)
# define HOST_ENDIAN_TO_BFS_INT16(value) __swap_int16(value)
# define HOST_ENDIAN_TO_BFS_INT32(value) __swap_int32(value)
# define HOST_ENDIAN_TO_BFS_INT64(value) __swap_int64(value)
#else
// ToDo: maybe build a version that supports both, big & little endian?
// But since that will need some kind of global data (to
// know of what type this file system is), it's probably
// something for the boot loader; anything else would be
// a major pain.
#endif
#endif /* BFS_ENDIAN_H */

View File

@ -0,0 +1,50 @@
/*
Copyright 1999-2001, Be Incorporated. All Rights Reserved.
This file may be used under the terms of the Be Sample Code License.
*/
#ifndef _CACHE_H_
#define _CACHE_H_
#include <BeBuild.h>
#define ALLOW_WRITES 1
#define NO_WRITES 0
#ifdef __cplusplus
extern "C" {
#endif
extern int init_block_cache(int max_blocks, int flags);
extern void shutdown_block_cache(void);
extern void force_cache_flush(int dev, int prefer_log_blocks);
extern int flush_blocks(int dev, off_t bnum, int nblocks);
extern int flush_device(int dev, int warn_locked);
extern int init_cache_for_device(int fd, off_t max_blocks);
extern int remove_cached_device_blocks(int dev, int allow_write);
extern void *get_block(int dev, off_t bnum, int bsize);
extern void *get_empty_block(int dev, off_t bnum, int bsize);
extern int release_block(int dev, off_t bnum);
extern int mark_blocks_dirty(int dev, off_t bnum, int nblocks);
extern int cached_read(int dev, off_t bnum, void *data, off_t num_blocks, int bsize);
extern int cached_write(int dev, off_t bnum, const void *data,
off_t num_blocks, int bsize);
extern int cached_write_locked(int dev, off_t bnum, const void *data,
off_t num_blocks, int bsize);
extern int set_blocks_info(int dev, off_t *blocks, int nblocks,
void (*func)(off_t bnum, size_t nblocks, void *arg),
void *arg);
extern size_t read_phys_blocks (int fd, off_t bnum, void *data, uint num_blocks, int bsize);
extern size_t write_phys_blocks(int fd, off_t bnum, void *data, uint num_blocks, int bsize);
#ifdef __cplusplus
}
#endif
#endif /* _CACHE_H_ */

View File

@ -0,0 +1,247 @@
/*
Copyright 1999-2001, Be Incorporated. All Rights Reserved.
This file may be used under the terms of the Be Sample Code License.
*/
#ifndef _FSPROTO_H
#define _FSPROTO_H
#include <sys/types.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <unistd.h>
#include <iovec.h>
#include <OS.h>
#include <NodeMonitor.h>
#include <fs_attr.h>
#include <fs_info.h>
#include <BeBuild.h>
#include <Drivers.h>
typedef dev_t nspace_id;
typedef ino_t vnode_id;
/*
* PUBLIC PART OF THE FILE SYSTEM PROTOCOL
*/
#define WSTAT_MODE 0x0001
#define WSTAT_UID 0x0002
#define WSTAT_GID 0x0004
#define WSTAT_SIZE 0x0008
#define WSTAT_ATIME 0x0010
#define WSTAT_MTIME 0x0020
#define WSTAT_CRTIME 0x0040
#define WFSSTAT_NAME 0x0001
#define SELECT_READ 1
#define SELECT_WRITE 2
#define SELECT_EXCEPTION 3
// missing ioctl() call added
#define IOCTL_FILE_UNCACHED_IO 10000
#define IOCTL_CREATE_TIME 10002
#define IOCTL_MODIFIED_TIME 10003
// B_CUR_FS_API_VERSION is 2 for R5, but 3 on Zeta and Dano, because
// of the added calls for power management - so it's set to 3 here
// because that's a requirement to let Dano boot from our fs...
#ifdef COMPILE_FOR_ZETA
# define B_CUR_FS_API_VERSION 3
#else
# define B_CUR_FS_API_VERSION 2
#endif
struct attr_info;
struct index_info;
typedef int op_read_vnode(void *ns, vnode_id vnid, char r, void **node);
typedef int op_write_vnode(void *ns, void *node, char r);
typedef int op_remove_vnode(void *ns, void *node, char r);
typedef int op_secure_vnode(void *ns, void *node);
typedef int op_wake_vnode(void *ns, void *node);
typedef int op_suspend_vnode(void *ns, void *node);
typedef int op_walk(void *ns, void *base, const char *file, char **newpath,
vnode_id *vnid);
typedef int op_access(void *ns, void *node, int mode);
typedef int op_create(void *ns, void *dir, const char *name,
int omode, int perms, vnode_id *vnid, void **cookie);
typedef int op_mkdir(void *ns, void *dir, const char *name, int perms);
typedef int op_symlink(void *ns, void *dir, const char *name,
const char *path);
typedef int op_link(void *ns, void *dir, const char *name, void *node);
typedef int op_rename(void *ns, void *olddir, const char *oldname,
void *newdir, const char *newname);
typedef int op_unlink(void *ns, void *dir, const char *name);
typedef int op_rmdir(void *ns, void *dir, const char *name);
typedef int op_readlink(void *ns, void *node, char *buf, size_t *bufsize);
typedef int op_opendir(void *ns, void *node, void **cookie);
typedef int op_closedir(void *ns, void *node, void *cookie);
typedef int op_rewinddir(void *ns, void *node, void *cookie);
typedef int op_readdir(void *ns, void *node, void *cookie, long *num,
struct dirent *buf, size_t bufsize);
typedef int op_open(void *ns, void *node, int omode, void **cookie);
typedef int op_close(void *ns, void *node, void *cookie);
typedef int op_free_cookie(void *ns, void *node, void *cookie);
typedef int op_read(void *ns, void *node, void *cookie, off_t pos, void *buf,
size_t *len);
typedef int op_write(void *ns, void *node, void *cookie, off_t pos,
const void *buf, size_t *len);
typedef int op_readv(void *ns, void *node, void *cookie, off_t pos, const iovec *vec,
size_t count, size_t *len);
typedef int op_writev(void *ns, void *node, void *cookie, off_t pos, const iovec *vec,
size_t count, size_t *len);
typedef int op_ioctl(void *ns, void *node, void *cookie, int cmd, void *buf,
size_t len);
typedef int op_setflags(void *ns, void *node, void *cookie, int flags);
typedef int op_rstat(void *ns, void *node, struct stat *);
typedef int op_wstat(void *ns, void *node, struct stat *, long mask);
typedef int op_fsync(void *ns, void *node);
typedef int op_select(void *ns, void *node, void *cookie, uint8 event,
uint32 ref, selectsync *sync);
typedef int op_deselect(void *ns, void *node, void *cookie, uint8 event,
selectsync *sync);
typedef int op_initialize(const char *devname, void *parms, size_t len);
typedef int op_mount(nspace_id nsid, const char *devname, ulong flags,
void *parms, size_t len, void **data, vnode_id *vnid);
typedef int op_unmount(void *ns);
typedef int op_sync(void *ns);
typedef int op_rfsstat(void *ns, struct fs_info *);
typedef int op_wfsstat(void *ns, struct fs_info *, long mask);
typedef int op_open_attrdir(void *ns, void *node, void **cookie);
typedef int op_close_attrdir(void *ns, void *node, void *cookie);
typedef int op_rewind_attrdir(void *ns, void *node, void *cookie);
typedef int op_read_attrdir(void *ns, void *node, void *cookie, long *num,
struct dirent *buf, size_t bufsize);
typedef int op_remove_attr(void *ns, void *node, const char *name);
typedef int op_rename_attr(void *ns, void *node, const char *oldname,
const char *newname);
typedef int op_stat_attr(void *ns, void *node, const char *name,
struct attr_info *buf);
typedef int op_write_attr(void *ns, void *node, const char *name, int type,
const void *buf, size_t *len, off_t pos);
typedef int op_read_attr(void *ns, void *node, const char *name, int type,
void *buf, size_t *len, off_t pos);
typedef int op_open_indexdir(void *ns, void **cookie);
typedef int op_close_indexdir(void *ns, void *cookie);
typedef int op_rewind_indexdir(void *ns, void *cookie);
typedef int op_read_indexdir(void *ns, void *cookie, long *num,
struct dirent *buf, size_t bufsize);
typedef int op_create_index(void *ns, const char *name, int type, int flags);
typedef int op_remove_index(void *ns, const char *name);
typedef int op_rename_index(void *ns, const char *oldname,
const char *newname);
typedef int op_stat_index(void *ns, const char *name, struct index_info *buf);
typedef int op_open_query(void *ns, const char *query, ulong flags,
port_id port, long token, void **cookie);
typedef int op_close_query(void *ns, void *cookie);
typedef int op_read_query(void *ns, void *cookie, long *num,
struct dirent *buf, size_t bufsize);
typedef struct vnode_ops {
op_read_vnode (*read_vnode);
op_write_vnode (*write_vnode);
op_remove_vnode (*remove_vnode);
op_secure_vnode (*secure_vnode);
op_walk (*walk);
op_access (*access);
op_create (*create);
op_mkdir (*mkdir);
op_symlink (*symlink);
op_link (*link);
op_rename (*rename);
op_unlink (*unlink);
op_rmdir (*rmdir);
op_readlink (*readlink);
op_opendir (*opendir);
op_closedir (*closedir);
op_free_cookie (*free_dircookie);
op_rewinddir (*rewinddir);
op_readdir (*readdir);
op_open (*open);
op_close (*close);
op_free_cookie (*free_cookie);
op_read (*read);
op_write (*write);
op_readv (*readv);
op_writev (*writev);
op_ioctl (*ioctl);
op_setflags (*setflags);
op_rstat (*rstat);
op_wstat (*wstat);
op_fsync (*fsync);
op_initialize (*initialize);
op_mount (*mount);
op_unmount (*unmount);
op_sync (*sync);
op_rfsstat (*rfsstat);
op_wfsstat (*wfsstat);
op_select (*select);
op_deselect (*deselect);
op_open_indexdir (*open_indexdir);
op_close_indexdir (*close_indexdir);
op_free_cookie (*free_indexdircookie);
op_rewind_indexdir (*rewind_indexdir);
op_read_indexdir (*read_indexdir);
op_create_index (*create_index);
op_remove_index (*remove_index);
op_rename_index (*rename_index);
op_stat_index (*stat_index);
op_open_attrdir (*open_attrdir);
op_close_attrdir (*close_attrdir);
op_free_cookie (*free_attrdircookie);
op_rewind_attrdir (*rewind_attrdir);
op_read_attrdir (*read_attrdir);
op_write_attr (*write_attr);
op_read_attr (*read_attr);
op_remove_attr (*remove_attr);
op_rename_attr (*rename_attr);
op_stat_attr (*stat_attr);
op_open_query (*open_query);
op_close_query (*close_query);
op_free_cookie (*free_querycookie);
op_read_query (*read_query);
// for Dano compatibility only
op_wake_vnode (*wake_vnode);
op_suspend_vnode (*suspend_vnode);
} vnode_ops;
#ifdef __cplusplus
extern "C" {
#endif
extern _IMPEXP_KERNEL int new_path(const char *path, char **copy);
extern _IMPEXP_KERNEL void free_path(char *p);
extern _IMPEXP_KERNEL void notify_select_event(selectsync *sync, uint32 ref);
extern _IMPEXP_KERNEL status_t is_vnode_removed(nspace_id nsid, vnode_id vnid);
// The missing prototypes can be found in the fs_interface.h file.
// That part of the VFS is still compatible with BeOS :)
#ifdef __cplusplus
}
#endif
extern _EXPORT vnode_ops fs_entry;
extern _EXPORT int32 api_version;
#endif /* _FSPROTO_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,51 @@
/*
Copyright 1999-2001, Be Incorporated. All Rights Reserved.
This file may be used under the terms of the Be Sample Code License.
*/
#ifndef _LOCK_H
#define _LOCK_H
#include <BeBuild.h>
#include <OS.h>
#ifdef __cplusplus
extern "C" {
#else
typedef struct lock lock;
typedef struct mlock mlock;
#endif
struct lock {
sem_id s;
long c;
};
struct mlock {
sem_id s;
};
extern _IMPEXP_KERNEL int new_lock(lock *l, const char *name);
extern _IMPEXP_KERNEL int free_lock(lock *l);
#ifdef LOCK
#undef LOCK
#endif
#define LOCK(l) if (atomic_add(&l.c, -1) <= 0) acquire_sem(l.s);
#define UNLOCK(l) if (atomic_add(&l.c, 1) < 0) release_sem(l.s);
extern _IMPEXP_KERNEL int new_mlock(mlock *l, long c, const char *name);
extern _IMPEXP_KERNEL int free_mlock(mlock *l);
#define LOCKM(l,cnt) acquire_sem_etc(l.s, cnt, 0, 0)
#define UNLOCKM(l,cnt) release_sem_etc(l.s, cnt, 0)
#ifdef __cplusplus
} // extern "C"
#endif
#endif