Initial revision

git-svn-id: file:///srv/svn/repos/haiku/trunk/current@639 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Axel Dörfler 2002-08-07 23:14:13 +00:00
parent 0daa3f2af3
commit c42ee13401
30 changed files with 12342 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,436 @@
#ifndef B_PLUS_TREE_H
#define B_PLUS_TREE_H
/* BPlusTree - BFS B+Tree implementation
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** Roughly based on 'btlib' written by Marcus J. Ranum
**
** Copyright (c) 2001-2002 pinc Software. All Rights Reserved.
** This file may be used under the terms of the OpenBeOS License.
*/
#include "bfs.h"
#include "Journal.h"
#include "Chain.h"
//****************** on-disk structures ********************
#define BPLUSTREE_NULL -1LL
#define BPLUSTREE_FREE -2LL
struct bplustree_header {
uint32 magic;
uint32 node_size;
uint32 max_number_of_levels;
uint32 data_type;
off_t root_node_pointer;
off_t free_node_pointer;
off_t maximum_size;
inline bool IsValidLink(off_t link);
};
#define BPLUSTREE_MAGIC 0x69f6c2e8
#define BPLUSTREE_NODE_SIZE 1024
#define BPLUSTREE_MAX_KEY_LENGTH 256
#define BPLUSTREE_MIN_KEY_LENGTH 1
enum bplustree_types {
BPLUSTREE_STRING_TYPE = 0,
BPLUSTREE_INT32_TYPE = 1,
BPLUSTREE_UINT32_TYPE = 2,
BPLUSTREE_INT64_TYPE = 3,
BPLUSTREE_UINT64_TYPE = 4,
BPLUSTREE_FLOAT_TYPE = 5,
BPLUSTREE_DOUBLE_TYPE = 6
};
struct sorted_array;
typedef sorted_array duplicate_array;
struct bplustree_node {
off_t left_link;
off_t right_link;
off_t overflow_link;
uint16 all_key_count;
uint16 all_key_length;
inline uint16 *KeyLengths() const;
inline off_t *Values() const;
inline uint8 *Keys() const;
inline int32 Used() const;
uint8 *KeyAt(int32 index,uint16 *keyLength) const;
inline bool IsLeaf() const;
void Initialize();
uint8 CountDuplicates(off_t offset,bool isFragment) const;
off_t DuplicateAt(off_t offset,bool isFragment,int8 index) const;
int32 FragmentsUsed(uint32 nodeSize);
inline duplicate_array *FragmentAt(int8 index);
inline duplicate_array *DuplicateArray();
static inline uint8 LinkType(off_t link);
static inline off_t MakeLink(uint8 type, off_t link, uint32 fragmentIndex = 0);
static inline bool IsDuplicate(off_t link);
static inline off_t FragmentOffset(off_t link);
static inline uint32 FragmentIndex(off_t link);
};
//#define BPLUSTREE_NODE 0
#define BPLUSTREE_DUPLICATE_NODE 2
#define BPLUSTREE_DUPLICATE_FRAGMENT 3
#define NUM_FRAGMENT_VALUES 7
#define NUM_DUPLICATE_VALUES 125
//**************************************
enum bplustree_traversing {
BPLUSTREE_FORWARD = 1,
BPLUSTREE_BACKWARD = -1,
BPLUSTREE_BEGIN = 0,
BPLUSTREE_END = 1
};
//****************** in-memory structures ********************
template<class T> class Stack;
class BPlusTree;
class TreeIterator;
class CachedNode;
class Inode;
// needed for searching (utilizing a stack)
struct node_and_key {
off_t nodeOffset;
uint16 keyIndex;
};
//***** Cache handling *****
class CachedNode {
public:
CachedNode(BPlusTree *tree)
:
fTree(tree),
fNode(NULL),
fBlock(NULL)
{
}
CachedNode(BPlusTree *tree,off_t offset,bool check = true)
:
fTree(tree),
fNode(NULL),
fBlock(NULL)
{
SetTo(offset,check);
}
~CachedNode()
{
Unset();
}
bplustree_node *SetTo(off_t offset,bool check = true);
bplustree_header *SetToHeader();
void Unset();
status_t Free(Transaction *transaction, off_t offset);
status_t Allocate(Transaction *transaction,bplustree_node **node,off_t *offset);
status_t WriteBack(Transaction *transaction);
bplustree_node *Node() const { return fNode; }
protected:
bplustree_node *InternalSetTo(off_t offset);
BPlusTree *fTree;
bplustree_node *fNode;
uint8 *fBlock;
off_t fBlockNumber;
};
//******** B+tree class *********
class BPlusTree {
public:
BPlusTree(Transaction *transaction,Inode *stream,int32 nodeSize = BPLUSTREE_NODE_SIZE);
BPlusTree(Inode *stream);
BPlusTree();
~BPlusTree();
status_t SetTo(Transaction *transaction,Inode *stream,int32 nodeSize = BPLUSTREE_NODE_SIZE);
status_t SetTo(Inode *stream);
status_t SetStream(Inode *stream);
status_t InitCheck();
status_t Validate();
status_t Remove(Transaction *transaction,const uint8 *key, uint16 keyLength, off_t value);
status_t Insert(Transaction *transaction,const uint8 *key, uint16 keyLength, off_t value);
status_t Insert(Transaction *transaction,const char *key, off_t value);
status_t Insert(Transaction *transaction,int32 key, off_t value);
status_t Insert(Transaction *transaction,uint32 key, off_t value);
status_t Insert(Transaction *transaction,int64 key, off_t value);
status_t Insert(Transaction *transaction,uint64 key, off_t value);
status_t Insert(Transaction *transaction,float key, off_t value);
status_t Insert(Transaction *transaction,double key, off_t value);
status_t Replace(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value);
status_t Find(const uint8 *key, uint16 keyLength, off_t *value);
static int32 TypeCodeToKeyType(type_code code);
static int32 ModeToKeyType(mode_t mode);
private:
int32 CompareKeys(const void *key1, int keylength1, const void *key2, int keylength2);
status_t FindKey(bplustree_node *node, const uint8 *key, uint16 keyLength, uint16 *index = NULL, off_t *next = NULL);
status_t SeekDown(Stack<node_and_key> &stack, const uint8 *key, uint16 keyLength);
status_t FindFreeDuplicateFragment(bplustree_node *node, CachedNode *cached, off_t *_offset, bplustree_node **_fragment,uint32 *_index);
status_t InsertDuplicate(Transaction *transaction,CachedNode *cached,bplustree_node *node,uint16 index,off_t value);
void InsertKey(bplustree_node *node, uint16 index, uint8 *key, uint16 keyLength, off_t value);
status_t SplitNode(bplustree_node *node, off_t nodeOffset, bplustree_node *other, off_t otherOffset, uint16 *_keyIndex, uint8 *key, uint16 *_keyLength, off_t *_value);
status_t RemoveDuplicate(Transaction *transaction,bplustree_node *node,CachedNode *cached,uint16 keyIndex, off_t value);
void RemoveKey(bplustree_node *node, uint16 index);
void UpdateIterators(off_t offset,off_t nextOffset,uint16 keyIndex,uint16 splitAt,int8 change);
void AddIterator(TreeIterator *iterator);
void RemoveIterator(TreeIterator *iterator);
private:
friend TreeIterator;
friend CachedNode;
Inode *fStream;
bplustree_header *fHeader;
CachedNode fCachedHeader;
int32 fNodeSize;
bool fAllowDuplicates;
status_t fStatus;
SimpleLock fIteratorLock;
Chain<TreeIterator> fIterators;
};
//***** helper classes/functions *****
extern int32 compareKeys(type_code type,const void *key1, int keyLength1, const void *key2, int keyLength2);
class TreeIterator {
public:
TreeIterator(BPlusTree *tree);
~TreeIterator();
status_t Goto(int8 to);
status_t Traverse(int8 direction, void *key, uint16 *keyLength, uint16 maxLength, off_t *value,uint16 *duplicate = NULL);
status_t Find(const uint8 *key, uint16 keyLength);
status_t Rewind();
status_t GetNextEntry(void *key,uint16 *keyLength,uint16 maxLength,off_t *value,uint16 *duplicate = NULL);
status_t GetPreviousEntry(void *key,uint16 *keyLength,uint16 maxLength,off_t *value,uint16 *duplicate = NULL);
void SkipDuplicates();
#ifdef DEBUG
void Dump();
#endif
private:
BPlusTree *fTree;
off_t fCurrentNodeOffset; // traverse position
int32 fCurrentKey;
off_t fDuplicateNode;
uint16 fDuplicate, fNumDuplicates;
bool fIsFragment;
private:
friend Chain<TreeIterator>;
friend BPlusTree;
void Update(off_t offset,off_t nextOffset,uint16 keyIndex,uint16 splitAt,int8 change);
void Stop();
TreeIterator *fNext;
};
// BPlusTree's inline functions (most of them may not be needed)
inline status_t
BPlusTree::Insert(Transaction *transaction,const char *key,off_t value)
{
if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
return B_BAD_TYPE;
return Insert(transaction,(uint8 *)key, strlen(key), value);
}
inline status_t
BPlusTree::Insert(Transaction *transaction,int32 key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_INT32_TYPE)
return B_BAD_TYPE;
return Insert(transaction,(uint8 *)&key, sizeof(key), value);
}
inline status_t
BPlusTree::Insert(Transaction *transaction,uint32 key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_UINT32_TYPE)
return B_BAD_TYPE;
return Insert(transaction,(uint8 *)&key, sizeof(key), value);
}
inline status_t
BPlusTree::Insert(Transaction *transaction,int64 key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_INT64_TYPE)
return B_BAD_TYPE;
return Insert(transaction,(uint8 *)&key, sizeof(key), value);
}
inline status_t
BPlusTree::Insert(Transaction *transaction,uint64 key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_UINT64_TYPE)
return B_BAD_TYPE;
return Insert(transaction,(uint8 *)&key, sizeof(key), value);
}
inline status_t
BPlusTree::Insert(Transaction *transaction,float key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_FLOAT_TYPE)
return B_BAD_TYPE;
return Insert(transaction,(uint8 *)&key, sizeof(key), value);
}
inline status_t
BPlusTree::Insert(Transaction *transaction,double key, off_t value)
{
if (fHeader->data_type != BPLUSTREE_DOUBLE_TYPE)
return B_BAD_TYPE;
return Insert(transaction,(uint8 *)&key, sizeof(key), value);
}
/************************ TreeIterator inline functions ************************/
// #pragma mark -
inline status_t
TreeIterator::Rewind()
{
return Goto(BPLUSTREE_BEGIN);
}
inline status_t
TreeIterator::GetNextEntry(void *key,uint16 *keyLength,uint16 maxLength,off_t *value,uint16 *duplicate)
{
return Traverse(BPLUSTREE_FORWARD,key,keyLength,maxLength,value,duplicate);
}
inline status_t
TreeIterator::GetPreviousEntry(void *key,uint16 *keyLength,uint16 maxLength,off_t *value,uint16 *duplicate)
{
return Traverse(BPLUSTREE_BACKWARD,key,keyLength,maxLength,value,duplicate);
}
/************************ bplustree_header inline functions ************************/
// #pragma mark -
inline bool
bplustree_header::IsValidLink(off_t link)
{
return link == BPLUSTREE_NULL || (link > 0 && link <= maximum_size - node_size);
}
/************************ bplustree_node inline functions ************************/
// #pragma mark -
inline uint16 *
bplustree_node::KeyLengths() const
{
return (uint16 *)(((char *)this) + round_up(sizeof(bplustree_node) + all_key_length));
}
inline off_t *
bplustree_node::Values() const
{
return (off_t *)((char *)KeyLengths() + all_key_count * sizeof(uint16));
}
inline uint8 *
bplustree_node::Keys() const
{
return (uint8 *)this + sizeof(bplustree_node);
}
inline int32
bplustree_node::Used() const
{
return round_up(sizeof(bplustree_node) + all_key_length) + all_key_count * (sizeof(uint16) + sizeof(off_t));
}
inline bool
bplustree_node::IsLeaf() const
{
return overflow_link == BPLUSTREE_NULL;
}
inline duplicate_array *
bplustree_node::FragmentAt(int8 index)
{
return (duplicate_array *)((off_t *)this + index * (NUM_FRAGMENT_VALUES + 1));
}
inline duplicate_array *
bplustree_node::DuplicateArray()
{
return (duplicate_array *)&this->overflow_link;
}
inline uint8
bplustree_node::LinkType(off_t link)
{
return *(uint64 *)&link >> 62;
}
inline off_t
bplustree_node::MakeLink(uint8 type,off_t link,uint32 fragmentIndex)
{
return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) | (fragmentIndex & 0x3ff);
}
inline bool
bplustree_node::IsDuplicate(off_t link)
{
return (LinkType(link) & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
}
inline off_t
bplustree_node::FragmentOffset(off_t link)
{
return link & 0x3ffffffffffffc00LL;
}
inline uint32
bplustree_node::FragmentIndex(off_t link)
{
return (uint32)(link & 0x3ff);
}
#endif /* B_PLUS_TREE_H */

View File

@ -0,0 +1,599 @@
/* BlockAllocator - block bitmap handling and allocation policies
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include "cpp.h"
#include "Debug.h"
#include "BlockAllocator.h"
#include "Volume.h"
#include "Inode.h"
#ifdef USER
# define spawn_kernel_thread spawn_thread
#endif
// Things the BlockAllocator should do:
// - find a range of blocks of a certain size nearby a specific position
// - allocating a unsharp range of blocks for pre-allocation
// - free blocks
// - know how to deal with each allocation, special handling for directories,
// files, symlinks, etc. (type sensitive allocation policies)
// What makes the code complicated is the fact that we are not just reading
// in the whole bitmap and operate on that in memory - e.g. a 13 GB partition
// with a block size of 2048 bytes already has a 800kB bitmap, and the size
// of partitions will grow even more - so that's not an option.
// Instead we are reading in every block when it's used - since an allocation
// group can span several blocks in the block bitmap, the AllocationBlock
// class is there to make handling those easier.
// The current implementation is very basic and will be heavily optimized
// in the future.
// Furthermore, the allocation policies used here (when they will be in place)
// should have some real world tests.
class AllocationBlock : public CachedBlock {
public:
AllocationBlock(Volume *volume);
void Allocate(uint16 start,uint16 numBlocks = 0xffff);
void Free(uint16 start,uint16 numBlocks = 0xffff);
inline bool IsUsed(uint16 block);
status_t SetTo(AllocationGroup &group,uint16 block);
int32 NumBlockBits() const { return fNumBits; }
private:
int32 fNumBits;
};
class AllocationGroup {
public:
AllocationGroup();
void AddFreeRange(int32 start,int32 blocks);
bool IsFull() const { return fFreeBits == 0; }
int32 fNumBits;
int32 fStart;
int32 fFirstFree,fLargest,fLargestFirst;
int32 fFreeBits;
};
AllocationBlock::AllocationBlock(Volume *volume)
: CachedBlock(volume)
{
}
status_t
AllocationBlock::SetTo(AllocationGroup &group, uint16 block)
{
// 8 blocks per byte
fNumBits = fVolume->BlockSize() << 3;
// the last group may have less bits in the last block
if ((group.fNumBits % fNumBits) != 0)
fNumBits = group.fNumBits % fNumBits;
return CachedBlock::SetTo(group.fStart + block) != NULL ? B_OK : B_ERROR;
}
bool
AllocationBlock::IsUsed(uint16 block)
{
if (block > fNumBits)
return true;
return ((uint32 *)fBlock)[block >> 5] & (1UL << (block % 32));
}
void
AllocationBlock::Allocate(uint16 start,uint16 numBlocks)
{
start = start % fNumBits;
if (numBlocks == 0xffff) {
// allocate all blocks after "start"
numBlocks = fNumBits - start;
} else if (start + numBlocks > fNumBits) {
FATAL(("should allocate more blocks than there are in a block!\n"));
numBlocks = fNumBits - start;
}
int32 block = start >> 5;
while (numBlocks > 0) {
uint32 mask = 0;
for (int32 i = start % 32;i < 32 && numBlocks;i++,numBlocks--)
mask |= 1UL << (i % 32);
((uint32 *)fBlock)[block++] |= mask;
start = 0;
}
}
void
AllocationBlock::Free(uint16 start,uint16 numBlocks)
{
start = start % fNumBits;
if (numBlocks == 0xffff) {
// free all blocks after "start"
numBlocks = fNumBits - start;
} else if (start + numBlocks > fNumBits) {
FATAL(("should free more blocks than there are in a block!\n"));
numBlocks = fNumBits - start;
}
int32 block = start >> 5;
while (numBlocks > 0) {
uint32 mask = 0;
for (int32 i = start % 32;i < 32 && numBlocks;i++,numBlocks--)
mask |= 1UL << (i % 32);
((uint32 *)fBlock)[block++] &= ~mask;
start = 0;
}
}
// #pragma mark -
AllocationGroup::AllocationGroup()
:
fFirstFree(-1),
fLargest(-1),
fLargestFirst(-1),
fFreeBits(0)
{
}
void
AllocationGroup::AddFreeRange(int32 start, int32 blocks)
{
D(if (blocks > 512)
PRINT(("range of %ld blocks starting at %ld\n",blocks,start)));
if (fFirstFree == -1)
fFirstFree = start;
if (fLargest < blocks) {
fLargest = blocks;
fLargestFirst = start;
}
fFreeBits += blocks;
}
// #pragma mark -
BlockAllocator::BlockAllocator(Volume *volume)
:
fVolume(volume),
fGroups(NULL)
{
}
BlockAllocator::~BlockAllocator()
{
delete[] fGroups;
}
status_t
BlockAllocator::Initialize()
{
if (fLock.InitCheck() < B_OK)
return B_ERROR;
fNumGroups = fVolume->AllocationGroups();
fBlocksPerGroup = fVolume->SuperBlock().blocks_per_ag;
fGroups = new AllocationGroup[fNumGroups];
if (fGroups == NULL)
return B_NO_MEMORY;
thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::initialize,"bfs block allocator",B_LOW_PRIORITY,(void *)this);
if (id < B_OK)
return initialize(this);
return resume_thread(id);
}
status_t
BlockAllocator::initialize(BlockAllocator *allocator)
{
Locker lock(allocator->fLock);
Volume *volume = allocator->fVolume;
uint32 blocks = allocator->fBlocksPerGroup;
uint32 numBits = 8 * blocks * volume->BlockSize();
off_t freeBlocks = 0;
uint32 *buffer = (uint32 *)malloc(numBits >> 3);
if (buffer == NULL)
RETURN_ERROR(B_NO_MEMORY);
AllocationGroup *groups = allocator->fGroups;
off_t offset = 1;
int32 num = allocator->fNumGroups;
for (int32 i = 0;i < num;i++) {
if (cached_read(volume->Device(),offset,buffer,blocks,volume->BlockSize()) < B_OK)
break;
// the last allocation group may contain less blocks than the others
groups[i].fNumBits = i == num - 1 ? allocator->fVolume->NumBlocks() - i * numBits : numBits;
groups[i].fStart = offset;
// finds all free ranges in this allocation group
int32 start,range = 0;
int32 size = groups[i].fNumBits,num = 0;
for (int32 k = 0;k < (size >> 2);k++) {
for (int32 j = 0;j < 32 && num < size;j++,num++) {
if (buffer[k] & (1UL << j)) {
if (range > 0) {
groups[i].AddFreeRange(start,range);
range = 0;
}
} else if (range++ == 0)
start = num;
}
}
if (range)
groups[i].AddFreeRange(start,range);
freeBlocks += groups[i].fFreeBits;
offset += blocks;
}
free(buffer);
off_t usedBlocks = volume->NumBlocks() - freeBlocks;
if (volume->UsedBlocks() != usedBlocks) {
// If the disk in a dirty state at mount time, it's
// normal that the values don't match
INFORM(("volume reports %Ld used blocks, correct is %Ld\n",volume->UsedBlocks(),usedBlocks));
volume->SuperBlock().used_blocks = usedBlocks;
}
return B_OK;
}
status_t
BlockAllocator::AllocateBlocks(Transaction *transaction,int32 group,uint16 start,uint16 maximum,uint16 minimum, block_run &run)
{
AllocationBlock cached(fVolume);
Locker lock(fLock);
// the first scan through all allocation groups will look for the
// wanted maximum of blocks, the second scan will just look to
// satisfy the minimal requirement
uint16 numBlocks = maximum;
for (int32 i = 0;i < fNumGroups * 2;i++,group++,start = 0) {
group = group % fNumGroups;
if (start >= fGroups[group].fNumBits || fGroups[group].IsFull())
continue;
if (i >= fNumGroups) {
// if the minimum is the same as the maximum, it's not necessary to
// search for in the allocation groups a second time
if (maximum == minimum)
return B_DEVICE_FULL;
numBlocks = minimum;
}
// The wanted maximum is smaller than the largest free block in the group
// or already smaller than the minimum
// ToDo: disabled because it's currently not maintained after the first allocation
//if (numBlocks > fGroups[group].fLargest)
// continue;
if (start < fGroups[group].fFirstFree)
start = fGroups[group].fFirstFree;
// there may be more than one block per allocation group - and
// we iterate through it to find a place for the allocation.
// (one allocation can't exceed one allocation group)
uint32 block = start / (fVolume->BlockSize() << 3);
int32 range = 0, rangeStart = 0,rangeBlock = 0;
for (;block < fBlocksPerGroup;block++) {
if (cached.SetTo(fGroups[group],block) < B_OK)
RETURN_ERROR(B_ERROR);
// find a block large enough to hold the allocation
for (int32 bit = start % cached.NumBlockBits();bit < cached.NumBlockBits();bit++) {
if (!cached.IsUsed(bit)) {
if (range == 0) {
// start new range
rangeStart = block * cached.NumBlockBits() + bit;
rangeBlock = block;
}
// have we found a range large enough to hold numBlocks?
if (++range >= maximum)
break;
} else if (i >= fNumGroups && range >= minimum) {
// we have found a block larger than the required minimum (second pass)
break;
} else {
// end of a range
range = 0;
}
}
// if we found a suitable block, mark the blocks as in use, and write
// the updated block bitmap back to disk
if (range >= numBlocks) {
// adjust allocation size
if (numBlocks < maximum)
numBlocks = range;
// Update the allocation group info
// Note, the fFirstFree block doesn't have to be really free
if (rangeStart == fGroups[group].fFirstFree)
fGroups[group].fFirstFree = rangeStart + numBlocks;
fGroups[group].fFreeBits -= numBlocks;
if (block != rangeBlock) {
// allocate the part that's in the current block
cached.Allocate(0,(rangeStart + numBlocks) % cached.NumBlockBits());
if (cached.WriteBack(transaction) < B_OK)
RETURN_ERROR(B_ERROR);
// set the blocks in the previous block
if (cached.SetTo(fGroups[group],block - 1) < B_OK)
cached.Allocate(rangeStart);
else
RETURN_ERROR(B_ERROR);
} else {
// just allocate the bits in the current block
cached.Allocate(rangeStart,numBlocks);
}
run.allocation_group = group;
run.start = rangeStart;
run.length = numBlocks;
fVolume->SuperBlock().used_blocks += numBlocks;
// We are not writing back the disk's super block - it's
// either done by the journaling code, or when the disk
// is unmounted.
// If the value is not correct at mount time, it will be
// fixed anyway.
return cached.WriteBack(transaction);
}
// start from the beginning of the next block
start = 0;
}
}
return B_DEVICE_FULL;
}
status_t
BlockAllocator::AllocateForInode(Transaction *transaction,const block_run *parent, mode_t type, block_run &run)
{
// apply some allocation policies here (AllocateBlocks() will break them
// if necessary) - we will start with those described in Dominic Giampaolo's
// "Practical File System Design", and see how good they work
// files are going in the same allocation group as its parent, sub-directories
// will be inserted 8 allocation groups after the one of the parent
uint16 group = parent->allocation_group;
if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY)
group += 8;
return AllocateBlocks(transaction,group,0,1,1,run);
}
status_t
BlockAllocator::Allocate(Transaction *transaction,const Inode *inode, off_t numBlocks, block_run &run, uint16 minimum)
{
if (numBlocks <= 0)
return B_ERROR;
// one block_run can't hold more data than it is in one allocation group
if (numBlocks > fGroups[0].fNumBits)
numBlocks = fGroups[0].fNumBits;
// apply some allocation policies here (AllocateBlocks() will break them
// if necessary)
uint16 group = inode->BlockRun().allocation_group;
uint16 start = 0;
// are there already allocated blocks? (then just allocate near the last)
if (inode->Size() > 0) {
data_stream *data = &inode->Node()->data;
// we currently don't care for when the data stream is
// already grown into the indirect ranges
if (data->max_double_indirect_range == 0
&& data->max_indirect_range == 0) {
int32 last = 0;
for (;last < NUM_DIRECT_BLOCKS - 1;last++)
if (data->direct[last + 1].IsZero())
break;
group = data->direct[last].allocation_group;
start = data->direct[last].start + data->direct[last].length;
}
} else if (inode->IsDirectory()) {
// directory data will go in the same allocation group as the inode is in
// but after the inode data
start = inode->BlockRun().start;
} else {
// file data will start in the next allocation group
group = inode->BlockRun().allocation_group + 1;
}
return AllocateBlocks(transaction,group,start,numBlocks,minimum,run);
}
status_t
BlockAllocator::Free(Transaction *transaction,block_run &run)
{
Locker lock(fLock);
int32 group = run.allocation_group;
uint16 start = run.start;
uint16 length = run.length;
// doesn't use Volume::IsValidBlockRun() here because it can check better
// against the group size (the last group may have a different length)
if (group < 0 || group >= fNumGroups
|| start > fGroups[group].fNumBits
|| start + length > fGroups[group].fNumBits
|| length == 0) {
FATAL(("someone tried to free an invalid block_run (%ld, %u, %u)\n",group,start,length));
return B_BAD_VALUE;
}
// check if someone tries to free reserved areas at the beginning of the drive
if (group == 0 && start < fVolume->Log().start + fVolume->Log().length) {
FATAL(("someone tried to free a reserved block_run (%ld, %u, %u)\n",group,start,length));
return B_BAD_VALUE;
}
#ifdef DEBUG
if (CheckBlockRun(run) < B_OK)
return B_BAD_DATA;
#endif
AllocationBlock cached(fVolume);
uint32 block = run.start / (fVolume->BlockSize() << 3);
if (fGroups[group].fFirstFree > start)
fGroups[group].fFirstFree = start;
fGroups[group].fFreeBits += length;
for (;block < fBlocksPerGroup;block++) {
if (cached.SetTo(fGroups[group],block) < B_OK)
RETURN_ERROR(B_IO_ERROR);
uint16 freeLength = length;
if (start + length > cached.NumBlockBits())
freeLength = cached.NumBlockBits() - start;
cached.Free(start,freeLength);
if (cached.WriteBack(transaction) < B_OK)
return B_IO_ERROR;
length -= freeLength;
if (length <= 0)
break;
start = 0;
}
fVolume->SuperBlock().used_blocks -= run.length;
return B_OK;
}
#ifdef DEBUG
#include "BPlusTree.h"
status_t
BlockAllocator::CheckBlockRun(block_run run)
{
uint32 block = run.start / (fVolume->BlockSize() << 3);
uint32 start = run.start;
uint32 pos = 0;
AllocationBlock cached(fVolume);
for (;block < fBlocksPerGroup;block++) {
if (cached.SetTo(fGroups[run.allocation_group],block) < B_OK)
RETURN_ERROR(B_IO_ERROR);
start = start % cached.NumBlockBits();
while (pos < run.length && start + pos < cached.NumBlockBits()) {
if (!cached.IsUsed(start + pos)) {
PRINT(("block_run(%ld,%u,%u) is only partially allocated!\n",run.allocation_group,run.start,run.length));
fVolume->Panic();
return B_BAD_DATA;
}
pos++;
}
start = 0;
}
return B_OK;
}
status_t
BlockAllocator::CheckInode(Inode *inode)
{
status_t status = CheckBlockRun(inode->BlockRun());
if (status < B_OK)
return status;
// only checks the direct range for now...
data_stream *data = &inode->Node()->data;
for (int32 i = 0;i < NUM_DIRECT_BLOCKS;i++) {
if (data->direct[i].IsZero())
break;
status = CheckBlockRun(data->direct[i]);
if (status < B_OK)
return status;
}
return B_OK;
}
status_t
BlockAllocator::Check(Inode *inode)
{
if (!inode || !inode->IsDirectory())
return B_BAD_VALUE;
BPlusTree *tree;
status_t status = inode->GetTree(&tree);
if (status < B_OK)
return status;
TreeIterator iterator(tree);
char key[BPLUSTREE_MAX_KEY_LENGTH];
uint16 length;
off_t offset;
while (iterator.GetNextEntry(key,&length,BPLUSTREE_MAX_KEY_LENGTH,&offset) == B_OK) {
Vnode vnode(fVolume,offset);
Inode *entry;
if (vnode.Get(&entry) < B_OK) {
FATAL(("could not get inode in tree at: %Ld\n",offset));
continue;
}
block_run run = entry->BlockRun();
PRINT(("check allocations of inode \"%s\" (%ld,%u,%u)\n",key,run.allocation_group,run.start,run.length));
status = CheckInode(entry);
if (status < B_OK)
return status;
}
return B_OK;
}
#endif /* DEBUG */

View File

@ -0,0 +1,49 @@
#ifndef BLOCK_ALLOCATOR_H
#define BLOCK_ALLOCATOR_H
/* BlockAllocator - block bitmap handling and allocation policies
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <Lock.h>
class AllocationGroup;
class Transaction;
class Volume;
class Inode;
struct disk_super_block;
struct block_run;
class BlockAllocator {
public:
BlockAllocator(Volume *volume);
~BlockAllocator();
status_t Initialize();
status_t AllocateForInode(Transaction *transaction,const block_run *parent,mode_t type,block_run &run);
status_t Allocate(Transaction *transaction,const Inode *inode,off_t numBlocks,block_run &run,uint16 minimum = 1);
status_t Free(Transaction *transaction,block_run &run);
status_t AllocateBlocks(Transaction *transaction,int32 group, uint16 start, uint16 numBlocks, uint16 minimum, block_run &run);
#ifdef DEBUG
status_t CheckBlockRun(block_run run);
status_t CheckInode(Inode *inode);
status_t Check(Inode *inode);
#endif
private:
static status_t initialize(BlockAllocator *);
Volume *fVolume;
Benaphore fLock;
AllocationGroup *fGroups;
int32 fNumGroups,fBlocksPerGroup;
};
#endif /* BLOCK_ALLOCATOR_H */

View File

@ -0,0 +1,55 @@
#ifndef CHAIN_H
#define CHAIN_H
/* Chain - a chain implementation; it's used for the callback management
** throughout the code (currently TreeIterator, and AttributeIterator).
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
/** The Link class you want to use with the Chain class needs to have
* a "fNext" member which is accessable from within the Chain class.
*/
template<class Link> class Chain {
public:
Chain()
:
fFirst(NULL)
{
}
void Add(Link *link)
{
link->fNext = fFirst;
fFirst = link;
}
void Remove(Link *link)
{
// search list for the correct callback to remove
Link *last = NULL,*entry;
for (entry = fFirst;link != entry;entry = entry->fNext)
last = entry;
if (link == entry) {
if (last)
last->fNext = link->fNext;
else
fFirst = link->fNext;
}
}
Link *Next(Link *last)
{
if (last == NULL)
return fFirst;
return last->fNext;
}
private:
Link *fFirst;
};
#endif /* CHAIN_H */

View File

@ -0,0 +1,241 @@
/* Debug - debug stuff
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** Some code is based on work previously done by Marcus Overhagen
**
** This file may be used under the terms of the OpenBeOS License.
*/
#include "Debug.h"
#include "BPlusTree.h"
#include <KernelExport.h>
#include <time.h>
#define Print __out
char *
get_tupel(uint32 id)
{
static unsigned char tupel[5];
tupel[0] = 0xff & (id >> 24);
tupel[1] = 0xff & (id >> 16);
tupel[2] = 0xff & (id >> 8);
tupel[3] = 0xff & (id);
tupel[4] = 0;
for (int16 i = 0;i < 4;i++)
if (tupel[i] < ' ' || tupel[i] > 128)
tupel[i] = '.';
return (char *)tupel;
}
void
dump_block_run(const char *prefix,block_run &run)
{
Print("%s(%ld, %d, %d)\n",prefix,run.allocation_group,run.start,run.length);
}
void
dump_super_block(disk_super_block *superBlock)
{
Print("disk_super_block:\n");
Print(" name = %s\n",superBlock->name);
Print(" magic1 = %#08lx (%s) %s\n",superBlock->magic1, get_tupel(superBlock->magic1), (superBlock->magic1 == SUPER_BLOCK_MAGIC1 ? "valid" : "INVALID"));
Print(" fs_byte_order = %#08lx (%s)\n",superBlock->fs_byte_order, get_tupel(superBlock->fs_byte_order));
Print(" block_size = %lu\n",superBlock->block_size);
Print(" block_shift = %lu\n",superBlock->block_shift);
Print(" num_blocks = %Lu\n",superBlock->num_blocks);
Print(" used_blocks = %Lu\n",superBlock->used_blocks);
Print(" inode_size = %lu\n",superBlock->inode_size);
Print(" magic2 = %#08lx (%s) %s\n",superBlock->magic2, get_tupel(superBlock->magic2), (superBlock->magic2 == (int)SUPER_BLOCK_MAGIC2 ? "valid" : "INVALID"));
Print(" blocks_per_ag = %lu\n",superBlock->blocks_per_ag);
Print(" ag_shift = %lu (%ld bytes)\n",superBlock->ag_shift, 1LL << superBlock->ag_shift);
Print(" num_ags = %lu\n",superBlock->num_ags);
Print(" flags = %#08lx (%s)\n",superBlock->flags, get_tupel(superBlock->flags));
dump_block_run(" log_blocks = ",superBlock->log_blocks);
Print(" log_start = %Lu\n",superBlock->log_start);
Print(" log_end = %Lu\n",superBlock->log_end);
Print(" magic3 = %#08lx (%s) %s\n",superBlock->magic3, get_tupel(superBlock->magic3), (superBlock->magic3 == SUPER_BLOCK_MAGIC3 ? "valid" : "INVALID"));
dump_block_run(" root_dir = ",superBlock->root_dir);
dump_block_run(" indices = ",superBlock->indices);
}
void
dump_data_stream(data_stream *stream)
{
Print("data_stream:\n");
for (int i = 0; i < NUM_DIRECT_BLOCKS; i++) {
if (!stream->direct[i].IsZero()) {
Print(" direct[%02d] = ",i);
dump_block_run("",stream->direct[i]);
}
}
Print(" max_direct_range = %Lu\n",stream->max_direct_range);
if (!stream->indirect.IsZero())
dump_block_run(" indirect = ",stream->indirect);
Print(" max_indirect_range = %Lu\n",stream->max_indirect_range);
if (!stream->double_indirect.IsZero())
dump_block_run(" double_indirect = ",stream->double_indirect);
Print(" max_double_indirect_range = %Lu\n",stream->max_double_indirect_range);
Print(" size = %Lu\n",stream->size);
}
void
dump_inode(bfs_inode *inode)
{
Print("inode:\n");
Print(" magic1 = %08lx (%s) %s\n",inode->magic1,
get_tupel(inode->magic1), (inode->magic1 == INODE_MAGIC1 ? "valid" : "INVALID"));
dump_block_run( " inode_num = ",inode->inode_num);
Print(" uid = %lu\n",inode->uid);
Print(" gid = %lu\n",inode->gid);
Print(" mode = %08lx\n",inode->mode);
Print(" flags = %08lx\n",inode->flags);
Print(" create_time = %Ld (%Ld)\n",inode->create_time,inode->create_time >> INODE_TIME_SHIFT);
Print(" last_modified_time = %Ld (%Ld)\n",inode->last_modified_time,inode->last_modified_time >> INODE_TIME_SHIFT);
dump_block_run( " parent = ",inode->parent);
dump_block_run( " attributes = ",inode->attributes);
Print(" type = %lu\n",inode->type);
Print(" inode_size = %lu\n",inode->inode_size);
Print(" etc = %#08lx\n",inode->etc);
Print(" short_symlink = %s\n",
S_ISLNK(inode->mode) && (inode->flags & INODE_LONG_SYMLINK) == 0? inode->short_symlink : "-");
dump_data_stream(&(inode->data));
Print(" --\n pad[0] = %08lx\n",inode->pad[0]);
Print(" pad[1] = %08lx\n",inode->pad[1]);
Print(" pad[2] = %08lx\n",inode->pad[2]);
Print(" pad[3] = %08lx\n",inode->pad[3]);
}
void
dump_bplustree_header(bplustree_header *header)
{
Print("bplustree_header:\n");
Print(" magic = %#08lx (%s) %s\n",header->magic,
get_tupel(header->magic), (header->magic == BPLUSTREE_MAGIC ? "valid" : "INVALID"));
Print(" node_size = %lu\n",header->node_size);
Print(" max_number_of_levels = %lu\n",header->max_number_of_levels);
Print(" data_type = %lu\n",header->data_type);
Print(" root_node_pointer = %Ld\n",header->root_node_pointer);
Print(" free_node_pointer = %Ld\n",header->free_node_pointer);
Print(" maximum_size = %Lu\n",header->maximum_size);
}
#define DUMPED_BLOCK_SIZE 16
void
dump_block(const char *buffer,int size)
{
for(int i = 0;i < size;) {
int start = i;
for(;i < start+DUMPED_BLOCK_SIZE;i++) {
if (!(i % 4))
Print(" ");
if (i >= size)
Print(" ");
else
Print("%02x",*(unsigned char *)(buffer+i));
}
Print(" ");
for(i = start;i < start + DUMPED_BLOCK_SIZE;i++) {
if (i < size) {
char c = *(buffer+i);
if (c < 30)
Print(".");
else
Print("%c",c);
}
else
break;
}
Print("\n");
}
}
void
dump_bplustree_node(bplustree_node *node,bplustree_header *header,Volume *volume)
{
Print("bplustree_node:\n");
Print(" left_link = %Ld\n",node->left_link);
Print(" right_link = %Ld\n",node->right_link);
Print(" overflow_link = %Ld\n",node->overflow_link);
Print(" all_key_count = %u\n",node->all_key_count);
Print(" all_key_length = %u\n",node->all_key_length);
if (header == NULL)
return;
if (node->all_key_count > node->all_key_length
|| uint32(node->all_key_count * 10) > (uint32)header->node_size
|| node->all_key_count == 0) {
Print("\n");
dump_block((char *)node,header->node_size/*,sizeof(off_t)*/);
return;
}
Print("\n");
for (int32 i = 0;i < node->all_key_count;i++) {
uint16 length;
char buffer[256],*key = (char *)node->KeyAt(i,&length);
if (length > 255 || length == 0) {
Print(" %2ld. Invalid length (%u)!!\n",i,length);
dump_block((char *)node,header->node_size/*,sizeof(off_t)*/);
break;
}
memcpy(buffer,key,length);
buffer[length] = '\0';
off_t *value = node->Values() + i;
if ((uint32)value < (uint32)node || (uint32)value > (uint32)node + header->node_size)
Print(" %2ld. Invalid Offset!!\n",i);
else {
Print(" %2ld. ",i);
if (header->data_type == BPLUSTREE_STRING_TYPE)
Print("\"%s\"",buffer);
else if (header->data_type == BPLUSTREE_INT32_TYPE)
Print("int32 = %ld (0x%lx)",*(int32 *)&buffer,*(int32 *)&buffer);
else if (header->data_type == BPLUSTREE_UINT32_TYPE)
Print("uint32 = %lu (0x%lx)",*(uint32 *)&buffer,*(uint32 *)&buffer);
else if (header->data_type == BPLUSTREE_INT64_TYPE)
Print("int64 = %Ld (0x%Lx)",*(int64 *)&buffer,*(int64 *)&buffer);
else
Print("???");
off_t offset = *value & 0x3fffffffffffffffLL;
Print(" (%d bytes) -> %Ld",length,offset);
if (volume != NULL)
{
block_run run = volume->ToBlockRun(offset);
Print(" (%ld, %d)",run.allocation_group,run.start);
}
if (bplustree_node::LinkType(*value) == BPLUSTREE_DUPLICATE_FRAGMENT)
Print(" (duplicate fragment %Ld)\n",*value & 0x3ff);
else if (bplustree_node::LinkType(*value) == BPLUSTREE_DUPLICATE_NODE)
Print(" (duplicate node)\n");
else
Print("\n");
}
}
}

View File

@ -0,0 +1,74 @@
#ifndef DEBUG_H
#define DEBUG_H
/* Debug - debug stuff
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <KernelExport.h>
#ifdef USER
# include <stdio.h>
# define __out printf
#else
# include <null.h>
# define __out dprintf
#endif
// Short overview over the debug output macros:
// PRINT()
// is for general messages that very unlikely should appear in a release build
// FATAL()
// this is for fatal messages, when something has really gone wrong
// INFORM()
// general information, as disk size, etc.
// REPORT_ERROR(status_t)
// prints out error information
// RETURN_ERROR(status_t)
// calls REPORT_ERROR() and return the value
// D()
// the statements in D() are only included if DEBUG is defined
#ifdef DEBUG
#define PRINT(x) { __out("bfs: "); __out x; }
#define REPORT_ERROR(status) __out("bfs: %s:%ld: %s\n",__FUNCTION__,__LINE__,strerror(status));
#define RETURN_ERROR(err) { status_t _status = err; if (_status < B_OK) REPORT_ERROR(_status); return _status;}
#define FATAL(x) { __out("bfs: "); __out x; }
#define INFORM(x) { __out("bfs: "); __out x; }
// #define FUNCTION() __out("bfs: %s()\n",__FUNCTION__);
#define FUNCTION_START(x) { __out("bfs: %s() ",__FUNCTION__); __out x; }
#define FUNCTION() ;
// #define FUNCTION_START(x) ;
#define D(x) {x;};
#else
#define PRINT(x) ;
#define REPORT_ERROR(status) ;
#define RETURN_ERROR(status) return status;
#define FATAL(x) { __out("bfs: "); __out x; }
#define INFORM(x) { __out("bfs: "); __out x; }
#define FUNCTION() ;
#define FUNCTION_START(x) ;
#define D(x) ;
#endif
#ifdef DEBUG
struct block_run;
struct bplustree_header;
struct bplustree_node;
struct data_stream;
struct bfs_inode;
struct disk_super_block;
class Volume;
// some structure dump functions
extern void dump_block_run(const char *prefix, block_run &run);
extern void dump_super_block(disk_super_block *superBlock);
extern void dump_data_stream(data_stream *stream);
extern void dump_inode(bfs_inode *inode);
extern void dump_bplustree_header(bplustree_header *header);
extern void dump_bplustree_node(bplustree_node *node,bplustree_header *header = NULL,Volume *volume = NULL);
extern void dump_block(const char *buffer, int size);
#endif
#endif /* DEBUG_H */

View File

@ -0,0 +1,335 @@
/* Index - index access functions
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include "Debug.h"
#include "cpp.h"
#include "Index.h"
#include "Volume.h"
#include "Inode.h"
#include "BPlusTree.h"
#include <TypeConstants.h>
Index::Index(Volume *volume)
:
fVolume(volume),
fNode(NULL)
{
}
Index::~Index()
{
if (fNode == NULL)
return;
put_vnode(fVolume->ID(),fNode->ID());
}
void
Index::Unset()
{
if (fNode == NULL)
return;
put_vnode(fVolume->ID(),fNode->ID());
fNode = NULL;
}
status_t
Index::SetTo(const char *name)
{
// remove the old node, if the index is set for the second time
Unset();
Inode *indices = fVolume->IndicesNode();
if (indices == NULL)
return B_ENTRY_NOT_FOUND;
BPlusTree *tree;
if (indices->GetTree(&tree) != B_OK)
return B_BAD_VALUE;
vnode_id id;
status_t status = tree->Find((uint8 *)name,(uint16)strlen(name),&id);
if (status != B_OK)
return status;
if (get_vnode(fVolume->ID(),id,(void **)&fNode) != B_OK)
return B_ENTRY_NOT_FOUND;
if (fNode == NULL) {
FATAL(("fatal error at Index::InitCheck(), get_vnode() returned NULL pointer\n"));
put_vnode(fVolume->ID(),id);
return B_ERROR;
}
fName = name;
// only stores the pointer, so it assumes that it will stay constant
// in further comparisons (currently only used in Index::Update())
return B_OK;
}
uint32
Index::Type()
{
if (fNode == NULL)
return 0;
switch (fNode->Mode() & (S_STR_INDEX | S_INT_INDEX | S_UINT_INDEX | S_LONG_LONG_INDEX |
S_ULONG_LONG_INDEX | S_FLOAT_INDEX | S_DOUBLE_INDEX)) {
case S_INT_INDEX:
return B_INT32_TYPE;
case S_UINT_INDEX:
return B_UINT32_TYPE;
case S_LONG_LONG_INDEX:
return B_INT64_TYPE;
case S_ULONG_LONG_INDEX:
return B_UINT64_TYPE;
case S_FLOAT_INDEX:
return B_FLOAT_TYPE;
case S_DOUBLE_INDEX:
return B_DOUBLE_TYPE;
case S_STR_INDEX:
return B_STRING_TYPE;
}
FATAL(("index has unknown type!\n"));
return 0;
}
size_t
Index::KeySize()
{
if (fNode == NULL)
return 0;
int32 mode = fNode->Mode() & (S_STR_INDEX | S_INT_INDEX | S_UINT_INDEX | S_LONG_LONG_INDEX |
S_ULONG_LONG_INDEX | S_FLOAT_INDEX | S_DOUBLE_INDEX);
if (mode == S_STR_INDEX)
// string indices don't have a fixed key size
return 0;
switch (mode) {
case S_INT_INDEX:
case S_UINT_INDEX:
return sizeof(int32);
case S_LONG_LONG_INDEX:
case S_ULONG_LONG_INDEX:
return sizeof(int64);
case S_FLOAT_INDEX:
return sizeof(float);
case S_DOUBLE_INDEX:
return sizeof(double);
}
FATAL(("index has unknown type!\n"));
return 0;
}
status_t
Index::Create(Transaction *transaction,const char *name,uint32 type)
{
Unset();
int32 mode = 0;
switch (type) {
case B_INT32_TYPE:
mode = S_INT_INDEX;
break;
case B_UINT32_TYPE:
mode = S_UINT_INDEX;
break;
case B_INT64_TYPE:
mode = S_LONG_LONG_INDEX;
break;
case B_UINT64_TYPE:
mode = S_ULONG_LONG_INDEX;
break;
case B_FLOAT_TYPE:
mode = S_FLOAT_INDEX;
break;
case B_DOUBLE_TYPE:
mode = S_DOUBLE_INDEX;
break;
case B_STRING_TYPE:
mode = S_STR_INDEX;
break;
default:
return B_BAD_TYPE;
}
status_t status;
// do we need to create the index directory first?
if (fVolume->IndicesNode() == NULL) {
if ((status = fVolume->CreateIndicesRoot(transaction)) < B_OK)
RETURN_ERROR(status);
}
vnode_id id;
status = Inode::Create(transaction,fVolume->IndicesNode(),name,S_INDEX_DIR | S_DIRECTORY | mode,0,type,&id);
if (status == B_OK) {
// since Inode::Create() lets the created inode open if "id" is specified,
// we don't need to call Vnode::Keep() here
Vnode vnode(fVolume,id);
return vnode.Get(&fNode);
}
return status;
}
/** Updates the specified index, the oldKey will be removed from, the newKey
* inserted into the tree.
* If the method returns B_BAD_INDEX, it means the index couldn't be found -
* the most common reason will be that the index doesn't exist.
* You may not want to let the whole transaction fail because of that.
*/
status_t
Index::Update(Transaction *transaction,const char *name,int32 type,const uint8 *oldKey,uint16 oldLength,const uint8 *newKey,uint16 newLength,Inode *inode)
{
if (name == NULL
|| oldKey == NULL && newKey == NULL
|| oldKey != NULL && oldLength == 0
|| newKey != NULL && newLength == 0)
return B_BAD_VALUE;
// if the two keys are identical, don't do anything
if (type != 0 && !compareKeys(type,oldKey,oldLength,newKey,newLength))
return B_OK;
// update all live queries about the change, if they have an index or not
fVolume->UpdateLiveQueries(inode,name,type,oldKey,oldLength,newKey,newLength);
status_t status;
if (name != fName && (status = SetTo(name)) < B_OK)
return B_BAD_INDEX;
// now that we have the type, check again for equality
if (type == 0 && !compareKeys(Type(),oldKey,oldLength,newKey,newLength))
return B_OK;
BPlusTree *tree;
if ((status = Node()->GetTree(&tree)) < B_OK)
return status;
// remove the old key from the tree
if (oldKey != NULL) {
status = tree->Remove(transaction,(const uint8 *)oldKey,oldLength,inode->ID());
if (status == B_ENTRY_NOT_FOUND) {
// That's not nice, but should be no reason to let the whole thing fail
FATAL(("Could not find value in index \"%s\"!\n",name));
} else if (status < B_OK)
return status;
}
// add the new key to the key
if (newKey != NULL)
status = tree->Insert(transaction,(const uint8 *)newKey,newLength,inode->ID());
return status;
}
status_t
Index::InsertName(Transaction *transaction,const char *name,Inode *inode)
{
return UpdateName(transaction,NULL,name,inode);
}
status_t
Index::RemoveName(Transaction *transaction,const char *name,Inode *inode)
{
return UpdateName(transaction,name,NULL,inode);
}
status_t
Index::UpdateName(Transaction *transaction,const char *oldName, const char *newName,Inode *inode)
{
uint16 oldLength = oldName ? strlen(oldName) : 0;
uint16 newLength = newName ? strlen(newName) : 0;
return Update(transaction,"name",B_STRING_TYPE,(uint8 *)oldName,oldLength,(uint8 *)newName,newLength,inode);
}
status_t
Index::InsertSize(Transaction *transaction, Inode *inode)
{
off_t size = inode->Size();
return Update(transaction,"size",B_INT64_TYPE,NULL,0,(uint8 *)&size,sizeof(int64),inode);
}
status_t
Index::RemoveSize(Transaction *transaction, Inode *inode)
{
// Inode::OldSize() is the size that's in the index
off_t size = inode->OldSize();
return Update(transaction,"size",B_INT64_TYPE,(uint8 *)&size,sizeof(int64),NULL,0,inode);
}
status_t
Index::UpdateSize(Transaction *transaction,Inode *inode)
{
off_t oldSize = inode->OldSize();
off_t newSize = inode->Size();
status_t status = Update(transaction,"size",B_INT64_TYPE,(uint8 *)&oldSize,sizeof(int64),
(uint8 *)&newSize,sizeof(int64),inode);
if (status == B_OK)
inode->UpdateOldSize();
return status;
}
status_t
Index::InsertLastModified(Transaction *transaction, Inode *inode)
{
off_t modified = inode->Node()->last_modified_time;
return Update(transaction,"last_modified",B_INT64_TYPE,NULL,0,(uint8 *)&modified,sizeof(int64),inode);
}
status_t
Index::RemoveLastModified(Transaction *transaction, Inode *inode)
{
// Inode::OldLastModified() is the value which is in the index
off_t modified = inode->OldLastModified();
return Update(transaction,"last_modified",B_INT64_TYPE,(uint8 *)&modified,sizeof(int64),NULL,0,inode);
}
status_t
Index::UpdateLastModified(Transaction *transaction, Inode *inode, off_t modified)
{
off_t oldModified = inode->OldLastModified();
if (modified == -1)
modified = (bigtime_t)time(NULL) << INODE_TIME_SHIFT;
modified |= fVolume->GetUniqueID() & INODE_TIME_MASK;
status_t status = Update(transaction,"last_modified",B_INT64_TYPE,(uint8 *)&oldModified,sizeof(int64),
(uint8 *)&modified,sizeof(int64),inode);
inode->Node()->last_modified_time = modified;
if (status == B_OK)
inode->UpdateOldLastModified();
return status;
}

View File

@ -0,0 +1,51 @@
#ifndef INDEX_H
#define INDEX_H
/* Index - index access functions
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <KernelExport.h>
class Transaction;
class Volume;
class Inode;
class Index {
public:
Index(Volume *volume);
~Index();
status_t SetTo(const char *name);
void Unset();
Inode *Node() const { return fNode; };
uint32 Type();
size_t KeySize();
status_t Create(Transaction *transaction, const char *name, uint32 type);
status_t Update(Transaction *transaction, const char *name, int32 type, const uint8 *oldKey, uint16 oldLength, const uint8 *newKey, uint16 newLength, Inode *inode);
status_t InsertName(Transaction *transaction,const char *name,Inode *inode);
status_t RemoveName(Transaction *transaction,const char *name,Inode *inode);
status_t UpdateName(Transaction *transaction,const char *oldName,const char *newName,Inode *inode);
status_t InsertSize(Transaction *transaction, Inode *inode);
status_t RemoveSize(Transaction *transaction, Inode *inode);
status_t UpdateSize(Transaction *transaction, Inode *inode);
status_t InsertLastModified(Transaction *transaction, Inode *inode);
status_t RemoveLastModified(Transaction *transaction, Inode *inode);
status_t UpdateLastModified(Transaction *transaction, Inode *inode,off_t modified = -1);
private:
Volume *fVolume;
Inode *fNode;
const char *fName;
};
#endif /* INDEX_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,309 @@
#ifndef INODE_H
#define INODE_H
/* Inode - inode access functions
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <KernelExport.h>
#ifdef USER
# include "myfs.h"
# include <stdio.h>
#endif
#ifndef _IMPEXP_KERNEL
# define _IMPEXP_KERNEL
#endif
extern "C" {
#include <lock.h>
#include <cache.h>
}
#include <string.h>
#include "Volume.h"
#include "Journal.h"
#include "Lock.h"
#include "Chain.h"
#include "Debug.h"
class BPlusTree;
class TreeIterator;
class AttributeIterator;
enum inode_type {
S_DIRECTORY = S_IFDIR,
S_FILE = S_IFREG,
S_SYMLINK = S_IFLNK
};
// The CachedBlock class is completely implemented as inlines.
// It should be used when cache single blocks to make sure they
// will be properly released after use (and it's also very
// convenient to use them).
class CachedBlock {
public:
CachedBlock(Volume *volume)
:
fVolume(volume),
fBlock(NULL)
{
}
CachedBlock(Volume *volume,off_t block,bool empty = false)
:
fVolume(volume),
fBlock(NULL)
{
SetTo(block,empty);
}
CachedBlock(Volume *volume,block_run run,bool empty = false)
:
fVolume(volume),
fBlock(NULL)
{
SetTo(volume->ToBlock(run),empty);
}
~CachedBlock()
{
Unset();
}
void Unset()
{
if (fBlock != NULL)
release_block(fVolume->Device(),fBlockNumber);
}
uint8 *SetTo(off_t block,bool empty = false)
{
Unset();
fBlockNumber = block;
return fBlock = empty ? (uint8 *)get_empty_block(fVolume->Device(),block,fVolume->BlockSize())
: (uint8 *)get_block(fVolume->Device(),block,fVolume->BlockSize());
}
uint8 *SetTo(block_run run,bool empty = false)
{
return SetTo(fVolume->ToBlock(run),empty);
}
status_t WriteBack(Transaction *transaction)
{
if (transaction == NULL || fBlock == NULL)
RETURN_ERROR(B_BAD_VALUE);
return transaction->WriteBlocks(fBlockNumber,fBlock);
}
uint8 *Block() const { return fBlock; }
off_t BlockNumber() const { return fBlockNumber; }
protected:
Volume *fVolume;
off_t fBlockNumber;
uint8 *fBlock;
};
class Inode : public CachedBlock {
public:
Inode(Volume *volume,vnode_id id,bool empty = false,uint8 reenter = 0);
~Inode();
bfs_inode *Node() const { return (bfs_inode *)fBlock; }
vnode_id ID() const { return fVolume->ToVnode(fBlockNumber); }
ReadWriteLock &Lock() { return fLock; }
SimpleLock &SmallDataLock() { return fSmallDataLock; }
mode_t Mode() const { return Node()->mode; }
int32 Flags() const { return Node()->flags; }
bool IsDirectory() const { return Mode() & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR); }
// note, that this test will also be true for S_IFBLK (not that it's used in the fs :)
bool IsIndex() const { return (Mode() & (S_INDEX_DIR | 0777)) == S_INDEX_DIR; }
// that's a stupid check, but AFAIK the only possible method...
bool IsSymLink() const { return S_ISLNK(Mode()); }
bool HasUserAccessableStream() const { return S_ISREG(Mode()); }
// currently only files can be accessed with bfs_read()/bfs_write()
off_t Size() const { return Node()->data.size; }
block_run &BlockRun() const { return Node()->inode_num; }
block_run &Parent() const { return Node()->parent; }
block_run &Attributes() const { return Node()->attributes; }
Volume *GetVolume() const { return fVolume; }
status_t InitCheck();
status_t CheckPermissions(int accessMode) const;
// small_data access methods
status_t MakeSpaceForSmallData(Transaction *transaction,const char *name, int32 length);
status_t RemoveSmallData(Transaction *transaction,const char *name);
status_t AddSmallData(Transaction *transaction,const char *name,uint32 type,const uint8 *data,size_t length,bool force = false);
status_t GetNextSmallData(small_data **smallData) const;
small_data *FindSmallData(const char *name) const;
const char *Name() const;
status_t SetName(Transaction *transaction,const char *name);
// high-level attribute methods
status_t ReadAttribute(const char *name, int32 type, off_t pos, uint8 *buffer, size_t *_length);
status_t WriteAttribute(Transaction *transaction, const char *name, int32 type, off_t pos, const uint8 *buffer, size_t *_length);
status_t RemoveAttribute(Transaction *transaction, const char *name);
// attribute methods
status_t GetAttribute(const char *name,Inode **attribute);
void ReleaseAttribute(Inode *attribute);
status_t CreateAttribute(Transaction *transaction,const char *name,uint32 type,Inode **attribute);
// for directories only:
status_t GetTree(BPlusTree **);
bool IsEmpty();
// manipulating the data stream
status_t FindBlockRun(off_t pos,block_run &run,off_t &offset);
status_t ReadAt(off_t pos,uint8 *buffer,size_t *length);
status_t WriteAt(Transaction *transaction,off_t pos,const uint8 *buffer,size_t *length);
status_t FillGapWithZeros(off_t oldSize,off_t newSize);
status_t SetFileSize(Transaction *transaction,off_t size);
status_t Append(Transaction *transaction,off_t bytes);
status_t Trim(Transaction *transaction);
status_t Sync();
// create/remove inodes
status_t Remove(Transaction *transaction,const char *name,off_t *_id = NULL,bool isDirectory = false);
static status_t Create(Transaction *transaction,Inode *parent,const char *name,int32 mode,int omode,uint32 type,off_t *_id = NULL,Inode **_inode = NULL);
// index maintaining helper
void UpdateOldSize() { fOldSize = Size(); }
void UpdateOldLastModified() { fOldLastModified = Node()->last_modified_time; }
off_t OldSize() { return fOldSize; }
off_t OldLastModified() { return fOldLastModified; }
private:
friend AttributeIterator;
status_t RemoveSmallData(small_data *item,int32 index);
void AddIterator(AttributeIterator *iterator);
void RemoveIterator(AttributeIterator *iterator);
status_t FreeStaticStreamArray(Transaction *transaction,int32 level,block_run run,off_t size,off_t offset,off_t &max);
status_t FreeStreamArray(Transaction *transaction, block_run *array, uint32 arrayLength, off_t size, off_t &offset, off_t &max);
status_t GrowStream(Transaction *transaction,off_t size);
status_t ShrinkStream(Transaction *transaction,off_t size);
BPlusTree *fTree;
Inode *fAttributes;
ReadWriteLock fLock;
off_t fOldSize; // we need those values to ensure we will remove
off_t fOldLastModified; // the correct keys from the indices
mutable SimpleLock fSmallDataLock;
Chain<AttributeIterator> fIterators;
};
// The Vnode class provides a convenience layer upon get_vnode(), so that
// you don't have to call put_vnode() anymore, which may make code more
// readable in some cases
class Vnode {
public:
Vnode(Volume *volume,vnode_id id)
:
fVolume(volume),
fID(id)
{
}
Vnode(Volume *volume,block_run run)
:
fVolume(volume),
fID(volume->ToVnode(run))
{
}
~Vnode()
{
Put();
}
status_t Get(Inode **inode)
{
// should we check inode against NULL here? it should not be necessary
return get_vnode(fVolume->ID(),fID,(void **)inode);
}
void Put()
{
if (fVolume)
put_vnode(fVolume->ID(),fID);
fVolume = NULL;
}
void Keep()
{
fVolume = NULL;
}
private:
Volume *fVolume;
vnode_id fID;
};
class AttributeIterator {
public:
AttributeIterator(Inode *inode);
~AttributeIterator();
status_t Rewind();
status_t GetNext(char *name,size_t *length,uint32 *type,vnode_id *id);
private:
int32 fCurrentSmallData;
Inode *fInode, *fAttributes;
TreeIterator *fIterator;
void *fBuffer;
private:
friend Chain<AttributeIterator>;
friend Inode;
void Update(uint16 index,int8 change);
AttributeIterator *fNext;
};
/** Converts the "omode", the open flags given to bfs_open(), into
* access modes, e.g. since O_RDONLY requires read access to the
* file, it will be converted to R_OK.
*/
inline int oModeToAccess(int omode)
{
omode &= O_RWMASK;
if (omode == O_RDONLY)
return R_OK;
else if (omode == O_WRONLY)
return W_OK;
return R_OK | W_OK;
}
#endif /* INODE_H */

View File

@ -0,0 +1,23 @@
SubDir OBOS_TOP src add-ons kernel file_systems bfs ;
{
local debug = -g ;
local defines = [ FDefines DEBUG ] ;
SubDirCcFlags $(defines) $(debug) ;
SubDirC++Flags $(defines) $(debug) ;
}
R5KernelAddon obfs : [ FDirName kernel file_systems bfs ] :
BlockAllocator.cpp
BPlusTree.cpp
cpp.cpp
Debug.cpp
Index.cpp
Inode.cpp
Journal.cpp
kernel_interface.cpp
Query.cpp
Utility.cpp
Volume.cpp
;

View File

@ -0,0 +1,433 @@
/* Journal - transaction and logging
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include "Journal.h"
#include "Inode.h"
#include "Debug.h"
#include "cpp.h"
Journal::Journal(Volume *volume)
:
fVolume(volume),
fLock("bfs journal"),
fOwner(NULL),
fOwningThread(-1),
fArray(volume->BlockSize()),
fLogSize(volume->Log().length),
fMaxTransactionSize(fLogSize / 4 - 5),
fUsed(0),
fTransactionsInEntry(0)
{
if (fMaxTransactionSize > fLogSize / 2)
fMaxTransactionSize = fLogSize / 2 - 5;
}
Journal::~Journal()
{
FlushLogAndBlocks();
}
status_t
Journal::InitCheck()
{
if (fVolume->LogStart() != fVolume->LogEnd()) {
if (fVolume->SuperBlock().flags != SUPER_BLOCK_DISK_DIRTY)
FATAL(("log_start and log_end differ, but disk is marked clean - trying to replay log...\n"));
return ReplayLog();
}
return B_OK;
}
status_t
Journal::CheckLogEntry(int32 count,off_t *array)
{
// ToDo: check log entry integrity (block numbers and entry size)
PRINT(("Log entry has %ld entries (%Ld)\n",count));
return B_OK;
}
status_t
Journal::ReplayLogEntry(int32 *_start)
{
PRINT(("ReplayLogEntry(start = %u)\n",*_start));
off_t logOffset = fVolume->ToBlock(fVolume->Log());
off_t arrayBlock = (*_start % fLogSize) + fVolume->ToBlock(fVolume->Log());
int32 blockSize = fVolume->BlockSize();
int32 count = 1,valuesInBlock = blockSize / sizeof(off_t);
int32 numArrayBlocks;
off_t blockNumber;
bool first = true;
CachedBlock cached(fVolume);
while (count > 0) {
off_t *array = (off_t *)cached.SetTo(arrayBlock);
if (array == NULL)
return B_IO_ERROR;
int32 index = 0;
if (first) {
count = array[0];
if (count < 1 || count >= fLogSize)
return B_BAD_DATA;
first = false;
numArrayBlocks = ((count + 1) * sizeof(off_t) + blockSize - 1) / blockSize;
blockNumber = (*_start + numArrayBlocks) % fLogSize;
// first real block in this log entry
*_start += count;
index++;
// the first entry in the first block is the number
// of blocks in that log entry
}
(*_start)++;
if (CheckLogEntry(count,array + 1) < B_OK)
return B_BAD_DATA;
CachedBlock cachedCopy(fVolume);
for (;index < valuesInBlock && count-- > 0;index++) {
PRINT(("replay block %Ld in log at %Ld!\n",array[index],blockNumber));
uint8 *copy = cachedCopy.SetTo(logOffset + blockNumber);
if (copy == NULL)
RETURN_ERROR(B_IO_ERROR);
ssize_t written = write_pos(fVolume->Device(),array[index] << fVolume->BlockShift(),copy,blockSize);
if (written != blockSize)
RETURN_ERROR(B_IO_ERROR);
blockNumber = (blockNumber + 1) % fLogSize;
}
arrayBlock++;
if (arrayBlock > fVolume->ToBlock(fVolume->Log()) + fLogSize)
arrayBlock = fVolume->ToBlock(fVolume->Log());
}
return B_OK;
}
/** Replays all log entries - this will put the disk into a
* consistent and clean state, if it was not correctly unmounted
* before.
* This method is called by Journal::InitCheck() if the log start
* and end pointer don't match.
*/
status_t
Journal::ReplayLog()
{
INFORM(("Replay log, disk was not correctly unmounted...\n"));
int32 start = fVolume->LogStart();
int32 lastStart = -1;
while (true) {
// stop if the log is completely flushed
if (start == fVolume->LogEnd())
break;
if (start == lastStart) {
// strange, flushing the log hasn't changed the log_start pointer
return B_ERROR;
}
lastStart = start;
status_t status = ReplayLogEntry(&start);
if (status < B_OK) {
FATAL(("replaying log entry from %u failed: %s\n",start,strerror(status)));
return B_ERROR;
}
start = start % fLogSize;
}
PRINT(("replaying worked fine!\n"));
fVolume->SuperBlock().log_start = fVolume->LogEnd();
fVolume->LogStart() = fVolume->LogEnd();
fVolume->SuperBlock().flags = SUPER_BLOCK_DISK_CLEAN;
return fVolume->WriteSuperBlock();
}
/** This is a callback function that is called by the cache, whenever
* a block is flushed to disk that was updated as part of a transaction.
* This is necessary to keep track of completed transactions, to be
* able to update the log start pointer.
*/
void
Journal::blockNotify(off_t blockNumber,size_t numBlocks,void *arg)
{
log_entry *logEntry = (log_entry *)arg;
logEntry->cached_blocks -= numBlocks;
if (logEntry->cached_blocks > 0) {
// nothing to do yet...
return;
}
Journal *journal = logEntry->journal;
disk_super_block &superBlock = journal->fVolume->SuperBlock();
bool update = false;
// Set log_start pointer if possible...
if (logEntry == journal->fEntries.head) {
if (logEntry->Next() != NULL) {
int32 length = logEntry->next->start - logEntry->start;
superBlock.log_start = (superBlock.log_start + length) % journal->fLogSize;
} else
superBlock.log_start = journal->fVolume->LogEnd();
update = true;
}
journal->fUsed -= logEntry->length;
journal->fEntriesLock.Lock();
logEntry->Remove();
journal->fEntriesLock.Unlock();
free(logEntry);
// update the super block, and change the disk's state, if necessary
if (update) {
journal->fVolume->LogStart() = superBlock.log_start;
if (superBlock.log_start == superBlock.log_end)
superBlock.flags = SUPER_BLOCK_DISK_CLEAN;
journal->fVolume->WriteSuperBlock();
}
}
status_t
Journal::WriteLogEntry()
{
fTransactionsInEntry = 0;
fHasChangedBlocks = false;
sorted_array *array = fArray.Array();
if (array == NULL || array->count == 0)
return B_OK;
// Make sure there is enough space in the log.
// If that fails for whatever reason, panic!
force_cache_flush(fVolume->Device(),false);
int32 tries = fLogSize / 2 + 1;
while (TransactionSize() > FreeLogBlocks() && tries-- > 0)
force_cache_flush(fVolume->Device(),true);
if (tries <= 0) {
fVolume->Panic();
return B_BAD_DATA;
}
int32 blockShift = fVolume->BlockShift();
off_t logOffset = fVolume->ToBlock(fVolume->Log()) << blockShift;
off_t logStart = fVolume->LogEnd();
off_t logPosition = logStart % fLogSize;
// Write disk block array
uint8 *arrayBlock = (uint8 *)array;
for (int32 size = fArray.BlocksUsed();size-- > 0;) {
write_pos(fVolume->Device(),logOffset + (logPosition << blockShift),arrayBlock,fVolume->BlockSize());
logPosition = (logPosition + 1) % fLogSize;
arrayBlock += fVolume->BlockSize();
}
// Write logged blocks into the log
CachedBlock cached(fVolume);
for (int32 i = 0;i < array->count;i++) {
uint8 *block = cached.SetTo(array->values[i]);
if (block == NULL)
return B_IO_ERROR;
write_pos(fVolume->Device(),logOffset + (logPosition << blockShift),block,fVolume->BlockSize());
logPosition = (logPosition + 1) % fLogSize;
}
log_entry *logEntry = (log_entry *)malloc(sizeof(log_entry));
if (logEntry != NULL) {
logEntry->start = logStart;
logEntry->length = TransactionSize();
logEntry->cached_blocks = array->count;
logEntry->journal = this;
fEntriesLock.Lock();
fEntries.Add(logEntry);
fEntriesLock.Unlock();
fCurrent = logEntry;
fUsed += logEntry->length;
set_blocks_info(fVolume->Device(),&array->values[0],array->count,blockNotify,logEntry);
}
// If the log goes to the next round (the log is written as a
// circular buffer), all blocks will be flushed out which is
// possible because we don't have any locked blocks at this
// point.
if (logPosition < logStart)
fVolume->FlushDevice();
// We need to flush the drives own cache here to ensure
// disk consistency.
// If that call fails, we can't do anything about it anyway
ioctl(fVolume->Device(),B_FLUSH_DRIVE_CACHE);
fArray.MakeEmpty();
// Update the log end pointer in the super block
fVolume->SuperBlock().flags = SUPER_BLOCK_DISK_DIRTY;
fVolume->SuperBlock().log_end = logPosition;
fVolume->LogEnd() = logPosition;
fVolume->WriteSuperBlock();
}
status_t
Journal::FlushLogAndBlocks()
{
status_t status = Lock((Transaction *)this);
if (status != B_OK)
return status;
// write the current log entry to disk
if (TransactionSize() != 0) {
status = WriteLogEntry();
if (status < B_OK)
FATAL(("writing current log entry failed: %s\n",status));
}
status = fVolume->FlushDevice();
Unlock((Transaction *)this,true);
return status;
}
status_t
Journal::Lock(Transaction *owner)
{
if (owner == fOwner)
return B_OK;
status_t status = fLock.Lock();
if (status == B_OK) {
fOwner = owner;
fOwningThread = find_thread(NULL);
}
// if the last transaction is older than 2 secs, start a new one
if (fTransactionsInEntry != 0 && system_time() - fTimestamp > 2000000L)
WriteLogEntry();
return B_OK;
}
void
Journal::Unlock(Transaction *owner,bool success)
{
if (owner != fOwner)
return;
TransactionDone(success);
fTimestamp = system_time();
fOwner = NULL;
fOwningThread = -1;
fLock.Unlock();
}
status_t
Journal::TransactionDone(bool success)
{
if (!success && fTransactionsInEntry == 0) {
// we can safely abort the transaction
// ToDo: abort the transaction
PRINT(("should abort transaction...\n"));
}
// Up to a maximum size, we will just batch several
// transactions together to improve speed
if (TransactionSize() < fMaxTransactionSize) {
fTransactionsInEntry++;
fHasChangedBlocks = false;
return B_OK;
}
return WriteLogEntry();
}
status_t
Journal::LogBlocks(off_t blockNumber,const uint8 *buffer,size_t numBlocks)
{
// ToDo: that's for now - we should change the log file size here
if (TransactionSize() + numBlocks + 1 > fLogSize)
return B_DEVICE_FULL;
fHasChangedBlocks = true;
int32 blockSize = fVolume->BlockSize();
for (;numBlocks-- > 0;blockNumber++,buffer += blockSize) {
if (fArray.Find(blockNumber) >= 0)
continue;
// Insert the block into the transaction's array, and write the changes
// back into the locked cache buffer
fArray.Insert(blockNumber);
status_t status = cached_write_locked(fVolume->Device(),blockNumber,buffer,1,blockSize);
if (status < B_OK)
return status;
}
// If necessary, flush the log, so that we have enough space for this transaction
if (TransactionSize() > FreeLogBlocks())
force_cache_flush(fVolume->Device(),true);
return B_OK;
}
// #pragma mark -
status_t
Transaction::Start(Volume *volume,off_t refBlock)
{
// has it already been started?
if (fJournal != NULL)
return B_OK;
fJournal = volume->GetJournal(refBlock);
if (fJournal != NULL && fJournal->Lock(this) == B_OK)
return B_OK;
fJournal = NULL;
return B_ERROR;
}

View File

@ -0,0 +1,152 @@
#ifndef JOURNAL_H
#define JOURNAL_H
/* Journal - transaction and logging
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <KernelExport.h>
#ifdef USER
# include "myfs.h"
# include <stdio.h>
#endif
#ifndef _IMPEXP_KERNEL
# define _IMPEXP_KERNEL
#endif
extern "C" {
#include <lock.h>
#include <cache.h>
}
#include "Volume.h"
#include "Chain.h"
#include "Utility.h"
struct log_entry : node<log_entry> {
uint16 start;
uint16 length;
uint32 cached_blocks;
Journal *journal;
};
class Journal {
public:
Journal(Volume *);
~Journal();
status_t InitCheck();
status_t Lock(Transaction *owner);
void Unlock(Transaction *owner,bool success);
status_t CheckLogEntry(int32 count, off_t *array);
status_t ReplayLogEntry(int32 *start);
status_t ReplayLog();
status_t WriteLogEntry();
status_t LogBlocks(off_t blockNumber,const uint8 *buffer, size_t numBlocks);
thread_id CurrentThread() const { return fOwningThread; }
Transaction *CurrentTransaction() const { return fOwner; }
uint32 TransactionSize() const { return fArray.CountItems() + fArray.BlocksUsed(); }
status_t FlushLogAndBlocks();
Volume *GetVolume() const { return fVolume; }
inline int32 FreeLogBlocks() const;
private:
friend log_entry;
static void blockNotify(off_t blockNumber, size_t numBlocks, void *arg);
status_t TransactionDone(bool success);
Volume *fVolume;
Benaphore fLock;
Transaction *fOwner;
thread_id fOwningThread;
BlockArray fArray;
uint32 fLogSize,fMaxTransactionSize,fUsed;
int32 fTransactionsInEntry;
SimpleLock fEntriesLock;
list<log_entry> fEntries;
log_entry *fCurrent;
bool fHasChangedBlocks;
bigtime_t fTimestamp;
};
inline int32
Journal::FreeLogBlocks() const
{
return fVolume->LogStart() <= fVolume->LogEnd() ?
fLogSize - fVolume->LogEnd() + fVolume->LogStart()
: fVolume->LogStart() - fVolume->LogEnd();
}
// For now, that's only a dumb class that does more or less nothing
// else than writing the blocks directly to the real location.
// It doesn't yet use logging.
class Transaction {
public:
Transaction(Volume *volume,off_t refBlock)
:
fJournal(NULL)
{
Start(volume,refBlock);
}
Transaction(Volume *volume,block_run refRun)
:
fJournal(NULL)
{
Start(volume,volume->ToBlock(refRun));
}
Transaction()
:
fJournal(NULL)
{
}
~Transaction()
{
if (fJournal)
fJournal->Unlock(this,false);
}
status_t Start(Volume *volume,off_t refBlock);
void Done()
{
if (fJournal != NULL)
fJournal->Unlock(this,true);
fJournal = NULL;
}
status_t WriteBlocks(off_t blockNumber,const uint8 *buffer,size_t numBlocks = 1)
{
if (fJournal == NULL)
return B_NO_INIT;
return fJournal->LogBlocks(blockNumber,buffer,numBlocks);
//status_t status = cached_write/*_locked*/(fVolume->Device(),blockNumber,buffer,numBlocks,fVolume->BlockSize());
//return status;
}
Volume *GetVolume() { return fJournal != NULL ? fJournal->GetVolume() : NULL; }
protected:
Journal *fJournal;
};
#endif /* JOURNAL_H */

View File

@ -0,0 +1,337 @@
#ifndef LOCK_H
#define LOCK_H
/* Lock - benaphores, read/write lock implementation
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** Roughly based on a Be sample code written by Nathan Schrenk.
**
** This file may be used under the terms of the OpenBeOS License.
*/
#include <KernelExport.h>
class Benaphore {
public:
Benaphore(const char *name = "bfs benaphore")
:
fSemaphore(create_sem(0, name)),
fCount(1)
{
}
~Benaphore()
{
delete_sem(fSemaphore);
}
status_t InitCheck()
{
if (fSemaphore < B_OK)
return fSemaphore;
return B_OK;
}
status_t Lock()
{
if (atomic_add(&fCount, -1) <= 0)
return acquire_sem(fSemaphore);
return B_OK;
}
void Unlock()
{
if (atomic_add(&fCount, 1) < 0)
release_sem(fSemaphore);
}
private:
sem_id fSemaphore;
vint32 fCount;
};
// a convenience class to lock the benaphore
class Locker {
public:
Locker(Benaphore &lock)
: fLock(lock)
{
fStatus = lock.Lock();
}
~Locker()
{
if (fStatus == B_OK)
fLock.Unlock();
}
private:
Benaphore &fLock;
status_t fStatus;
};
//**** Many Reader/Single Writer Lock
// This is a "fast" implementation of a single writer/many reader
// locking scheme. It's fast because it uses the benaphore idea
// to do lazy semaphore locking - in most cases it will only have
// to do some simple integer arithmetic.
// The second semaphore (fWriteLock) is needed to prevent the situation
// that a second writer can acquire the lock when there are still readers
// holding it.
#define MAX_READERS 100000
// Note: this code will break if you actually have 100000 readers
// at once. With the current thread/... limits in BeOS you can't
// touch that value, but it might be possible in the future.
// Also, you can only have about 20000 concurrent writers until
// the semaphore count exceeds the int32 bounds
// Timeouts:
// It may be a good idea to have timeouts for the WriteLocked class,
// in case something went wrong - we'll see if this is necessary,
// but it would be a somewhat poor work-around for a deadlock...
// But the only real problem with timeouts could be for things like
// "chkbfs" - because such a tool may need to lock for some more time
// define if you want to have fast locks as the foundation for the
// ReadWriteLock class - the benefit is that acquire_sem() doesn't
// have to be called when there is no one waiting.
// The disadvantage is the use of 2 real semaphores which is quite
// expensive regarding that BeOS only allows for a total of 64k
// semaphores.
//#define FAST_LOCK
#ifdef FAST_LOCK
class ReadWriteLock {
public:
ReadWriteLock(const char *name = "bfs r/w lock")
:
fSemaphore(create_sem(0, name)),
fCount(MAX_READERS),
fWriteLock()
{
}
~ReadWriteLock()
{
delete_sem(fSemaphore);
}
status_t InitCheck()
{
if (fSemaphore < B_OK)
return fSemaphore;
return B_OK;
}
status_t Lock()
{
if (atomic_add(&fCount, -1) <= 0)
return acquire_sem(fSemaphore);
return B_OK;
}
void Unlock()
{
if (atomic_add(&fCount, 1) < 0)
release_sem(fSemaphore);
}
status_t LockWrite()
{
if (fWriteLock.Lock() < B_OK)
return B_ERROR;
int32 readers = atomic_add(&fCount, -MAX_READERS);
status_t status = B_OK;
if (readers < MAX_READERS) {
// Acquire sem for all readers currently not using a semaphore.
// But if we are not the only write lock in the queue, just get
// the one for us
status = acquire_sem_etc(fSemaphore,readers <= 0 ? 1 : MAX_READERS - readers,0,0);
}
fWriteLock.Unlock();
return status;
}
void UnlockWrite()
{
int32 readers = atomic_add(&fCount,MAX_READERS);
if (readers < 0) {
// release sem for all readers only when we were the only writer
release_sem_etc(fSemaphore,readers <= -MAX_READERS ? 1 : -readers,0);
}
}
private:
friend class ReadLocked;
friend class WriteLocked;
sem_id fSemaphore;
vint32 fCount;
Benaphore fWriteLock;
};
#else // FAST_LOCK
class ReadWriteLock {
public:
ReadWriteLock(const char *name = "bfs r/w lock")
:
fSemaphore(create_sem(MAX_READERS, name))
{
}
~ReadWriteLock()
{
delete_sem(fSemaphore);
}
status_t InitCheck()
{
if (fSemaphore < B_OK)
return fSemaphore;
return B_OK;
}
status_t Lock()
{
return acquire_sem(fSemaphore);
}
void Unlock()
{
release_sem(fSemaphore);
}
status_t LockWrite()
{
return acquire_sem_etc(fSemaphore,MAX_READERS,0,0);
}
void UnlockWrite()
{
release_sem_etc(fSemaphore,MAX_READERS,0);
}
private:
friend class ReadLocked;
friend class WriteLocked;
sem_id fSemaphore;
};
#endif // FAST_LOCK
class ReadLocked {
public:
ReadLocked(ReadWriteLock &lock)
:
fLock(lock)
{
fStatus = lock.Lock();
}
~ReadLocked()
{
if (fStatus == B_OK)
fLock.Unlock();
}
private:
ReadWriteLock &fLock;
status_t fStatus;
};
class WriteLocked {
public:
WriteLocked(ReadWriteLock &lock)
:
fLock(lock)
{
fStatus = lock.LockWrite();
}
~WriteLocked()
{
if (fStatus == B_OK)
fLock.UnlockWrite();
}
status_t IsLocked()
{
return fStatus;
}
private:
ReadWriteLock &fLock;
status_t fStatus;
};
// A simple locking structure that doesn't use a semaphore - it's useful
// if you have to protect critical parts with a short runtime.
class SimpleLock {
public:
SimpleLock()
:
fLock(0),
fUnlock(0)
{
}
status_t Lock(bigtime_t time = 500)
{
int32 turn = atomic_add(&fLock,1);
while (turn != fUnlock)
snooze(time);
// ToDo: the lock cannot fail currently! We may want
// to change this
return B_OK;
}
void Unlock()
{
atomic_add(&fUnlock,1);
}
private:
vint32 fLock;
vint32 fUnlock;
};
// A convenience class to lock the SimpleLock, note the
// different timing compared to the direct call
class SimpleLocker {
public:
SimpleLocker(SimpleLock &lock,bigtime_t time = 1000)
: fLock(lock)
{
lock.Lock(time);
}
~SimpleLocker()
{
fLock.Unlock();
}
private:
SimpleLock &fLock;
};
#endif /* LOCK_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,72 @@
#ifndef QUERY_H
#define QUERY_H
/* Query - query parsing and evaluation
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <SupportDefs.h>
#include "Index.h"
#include "Stack.h"
#include "Chain.h"
class Volume;
class Term;
class Equation;
class TreeIterator;
class Query;
class Expression {
public:
Expression(char *expr);
~Expression();
status_t InitCheck();
const char *Position() const { return fPosition; }
Term *Root() const { return fTerm; }
protected:
Term *ParseOr(char **expr);
Term *ParseAnd(char **expr);
Term *ParseEquation(char **expr);
bool IsOperator(char **expr,char op);
private:
char *fPosition;
Term *fTerm;
};
class Query {
public:
Query(Volume *volume,Expression *expression);
~Query();
status_t GetNextEntry(struct dirent *,size_t size);
void SetLiveMode(port_id port,int32 token);
void LiveUpdate(Inode *inode,const char *attribute,int32 type,const uint8 *oldKey,size_t oldLength,const uint8 *newKey,size_t newLength);
Expression *GetExpression() const { return fExpression; }
private:
Volume *fVolume;
Expression *fExpression;
Equation *fCurrent;
TreeIterator *fIterator;
Index fIndex;
Stack<Equation *> fStack;
port_id fPort;
int32 fToken;
private:
friend Chain<Query>;
Query *fNext;
};
#endif /* QUERY_H */

View File

@ -0,0 +1,58 @@
#ifndef STACK_H
#define STACK_H
/* Stack - a template stack class
**
** Copyright 2001 pinc Software. All Rights Reserved.
** This file may be used under the terms of the OpenBeOS License.
*/
#include <SupportDefs.h>
template<class T> class Stack {
public:
Stack()
:
fArray(NULL),
fUsed(0),
fMax(0)
{
}
~Stack()
{
if (fArray)
free(fArray);
}
status_t Push(T value)
{
if (fUsed >= fMax) {
fMax += 16;
T *newArray = (T *)realloc(fArray,fMax * sizeof(T));
if (newArray == NULL)
return B_NO_MEMORY;
fArray = newArray;
}
fArray[fUsed++] = value;
return B_OK;
}
bool Pop(T *value)
{
if (fUsed == 0)
return false;
*value = fArray[--fUsed];
return true;
}
private:
T *fArray;
int32 fUsed;
int32 fMax;
};
#endif /* STACK_H */

View File

@ -0,0 +1,74 @@
BFS - ToDo, June 5th, 2002
-----
BlockAllocator
- the BlockAllocator is only slightly optimized and probably slow
- the first free and the largest range are currently not correctly maintained (only efficiency suffers - it does work correctly)
- the allocation policies will have to stand against some real world tests
- the access to the block bitmap is currently managed using a global lock
DataStream
- growing/shrinking the stream size is not implemented for the double indirect range
- only files are trimmed back (in bfs_close()), but every inode has a preallocated stream...
- merging of block_runs doesn't work between range/block boundaries
Queries
- There shouldn't be any cases where you can speed up a query with reordering the query expression - test it
- Check permissions of the parent directories
- Add protection against crashing applications which had a query open - at least the original BeOS kernel does not free the cookie (which throws some memory away *and* prevents unmounting the disk)
Journal
- Check if there are any standard and often-happening cases for a transaction to fail, and if so, start the transaction only when necessary
- if the system crashes between bfs_unlink() and bfs_remove_vnode(), the inode can be removed from the tree, but its memory is still allocated - this can happen if the inode is still in use by someone (and that's what the "chkbfs" utility is for, mainly).
- add delayed index updating (+ delete actions to solve the issue above)
- multiple log files, parallel transactions?
- variable sized log file
- as long as we have a fixed-sized log file, it should be possible to reserve space for a transaction to be able to decide if batching it is possible
BPlusTree
- BPlusTree::Remove() could trigger CachedNode::Free() to go through the free nodes list and free all pages at the end of the data stream
- updating the TreeIterators doesn't work yet for duplicates (which may be a problem if a duplicate node will go away after a remove)
- BPlusTree::RemoveDuplicate() could spread the contents of duplicate node with only a few entries to save some space (right now, only empty nodes are freed)
Inode
- sometimes the inode's last modified time seems to be wrong, and is therefore not found in the b+tree (assuming that the b+tree is working correctly, what I do)
- Inode::FillGapWithZeros() currently disabled; apart from being slow, it really shouldn't be executed while a transaction is running, because that stops all other threads from doing anything (which can be a long time for a 100 MB file)
Indices
Attributes
- bfs_write_attr() doesn't check if the attribute data may fit into the small_data region if there already is that attribute as an attribute file
Volume
kernel_interface
- missing functions, maybe they are not all needed (but most of them are): bfs_rename_attr(), bfs_rename_index(), bfs_initialize(), bfs_setflags(), bfs_link()
- bfs_rename() currently doesn't respect any permissions
general stuff
- There are also some comments with a leading "ToDo:" directly in the code which may not be mentioned here.
-----
Axel Dörfler
axeld@pinc-software.de

View File

@ -0,0 +1,138 @@
/* Utility - some helper classes
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include "Utility.h"
#include "Debug.h"
#include "cpp.h"
#include <stdlib.h>
#include <string.h>
bool
sorted_array::FindInternal(off_t value, int32 &index) const
{
int32 min = 0, max = count-1;
off_t cmp;
while (min <= max) {
index = (min + max) / 2;
cmp = values[index] - value;
if (cmp < 0)
min = index + 1;
else if (cmp > 0)
max = index - 1;
else
return true;
}
return false;
}
void
sorted_array::Insert(off_t value)
{
// if there are more than 8 values in this array, use a
// binary search, if not, just iterate linearly to find
// the insertion point
int32 i;
if (count > 8 ) {
if (!FindInternal(value,i)
&& values[i] <= value)
i++;
} else {
for (i = 0;i < count; i++)
if (values[i] > value)
break;
}
memmove(&values[i+1],&values[i],(count - i) * sizeof(off_t));
values[i] = value;
count++;
}
bool
sorted_array::Remove(off_t value)
{
int32 index = Find(value);
if (index == -1)
return false;
memmove(&values[index],&values[index + 1],(count - index) * sizeof(off_t));
count--;
return true;
}
// #pragma mark -
BlockArray::BlockArray(int32 blockSize)
:
fArray(NULL),
fSize(0),
fBlockSize(blockSize)
{
}
BlockArray::~BlockArray()
{
if (fArray)
free(fArray);
}
int32
BlockArray::Find(off_t value)
{
if (fArray == NULL)
return -1;
return fArray->Find(value);
}
status_t
BlockArray::Insert(off_t value)
{
if (fArray == NULL || fArray->count + 1 > fMaxBlocks) {
sorted_array *array = (sorted_array *)realloc(fArray,fSize + fBlockSize);
if (array == NULL)
return B_NO_MEMORY;
if (fArray == NULL)
array->count = 0;
fArray = array;
fSize += fBlockSize;
fMaxBlocks = fSize / sizeof(off_t) - 1;
}
fArray->Insert(value);
return B_OK;
}
status_t
BlockArray::Remove(off_t value)
{
if (fArray == NULL)
return B_ENTRY_NOT_FOUND;
return fArray->Remove(value) ? B_OK : B_ENTRY_NOT_FOUND;
}
void
BlockArray::MakeEmpty()
{
fArray->count = 0;
}

View File

@ -0,0 +1,110 @@
#ifndef UTILITY_H
#define UTILITY_H
/* Utility - some helper classes
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <SupportDefs.h>
// Simple array, used for the duplicate handling in the B+Tree,
// and for the log entries.
struct sorted_array {
public:
off_t count;
off_t values[0];
inline int32 Find(off_t value) const;
void Insert(off_t value);
bool Remove(off_t value);
private:
bool FindInternal(off_t value,int32 &index) const;
};
inline int32
sorted_array::Find(off_t value) const
{
int32 i;
return FindInternal(value,i) ? i : -1;
}
// The BlockArray reserves a multiple of "blockSize" and
// maintain array size for new entries.
// This is used for the in-memory log entries before they
// are written to disk.
class BlockArray {
public:
BlockArray(int32 blockSize);
~BlockArray();
int32 Find(off_t value);
status_t Insert(off_t value);
status_t Remove(off_t value);
void MakeEmpty();
int32 CountItems() const { return fArray != NULL ? fArray->count : 0; }
int32 BlocksUsed() const { return fArray != NULL ? ((fArray->count + 1) * sizeof(off_t) + fBlockSize - 1) / fBlockSize : 0; }
sorted_array *Array() const { return fArray; }
int32 Size() const { return fSize; }
private:
sorted_array *fArray;
int32 fBlockSize;
int32 fSize;
int32 fMaxBlocks;
};
// Doubly linked list
template<class Node> struct node {
Node *next,*prev;
void
Remove()
{
prev->next = next;
next->prev = prev;
}
Node *
Next()
{
if (next && next->next != NULL)
return next;
return NULL;
}
};
template<class Node> struct list {
Node *head,*tail,*last;
list()
{
head = (Node *)&tail;
tail = NULL;
last = (Node *)&head;
}
void
Add(Node *entry)
{
entry->next = (Node *)&tail;
entry->prev = last;
last->next = entry;
last = entry;
}
};
#endif /* UTILITY_H */

View File

@ -0,0 +1,304 @@
/* Volume - BFS super block, mounting, etc.
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include "Debug.h"
#include "cpp.h"
#include "Volume.h"
#include "Journal.h"
#include "Inode.h"
#include "Query.h"
#include <KernelExport.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
Volume::Volume(nspace_id id)
:
fID(id),
fBlockAllocator(this),
fLock("bfs volume"),
fDirtyCachedBlocks(0),
fUniqueID(0),
fFlags(0)
{
}
Volume::~Volume()
{
}
bool
Volume::IsValidSuperBlock()
{
if (fSuperBlock.magic1 != (int32)SUPER_BLOCK_MAGIC1
|| fSuperBlock.magic2 != (int32)SUPER_BLOCK_MAGIC2
|| fSuperBlock.magic3 != (int32)SUPER_BLOCK_MAGIC3
|| (int32)fSuperBlock.block_size != fSuperBlock.inode_size
|| fSuperBlock.fs_byte_order != SUPER_BLOCK_FS_LENDIAN
|| (1UL << fSuperBlock.block_shift) != fSuperBlock.block_size
|| fSuperBlock.num_ags < 1
|| fSuperBlock.ag_shift < 1
|| fSuperBlock.blocks_per_ag < 1
|| fSuperBlock.num_blocks < 10
|| fSuperBlock.num_ags != divide_roundup(fSuperBlock.num_blocks,1L << fSuperBlock.ag_shift))
return false;
return true;
}
void
Volume::Panic()
{
FATAL(("we have to panic... switch to read-only mode!\n"));
fFlags |= VOLUME_READ_ONLY;
#ifdef USER
debugger("BFS panics!");
#endif
}
status_t
Volume::Mount(const char *deviceName,uint32 flags)
{
if (flags & B_MOUNT_READ_ONLY)
fFlags |= VOLUME_READ_ONLY;
fDevice = open(deviceName,flags & B_MOUNT_READ_ONLY ? O_RDONLY : O_RDWR);
// if we couldn't open the device, try read-only (don't rely on a specific error code)
if (fDevice < B_OK && (flags & B_MOUNT_READ_ONLY) == 0) {
fDevice = open(deviceName,O_RDONLY);
fFlags |= VOLUME_READ_ONLY;
}
if (fDevice < B_OK)
RETURN_ERROR(fDevice);
// check if it's a regular file, and if so, disable the cache for the
// underlaying file system
struct stat stat;
if (fstat(fDevice,&stat) < 0)
RETURN_ERROR(B_ERROR);
//#ifndef USER
if (stat.st_mode & S_FILE && ioctl(fDevice,IOCTL_FILE_UNCACHED_IO,NULL) < 0) {
// mount read-only if the cache couldn't be disabled
# ifdef DEBUG
FATAL(("couldn't disable cache for image file - system may dead-lock!\n"));
# else
FATAL(("couldn't disable cache for image file!\n"));
Panic();
# endif
}
//#endif
// read the super block
char buffer[1024];
if (read_pos(fDevice,0,buffer,sizeof(buffer)) != sizeof(buffer))
return B_IO_ERROR;
status_t status = B_OK;
// Note: that does work only for x86, for PowerPC, the super block
// is located at offset 0!
memcpy(&fSuperBlock,buffer + 512,sizeof(disk_super_block));
if (IsValidSuperBlock()) {
// set the current log pointers, so that journaling will work correctly
fLogStart = fSuperBlock.log_start;
fLogEnd = fSuperBlock.log_end;
if (init_cache_for_device(fDevice, NumBlocks()) == B_OK) {
fJournal = new Journal(this);
// replaying the log is the first thing we will do on this disk
if (fJournal && fJournal->InitCheck() == B_OK
&& fBlockAllocator.Initialize() == B_OK) {
fRootNode = new Inode(this,ToVnode(Root()));
if (fRootNode && fRootNode->InitCheck() == B_OK) {
if (new_vnode(fID,ToVnode(Root()),(void *)fRootNode) == B_OK) {
// try to get indices root dir
// question: why doesn't get_vnode() work here??
// answer: we have not yet backpropagated the pointer to the
// volume in bfs_mount(), so bfs_read_vnode() can't get it.
// But it's not needed to do that anyway.
fIndicesNode = new Inode(this,ToVnode(Indices()));
if (fIndicesNode == NULL
|| fIndicesNode->InitCheck() < B_OK
|| !fIndicesNode->IsDirectory()) {
INFORM(("bfs: volume doesn't have indices!\n"));
if (fIndicesNode) {
// if this is the case, the index root node is gone bad, and
// BFS switch to read-only mode
fFlags |= VOLUME_READ_ONLY;
fIndicesNode = NULL;
}
}
// all went fine
return B_OK;
} else
status = B_NO_MEMORY;
} else
status = B_BAD_VALUE;
FATAL(("could not create root node: new_vnode() failed!\n"));
} else {
// ToDo: improve error reporting for a bad journal
status = B_NO_MEMORY;
FATAL(("could not initialize journal/block bitmap allocator!\n"));
}
remove_cached_device_blocks(fDevice,NO_WRITES);
} else {
FATAL(("could not initialize cache!\n"));
status = B_IO_ERROR;
}
FATAL(("invalid super block!\n"));
}
else
status = B_BAD_VALUE;
close(fDevice);
return status;
}
status_t
Volume::Unmount()
{
// This will also flush the log & all blocks to disk
delete fJournal;
fJournal = NULL;
delete fIndicesNode;
remove_cached_device_blocks(fDevice,ALLOW_WRITES);
close(fDevice);
return B_OK;
}
status_t
Volume::Sync()
{
return fJournal->FlushLogAndBlocks();
}
status_t
Volume::IsValidBlockRun(block_run run)
{
if (run.allocation_group < 0 || run.allocation_group > AllocationGroups()
|| run.start > (1LL << AllocationGroupShift())
|| run.length == 0
|| (uint32)run.length + run.start > (1LL << AllocationGroupShift())) {
Panic();
FATAL(("*** invalid run(%ld,%d,%d)\n",run.allocation_group,run.start,run.length));
return B_BAD_DATA;
}
return B_OK;
}
block_run
Volume::ToBlockRun(off_t block) const
{
block_run run;
run.allocation_group = block >> fSuperBlock.ag_shift;
run.start = block & ~((1LL << fSuperBlock.ag_shift) - 1);
run.length = 1;
return run;
}
status_t
Volume::CreateIndicesRoot(Transaction *transaction)
{
off_t id;
status_t status = Inode::Create(transaction,NULL,NULL,
S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700,0,0,&id);
if (status < B_OK)
RETURN_ERROR(status);
fSuperBlock.indices = ToBlockRun(id);
WriteSuperBlock();
// The Vnode destructor will unlock the inode, but it has already been
// locked by the Inode::Create() call.
Vnode vnode(this,id);
return vnode.Get(&fIndicesNode);
}
status_t
Volume::AllocateForInode(Transaction *transaction, const Inode *parent, mode_t type, block_run &run)
{
return fBlockAllocator.AllocateForInode(transaction,&parent->BlockRun(),type,run);
}
status_t
Volume::WriteSuperBlock()
{
if (write_pos(fDevice,512,&fSuperBlock,sizeof(disk_super_block)) != sizeof(disk_super_block))
return B_IO_ERROR;
return B_OK;
}
void
Volume::UpdateLiveQueries(Inode *inode,const char *attribute,int32 type,const uint8 *oldKey,size_t oldLength,const uint8 *newKey,size_t newLength)
{
if (fQueryLock.Lock() < B_OK)
return;
Query *query = NULL;
while ((query = fQueries.Next(query)) != NULL)
query->LiveUpdate(inode,attribute,type,oldKey,oldLength,newKey,newLength);
fQueryLock.Unlock();
}
void
Volume::AddQuery(Query *query)
{
if (fQueryLock.Lock() < B_OK)
return;
fQueries.Add(query);
fQueryLock.Unlock();
}
void
Volume::RemoveQuery(Query *query)
{
if (fQueryLock.Lock() < B_OK)
return;
fQueries.Remove(query);
fQueryLock.Unlock();
}

View File

@ -0,0 +1,176 @@
#ifndef VOLUME_H
#define VOLUME_H
/* Volume - BFS super block, mounting, etc.
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <KernelExport.h>
extern "C" {
#ifndef _IMPEXP_KERNEL
# define _IMPEXP_KERNEL
#endif
#include "fsproto.h"
#include "lock.h"
#include "cache.h"
}
#include "bfs.h"
#include "BlockAllocator.h"
#include "Chain.h"
class Journal;
class Inode;
class Query;
enum volume_flags {
VOLUME_READ_ONLY = 0x0001
};
class Volume {
public:
Volume(nspace_id id);
~Volume();
status_t Mount(const char *device,uint32 flags);
status_t Unmount();
bool IsValidSuperBlock();
bool IsReadOnly() const { return fFlags & VOLUME_READ_ONLY; }
void Panic();
Benaphore &Lock() { return fLock; }
block_run Root() const { return fSuperBlock.root_dir; }
Inode *RootNode() const { return fRootNode; }
block_run Indices() const { return fSuperBlock.indices; }
Inode *IndicesNode() const { return fIndicesNode; }
block_run Log() const { return fSuperBlock.log_blocks; }
vint32 &LogStart() { return fLogStart; }
vint32 &LogEnd() { return fLogEnd; }
int Device() const { return fDevice; }
nspace_id ID() const { return fID; }
const char *Name() const { return fSuperBlock.name; }
off_t NumBlocks() const { return fSuperBlock.num_blocks; }
off_t UsedBlocks() const { return fSuperBlock.used_blocks; }
off_t FreeBlocks() const { return fSuperBlock.num_blocks - fSuperBlock.used_blocks; }
uint32 BlockSize() const { return fSuperBlock.block_size; }
uint32 BlockShift() const { return fSuperBlock.block_shift; }
uint32 InodeSize() const { return fSuperBlock.inode_size; }
uint32 AllocationGroups() const { return fSuperBlock.num_ags; }
uint32 AllocationGroupShift() const { return fSuperBlock.ag_shift; }
disk_super_block &SuperBlock() { return fSuperBlock; }
off_t ToOffset(block_run run) const { return ToBlock(run) << fSuperBlock.block_shift; }
off_t ToBlock(block_run run) const { return ((((off_t)run.allocation_group) << fSuperBlock.ag_shift) | (off_t)run.start); }
block_run ToBlockRun(off_t block) const;
status_t IsValidBlockRun(block_run run);
off_t ToVnode(block_run run) const { return ToBlock(run); }
off_t ToVnode(off_t block) const { return block; }
off_t VnodeToBlock(vnode_id id) const { return (off_t)id; }
status_t CreateIndicesRoot(Transaction *transaction);
status_t AllocateForInode(Transaction *transaction,const Inode *parent,mode_t type,block_run &run);
status_t AllocateForInode(Transaction *transaction,const block_run *parent,mode_t type,block_run &run);
status_t Allocate(Transaction *transaction,const Inode *inode,off_t numBlocks,block_run &run,uint16 minimum = 1);
status_t Free(Transaction *transaction,block_run &run);
#ifdef DEBUG
BlockAllocator &Allocator() { return fBlockAllocator; }
#endif
status_t Sync();
Journal *GetJournal(off_t /*refBlock*/) const { return fJournal; }
status_t WriteSuperBlock();
status_t WriteBlocks(off_t blockNumber,const uint8 *block,uint32 numBlocks);
void WriteCachedBlocksIfNecessary();
status_t FlushDevice();
void UpdateLiveQueries(Inode *inode,const char *attribute,int32 type,const uint8 *oldKey,size_t oldLength,const uint8 *newKey,size_t newLength);
void AddQuery(Query *query);
void RemoveQuery(Query *query);
uint32 GetUniqueID() { return atomic_add(&fUniqueID,1); }
protected:
nspace_id fID;
int fDevice;
disk_super_block fSuperBlock;
BlockAllocator fBlockAllocator;
Benaphore fLock;
Journal *fJournal;
vint32 fLogStart,fLogEnd;
Inode *fRootNode;
Inode *fIndicesNode;
vint32 fDirtyCachedBlocks;
SimpleLock fQueryLock;
Chain<Query> fQueries;
int32 fUniqueID;
uint32 fFlags;
};
// inline functions
inline status_t
Volume::AllocateForInode(Transaction *transaction, const block_run *parent, mode_t type, block_run &run)
{
return fBlockAllocator.AllocateForInode(transaction,parent,type,run);
}
inline status_t
Volume::Allocate(Transaction *transaction, const Inode *inode, off_t numBlocks, block_run &run, uint16 minimum)
{
return fBlockAllocator.Allocate(transaction,inode,numBlocks,run,minimum);
}
inline status_t
Volume::Free(Transaction *transaction, block_run &run)
{
return fBlockAllocator.Free(transaction,run);
}
inline status_t
Volume::WriteBlocks(off_t blockNumber, const uint8 *block, uint32 numBlocks)
{
atomic_add(&fDirtyCachedBlocks,numBlocks);
return cached_write(fDevice,blockNumber,block,numBlocks,fSuperBlock.block_size);
}
inline void
Volume::WriteCachedBlocksIfNecessary()
{
// the specific values are only valid for the current BeOS cache
if (fDirtyCachedBlocks > 128) {
force_cache_flush(fDevice,false);
atomic_add(&fDirtyCachedBlocks,-64);
}
}
inline status_t
Volume::FlushDevice()
{
fDirtyCachedBlocks = 0;
return flush_device(fDevice,0);
}
#endif /* VOLUME_H */

View File

@ -0,0 +1,298 @@
#ifndef BFS_H
#define BFS_H
/* bfs - BFS definitions and helper functions
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** Parts of this code is based on work previously done by Marcus Overhagen
**
** Copyright 2001 pinc Software. All Rights Reserved.
** This file may be used under the terms of the OpenBeOS License.
*/
#include <SupportDefs.h>
#ifndef B_BAD_DATA
# define B_BAD_DATA B_ERROR
#endif
struct block_run
{
int32 allocation_group;
uint16 start;
uint16 length;
inline bool operator==(const block_run &run) const;
inline bool operator!=(const block_run &run) const;
inline bool IsZero();
inline void SetTo(int32 group,uint16 start,uint16 length = 1);
inline static block_run Run(int32 group,uint16 start,uint16 length = 1);
};
typedef block_run inode_addr;
//**************************************
#define BFS_DISK_NAME_LENGTH 32
struct disk_super_block
{
char name[BFS_DISK_NAME_LENGTH];
int32 magic1;
int32 fs_byte_order;
uint32 block_size;
uint32 block_shift;
off_t num_blocks;
off_t used_blocks;
int32 inode_size;
int32 magic2;
int32 blocks_per_ag;
int32 ag_shift;
int32 num_ags;
int32 flags;
block_run log_blocks;
off_t log_start;
off_t log_end;
int32 magic3;
inode_addr root_dir;
inode_addr indices;
int32 pad[8];
};
#define SUPER_BLOCK_FS_LENDIAN 'BIGE' /* BIGE */
#define SUPER_BLOCK_MAGIC1 'BFS1' /* BFS1 */
#define SUPER_BLOCK_MAGIC2 0xdd121031
#define SUPER_BLOCK_MAGIC3 0x15b6830e
#define SUPER_BLOCK_DISK_CLEAN 'CLEN' /* CLEN */
#define SUPER_BLOCK_DISK_DIRTY 'DIRT' /* DIRT */
//**************************************
#define NUM_DIRECT_BLOCKS 12
struct data_stream
{
block_run direct[NUM_DIRECT_BLOCKS];
off_t max_direct_range;
block_run indirect;
off_t max_indirect_range;
block_run double_indirect;
off_t max_double_indirect_range;
off_t size;
};
//**************************************
struct bfs_inode;
struct small_data
{
uint32 type;
uint16 name_size;
uint16 data_size;
char name[0]; // name_size long, followed by data
inline char *Name();
inline uint8 *Data();
inline uint32 Size();
inline small_data *Next();
inline bool IsLast(bfs_inode *inode);
};
// the file name is part of the small_data structure
#define FILE_NAME_TYPE 'CSTR'
#define FILE_NAME_NAME 0x13
#define FILE_NAME_NAME_LENGTH 1
//**************************************
#define SHORT_SYMLINK_NAME_LENGTH 144 // length incl. terminating '\0'
struct bfs_inode
{
int32 magic1;
inode_addr inode_num;
int32 uid;
int32 gid;
int32 mode; // see sys/stat.h
int32 flags;
bigtime_t create_time;
bigtime_t last_modified_time;
inode_addr parent;
inode_addr attributes;
uint32 type; // attribute type
int32 inode_size;
uint32 etc; // for in-memory structures (unused in OpenBeOS' fs)
union {
data_stream data;
char short_symlink[SHORT_SYMLINK_NAME_LENGTH];
};
int32 pad[4];
small_data small_data_start[0];
};
#define INODE_MAGIC1 0x3bbe0ad9
#define INODE_TIME_SHIFT 16
#define INODE_TIME_MASK 0xffff
#define INODE_FILE_NAME_LENGTH 256
enum inode_flags
{
INODE_IN_USE = 0x00000001, // always set
INODE_ATTR_INODE = 0x00000004,
INODE_LOGGED = 0x00000008, // log changes to the data stream
INODE_DELETED = 0x00000010,
INODE_EMPTY = 0x00000020,
INODE_LONG_SYMLINK = 0x00000040, // symlink in data stream
INODE_PERMANENT_FLAGS = 0x0000ffff,
INODE_NO_CACHE = 0x00010000,
INODE_WAS_WRITTEN = 0x00020000,
INODE_NO_TRANSACTION = 0x00040000,
};
//**************************************
struct file_cookie {
bigtime_t last_notification;
off_t last_size;
int open_mode;
};
// notify every second if the file size has changed
#define INODE_NOTIFICATION_INTERVAL 1000000LL
//**************************************
inline int32
divide_roundup(int32 num,int32 divisor)
{
return (num + divisor - 1) / divisor;
}
inline int64
divide_roundup(int64 num,int32 divisor)
{
return (num + divisor - 1) / divisor;
}
inline int
get_shift(uint64 i)
{
int c;
c = 0;
while (i > 1) {
i >>= 1;
c++;
}
return c;
}
inline int32
round_up(uint32 data)
{
// rounds up to the next off_t boundary
return (data + sizeof(off_t) - 1) & ~(sizeof(off_t) - 1);
}
/************************ block_run inline functions ************************/
// #pragma mark -
inline bool
block_run::operator==(const block_run &run) const
{
return allocation_group == run.allocation_group
&& start == run.start
&& length == run.length;
}
inline bool
block_run::operator!=(const block_run &run) const
{
return allocation_group != run.allocation_group
|| start != run.start
|| length != run.length;
}
inline bool
block_run::IsZero()
{
return allocation_group == 0 && start == 0 && length == 0;
}
inline void
block_run::SetTo(int32 _group,uint16 _start,uint16 _length)
{
allocation_group = _group;
start = _start;
length = _length;
}
inline block_run
block_run::Run(int32 group, uint16 start, uint16 length)
{
block_run run;
run.allocation_group = group;
run.start = start;
run.length = length;
return run;
}
/************************ small_data inline functions ************************/
// #pragma mark -
inline char *
small_data::Name()
{
return name;
}
inline uint8 *
small_data::Data()
{
return (uint8 *)name + name_size + 3;
}
inline uint32
small_data::Size()
{
return sizeof(small_data) + name_size + 3 + data_size + 1;
}
inline small_data *
small_data::Next()
{
return (small_data *)((uint8 *)this + Size());
}
inline bool
small_data::IsLast(bfs_inode *inode)
{
// we need to check the location first, because if name_size is already beyond
// the block, we would touch invalid memory (although that can't cause wrong
// results)
return (uint32)this > (uint32)inode + inode->inode_size - sizeof(small_data) || name_size == 0;
}
#endif /* BFS_H */

View File

@ -0,0 +1,108 @@
/*
Copyright 1999-2001, Be Incorporated. All Rights Reserved.
This file may be used under the terms of the Be Sample Code License.
*/
#ifndef _CACHE_H_
#define _CACHE_H_
#include <BeBuild.h>
typedef struct hash_ent {
int dev;
off_t bnum;
off_t hash_val;
void *data;
struct hash_ent *next;
} hash_ent;
typedef struct hash_table {
hash_ent **table;
int max;
int mask; /* == max - 1 */
int num_elements;
} hash_table;
#define HT_DEFAULT_MAX 128
typedef struct cache_ent {
int dev;
off_t block_num;
int bsize;
volatile int flags;
void *data;
void *clone; /* copy of data by set_block_info() */
int lock;
void (*func)(off_t bnum, size_t num_blocks, void *arg);
off_t logged_bnum;
void *arg;
struct cache_ent *next, /* points toward mru end of list */
*prev; /* points toward lru end of list */
} cache_ent;
#define CE_NORMAL 0x0000 /* a nice clean pristine page */
#define CE_DIRTY 0x0002 /* needs to be written to disk */
#define CE_BUSY 0x0004 /* this block has i/o happening, don't touch it */
typedef struct cache_ent_list {
cache_ent *lru; /* tail of the list */
cache_ent *mru; /* head of the list */
} cache_ent_list;
typedef struct block_cache {
struct lock lock;
int flags;
int cur_blocks;
int max_blocks;
hash_table ht;
cache_ent_list normal, /* list of "normal" blocks (clean & dirty) */
locked; /* list of clean and locked blocks */
} block_cache;
#if 0 /* XXXdbg -- need to deal with write through caches */
#define DC_WRITE_THROUGH 0x0001 /* cache is write-through (for floppies) */
#endif
#define ALLOW_WRITES 1
#define NO_WRITES 0
extern _IMPEXP_KERNEL int init_block_cache(int max_blocks, int flags);
extern _IMPEXP_KERNEL void shutdown_block_cache(void);
extern _IMPEXP_KERNEL void force_cache_flush(int dev, int prefer_log_blocks);
extern _IMPEXP_KERNEL int flush_blocks(int dev, off_t bnum, int nblocks);
extern _IMPEXP_KERNEL int flush_device(int dev, int warn_locked);
extern _IMPEXP_KERNEL int init_cache_for_device(int fd, off_t max_blocks);
extern _IMPEXP_KERNEL int remove_cached_device_blocks(int dev, int allow_write);
extern _IMPEXP_KERNEL void *get_block(int dev, off_t bnum, int bsize);
extern _IMPEXP_KERNEL void *get_empty_block(int dev, off_t bnum, int bsize);
extern _IMPEXP_KERNEL int release_block(int dev, off_t bnum);
extern _IMPEXP_KERNEL int mark_blocks_dirty(int dev, off_t bnum, int nblocks);
extern _IMPEXP_KERNEL int cached_read(int dev, off_t bnum, void *data, off_t num_blocks, int bsize);
extern _IMPEXP_KERNEL int cached_write(int dev, off_t bnum, const void *data,
off_t num_blocks, int bsize);
extern _IMPEXP_KERNEL int cached_write_locked(int dev, off_t bnum, const void *data,
off_t num_blocks, int bsize);
extern _IMPEXP_KERNEL int set_blocks_info(int dev, off_t *blocks, int nblocks,
void (*func)(off_t bnum, size_t nblocks, void *arg),
void *arg);
extern _IMPEXP_KERNEL size_t read_phys_blocks (int fd, off_t bnum, void *data, uint num_blocks, int bsize);
extern _IMPEXP_KERNEL size_t write_phys_blocks(int fd, off_t bnum, void *data, uint num_blocks, int bsize);
#endif /* _CACHE_H_ */

View File

@ -0,0 +1,17 @@
/* cpp - C++ in the kernel
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include "cpp.h"
nothrow_t _dontthrow;
extern "C" void __pure_virtual()
{
//printf("pure virtual function call");
}

View File

@ -0,0 +1,52 @@
#ifndef CPP_H
#define CPP_H
/* cpp - C++ in the kernel
**
** Initial version by Axel Dörfler, axeld@pinc-software.de
** This file may be used under the terms of the OpenBeOS License.
*/
#include <new>
#include <stdlib.h>
// Oh no! C++ in the kernel! Are you nuts?
//
// - no exceptions
// - (almost) no virtuals (well, the Query code now uses them)
// - it's basically only the C++ syntax, and type checking
// - since one tend to encapsulate everything in classes, it has a slightly
// higher memory overhead
// - nicer code
// - easier to maintain
inline void *operator new(size_t size, const nothrow_t&) throw()
{
return malloc(size);
}
inline void *operator new[](size_t size, const nothrow_t&) throw()
{
return malloc(size);
}
inline void operator delete(void *ptr)
{
free(ptr);
}
inline void operator delete[](void *ptr)
{
free(ptr);
}
// now we're using virtuals
extern "C" void __pure_virtual();
extern nothrow_t _dontthrow;
#define new new (_dontthrow)
#endif /* CPP_H */

View File

@ -0,0 +1,249 @@
/*
Copyright 1999-2001, Be Incorporated. All Rights Reserved.
This file may be used under the terms of the Be Sample Code License.
*/
#ifndef _FSPROTO_H
#define _FSPROTO_H
#include <sys/dirent.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <unistd.h>
#include <iovec.h>
#include <OS.h>
#include <fs_attr.h>
#include <fs_info.h>
#include <BeBuild.h>
#include <Drivers.h>
typedef dev_t nspace_id;
typedef ino_t vnode_id;
/*
* PUBLIC PART OF THE FILE SYSTEM PROTOCOL
*/
#define WSTAT_MODE 0x0001
#define WSTAT_UID 0x0002
#define WSTAT_GID 0x0004
#define WSTAT_SIZE 0x0008
#define WSTAT_ATIME 0x0010
#define WSTAT_MTIME 0x0020
#define WSTAT_CRTIME 0x0040
#define WFSSTAT_NAME 0x0001
#define B_ENTRY_CREATED 1
#define B_ENTRY_REMOVED 2
#define B_ENTRY_MOVED 3
#define B_STAT_CHANGED 4
#define B_ATTR_CHANGED 5
#define B_DEVICE_MOUNTED 6
#define B_DEVICE_UNMOUNTED 7
#define B_STOP_WATCHING 0x0000
#define B_WATCH_NAME 0x0001
#define B_WATCH_STAT 0x0002
#define B_WATCH_ATTR 0x0004
#define B_WATCH_DIRECTORY 0x0008
#define SELECT_READ 1
#define SELECT_WRITE 2
#define SELECT_EXCEPTION 3
// missing ioctl() call added
#define IOCTL_FILE_UNCACHED_IO 10000
#define B_CUR_FS_API_VERSION 2
struct attr_info;
struct index_info;
typedef int op_read_vnode(void *ns, vnode_id vnid, char r, void **node);
typedef int op_write_vnode(void *ns, void *node, char r);
typedef int op_remove_vnode(void *ns, void *node, char r);
typedef int op_secure_vnode(void *ns, void *node);
typedef int op_walk(void *ns, void *base, const char *file, char **newpath,
vnode_id *vnid);
typedef int op_access(void *ns, void *node, int mode);
typedef int op_create(void *ns, void *dir, const char *name,
int omode, int perms, vnode_id *vnid, void **cookie);
typedef int op_mkdir(void *ns, void *dir, const char *name, int perms);
typedef int op_symlink(void *ns, void *dir, const char *name,
const char *path);
typedef int op_link(void *ns, void *dir, const char *name, void *node);
typedef int op_rename(void *ns, void *olddir, const char *oldname,
void *newdir, const char *newname);
typedef int op_unlink(void *ns, void *dir, const char *name);
typedef int op_rmdir(void *ns, void *dir, const char *name);
typedef int op_readlink(void *ns, void *node, char *buf, size_t *bufsize);
typedef int op_opendir(void *ns, void *node, void **cookie);
typedef int op_closedir(void *ns, void *node, void *cookie);
typedef int op_rewinddir(void *ns, void *node, void *cookie);
typedef int op_readdir(void *ns, void *node, void *cookie, long *num,
struct dirent *buf, size_t bufsize);
typedef int op_open(void *ns, void *node, int omode, void **cookie);
typedef int op_close(void *ns, void *node, void *cookie);
typedef int op_free_cookie(void *ns, void *node, void *cookie);
typedef int op_read(void *ns, void *node, void *cookie, off_t pos, void *buf,
size_t *len);
typedef int op_write(void *ns, void *node, void *cookie, off_t pos,
const void *buf, size_t *len);
typedef int op_readv(void *ns, void *node, void *cookie, off_t pos, const iovec *vec,
size_t count, size_t *len);
typedef int op_writev(void *ns, void *node, void *cookie, off_t pos, const iovec *vec,
size_t count, size_t *len);
typedef int op_ioctl(void *ns, void *node, void *cookie, int cmd, void *buf,
size_t len);
typedef int op_setflags(void *ns, void *node, void *cookie, int flags);
typedef int op_rstat(void *ns, void *node, struct stat *);
typedef int op_wstat(void *ns, void *node, struct stat *, long mask);
typedef int op_fsync(void *ns, void *node);
typedef int op_select(void *ns, void *node, void *cookie, uint8 event,
uint32 ref, selectsync *sync);
typedef int op_deselect(void *ns, void *node, void *cookie, uint8 event,
selectsync *sync);
typedef int op_initialize(const char *devname, void *parms, size_t len);
typedef int op_mount(nspace_id nsid, const char *devname, ulong flags,
void *parms, size_t len, void **data, vnode_id *vnid);
typedef int op_unmount(void *ns);
typedef int op_sync(void *ns);
typedef int op_rfsstat(void *ns, struct fs_info *);
typedef int op_wfsstat(void *ns, struct fs_info *, long mask);
typedef int op_open_attrdir(void *ns, void *node, void **cookie);
typedef int op_close_attrdir(void *ns, void *node, void *cookie);
typedef int op_rewind_attrdir(void *ns, void *node, void *cookie);
typedef int op_read_attrdir(void *ns, void *node, void *cookie, long *num,
struct dirent *buf, size_t bufsize);
typedef int op_remove_attr(void *ns, void *node, const char *name);
typedef int op_rename_attr(void *ns, void *node, const char *oldname,
const char *newname);
typedef int op_stat_attr(void *ns, void *node, const char *name,
struct attr_info *buf);
typedef int op_write_attr(void *ns, void *node, const char *name, int type,
const void *buf, size_t *len, off_t pos);
typedef int op_read_attr(void *ns, void *node, const char *name, int type,
void *buf, size_t *len, off_t pos);
typedef int op_open_indexdir(void *ns, void **cookie);
typedef int op_close_indexdir(void *ns, void *cookie);
typedef int op_rewind_indexdir(void *ns, void *cookie);
typedef int op_read_indexdir(void *ns, void *cookie, long *num,
struct dirent *buf, size_t bufsize);
typedef int op_create_index(void *ns, const char *name, int type, int flags);
typedef int op_remove_index(void *ns, const char *name);
typedef int op_rename_index(void *ns, const char *oldname,
const char *newname);
typedef int op_stat_index(void *ns, const char *name, struct index_info *buf);
typedef int op_open_query(void *ns, const char *query, ulong flags,
port_id port, long token, void **cookie);
typedef int op_close_query(void *ns, void *cookie);
typedef int op_read_query(void *ns, void *cookie, long *num,
struct dirent *buf, size_t bufsize);
typedef struct vnode_ops {
op_read_vnode (*read_vnode);
op_write_vnode (*write_vnode);
op_remove_vnode (*remove_vnode);
op_secure_vnode (*secure_vnode);
op_walk (*walk);
op_access (*access);
op_create (*create);
op_mkdir (*mkdir);
op_symlink (*symlink);
op_link (*link);
op_rename (*rename);
op_unlink (*unlink);
op_rmdir (*rmdir);
op_readlink (*readlink);
op_opendir (*opendir);
op_closedir (*closedir);
op_free_cookie (*free_dircookie);
op_rewinddir (*rewinddir);
op_readdir (*readdir);
op_open (*open);
op_close (*close);
op_free_cookie (*free_cookie);
op_read (*read);
op_write (*write);
op_readv (*readv);
op_writev (*writev);
op_ioctl (*ioctl);
op_setflags (*setflags);
op_rstat (*rstat);
op_wstat (*wstat);
op_fsync (*fsync);
op_initialize (*initialize);
op_mount (*mount);
op_unmount (*unmount);
op_sync (*sync);
op_rfsstat (*rfsstat);
op_wfsstat (*wfsstat);
op_select (*select);
op_deselect (*deselect);
op_open_indexdir (*open_indexdir);
op_close_indexdir (*close_indexdir);
op_free_cookie (*free_indexdircookie);
op_rewind_indexdir (*rewind_indexdir);
op_read_indexdir (*read_indexdir);
op_create_index (*create_index);
op_remove_index (*remove_index);
op_rename_index (*rename_index);
op_stat_index (*stat_index);
op_open_attrdir (*open_attrdir);
op_close_attrdir (*close_attrdir);
op_free_cookie (*free_attrdircookie);
op_rewind_attrdir (*rewind_attrdir);
op_read_attrdir (*read_attrdir);
op_write_attr (*write_attr);
op_read_attr (*read_attr);
op_remove_attr (*remove_attr);
op_rename_attr (*rename_attr);
op_stat_attr (*stat_attr);
op_open_query (*open_query);
op_close_query (*close_query);
op_free_cookie (*free_querycookie);
op_read_query (*read_query);
} vnode_ops;
extern _IMPEXP_KERNEL int new_path(const char *path, char **copy);
extern _IMPEXP_KERNEL void free_path(char *p);
extern _IMPEXP_KERNEL int notify_listener(int op, nspace_id nsid,
vnode_id vnida, vnode_id vnidb,
vnode_id vnidc, const char *name);
extern _IMPEXP_KERNEL void notify_select_event(selectsync *sync, uint32 ref);
extern _IMPEXP_KERNEL int send_notification(port_id port, long token,
ulong what, long op, nspace_id nsida,
nspace_id nsidb, vnode_id vnida,
vnode_id vnidb, vnode_id vnidc,
const char *name);
extern _IMPEXP_KERNEL int get_vnode(nspace_id nsid, vnode_id vnid, void **data);
extern _IMPEXP_KERNEL int put_vnode(nspace_id nsid, vnode_id vnid);
extern _IMPEXP_KERNEL int new_vnode(nspace_id nsid, vnode_id vnid, void *data);
extern _IMPEXP_KERNEL int remove_vnode(nspace_id nsid, vnode_id vnid);
extern _IMPEXP_KERNEL int unremove_vnode(nspace_id nsid, vnode_id vnid);
extern _IMPEXP_KERNEL int is_vnode_removed(nspace_id nsid, vnode_id vnid);
extern _EXPORT vnode_ops fs_entry;
extern _EXPORT int32 api_version;
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,47 @@
/*
Copyright 1999-2001, Be Incorporated. All Rights Reserved.
This file may be used under the terms of the Be Sample Code License.
*/
#ifndef _LOCK_H
#define _LOCK_H
#include <BeBuild.h>
#include <OS.h>
#ifdef __cplusplus
extern "C" {
#else
typedef struct lock lock;
typedef struct mlock mlock;
#endif
struct lock {
sem_id s;
long c;
};
struct mlock {
sem_id s;
};
extern _IMPEXP_KERNEL int new_lock(lock *l, const char *name);
extern _IMPEXP_KERNEL int free_lock(lock *l);
#define LOCK(l) if (atomic_add(&l.c, -1) <= 0) acquire_sem(l.s);
#define UNLOCK(l) if (atomic_add(&l.c, 1) < 0) release_sem(l.s);
extern _IMPEXP_KERNEL int new_mlock(mlock *l, long c, const char *name);
extern _IMPEXP_KERNEL int free_mlock(mlock *l);
#define LOCKM(l,cnt) acquire_sem_etc(l.s, cnt, 0, 0)
#define UNLOCKM(l,cnt) release_sem_etc(l.s, cnt, 0)
#ifdef __cplusplus
} // extern "C"
#endif
#endif