* Changed the inode write locking to be held as long as the transaction is

running - this fixes several cases where someone could see outdated data
  when a transaction had to be reverted (the time between unlocking the inode
  and actually reverting the blocks). For that, Inodes can now be put into a
  singly linked list.
* Added a TODO in Inode::WriteAt() which explains why it cannot use the above
  method: seems that our VFS/VM locking model isn't really that good.
* Fixed a possible deadlock in Attribute::_Truncate() where the inode write
  lock was held before starting the transaction.
* Added an InodeReadLocker convenience class, that should be used instead
  of ReadLocker - Inode::Lock() only still exists because of the needs of
  bfs_io().
* Moved the bfs_io() callback hooks out of the exported module API region,
  and removed their bfs_ prefix.
* Added a Volume::IsInitializing() method that should be used rather than
  checking if Volume::ID() is >= 0.
* Removed the MultiInodeLocker again, as it's pretty much superfluous now.
* Moved openModeToAccess() to the Utility.h header.
* Minor cleanup.


git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@26715 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Axel Dörfler 2008-08-01 12:28:28 +00:00
parent ebaab6d5f4
commit 2e3477e3d8
11 changed files with 474 additions and 387 deletions

View File

@ -58,9 +58,10 @@ Attribute::CheckAccess(const char *name, int openMode)
// Opening the name attribute using this function is not allowed,
// also using the reserved indices name, last_modified, and size
// shouldn't be allowed.
// ToDo: we might think about allowing to update those values, but
// TODO: we might think about allowing to update those values, but
// really change their corresponding values in the bfs_inode structure
if (name[0] == FILE_NAME_NAME && name[1] == '\0'
// TODO: reenable this check -- some WonderBrush locale files used them
/* || !strcmp(name, "name")
|| !strcmp(name, "last_modified")
|| !strcmp(name, "size")*/)
@ -223,9 +224,9 @@ Attribute::_Truncate()
}
if (fAttribute != NULL) {
WriteLocker locker(fAttribute->Lock());
Transaction transaction(fAttribute->GetVolume(),
fAttribute->BlockNumber());
fAttribute->WriteLockInTransaction(transaction);
status_t status = fAttribute->SetFileSize(transaction, 0);
if (status >= B_OK)

View File

@ -241,7 +241,7 @@ CachedNode::Free(Transaction &transaction, off_t offset)
if (fTree == NULL || fTree->fStream == NULL || offset == BPLUSTREE_NULL)
RETURN_ERROR(B_BAD_VALUE);
// ToDo: scan the free nodes list and remove all nodes at the end
// TODO: scan the free nodes list and remove all nodes at the end
// of the tree - perhaps that shouldn't be done everytime that
// function is called, perhaps it should be done when the directory
// inode is closed or based on some calculation or whatever...
@ -1258,7 +1258,7 @@ BPlusTree::Insert(Transaction &transaction, const uint8 *key, uint16 keyLength,
panic("tried to insert invalid value %Ld!\n", value);
#endif
ASSERT_WRITE_LOCKED_RW_LOCK(&fStream->Lock());
ASSERT_WRITE_LOCKED_INODE(fStream);
Stack<node_and_key> stack;
if (_SeekDown(stack, key, keyLength) != B_OK)
@ -1649,7 +1649,7 @@ BPlusTree::Remove(Transaction &transaction, const uint8 *key, uint16 keyLength,
|| keyLength > BPLUSTREE_MAX_KEY_LENGTH)
RETURN_ERROR(B_BAD_VALUE);
ASSERT_WRITE_LOCKED_RW_LOCK(&fStream->Lock());
ASSERT_WRITE_LOCKED_INODE(fStream);
Stack<node_and_key> stack;
if (_SeekDown(stack, key, keyLength) != B_OK)
@ -1766,7 +1766,7 @@ BPlusTree::Replace(Transaction &transaction, const uint8 *key,
if (fAllowDuplicates)
RETURN_ERROR(B_BAD_TYPE);
ASSERT_WRITE_LOCKED_RW_LOCK(&fStream->Lock());
ASSERT_WRITE_LOCKED_INODE(fStream);
off_t nodeOffset = fHeader->RootNode();
CachedNode cached(this);
@ -1819,7 +1819,7 @@ BPlusTree::Find(const uint8 *key, uint16 keyLength, off_t *_value)
if (fAllowDuplicates)
RETURN_ERROR(B_BAD_TYPE);
ASSERT_READ_LOCKED_RW_LOCK(&fStream->Lock());
ASSERT_READ_LOCKED_INODE(fStream);
off_t nodeOffset = fHeader->RootNode();
CachedNode cached(this);
@ -1883,7 +1883,7 @@ TreeIterator::Goto(int8 to)
RETURN_ERROR(B_BAD_VALUE);
// lock access to stream
ReadLocker locker(fTree->fStream->Lock());
InodeReadLocker locker(fTree->fStream);
off_t nodeOffset = fTree->fHeader->RootNode();
CachedNode cached(fTree);
@ -1953,7 +1953,7 @@ TreeIterator::Traverse(int8 direction, void *key, uint16 *keyLength,
return B_ENTRY_NOT_FOUND;
// lock access to stream
ReadLocker locker(fTree->fStream->Lock());
InodeReadLocker locker(fTree->fStream);
CachedNode cached(fTree);
const bplustree_node *node;
@ -2088,7 +2088,7 @@ TreeIterator::Find(const uint8 *key, uint16 keyLength)
RETURN_ERROR(B_BAD_VALUE);
// lock access to stream
ReadLocker locker(fTree->fStream->Lock());
InodeReadLocker locker(fTree->fStream);
off_t nodeOffset = fTree->fHeader->RootNode();

View File

@ -75,7 +75,7 @@ Index::SetTo(const char *name)
if (indices == NULL)
return B_ENTRY_NOT_FOUND;
ReadLocker locker(indices->Lock());
InodeReadLocker locker(indices);
BPlusTree *tree;
if (indices->GetTree(&tree) != B_OK)
@ -201,7 +201,6 @@ Index::Create(Transaction &transaction, const char *name, uint32 type)
return B_BAD_TYPE;
}
// do we need to create the index directory first?
if (fVolume->IndicesNode() == NULL) {
status_t status = fVolume->CreateIndicesRoot(transaction);
@ -270,7 +269,7 @@ Index::Update(Transaction &transaction, const char *name, int32 type,
// remove the old key from the tree
WriteLocker locker(Node()->Lock());
Node()->WriteLockInTransaction(transaction);
if (oldKey != NULL) {
status = tree->Remove(transaction, (const uint8 *)oldKey, oldLength,

View File

@ -169,7 +169,6 @@ InodeAllocator::~InodeAllocator()
fInode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
// this unblocks any pending bfs_read_vnode() calls
fInode->Free(*fTransaction);
rw_lock_write_unlock(&fInode->Lock());
remove_vnode(volume->FSVolume(), fInode->ID());
} else
volume->Free(*fTransaction, fRun);
@ -199,7 +198,7 @@ InodeAllocator::New(block_run *parentRun, mode_t mode, block_run &run,
if (fInode == NULL)
RETURN_ERROR(B_NO_MEMORY);
if (volume->ID() >= 0) {
if (!volume->IsInitializing()) {
status = new_vnode(volume->FSVolume(), fInode->ID(), fInode,
vnodeOps != NULL ? vnodeOps : &gBFSVnodeOps);
if (status < B_OK) {
@ -209,7 +208,7 @@ InodeAllocator::New(block_run *parentRun, mode_t mode, block_run &run,
}
}
rw_lock_write_lock(&fInode->Lock());
fInode->WriteLockInTransaction(*fTransaction);
*_inode = fInode;
return B_OK;
}
@ -261,8 +260,6 @@ InodeAllocator::Keep(fs_vnode_ops *vnodeOps, uint32 publishFlags)
TRANSACTION_ABORTED, &_TransactionListener, fInode);
}
rw_lock_write_unlock(&fInode->Lock());
fTransaction = NULL;
fInode = NULL;
@ -305,7 +302,7 @@ bfs_inode::InitCheck(Volume *volume)
if (Flags() & INODE_DELETED)
return B_NOT_ALLOWED;
// ToDo: Add some tests to check the integrity of the other stuff here,
// TODO: Add some tests to check the integrity of the other stuff here,
// especially for the data_stream!
return B_OK;
@ -1059,7 +1056,8 @@ Inode::WriteAttribute(Transaction &transaction, const char *name, int32 type,
}
if (attribute != NULL) {
if (rw_lock_write_lock(&attribute->Lock()) == B_OK) {
// TODO: we need to lock the inode in the transaction, see WriteAt()!
if (rw_lock_write_lock(&attribute->fLock) == B_OK) {
// Save the old attribute data (if this fails, oldLength will
// reflect it)
if (fVolume->CheckForLiveQuery(name) && attribute->Size() > 0) {
@ -1075,14 +1073,14 @@ Inode::WriteAttribute(Transaction &transaction, const char *name, int32 type,
if (status == B_OK) {
// it does - remove its file
rw_lock_write_unlock(&attribute->Lock());
rw_lock_write_unlock(&attribute->fLock);
status = _RemoveAttribute(transaction, name, false, NULL);
} else {
// The attribute type might have been changed - we need to
// adopt the new one
attribute->Node().type = HOST_ENDIAN_TO_BFS_INT32(type);
status = attribute->WriteBack(transaction);
rw_lock_write_unlock(&attribute->Lock());
rw_lock_write_unlock(&attribute->fLock);
if (status == B_OK) {
status = attribute->WriteAt(transaction, pos, buffer,
@ -1167,7 +1165,7 @@ Inode::GetAttribute(const char *name, Inode **_attribute)
BPlusTree *tree;
status_t status = attributes->GetTree(&tree);
if (status == B_OK) {
ReadLocker locker(attributes->Lock());
InodeReadLocker locker(attributes);
ino_t id;
status = tree->Find((uint8 *)name, (uint16)strlen(name), &id);
@ -1382,7 +1380,7 @@ Inode::ReadAt(off_t pos, uint8 *buffer, size_t *_length)
if (pos < 0)
return B_BAD_VALUE;
ReadLocker locker(Lock());
InodeReadLocker locker(this);
if (pos >= Size() || length == 0) {
*_length = 0;
@ -1399,13 +1397,11 @@ status_t
Inode::WriteAt(Transaction &transaction, off_t pos, const uint8 *buffer,
size_t *_length)
{
WriteLocker locker(Lock());
if (!locker.IsLocked())
RETURN_ERROR(B_ERROR);
InodeReadLocker locker(this);
// update the last modification time in memory, it will be written
// back to the inode, and the index when the file is closed
// ToDo: should update the internal last modified time only at this point!
// TODO: should update the internal last modified time only at this point!
Node().last_modified_time = HOST_ENDIAN_TO_BFS_INT64((bigtime_t)time(NULL)
<< INODE_TIME_SHIFT);
@ -1427,7 +1423,12 @@ Inode::WriteAt(Transaction &transaction, off_t pos, const uint8 *buffer,
if (changeSize && !transaction.IsStarted())
transaction.Start(fVolume, BlockNumber());
locker.Lock();
// TODO: we actually need to call WriteLockInTransaction() here, but we
// cannot do this with the current locking model (ie. file cache functions
// are not to be called with the inode lock held).
// But this cannot work anyway, since we hold the lock when calling
// file_cache_set_size(), too... (possible deadlock)
rw_lock_write_lock(&fLock);
if (pos + length > Size()) {
// let's grow the data stream to the size needed
@ -1453,7 +1454,7 @@ Inode::WriteAt(Transaction &transaction, off_t pos, const uint8 *buffer,
if (length == 0)
return B_OK;
locker.Unlock();
rw_lock_write_unlock(&fLock);
return file_cache_write(FileCache(), NULL, pos, buffer, _length);
}
@ -2117,7 +2118,7 @@ Inode::Sync()
if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0)
return B_OK;
ReadLocker locker(Lock());
InodeReadLocker locker(this);
data_stream *data = &Node().data;
status_t status = B_OK;
@ -2212,7 +2213,7 @@ Inode::Remove(Transaction &transaction, const char *name, ino_t *_id,
if (GetTree(&tree) != B_OK)
RETURN_ERROR(B_BAD_VALUE);
WriteLocker locker(Lock());
WriteLockInTransaction(transaction);
// does the file even exist?
off_t id;
@ -2231,6 +2232,7 @@ Inode::Remove(Transaction &transaction, const char *name, ino_t *_id,
}
T(Remove(inode, name));
inode->WriteLockInTransaction(transaction);
// Inode::IsContainer() is true also for indices (furthermore, the S_IFDIR
// bit is set for indices in BFS, not for attribute directories) - but you
@ -2318,14 +2320,16 @@ Inode::Create(Transaction &transaction, Inode *parent, const char *name,
RETURN_ERROR(B_BAD_VALUE);
}
WriteLocker locker(parent != NULL ? &parent->Lock() : NULL);
if (parent != NULL) {
// the parent directory is locked during the whole inode creation
parent->WriteLockInTransaction(transaction);
}
if (parent != NULL && parent->IsDirectory()) {
if (parent != NULL && !volume->IsInitializing() && parent->IsContainer()) {
// don't create anything in removed directories
bool removed;
if (get_vnode_removed(volume->FSVolume(), parent->ID(), &removed)
!= B_OK || removed) {
== B_OK && removed) {
RETURN_ERROR(B_ENTRY_NOT_FOUND);
}
}
@ -2362,7 +2366,7 @@ Inode::Create(Transaction &transaction, Inode *parent, const char *name,
return status;
// truncate the existing file
WriteLocker _(inode->Lock());
inode->WriteLockInTransaction(transaction);
status_t status = inode->SetFileSize(transaction, 0);
if (status >= B_OK)

View File

@ -8,11 +8,11 @@
#include "system_dependencies.h"
#include "Volume.h"
#include "Journal.h"
#include "CachedBlock.h"
#include "Chain.h"
#include "Debug.h"
#include "CachedBlock.h"
#include "Journal.h"
#include "Volume.h"
class BPlusTree;
@ -20,229 +20,292 @@ class TreeIterator;
class AttributeIterator;
class Index;
class InodeAllocator;
class InodeReadLocker;
class NodeGetter;
class Transaction;
enum inode_type {
S_DIRECTORY = S_IFDIR,
S_FILE = S_IFREG,
S_SYMLINK = S_IFLNK,
S_INDEX_TYPES = (S_STR_INDEX | S_INT_INDEX | S_UINT_INDEX | S_LONG_LONG_INDEX
| S_ULONG_LONG_INDEX | S_FLOAT_INDEX | S_DOUBLE_INDEX)
S_INDEX_TYPES = (S_STR_INDEX | S_INT_INDEX | S_UINT_INDEX
| S_LONG_LONG_INDEX | S_ULONG_LONG_INDEX
| S_FLOAT_INDEX | S_DOUBLE_INDEX)
};
class Inode {
public:
Inode(Volume *volume, ino_t id);
Inode(Volume *volume, Transaction &transaction, ino_t id,
mode_t mode, block_run &run);
//Inode(CachedBlock *cached);
~Inode();
//bfs_inode *Node() const { return (bfs_inode *)fBlock; }
ino_t ID() const { return fID; }
off_t BlockNumber() const { return fVolume->VnodeToBlock(fID); }
class Inode : public SinglyLinkedListLinkImpl<Inode> {
public:
Inode(Volume* volume, ino_t id);
Inode(Volume* volume, Transaction& transaction,
ino_t id, mode_t mode, block_run& run);
~Inode();
rw_lock &Lock() { return fLock; }
recursive_lock &SmallDataLock() { return fSmallDataLock; }
status_t WriteBack(Transaction &transaction);
ino_t ID() const { return fID; }
off_t BlockNumber() const
{ return fVolume->VnodeToBlock(fID); }
bool IsContainer() const
{ return S_ISDIR(Mode()); }
bool IsDirectory() const
{ return (Mode() & (S_INDEX_DIR | S_ATTR_DIR | S_IFDIR))
== S_IFDIR; }
bool IsIndex() const
{ return (Mode() & (S_INDEX_DIR | 0777)) == S_INDEX_DIR; }
// that's a stupid check, but AFAIK the only possible method...
rw_lock& Lock() { return fLock; }
ReadLocker ReadLock() { return ReadLocker(fLock); }
void WriteLockInTransaction(Transaction& transaction)
{ transaction.AddInode(this); }
bool IsAttributeDirectory() const
{ return (Mode() & S_ATTR_DIR) != 0; }
bool IsAttribute() const
{ return (Mode() & S_ATTR) != 0; }
bool IsFile() const
{ return (Mode() & (S_IFMT | S_ATTR)) == S_FILE; }
bool IsRegularNode() const
{ return (Mode() & (S_ATTR_DIR | S_INDEX_DIR | S_ATTR)) == 0; }
// a regular node in the standard namespace
// (i.e. not an index or attribute)
bool IsSymLink() const { return S_ISLNK(Mode()); }
bool HasUserAccessableStream() const { return IsFile(); }
// currently only files can be accessed with bfs_read()/bfs_write()
recursive_lock& SmallDataLock() { return fSmallDataLock; }
status_t WriteBack(Transaction& transaction);
bool IsDeleted() const { return (Flags() & INODE_DELETED) != 0; }
bool IsContainer() const
{ return S_ISDIR(Mode()); }
bool IsDirectory() const
{ return is_directory(Mode()); }
bool IsIndex() const
{ return is_index(Mode()); }
mode_t Mode() const { return fNode.Mode(); }
uint32 Type() const { return fNode.Type(); }
int32 Flags() const { return fNode.Flags(); }
bool IsAttributeDirectory() const
{ return (Mode() & S_ATTR_DIR) != 0; }
bool IsAttribute() const
{ return (Mode() & S_ATTR) != 0; }
bool IsFile() const
{ return (Mode()
& (S_IFMT | S_ATTR)) == S_FILE; }
bool IsRegularNode() const
{ return (Mode()
& (S_ATTR_DIR | S_INDEX_DIR | S_ATTR))
== 0; }
// a regular node in the standard namespace
// (i.e. not an index or attribute)
bool IsSymLink() const { return S_ISLNK(Mode()); }
bool HasUserAccessableStream() const { return IsFile(); }
// currently only files can be accessed with
// bfs_read()/bfs_write()
off_t Size() const { return fNode.data.Size(); }
off_t LastModified() const { return fNode.last_modified_time; }
bool IsDeleted() const
{ return (Flags() & INODE_DELETED) != 0; }
const block_run &BlockRun() const { return fNode.inode_num; }
block_run &Parent() { return fNode.parent; }
block_run &Attributes() { return fNode.attributes; }
mode_t Mode() const { return fNode.Mode(); }
uint32 Type() const { return fNode.Type(); }
int32 Flags() const { return fNode.Flags(); }
Volume *GetVolume() const { return fVolume; }
off_t Size() const { return fNode.data.Size(); }
off_t LastModified() const
{ return fNode.last_modified_time; }
status_t InitCheck(bool checkNode = true);
const block_run& BlockRun() const
{ return fNode.inode_num; }
block_run& Parent() { return fNode.parent; }
block_run& Attributes() { return fNode.attributes; }
status_t CheckPermissions(int accessMode) const;
Volume* GetVolume() const { return fVolume; }
// small_data access methods
small_data *FindSmallData(const bfs_inode *node,
const char *name) const;
const char *Name(const bfs_inode *node) const;
status_t GetName(char *buffer,
size_t bufferSize = B_FILE_NAME_LENGTH) const;
status_t SetName(Transaction &transaction, const char *name);
status_t InitCheck(bool checkNode = true);
// high-level attribute methods
status_t ReadAttribute(const char *name, int32 type, off_t pos,
uint8 *buffer, size_t *_length);
status_t WriteAttribute(Transaction &transaction, const char *name,
int32 type, off_t pos, const uint8 *buffer, size_t *_length);
status_t RemoveAttribute(Transaction &transaction, const char *name);
status_t CheckPermissions(int accessMode) const;
// attribute methods
status_t GetAttribute(const char *name, Inode **attribute);
void ReleaseAttribute(Inode *attribute);
status_t CreateAttribute(Transaction &transaction, const char *name,
uint32 type, Inode **attribute);
// small_data access methods
small_data* FindSmallData(const bfs_inode* node,
const char* name) const;
const char* Name(const bfs_inode* node) const;
status_t GetName(char* buffer,
size_t bufferSize = B_FILE_NAME_LENGTH) const;
status_t SetName(Transaction& transaction, const char* name);
// for directories only:
status_t GetTree(BPlusTree **_tree);
bool IsEmpty();
// high-level attribute methods
status_t ReadAttribute(const char* name, int32 type,
off_t pos, uint8* buffer, size_t* _length);
status_t WriteAttribute(Transaction& transaction,
const char* name, int32 type, off_t pos,
const uint8* buffer, size_t* _length);
status_t RemoveAttribute(Transaction& transaction,
const char* name);
// manipulating the data stream
status_t FindBlockRun(off_t pos, block_run &run, off_t &offset);
// attribute methods
status_t GetAttribute(const char* name, Inode** attribute);
void ReleaseAttribute(Inode* attribute);
status_t CreateAttribute(Transaction& transaction,
const char* name, uint32 type,
Inode** attribute);
status_t ReadAt(off_t pos, uint8 *buffer, size_t *length);
status_t WriteAt(Transaction &transaction, off_t pos,
const uint8 *buffer, size_t *length);
status_t FillGapWithZeros(off_t oldSize, off_t newSize);
// for directories only:
status_t GetTree(BPlusTree** _tree);
bool IsEmpty();
status_t SetFileSize(Transaction &transaction, off_t size);
status_t Append(Transaction &transaction, off_t bytes);
status_t TrimPreallocation(Transaction &transaction);
bool NeedsTrimming();
// manipulating the data stream
status_t FindBlockRun(off_t pos, block_run& run,
off_t& offset);
status_t Free(Transaction &transaction);
status_t Sync();
status_t ReadAt(off_t pos, uint8* buffer, size_t* length);
status_t WriteAt(Transaction& transaction, off_t pos,
const uint8* buffer, size_t* length);
status_t FillGapWithZeros(off_t oldSize, off_t newSize);
bfs_inode &Node() { return fNode; }
status_t SetFileSize(Transaction& transaction, off_t size);
status_t Append(Transaction& transaction, off_t bytes);
status_t TrimPreallocation(Transaction& transaction);
bool NeedsTrimming();
// create/remove inodes
status_t Remove(Transaction &transaction, const char *name,
ino_t *_id = NULL, bool isDirectory = false);
static status_t Create(Transaction &transaction, Inode *parent,
const char *name, int32 mode, int openMode, uint32 type,
bool *_created = NULL, ino_t *_id = NULL, Inode **_inode = NULL,
fs_vnode_ops *vnodeOps = NULL, uint32 publishFlags = 0);
status_t Free(Transaction& transaction);
status_t Sync();
// index maintaining helper
void UpdateOldSize()
{ fOldSize = Size(); }
void UpdateOldLastModified()
{ fOldLastModified = Node().LastModifiedTime(); }
off_t OldSize()
{ return fOldSize; }
off_t OldLastModified()
{ return fOldLastModified; }
bfs_inode& Node() { return fNode; }
// file cache
void *FileCache() const { return fCache; }
void SetFileCache(void *cache) { fCache = cache; }
void *Map() const { return fMap; }
void SetMap(void *map) { fMap = map; }
// create/remove inodes
status_t Remove(Transaction& transaction, const char* name,
ino_t* _id = NULL, bool isDirectory = false);
static status_t Create(Transaction& transaction, Inode* parent,
const char* name, int32 mode, int openMode,
uint32 type, bool* _created = NULL,
ino_t* _id = NULL, Inode** _inode = NULL,
fs_vnode_ops* vnodeOps = NULL,
uint32 publishFlags = 0);
private:
Inode(const Inode &);
Inode &operator=(const Inode &);
// no implementation
// index maintaining helper
void UpdateOldSize() { fOldSize = Size(); }
void UpdateOldLastModified()
{ fOldLastModified
= Node().LastModifiedTime(); }
off_t OldSize() { return fOldSize; }
off_t OldLastModified() { return fOldLastModified; }
friend class AttributeIterator;
friend class InodeAllocator;
// file cache
void* FileCache() const { return fCache; }
void SetFileCache(void* cache) { fCache = cache; }
void* Map() const { return fMap; }
void SetMap(void* map) { fMap = map; }
// small_data access methods
status_t _MakeSpaceForSmallData(Transaction &transaction,
bfs_inode *node, const char *name, int32 length);
status_t _RemoveSmallData(Transaction &transaction, NodeGetter &node,
const char *name);
status_t _AddSmallData(Transaction &transaction, NodeGetter &node,
const char *name, uint32 type, const uint8 *data, size_t length,
bool force = false);
status_t _GetNextSmallData(bfs_inode *node,
small_data **_smallData) const;
status_t _RemoveSmallData(bfs_inode *node, small_data *item,
int32 index);
status_t _RemoveAttribute(Transaction &transaction, const char *name,
bool hasIndex, Index *index);
#if _KERNEL_MODE && KDEBUG
void AssertReadLocked()
{ ASSERT_READ_LOCKED_RW_LOCK(&fLock); }
void AssertWriteLocked()
{ ASSERT_WRITE_LOCKED_RW_LOCK(&fLock); }
#endif
void _AddIterator(AttributeIterator *iterator);
void _RemoveIterator(AttributeIterator *iterator);
private:
Inode(const Inode& other);
Inode& operator=(const Inode& other);
// no implementation
status_t _FreeStaticStreamArray(Transaction &transaction, int32 level,
block_run run, off_t size, off_t offset, off_t &max);
status_t _FreeStreamArray(Transaction &transaction, block_run *array,
uint32 arrayLength, off_t size, off_t &offset, off_t &max);
status_t _AllocateBlockArray(Transaction &transaction, block_run &run);
status_t _GrowStream(Transaction &transaction, off_t size);
status_t _ShrinkStream(Transaction &transaction, off_t size);
friend class AttributeIterator;
friend class InodeAllocator;
friend class InodeReadLocker;
friend class Transaction;
private:
rw_lock fLock;
Volume *fVolume;
ino_t fID;
BPlusTree *fTree;
Inode *fAttributes;
void *fCache;
void *fMap;
bfs_inode fNode;
// small_data access methods
status_t _MakeSpaceForSmallData(Transaction& transaction,
bfs_inode* node, const char* name,
int32 length);
status_t _RemoveSmallData(Transaction& transaction,
NodeGetter& node, const char* name);
status_t _AddSmallData(Transaction& transaction,
NodeGetter& node, const char* name, uint32 type,
const uint8* data, size_t length,
bool force = false);
status_t _GetNextSmallData(bfs_inode* node,
small_data** _smallData) const;
status_t _RemoveSmallData(bfs_inode* node, small_data* item,
int32 index);
status_t _RemoveAttribute(Transaction& transaction,
const char* name, bool hasIndex, Index* index);
off_t fOldSize;
off_t fOldLastModified;
// we need those values to ensure we will remove
// the correct keys from the indices
void _AddIterator(AttributeIterator* iterator);
void _RemoveIterator(AttributeIterator* iterator);
mutable recursive_lock fSmallDataLock;
Chain<AttributeIterator> fIterators;
status_t _FreeStaticStreamArray(Transaction& transaction,
int32 level, block_run run, off_t size,
off_t offset, off_t& max);
status_t _FreeStreamArray(Transaction& transaction,
block_run* array, uint32 arrayLength,
off_t size, off_t& offset, off_t& max);
status_t _AllocateBlockArray(Transaction& transaction,
block_run& run);
status_t _GrowStream(Transaction& transaction, off_t size);
status_t _ShrinkStream(Transaction& transaction, off_t size);
private:
rw_lock fLock;
Volume* fVolume;
ino_t fID;
BPlusTree* fTree;
Inode* fAttributes;
void* fCache;
void* fMap;
bfs_inode fNode;
off_t fOldSize;
off_t fOldLastModified;
// we need those values to ensure we will remove
// the correct keys from the indices
mutable recursive_lock fSmallDataLock;
Chain<AttributeIterator> fIterators;
};
#if _KERNEL_MODE && KDEBUG
# define ASSERT_READ_LOCKED_INODE(inode) inode->AssertReadLocked()
# define ASSERT_WRITE_LOCKED_INODE(inode) inode->AssertWriteLocked()
#else
# define ASSERT_READ_LOCKED_INODE(inode)
# define ASSERT_WRITE_LOCKED_INODE(inode)
#endif
class InodeReadLocker {
public:
InodeReadLocker(Inode* inode)
:
fLock(&inode->fLock)
{
rw_lock_read_lock(fLock);
}
~InodeReadLocker()
{
if (fLock != NULL)
rw_lock_read_unlock(fLock);
}
void Unlock()
{
if (fLock != NULL) {
rw_lock_read_unlock(fLock);
fLock = NULL;
}
}
private:
rw_lock* fLock;
};
class NodeGetter : public CachedBlock {
public:
NodeGetter(Volume *volume)
: CachedBlock(volume)
{
}
public:
NodeGetter(Volume* volume)
: CachedBlock(volume)
{
}
NodeGetter(Volume *volume, const Inode *inode)
: CachedBlock(volume)
{
SetTo(volume->VnodeToBlock(inode->ID()));
}
NodeGetter(Volume* volume, const Inode* inode)
: CachedBlock(volume)
{
SetTo(volume->VnodeToBlock(inode->ID()));
}
NodeGetter(Volume *volume, Transaction &transaction,
const Inode *inode, bool empty = false)
: CachedBlock(volume)
{
SetToWritable(transaction, volume->VnodeToBlock(inode->ID()), empty);
}
NodeGetter(Volume* volume, Transaction& transaction,
const Inode* inode, bool empty = false)
: CachedBlock(volume)
{
SetToWritable(transaction, volume->VnodeToBlock(inode->ID()), empty);
}
~NodeGetter()
{
}
~NodeGetter()
{
}
const bfs_inode *
SetToNode(const Inode *inode)
{
return (const bfs_inode *)SetTo(fVolume->VnodeToBlock(inode->ID()));
}
const bfs_inode* SetToNode(const Inode* inode)
{
return (const bfs_inode*)SetTo(fVolume->VnodeToBlock(inode->ID()));
}
const bfs_inode *Node() const { return (const bfs_inode *)Block(); }
bfs_inode *WritableNode() const { return (bfs_inode *)Block(); }
const bfs_inode* Node() const { return (const bfs_inode*)Block(); }
bfs_inode* WritableNode() const { return (bfs_inode*)Block(); }
};
@ -251,114 +314,98 @@ class NodeGetter : public CachedBlock {
// readable in some cases
class Vnode {
public:
Vnode(Volume* volume, ino_t id)
:
fInode(NULL)
{
SetTo(volume, id);
}
public:
Vnode(Volume* volume, ino_t id)
:
fInode(NULL)
{
SetTo(volume, id);
}
Vnode(Volume* volume, block_run run)
:
fInode(NULL)
{
SetTo(volume, run);
}
Vnode(Volume* volume, block_run run)
:
fInode(NULL)
{
SetTo(volume, run);
}
Vnode()
:
fStatus(B_NO_INIT),
fInode(NULL)
{
}
Vnode()
:
fStatus(B_NO_INIT),
fInode(NULL)
{
}
~Vnode()
{
Unset();
}
~Vnode()
{
Unset();
}
status_t InitCheck()
{
return fStatus;
}
status_t InitCheck()
{
return fStatus;
}
void Unset()
{
if (fInode != NULL) {
put_vnode(fInode->GetVolume()->FSVolume(), fInode->ID());
fInode = NULL;
fStatus = B_NO_INIT;
}
}
status_t SetTo(Volume* volume, ino_t id)
{
Unset();
return fStatus = get_vnode(volume->FSVolume(), id, (void**)&fInode);
}
status_t SetTo(Volume* volume, block_run run)
{
return SetTo(volume, volume->ToVnode(run));
}
status_t Get(Inode** _inode)
{
*_inode = fInode;
return fStatus;
}
void Keep()
{
void Unset()
{
if (fInode != NULL) {
put_vnode(fInode->GetVolume()->FSVolume(), fInode->ID());
fInode = NULL;
fStatus = B_NO_INIT;
}
}
private:
status_t fStatus;
Inode* fInode;
status_t SetTo(Volume* volume, ino_t id)
{
Unset();
return fStatus = get_vnode(volume->FSVolume(), id, (void**)&fInode);
}
status_t SetTo(Volume* volume, block_run run)
{
return SetTo(volume, volume->ToVnode(run));
}
status_t Get(Inode** _inode)
{
*_inode = fInode;
return fStatus;
}
void Keep()
{
fInode = NULL;
}
private:
status_t fStatus;
Inode* fInode;
};
class AttributeIterator {
public:
AttributeIterator(Inode *inode);
~AttributeIterator();
public:
AttributeIterator(Inode* inode);
~AttributeIterator();
status_t Rewind();
status_t GetNext(char *name, size_t *length, uint32 *type, ino_t *id);
status_t Rewind();
status_t GetNext(char* name, size_t* length, uint32* type,
ino_t* id);
private:
friend class Chain<AttributeIterator>;
friend class Inode;
private:
friend class Chain<AttributeIterator>;
friend class Inode;
void Update(uint16 index, int8 change);
AttributeIterator *fNext;
void Update(uint16 index, int8 change);
private:
int32 fCurrentSmallData;
Inode *fInode, *fAttributes;
TreeIterator *fIterator;
void *fBuffer;
private:
AttributeIterator* fNext;
int32 fCurrentSmallData;
Inode* fInode;
Inode* fAttributes;
TreeIterator* fIterator;
void* fBuffer;
};
/*!
Converts the open mode, the open flags given to bfs_open(), into
access modes, e.g. since O_RDONLY requires read access to the
file, it will be converted to R_OK.
*/
inline int
openModeToAccess(int openMode)
{
openMode &= O_RWMASK;
if (openMode == O_RDONLY)
return R_OK;
else if (openMode == O_WRONLY)
return W_OK;
return R_OK | W_OK;
}
#endif /* INODE_H */
#endif // INODE_H

View File

@ -7,8 +7,9 @@
#include "Journal.h"
#include "Inode.h"
#include "Debug.h"
#include "Inode.h"
struct run_array {
@ -668,7 +669,7 @@ Journal::_TransactionIdle(int32 transactionID, int32 event, void *_journal)
status_t
Journal::_WriteTransactionToLog()
{
// ToDo: in case of a failure, we need a backup plan like writing all
// TODO: in case of a failure, we need a backup plan like writing all
// changed blocks back to disk immediately (hello disk corruption!)
bool detached = false;
@ -746,7 +747,7 @@ Journal::_WriteTransactionToLog()
iovec *vecs = (iovec *)malloc(sizeof(iovec) * maxVecs);
if (vecs == NULL) {
// ToDo: write back log entries directly?
// TODO: write back log entries directly?
return B_NO_MEMORY;
}
@ -919,7 +920,7 @@ Journal::Lock(Transaction *owner)
fOwner = owner;
// ToDo: we need a way to find out how big the current transaction is;
// TODO: we need a way to find out how big the current transaction is;
// we need to be able to either detach the latest sub transaction on
// demand, as well as having some kind of fall back plan in case the
// sub transaction itself grows bigger than the log.
@ -950,7 +951,7 @@ Journal::Unlock(Transaction *owner, bool success)
{
if (recursive_lock_get_recursion(&fLock) == 1) {
// we only end the transaction if we would really unlock it
// ToDo: what about failing transactions that do not unlock?
// TODO: what about failing transactions that do not unlock?
_TransactionDone(success);
fTimestamp = system_time();
@ -1065,3 +1066,35 @@ Transaction::Start(Volume *volume, off_t refBlock)
return B_ERROR;
}
void
Transaction::AddInode(Inode* inode)
{
if (GetVolume()->IsInitializing())
return;
if (fJournal == NULL)
panic("Transaction is not running!");
InodeList::Iterator iterator = fLockedInodes.GetIterator();
while (iterator.HasNext()) {
if (iterator.Next() == inode) {
//dprintf(" inode %Ld already in transaction\n", inode->ID());
return;
}
}
acquire_vnode(GetVolume()->FSVolume(), inode->ID());
rw_lock_write_lock(&inode->fLock);
fLockedInodes.Add(inode);
}
void
Transaction::_UnlockInodes()
{
while (Inode* inode = fLockedInodes.RemoveHead()) {
rw_lock_write_unlock(&inode->fLock);
put_vnode(GetVolume()->FSVolume(), inode->ID());
}
}

View File

@ -18,8 +18,10 @@
struct run_array;
class Inode;
class LogEntry;
typedef DoublyLinkedList<LogEntry> LogEntryList;
typedef SinglyLinkedList<Inode> InodeList;
// Locking policy in BFS: if you need both, the volume lock and the
@ -118,23 +120,25 @@ class Transaction {
~Transaction()
{
if (fJournal)
if (fJournal != NULL) {
fJournal->Unlock(this, false);
_UnlockInodes();
}
}
status_t Start(Volume *volume, off_t refBlock);
bool IsStarted() const { return fJournal != NULL; }
void
Done()
void Done()
{
if (fJournal != NULL)
if (fJournal != NULL) {
fJournal->Unlock(this, true);
_UnlockInodes();
}
fJournal = NULL;
}
bool
HasParent()
bool HasParent()
{
if (fJournal != NULL)
return fJournal->CurrentTransaction() == this;
@ -142,8 +146,7 @@ class Transaction {
return false;
}
status_t
WriteBlocks(off_t blockNumber, const uint8 *buffer,
status_t WriteBlocks(off_t blockNumber, const uint8 *buffer,
size_t numBlocks = 1)
{
if (fJournal == NULL)
@ -172,12 +175,17 @@ class Transaction {
int32 ID() const
{ return fJournal->TransactionID(); }
void AddInode(Inode* inode);
private:
Transaction(const Transaction &);
Transaction &operator=(const Transaction &);
// no implementation
Journal *fJournal;
void _UnlockInodes();
Journal* fJournal;
InodeList fLockedInodes;
};
#ifdef BFS_DEBUGGER_COMMANDS

View File

@ -58,4 +58,21 @@ is_directory(int mode)
return (mode & (S_INDEX_DIR | S_ATTR_DIR | S_IFDIR)) == S_IFDIR;
}
#endif /* UTILITY_H */
/*! Converts the open mode, the open flags given to bfs_open(), into
access modes, e.g. since O_RDONLY requires read access to the
file, it will be converted to R_OK.
*/
inline int
openModeToAccess(int openMode)
{
openMode &= O_RWMASK;
if (openMode == O_RDONLY)
return R_OK;
if (openMode == O_WRONLY)
return W_OK;
return R_OK | W_OK;
}
#endif // UTILITY_H

View File

@ -34,6 +34,8 @@ class Volume {
status_t Initialize(int fd, const char *name,
uint32 blockSize, uint32 flags);
bool IsInitializing() const { return fVolume == NULL; }
bool IsValidSuperBlock();
bool IsReadOnly() const;
void Panic();

View File

@ -24,30 +24,6 @@ struct identify_cookie {
disk_super_block super_block;
};
class MultiWriteLocker {
public:
MultiWriteLocker(Inode* inodeA, Inode* inodeB)
{
if (inodeA->ID() < inodeB->ID()) {
Inode* tempInode = inodeA;
inodeA = inodeB;
inodeB = tempInode;
}
fOuterLocker.SetTo(inodeA->Lock(), false);
if (inodeA != inodeB)
fInnerLocker.SetTo(inodeB->Lock(), false);
}
~MultiWriteLocker()
{
}
private:
WriteLocker fOuterLocker;
WriteLocker fInnerLocker;
};
extern void fill_stat_buffer(Inode *inode, struct stat &stat);
@ -78,6 +54,28 @@ fill_stat_buffer(Inode *inode, struct stat &stat)
}
//! bfs_io() callback hook
static status_t
iterative_io_get_vecs_hook(void* cookie, io_request *request, off_t offset,
size_t size, struct file_io_vec *vecs, size_t *_count)
{
Inode *inode = (Inode*)cookie;
return file_map_translate(inode->Map(), offset, size, vecs, _count);
}
//! bfs_io() callback hook
static status_t
iterative_io_finished_hook(void *cookie, io_request *request, status_t status,
bool partialTransfer, size_t bytesTransferred)
{
Inode *inode = (Inode*)cookie;
rw_lock_read_unlock(&inode->Lock());
return B_OK;
}
// #pragma mark - Scanning
@ -299,7 +297,7 @@ bfs_put_vnode(fs_volume *_volume, fs_vnode *_node, bool reenter)
if (inode->TrimPreallocation(transaction) == B_OK)
transaction.Done();
else if (transaction.HasParent()) {
// ToDo: for now, we don't let sub-transactions fail
// TODO: for now, we don't let sub-transactions fail
transaction.Done();
}
}
@ -337,7 +335,7 @@ bfs_remove_vnode(fs_volume *_volume, fs_vnode *_node, bool reenter)
delete inode;
} else if (transaction.HasParent()) {
// ToDo: for now, we don't let sub-transactions fail
// TODO: for now, we don't let sub-transactions fail
transaction.Done();
}
@ -363,7 +361,7 @@ bfs_read_pages(fs_volume *_volume, fs_vnode *_node, void *_cookie,
if (inode->FileCache() == NULL)
RETURN_ERROR(B_BAD_VALUE);
rw_lock_read_lock(&inode->Lock());
InodeReadLocker _(inode);
uint32 vecIndex = 0;
size_t vecOffset = 0;
@ -391,8 +389,6 @@ bfs_read_pages(fs_volume *_volume, fs_vnode *_node, void *_cookie,
bytesLeft -= bytes;
}
rw_lock_read_unlock(&inode->Lock());
return status;
}
@ -410,7 +406,7 @@ bfs_write_pages(fs_volume *_volume, fs_vnode *_node, void *_cookie,
if (inode->FileCache() == NULL)
RETURN_ERROR(B_BAD_VALUE);
rw_lock_read_lock(&inode->Lock());
InodeReadLocker _(inode);
uint32 vecIndex = 0;
size_t vecOffset = 0;
@ -438,33 +434,10 @@ bfs_write_pages(fs_volume *_volume, fs_vnode *_node, void *_cookie,
bytesLeft -= bytes;
}
rw_lock_read_unlock(&inode->Lock());
return status;
}
static status_t
bfs_iterative_io_get_vecs(void* cookie, io_request *request, off_t offset,
size_t size, struct file_io_vec *vecs, size_t *_count)
{
Inode *inode = (Inode*)cookie;
return file_map_translate(inode->Map(), offset, size, vecs, _count);
}
static status_t
bfs_iterative_io_finished(void *cookie, io_request *request, status_t status,
bool partialTransfer, size_t bytesTransferred)
{
Inode *inode = (Inode*)cookie;
rw_lock_read_unlock(&inode->Lock());
return B_OK;
}
static status_t
bfs_io(fs_volume *_volume, fs_vnode *_node, void *_cookie, io_request *request)
{
@ -481,7 +454,7 @@ bfs_io(fs_volume *_volume, fs_vnode *_node, void *_cookie, io_request *request)
rw_lock_read_lock(&inode->Lock());
return do_iterative_fd_io(volume->Device(), request,
bfs_iterative_io_get_vecs, bfs_iterative_io_finished, inode);
iterative_io_get_vecs_hook, iterative_io_finished_hook, inode);
}
@ -546,6 +519,8 @@ bfs_lookup(fs_volume *_volume, fs_vnode *_directory, const char *file,
Volume *volume = (Volume *)_volume->private_volume;
Inode *directory = (Inode *)_directory->private_node;
InodeReadLocker locker(directory);
// check access permissions
status_t status = directory->CheckPermissions(X_OK);
if (status < B_OK)
@ -555,14 +530,14 @@ bfs_lookup(fs_volume *_volume, fs_vnode *_directory, const char *file,
if (directory->GetTree(&tree) != B_OK)
RETURN_ERROR(B_BAD_VALUE);
ReadLocker locker(directory->Lock());
status = tree->Find((uint8 *)file, (uint16)strlen(file), _vnodeID);
if (status < B_OK) {
//PRINT(("bfs_walk() could not find %Ld:\"%s\": %s\n", directory->BlockNumber(), file, strerror(status)));
return status;
}
locker.Unlock();
Inode *inode;
status = get_vnode(volume->FSVolume(), *_vnodeID, (void **)&inode);
if (status != B_OK) {
@ -729,10 +704,7 @@ bfs_write_stat(fs_volume *_volume, fs_vnode *_node, const struct stat *stat,
RETURN_ERROR(status);
Transaction transaction(volume, inode->BlockNumber());
WriteLocker locker(inode->Lock());
if (!locker.IsLocked())
RETURN_ERROR(B_ERROR);
inode->WriteLockInTransaction(transaction);
bfs_inode &node = inode->Node();
@ -972,7 +944,10 @@ bfs_rename(fs_volume *_volume, fs_vnode *_oldDir, const char *oldName,
return B_OK;
Transaction transaction(volume, oldDirectory->BlockNumber());
MultiWriteLocker locker(oldDirectory, newDirectory);
oldDirectory->WriteLockInTransaction(transaction);
if (oldDirectory != newDirectory)
newDirectory->WriteLockInTransaction(transaction);
// are we allowed to do what we've been told?
status_t status = oldDirectory->CheckPermissions(W_OK);
@ -1069,7 +1044,7 @@ bfs_rename(fs_volume *_volume, fs_vnode *_oldDir, const char *oldName,
if (status < B_OK)
return status;
WriteLocker _(inode->Lock());
inode->WriteLockInTransaction(transaction);
// update the name only when they differ
bool nameUpdated = false;
@ -1165,7 +1140,7 @@ bfs_open(fs_volume *_volume, fs_vnode *_node, int openMode, void **_cookie)
return B_IS_A_DIRECTORY;
Transaction transaction(volume, inode->BlockNumber());
WriteLocker locker(inode->Lock());
inode->WriteLockInTransaction(transaction);
status_t status = inode->SetFileSize(transaction, 0);
if (status >= B_OK)
@ -1232,7 +1207,7 @@ bfs_write(fs_volume *_volume, fs_vnode *_node, void *_cookie, off_t pos,
if (status == B_OK) {
transaction.Done();
ReadLocker locker(inode->Lock());
InodeReadLocker locker(inode);
// periodically notify if the file size has changed
// TODO: should we better test for a change in the last_modified time only?
@ -1271,7 +1246,7 @@ bfs_free_cookie(fs_volume *_volume, fs_vnode *_node, void *_cookie)
bool needsTrimming = false;
if (!volume->IsReadOnly()) {
ReadLocker locker(inode->Lock());
InodeReadLocker locker(inode);
needsTrimming = inode->NeedsTrimming();
if ((cookie->open_mode & O_RWMASK) != 0
@ -1287,7 +1262,7 @@ bfs_free_cookie(fs_volume *_volume, fs_vnode *_node, void *_cookie)
status_t status = transaction.IsStarted() ? B_OK : B_ERROR;
if (status == B_OK) {
WriteLocker locker(inode->Lock());
inode->WriteLockInTransaction(transaction);
// trim the preallocated blocks and update the size,
// and last_modified indices if needed

View File

@ -17,6 +17,7 @@
#include <util/AutoLock.h>
#include <util/DoublyLinkedList.h>
#include <util/kernel_cpp.h>
#include <util/SinglyLinkedList.h>
#include <util/Stack.h>
#include <ByteOrder.h>