* Added a File class for representing regular files. We use a simple block tree

for the data management. Reading/writing (using file cache and file map) is
  implemented, but not exactly well tested yet.
* Renamed SymLink::{Read,Write}() to {Read,Write}SymLink().
* Implemented FS hooks write_stat(), create(), read(), write(), io().
* Added O_TRUNC support to open() hook.


git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@37507 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Ingo Weinhold 2010-07-14 00:12:52 +00:00
parent 4a6cd3b3f9
commit 87c30de789
8 changed files with 1208 additions and 40 deletions

View File

@ -0,0 +1,647 @@
/*
* Copyright 2010, Ingo Weinhold, ingo_weinhold@gmx.de.
* Distributed under the terms of the MIT License.
*/
#include "File.h"
#include <string.h>
#include <algorithm>
#include <new>
#include <AutoDeleter.h>
#include "Block.h"
#include "BlockAllocator.h"
#include "DebugSupport.h"
#include "Volume.h"
static const size_t kFileRootBlockOffset = sizeof(checksumfs_node);
static const size_t kFileRootBlockSize = B_PAGE_SIZE
- kFileRootBlockOffset;
static const uint32 kFileRootBlockMaxCount = kFileRootBlockSize / 8;
static const uint32 kFileBlockMaxCount = B_PAGE_SIZE / 8;
static const uint32 kFileBlockShift = 9;
static const uint32 kFileMaxTreeDepth = (64 + kFileBlockShift - 1)
/ kFileBlockShift + 1;
#define BLOCK_ROUND_UP(value) (((value) + B_PAGE_SIZE - 1) / B_PAGE_SIZE \
* B_PAGE_SIZE)
struct File::LevelInfo {
uint64 addressableShift; // 1 << addressableShift is the number of
// descendent data blocks a child block (and its
// descendents) can address
uint32 childCount; // number of child blocks of the last block of
// this level
Block block;
uint64* blockData;
int32 index;
};
File::File(Volume* volume, uint64 blockIndex, const checksumfs_node& nodeData)
:
Node(volume, blockIndex, nodeData),
fFileCache(NULL)
{
STATIC_ASSERT(kFileBlockMaxCount == (uint32)1 << kFileBlockShift);
}
File::File(Volume* volume, uint64 blockIndex, mode_t mode)
:
Node(volume, blockIndex, mode),
fFileCache(NULL),
fFileMap(NULL)
{
}
File::~File()
{
if (fFileCache != NULL)
file_cache_delete(fFileCache);
if (fFileMap != NULL)
file_map_delete(fFileMap);
}
status_t
File::InitForVFS()
{
// create the file map
fFileMap = file_map_create(GetVolume()->ID(), BlockIndex(), Size());
if (fFileMap == NULL)
RETURN_ERROR(B_NO_MEMORY);
// create the file cache
fFileCache = file_cache_create(GetVolume()->ID(), BlockIndex(), Size());
if (fFileCache == NULL)
RETURN_ERROR(B_NO_MEMORY);
return B_OK;
}
status_t
File::DeletingNode(Transaction& transaction)
{
return Resize(0, false, transaction);
}
status_t
File::Resize(uint64 newSize, bool fillWithZeroes, Transaction& transaction)
{
uint64 size = Size();
if (newSize == size)
return B_OK;
FUNCTION("%" B_PRIu64 " -> %" B_PRIu64 "\n", size, newSize);
uint64 blockCount = BLOCK_ROUND_UP(size) / B_PAGE_SIZE;
uint64 newBlockCount = BLOCK_ROUND_UP(newSize) / B_PAGE_SIZE;
if (newBlockCount != blockCount) {
status_t error;
if (newBlockCount < blockCount)
error = _ShrinkTree(blockCount, newBlockCount, transaction);
else
error = _GrowTree(blockCount, newBlockCount, transaction);
if (error != B_OK)
RETURN_ERROR(error);
}
SetSize(newSize);
if (newSize > size && fillWithZeroes) {
status_t error = _WriteZeroes(size, newSize - size);
if (error != B_OK)
RETURN_ERROR(error);
}
file_cache_set_size(fFileCache, newSize);
file_map_set_size(fFileMap, newSize);
return B_OK;
}
status_t
File::Read(off_t pos, void* buffer, size_t size, size_t& _bytesRead)
{
if (pos < 0)
return B_BAD_VALUE;
if (size == 0) {
_bytesRead = 0;
return B_OK;
}
NodeReadLocker locker(this);
uint64 fileSize = Size();
if ((uint64)pos >= fileSize) {
_bytesRead = 0;
return B_OK;
}
if (fileSize - pos < size)
size = fileSize - pos;
locker.Unlock();
size_t bytesRead = size;
status_t error = file_cache_read(fFileCache, NULL, pos, buffer, &bytesRead);
if (error != B_OK)
RETURN_ERROR(error);
_bytesRead = bytesRead;
return B_OK;
}
status_t
File::Write(off_t pos, const void* buffer, size_t size, size_t& _bytesWritten)
{
if (size == 0) {
_bytesWritten = 0;
return B_OK;
}
NodeWriteLocker locker(this);
uint64 fileSize = Size();
if (pos < 0)
pos = fileSize;
uint64 newFileSize = (uint64)pos + size;
if (newFileSize > fileSize) {
// we have to resize the file
Transaction transaction(GetVolume());
status_t error = transaction.Start();
if (error != B_OK)
RETURN_ERROR(error);
// attach the node to the transaction (write locks it, too)
error = transaction.AddNode(this,
TRANSACTION_NODE_ALREADY_LOCKED | TRANSACTION_KEEP_NODE_LOCKED);
if (error != B_OK)
RETURN_ERROR(error);
// resize
error = Resize((uint64)pos + size, false, transaction);
if (error != B_OK)
RETURN_ERROR(error);
SetSize(newFileSize);
// commit the transaction
error = transaction.Commit();
if (error != B_OK)
RETURN_ERROR(error);
}
// now the file has the right size -- do the write
locker.Unlock();
if (fileSize < (uint64)pos) {
// fill the gap between old file end and write position with zeroes
_WriteZeroes(fileSize, pos - fileSize);
}
size_t bytesWritten = size;
status_t error = file_cache_write(fFileCache, NULL, pos, buffer,
&bytesWritten);
if (error != B_OK)
RETURN_ERROR(error);
// update the file times
Transaction transaction(GetVolume());
if (transaction.Start() == B_OK && transaction.AddNode(this) == B_OK) {
// note: we don't fail, if we only couldn't update the times
Touched(NODE_MODIFIED);
transaction.Commit();
}
_bytesWritten = bytesWritten;
return B_OK;
}
void
File::RevertNodeData(const checksumfs_node& nodeData)
{
Node::RevertNodeData(nodeData);
// in case the file size was reverted, reset file cache and map
uint64 size = Size();
file_cache_set_size(fFileCache, size);
file_map_set_size(fFileMap, size);
}
status_t
File::GetFileVecs(uint64 offset, size_t size, file_io_vec* vecs, size_t count,
size_t& _count)
{
// Round size to block size, but restrict to file size. This semantics is
// fine with the caller (the file map) and it will help avoiding partial
// block I/O.
uint32 inBlockOffset = offset % B_PAGE_SIZE;
offset -= inBlockOffset;
size = BLOCK_ROUND_UP(size + inBlockOffset);
uint64 fileSize = BLOCK_ROUND_UP(Size());
if (offset >= fileSize) {
_count = 0;
return B_OK;
}
if (offset + size > fileSize)
size = fileSize - offset;
uint64 blockCount = fileSize / B_PAGE_SIZE;
// get the level infos
int32 depth;
LevelInfo* infos = _GetLevelInfos(blockCount, depth);
if (infos == NULL)
RETURN_ERROR(B_NO_MEMORY);
ArrayDeleter<LevelInfo> infosDeleter(infos);
// prepare for the iteration
uint64 firstBlock = offset / B_PAGE_SIZE;
uint64 blockIndex = BlockIndex();
for (int32 i = 0; i < depth; i++) {
LevelInfo& info = infos[i];
if (!info.block.GetReadable(GetVolume(), blockIndex))
RETURN_ERROR(B_ERROR);
if (i == 0) {
info.blockData = (uint64*)((uint8*)info.block.Data()
+ kFileRootBlockOffset);
} else
info.blockData = (uint64*)info.block.Data();
info.index = firstBlock >> info.addressableShift;
firstBlock -= (uint64)info.index << info.addressableShift;
blockIndex = info.blockData[info.index];
}
// and iterate
int32 level = depth - 1;
uint64 neededBlockCount = size / B_PAGE_SIZE;
size_t countAdded = 0;
while (true) {
LevelInfo& info = infos[level];
if (info.index == (int32)kFileBlockMaxCount) {
// end of block -- back track to next greater branch
level--;
infos[level].index++;
continue;
}
blockIndex = info.blockData[info.index];
if (level < depth - 1) {
// descend to next level
level++;
if (!infos[level].block.GetReadable(GetVolume(), blockIndex))
RETURN_ERROR(B_ERROR);
infos[level].blockData = (uint64*)infos[level].block.Data();
infos[level].index = 0;
continue;
}
// add the block
uint64 blockOffset = blockIndex * B_PAGE_SIZE;
if (countAdded > 0
&& blockOffset
== (uint64)vecs[countAdded - 1].offset
+ vecs[countAdded - 1].length) {
// the block continues where the previous block ends -- just extend
// the vector
vecs[countAdded - 1].length += B_PAGE_SIZE;
} else {
// we need a new block
if (countAdded == count)
break;
vecs[countAdded].offset = blockOffset;
vecs[countAdded].length = B_PAGE_SIZE;
countAdded++;
}
if (--neededBlockCount == 0)
break;
}
_count = countAdded;
return B_OK;
}
/*static*/ uint32
File::_DepthForBlockCount(uint64 blockCount)
{
uint64 addressableBlocks = kFileRootBlockMaxCount;
uint32 depth = 1;
while (blockCount > addressableBlocks) {
addressableBlocks *= kFileBlockMaxCount;
depth++;
}
return depth;
}
/*static*/ void
File::_UpdateLevelInfos(LevelInfo* infos, int32 levelCount, uint64 blockCount)
{
uint64 addressableShift = 0;
for (int32 i = levelCount - 1; i >= 0; i--) {
infos[i].addressableShift = addressableShift;
infos[i].childCount = blockCount % kFileBlockMaxCount;
addressableShift += kFileBlockShift;
blockCount = (blockCount + kFileBlockMaxCount - 1) / kFileBlockMaxCount;
}
}
/*static*/ File::LevelInfo*
File::_GetLevelInfos(uint64 blockCount, int32& _levelCount)
{
LevelInfo* infos = new(std::nothrow) LevelInfo[kFileMaxTreeDepth];
// TODO: We need to allocate differently, if requested by the page writer!
if (infos == NULL)
return NULL;
int32 levelCount = _DepthForBlockCount(blockCount);
_UpdateLevelInfos(infos, levelCount, blockCount);
_levelCount = levelCount;
return infos;
}
status_t
File::_ShrinkTree(uint64 blockCount, uint64 newBlockCount,
Transaction& transaction)
{
FUNCTION("blockCount: %" B_PRIu64 " -> %" B_PRIu64 "\n", blockCount,
newBlockCount);
int32 depth;
LevelInfo* infos = _GetLevelInfos(blockCount, depth);
if (infos == NULL)
return B_NO_MEMORY;
ArrayDeleter<LevelInfo> infosDeleter(infos);
// load the root block
if (!infos[0].block.GetWritable(GetVolume(), BlockIndex(), transaction))
RETURN_ERROR(B_ERROR);
infos[0].blockData = (uint64*)((uint8*)infos[0].block.Data()
+ kFileRootBlockOffset);
int32 level = 0;
// remove blocks
bool removeBlock = false;
while (true) {
PRINT(" level %" B_PRId32 ", child count: %" B_PRIu32 "\n", level,
infos[level].childCount);
// If the block is empty, remove it.
if (infos[level].childCount == 0) {
if (level == 0)
break;
// prepare for the next iteration
infos[level].childCount = kFileBlockMaxCount;
removeBlock = true;
level--;
continue;
}
// block not empty -- we might already be done
if (blockCount == newBlockCount)
break;
uint64 blockIndex = infos[level].blockData[infos[level].childCount - 1];
// unless we're in the last level or shall remove, descend
if (level < depth - 1 && !removeBlock) {
LevelInfo& info = infos[++level];
if (!info.block.GetWritable(GetVolume(), blockIndex, transaction))
RETURN_ERROR(B_ERROR);
info.blockData = (uint64*)info.block.Data();
continue;
}
// remove the block
LevelInfo& info = infos[level];
PRINT(" freeing block: %" B_PRId64 "\n", blockIndex);
// clear the entry (not strictly necessary)
info.blockData[info.childCount - 1] = 0;
// free the block
status_t error = GetVolume()->GetBlockAllocator()->Free(blockIndex, 1,
transaction);
if (error != B_OK)
RETURN_ERROR(error);
if (level == depth - 1)
blockCount--;
infos[level].childCount--;
removeBlock = false;
}
// We got rid of all unnecessary data blocks and empty node blocks. We might
// need to cull the lower levels of the tree, now.
int32 newDepth = _DepthForBlockCount(newBlockCount);
if (newDepth == depth)
return B_OK;
for (int32 i = 1; i <= depth - newDepth; i++) {
uint64 blockIndex = infos[0].blockData[0];
PRINT(" removing block %" B_PRIu64 " at level %" B_PRIi32 "\n",
blockIndex, i);
Block block;
if (!block.GetReadable(GetVolume(), blockIndex))
RETURN_ERROR(B_ERROR);
// copy to the root block
const uint64* blockData = (uint64*)infos[i].block.Data();
memcpy(infos[0].blockData, blockData, infos[i].childCount * 8);
// free the block
block.Put();
status_t error = GetVolume()->GetBlockAllocator()->Free(blockIndex, 1,
transaction);
if (error != B_OK)
RETURN_ERROR(error);
}
return B_OK;
}
status_t
File::_GrowTree(uint64 blockCount, uint64 newBlockCount,
Transaction& transaction)
{
FUNCTION("blockCount: %" B_PRIu64 " -> %" B_PRIu64 "\n", blockCount,
newBlockCount);
int32 depth;
LevelInfo* infos = _GetLevelInfos(blockCount, depth);
if (infos == NULL)
return B_NO_MEMORY;
ArrayDeleter<LevelInfo> infosDeleter(infos);
int32 newDepth = _DepthForBlockCount(newBlockCount);
Block& rootBlock = infos[0].block;
if (!rootBlock.GetWritable(GetVolume(), BlockIndex(), transaction))
RETURN_ERROR(B_ERROR);
infos[0].blockData = (uint64*)((uint8*)rootBlock.Data()
+ kFileRootBlockOffset);
// add new levels, if necessary
if (depth < newDepth) {
uint32 childCount = infos[0].childCount;
// update the level infos
_UpdateLevelInfos(infos, newDepth, blockCount);
// allocate a block per new level
for (int32 i = newDepth - depth - 1; i >= 0; i--) {
while (depth < newDepth) {
// allocate a new block
AllocatedBlock allocatedBlock(GetVolume()->GetBlockAllocator(),
transaction);
status_t error = allocatedBlock.Allocate(BlockIndex());
if (error != B_OK)
RETURN_ERROR(error);
Block newBlock;
if (!newBlock.GetZero(GetVolume(), allocatedBlock.Index(),
transaction)) {
RETURN_ERROR(B_ERROR);
}
allocatedBlock.Detach();
PRINT(" inserting block %" B_PRIu64 " at level %" B_PRIi32
"\n", newBlock.Index(), i + 1);
// copy the root block
memcpy(newBlock.Data(), infos[0].blockData, childCount * 8);
// set the block in the root block
infos[0].blockData[0] = newBlock.Index();
childCount = 1;
}
}
}
depth = newDepth;
// prepare the iteration
int32 level = depth - 1;
for (int32 i = 0; i < level; i++) {
// get the block for the next level
LevelInfo& info = infos[i];
if (!infos[i + 1].block.GetWritable(GetVolume(),
info.blockData[info.childCount - 1], transaction)) {
RETURN_ERROR(B_ERROR);
}
infos[i + 1].blockData = (uint64*)infos[i + 1].block.Data();
}
// add the new blocks
while (blockCount < newBlockCount) {
PRINT(" level %" B_PRId32 ", child count: %" B_PRIu32 "\n", level,
infos[level].childCount);
if (infos[level].childCount >= (int32)kFileBlockMaxCount) {
// block is full -- back track
level--;
}
// allocate and insert block
AllocatedBlock allocatedBlock(GetVolume()->GetBlockAllocator(),
transaction);
status_t error = allocatedBlock.Allocate(BlockIndex());
if (error != B_OK)
RETURN_ERROR(error);
uint64 blockIndex = allocatedBlock.Index();
infos[level].blockData[infos[level].childCount++] = blockIndex;
PRINT(" allocated block: %" B_PRId64 "\n", blockIndex);
if (level < depth - 1) {
// descend to the next level
level++;
infos[level].childCount = 0;
if (!infos[level].block.GetZero(GetVolume(), blockIndex,
transaction)) {
RETURN_ERROR(B_ERROR);
}
infos[level].blockData = (uint64*)infos[level].block.Data();
} else {
// That's a data block -- make the block cache forget it, so it
// doesn't conflict with the file cache.
block_cache_discard(GetVolume()->BlockCache(), blockIndex, 1);
blockCount++;
}
allocatedBlock.Detach();
}
return B_OK;
}
status_t
File::_WriteZeroes(uint64 offset, uint64 size)
{
while (size > 0) {
size_t toWrite = std::min(size, (uint64)SIZE_MAX);
status_t error = file_cache_write(fFileCache, NULL, offset, NULL,
&toWrite);
if (error != B_OK)
RETURN_ERROR(error);
size -= toWrite;
offset += toWrite;
}
return B_OK;
}

View File

@ -0,0 +1,66 @@
/*
* Copyright 2010, Ingo Weinhold, ingo_weinhold@gmx.de.
* Distributed under the terms of the MIT License.
*/
#ifndef FILE_H
#define FILE_H
#include "Node.h"
struct file_io_vec;
class File : public Node {
public:
File(Volume* volume, uint64 blockIndex,
const checksumfs_node& nodeData);
File(Volume* volume, uint64 blockIndex,
mode_t mode);
virtual ~File();
virtual status_t InitForVFS();
virtual status_t DeletingNode(Transaction& transaction);
virtual status_t Resize(uint64 newSize, bool fillWithZeroes,
Transaction& transaction);
virtual status_t Read(off_t pos, void* buffer, size_t size,
size_t& _bytesRead);
virtual status_t Write(off_t pos, const void* buffer,
size_t size, size_t& _bytesWritten);
virtual void RevertNodeData(const checksumfs_node& nodeData);
status_t GetFileVecs(uint64 offset, size_t size,
file_io_vec* vecs, size_t count,
size_t& _count);
void* FileMap() const { return fFileMap; }
private:
struct LevelInfo;
private:
static uint32 _DepthForBlockCount(uint64 blockCount);
static void _UpdateLevelInfos(LevelInfo* infos,
int32 levelCount, uint64 blockCount);
static LevelInfo* _GetLevelInfos(uint64 blockCount,
int32& _levelCount);
status_t _ShrinkTree(uint64 blockCount,
uint64 newBlockCount,
Transaction& transaction);
status_t _GrowTree(uint64 blockCount,
uint64 newBlockCount,
Transaction& transaction);
status_t _WriteZeroes(uint64 offset, uint64 size);
private:
void* fFileCache;
void* fFileMap;
};
#endif // FILE_H

View File

@ -20,6 +20,7 @@ HAIKU_CHECKSUM_FS_SOURCES =
BlockAllocator.cpp
checksumfs.cpp
Directory.cpp
File.cpp
Node.cpp
SuperBlock.cpp
SymLink.cpp

View File

@ -37,7 +37,7 @@ SymLink::~SymLink()
status_t
SymLink::Read(char* buffer, size_t toRead, size_t& _bytesRead)
SymLink::ReadSymLink(char* buffer, size_t toRead, size_t& _bytesRead)
{
uint64 size = Size();
if (size > kMaxSymLinkSize)
@ -65,7 +65,8 @@ SymLink::Read(char* buffer, size_t toRead, size_t& _bytesRead)
status_t
SymLink::Write(const char* buffer, size_t toWrite, Transaction& transaction)
SymLink::WriteSymLink(const char* buffer, size_t toWrite,
Transaction& transaction)
{
uint64 size = Size();
if (size > kMaxSymLinkSize)

View File

@ -17,9 +17,9 @@ public:
mode_t mode);
virtual ~SymLink();
status_t Read(char* buffer, size_t toRead,
status_t ReadSymLink(char* buffer, size_t toRead,
size_t& _bytesRead);
status_t Write(const char* buffer, size_t toWrite,
status_t WriteSymLink(const char* buffer, size_t toWrite,
Transaction& transaction);
};

View File

@ -23,6 +23,7 @@
#include "checksumfs_private.h"
#include "DebugSupport.h"
#include "Directory.h"
#include "File.h"
#include "SuperBlock.h"
#include "SymLink.h"
@ -228,6 +229,13 @@ Volume::GetInfo(fs_info& info)
}
status_t
Volume::NewNode(Node* node)
{
return new_vnode(fFSVolume, node->BlockIndex(), node, &gCheckSumFSVnodeOps);
}
status_t
Volume::PublishNode(Node* node, uint32 flags)
{
@ -277,6 +285,9 @@ Volume::ReadNode(uint64 blockIndex, Node*& _node)
case S_IFDIR:
node = new(std::nothrow) Directory(this, blockIndex, *nodeData);
break;
case S_IFREG:
node = new(std::nothrow) File(this, blockIndex, *nodeData);
break;
case S_IFLNK:
node = new(std::nothrow) SymLink(this, blockIndex, *nodeData);
break;
@ -307,7 +318,7 @@ Volume::CreateDirectory(mode_t mode, Transaction& transaction,
// create the directory
Directory* directory = new(std::nothrow) Directory(this,
allocatedBlock.Index(), (mode & ~(mode_t)S_IFMT) | S_IFDIR);
allocatedBlock.Index(), (mode & S_IUMSK) | S_IFDIR);
if (directory == NULL)
return B_NO_MEMORY;
@ -325,6 +336,35 @@ Volume::CreateDirectory(mode_t mode, Transaction& transaction,
}
status_t
Volume::CreateFile(mode_t mode, Transaction& transaction, File*& _file)
{
// allocate a free block
AllocatedBlock allocatedBlock(fBlockAllocator, transaction);
status_t error = allocatedBlock.Allocate();
if (error != B_OK)
return error;
// create the file
File* file = new(std::nothrow) File(this, allocatedBlock.Index(),
(mode & S_IUMSK) | S_IFREG);
if (file == NULL)
return B_NO_MEMORY;
// attach the file to the transaction
error = transaction.AddNode(file, TRANSACTION_DELETE_NODE);
if (error != B_OK) {
delete file;
return error;
}
allocatedBlock.Detach();
_file = file;
return B_OK;
}
status_t
Volume::CreateSymLink(mode_t mode, Transaction& transaction, SymLink*& _symLink)
{
@ -336,11 +376,11 @@ Volume::CreateSymLink(mode_t mode, Transaction& transaction, SymLink*& _symLink)
// create the symlink
SymLink* symLink = new(std::nothrow) SymLink(this, allocatedBlock.Index(),
(mode & ~(mode_t)S_IFMT) | S_IFLNK);
(mode & S_IUMSK) | S_IFLNK);
if (symLink == NULL)
return B_NO_MEMORY;
// attach the directory to the transaction
// attach the symlink to the transaction
error = transaction.AddNode(symLink, TRANSACTION_DELETE_NODE);
if (error != B_OK) {
delete symLink;
@ -360,6 +400,12 @@ Volume::DeleteNode(Node* node)
Transaction transaction(this);
status_t error = transaction.Start();
if (error == B_OK) {
error = node->DeletingNode(transaction);
if (error != B_OK) {
ERROR("Preparing deletion of failed for node at %" B_PRIu64 "\n",
node->BlockIndex());
}
error = fBlockAllocator->Free(node->BlockIndex(), 1, transaction);
if (error == B_OK) {
error = transaction.Commit();

View File

@ -15,6 +15,7 @@
class BlockAllocator;
class Directory;
class File;
class Node;
class SymLink;
class Transaction;
@ -35,6 +36,7 @@ public:
void GetInfo(fs_info& info);
status_t NewNode(Node* node);
status_t PublishNode(Node* node, uint32 flags);
status_t GetNode(uint64 blockIndex, Node*& _node);
status_t PutNode(Node* node);
@ -45,6 +47,8 @@ public:
status_t CreateDirectory(mode_t mode,
Transaction& transaction,
Directory*& _directory);
status_t CreateFile(mode_t mode,
Transaction& transaction, File*& _file);
status_t CreateSymLink(mode_t mode,
Transaction& transaction,
SymLink*& _symLink);
@ -54,6 +58,7 @@ public:
inline void TransactionFinished();
inline dev_t ID() const { return fFSVolume->id; }
inline int FD() const { return fFD; }
inline bool IsReadOnly() const;
inline uint64 TotalBlocks() const { return fTotalBlocks; }
inline void* BlockCache() const { return fBlockCache; }

View File

@ -12,6 +12,8 @@
#include <new>
#include <fs_interface.h>
#include <io_requests.h>
#include <NodeMonitor.h>
#include <AutoDeleter.h>
#include <AutoLocker.h>
@ -22,6 +24,7 @@
#include "checksumfs_private.h"
#include "DebugSupport.h"
#include "Directory.h"
#include "File.h"
#include "SuperBlock.h"
#include "SymLink.h"
#include "Transaction.h"
@ -41,6 +44,13 @@ set_timespec(timespec& time, uint64 nanos)
}
static uint64
timespec_to_nsecs(const timespec& time)
{
return (uint64)time.tv_sec * 1000000000 + time.tv_nsec;
}
struct PutNode {
inline void operator()(Node* node)
{
@ -103,7 +113,7 @@ check_access(Node* node, uint32 accessFlags)
}
status_t
static status_t
remove_entry(fs_volume* fsVolume, fs_vnode* parent, const char* name,
bool removeDirectory)
{
@ -356,6 +366,12 @@ checksumfs_get_vnode(fs_volume* fsVolume, ino_t id, fs_vnode* vnode,
if (error != B_OK)
return error;
error = node->InitForVFS();
if (error != B_OK) {
delete node;
return error;
}
vnode->private_node = node;
vnode->ops = &gCheckSumFSVnodeOps;
*_type = node->Mode();
@ -425,6 +441,69 @@ checksumfs_remove_vnode(fs_volume* fsVolume, fs_vnode* vnode, bool reenter)
}
// #pragma mark - asynchronous I/O
static status_t
iterative_io_get_vecs_hook(void* cookie, io_request* request, off_t offset,
size_t size, file_io_vec* vecs, size_t* _count)
{
File* file = (File*)cookie;
RETURN_ERROR(file_map_translate(file->FileMap(), offset, size, vecs, _count,
B_PAGE_SIZE));
}
static status_t
iterative_io_finished_hook(void* cookie, io_request* request, status_t status,
bool partialTransfer, size_t bytesTransferred)
{
File* file = (File*)cookie;
file->ReadUnlock();
return B_OK;
}
static status_t
checksumfs_io(fs_volume* fsVolume, fs_vnode* vnode, void* cookie,
io_request* request)
{
Volume* volume = (Volume*)fsVolume->private_volume;
File* file = dynamic_cast<File*>((Node*)vnode->private_node);
if (file == NULL)
RETURN_ERROR(B_BAD_VALUE);
if (io_request_is_write(request) && volume->IsReadOnly())
RETURN_ERROR(B_READ_ONLY_DEVICE);
// Read-lock the file -- we'll unlock it in the finished hook.
file->ReadLock();
RETURN_ERROR(do_iterative_fd_io(volume->FD(), request,
iterative_io_get_vecs_hook, iterative_io_finished_hook, file));
}
// #pragma mark - cache file access
static status_t
checksumfs_get_file_map(fs_volume* fsVolume, fs_vnode* vnode, off_t offset,
size_t size, struct file_io_vec* vecs, size_t* _count)
{
if (offset < 0)
RETURN_ERROR(B_BAD_VALUE);
File* file = dynamic_cast<File*>((Node*)vnode->private_node);
if (file == NULL)
RETURN_ERROR(B_BAD_VALUE);
RETURN_ERROR(file->GetFileVecs(offset, size, vecs, *_count, *_count));
}
// #pragma mark - common operations
@ -440,7 +519,7 @@ checksumfs_read_symlink(fs_volume* fsVolume, fs_vnode* vnode, char* buffer,
if (error != B_OK)
return error;
return symLink->Read(buffer, *_bufferSize, *_bufferSize);
return symLink->ReadSymLink(buffer, *_bufferSize, *_bufferSize);
}
@ -461,14 +540,9 @@ checksumfs_create_symlink(fs_volume* fsVolume, fs_vnode* parent,
if (error != B_OK)
return error;
// start a transaction
// start a transaction and add the directory (write locks it, too)
Transaction transaction(volume);
error = transaction.Start();
if (error != B_OK)
return error;
// attach the directory to the transaction (write locks it, too)
error = transaction.AddNode(directory);
error = transaction.StartAndAddNode(directory);
if (error != B_OK)
return error;
@ -479,7 +553,7 @@ checksumfs_create_symlink(fs_volume* fsVolume, fs_vnode* parent,
return error;
// write it
error = newSymLink->Write(path, strlen(path), transaction);
error = newSymLink->WriteSymLink(path, strlen(path), transaction);
if (error != B_OK)
return error;
@ -531,6 +605,100 @@ checksumfs_read_stat(fs_volume* fsVolume, fs_vnode* vnode, struct stat* st)
}
static status_t
checksumfs_write_stat(fs_volume* fsVolume, fs_vnode* vnode,
const struct stat* st, uint32 statMask)
{
Volume* volume = (Volume*)fsVolume->private_volume;
Node* node = (Node*)vnode->private_node;
if (volume->IsReadOnly())
return B_READ_ONLY_DEVICE;
// start a transaction and add the node to it (write locks the node, too)
Transaction transaction(volume);
status_t error = transaction.Start();
if (error != B_OK)
return error;
uid_t uid = geteuid();
bool isOwnerOrRoot = uid == 0 || uid == node->UID();
bool hasWriteAccess = check_access(node, W_OK) == B_OK;
bool updateModified = false;
bool updateChanged = false;
if ((statMask & B_STAT_SIZE) != 0 && (uint64)st->st_size != node->Size()) {
if (!hasWriteAccess)
RETURN_ERROR(B_NOT_ALLOWED);
error = node->Resize(st->st_size, true, transaction);
if (error != B_OK)
RETURN_ERROR(error);
updateModified = updateChanged = true;
}
if ((statMask & B_STAT_UID) != 0 && st->st_uid != node->UID()) {
// only root can do that
if (uid != 0)
RETURN_ERROR(B_NOT_ALLOWED);
node->SetUID(st->st_uid);
updateChanged = true;
}
if ((statMask & B_STAT_GID) != 0 && st->st_gid != node->GID()) {
// only the user or root can do that
if (!isOwnerOrRoot)
RETURN_ERROR(B_NOT_ALLOWED);
node->SetGID(st->st_gid);
updateChanged = true;
}
if ((statMask & B_STAT_CREATION_TIME) != 0) {
// the user or root can do that or any user with write access
if (!isOwnerOrRoot && !hasWriteAccess)
RETURN_ERROR(B_NOT_ALLOWED);
node->SetCreationTime(timespec_to_nsecs(st->st_crtim));
updateChanged = true;
}
if ((statMask & B_STAT_MODIFICATION_TIME) != 0) {
// the user or root can do that or any user with write access
if (!isOwnerOrRoot && !hasWriteAccess)
RETURN_ERROR(B_NOT_ALLOWED);
node->SetModificationTime(timespec_to_nsecs(st->st_mtim));
updateModified = false;
updateChanged = true;
}
if ((statMask & B_STAT_CHANGE_TIME) != 0) {
// the user or root can do that or any user with write access
if (!isOwnerOrRoot && !hasWriteAccess)
RETURN_ERROR(B_NOT_ALLOWED);
node->SetModificationTime(timespec_to_nsecs(st->st_mtim));
updateModified = false;
updateChanged = false;
}
// update access/change/modification time
if (updateModified)
node->Touched(NODE_MODIFIED);
else if (updateChanged)
node->Touched(NODE_STAT_CHANGED);
else
node->Touched(NODE_ACCESSED);
// commit the transaction
return transaction.Commit();
}
// #pragma mark - file operations
@ -545,16 +713,17 @@ struct FileCookie {
};
/*! Opens the node according to the given open mode (if the permissions allow
that) and creates a file cookie.
The caller must either pass a \a transaction, which is already started and
has the node added to it, or not have a lock to any node at all (this
function will do the locking in this case).
*/
static status_t
checksumfs_open(fs_volume* fsVolume, fs_vnode* vnode, int openMode,
void** _cookie)
open_file(Volume* volume, Node* node, int openMode, Transaction* transaction,
FileCookie*& _cookie)
{
Volume* volume = (Volume*)fsVolume->private_volume;
Node* node = (Node*)vnode->private_node;
NodeReadLocker nodeLocker(node);
// check the open mode and permissions
// translate the open mode to required permissions
uint32 accessFlags = 0;
switch (openMode & O_RWMASK) {
case O_RDONLY:
@ -568,20 +737,209 @@ checksumfs_open(fs_volume* fsVolume, fs_vnode* vnode, int openMode,
break;
}
// We need to at least read-lock the node. If O_TRUNC is specified, we even
// need a write lock and a transaction. If the caller has supplied a
// transaction, it is already started and the node is locked.
NodeReadLocker nodeReadLocker;
Transaction localTransaction(volume);
if ((openMode & O_TRUNC) != 0) {
accessFlags |= W_OK;
if (transaction == NULL) {
transaction = &localTransaction;
status_t error = localTransaction.StartAndAddNode(node);
if (error != B_OK)
RETURN_ERROR(error);
}
} else if (transaction == NULL)
nodeReadLocker.SetTo(node, false);
// check permissions
if ((accessFlags & W_OK) != 0) {
if (S_ISDIR(node->Mode()))
return B_IS_A_DIRECTORY;
if (volume->IsReadOnly())
return B_READ_ONLY_DEVICE;
if (S_ISDIR(node->Mode()))
return B_IS_A_DIRECTORY;
}
if ((openMode & O_DIRECTORY) != 0 && !S_ISDIR(node->Mode()))
return B_NOT_A_DIRECTORY;
status_t error = check_access(node, accessFlags);
if (error != B_OK)
return error;
// TODO: Support O_NOCACHE.
FileCookie* cookie = new(std::nothrow) FileCookie(openMode);
if (cookie == NULL)
return B_NO_MEMORY;
ObjectDeleter<FileCookie> cookieDeleter(cookie);
// truncate the file, if requested
if ((openMode & O_TRUNC) != 0) {
error = node->Resize(0, false, *transaction);
if (error != B_OK)
return error;
node->Touched(NODE_MODIFIED);
if (transaction == &localTransaction) {
error = transaction->Commit();
if (error != B_OK)
return error;
}
}
_cookie = cookieDeleter.Detach();
return B_OK;
}
static status_t
checksumfs_create(fs_volume* fsVolume, fs_vnode* parent, const char* name,
int openMode, int perms, void** _cookie, ino_t* _newVnodeID)
{
Volume* volume = (Volume*)fsVolume->private_volume;
Directory* directory
= dynamic_cast<Directory*>((Node*)parent->private_node);
if (directory == NULL)
return B_NOT_A_DIRECTORY;
if (volume->IsReadOnly())
return B_READ_ONLY_DEVICE;
Node* childNode;
NodePutter childNodePutter;
childNode = NULL;
// look up the entry
NodeWriteLocker directoryLocker(directory);
// We only need a read lock for the lookup, but later we'll need a
// write lock, if we have to create the file. So this is simpler.
uint64 blockIndex;
status_t error = directory->LookupEntry(name, blockIndex);
if (error == B_OK) {
// the entry already exists
if ((openMode & O_EXCL) != 0)
return B_FILE_EXISTS;
// get the entry's node
error = volume->GetNode(blockIndex, childNode);
if (error != B_OK)
RETURN_ERROR(error);
childNodePutter.SetTo(childNode);
directoryLocker.Unlock();
FileCookie* cookie;
error = open_file(volume, childNode, openMode, NULL, cookie);
if (error != B_OK)
RETURN_ERROR(error);
*_cookie = cookie;
*_newVnodeID = childNode->BlockIndex();
return B_OK;
}
if (error != B_ENTRY_NOT_FOUND)
RETURN_ERROR(error);
// The entry doesn't exist yet. We have to create a new file.
// TODO: Don't do that in an unlinked directory!
// check the directory write permission
error = check_access(directory, W_OK);
if (error != B_OK)
return error;
// start a transaction and attach the directory
Transaction transaction(volume);
error = transaction.StartAndAddNode(directory,
TRANSACTION_NODE_ALREADY_LOCKED);
if (error != B_OK)
RETURN_ERROR(error);
directoryLocker.Detach();
// write lock transferred to the transaction
// create a file
File* newFile;
error = volume->CreateFile(perms, transaction, newFile);
if (error != B_OK)
return error;
// insert the new file
error = directory->InsertEntry(name, newFile->BlockIndex(), transaction);
if (error != B_OK)
return error;
// open the file
FileCookie* cookie;
error = open_file(volume, newFile, openMode & ~O_TRUNC, &transaction,
cookie);
if (error != B_OK)
RETURN_ERROR(error);
ObjectDeleter<FileCookie> cookieDeleter(cookie);
// update stat data
newFile->SetHardLinks(1);
directory->Touched(NODE_MODIFIED);
// announce the new vnode, but don't publish it yet
error = volume->NewNode(newFile);
if (error != B_OK)
RETURN_ERROR(error);
// create the file cache
error = newFile->InitForVFS();
if (error != B_OK) {
volume->RemoveNode(newFile);
return error;
}
// Don't delete the File object when the transaction is committed, since
// it's going to be published.
transaction.KeepNode(newFile);
// commit the transaction
error = transaction.Commit();
if (error != B_OK) {
volume->RemoveNode(newFile);
delete newFile;
RETURN_ERROR(error);
}
// everything is on disk -- publish the vnode, now
error = volume->PublishNode(newFile, 0);
if (error != B_OK) {
// TODO: The file creation succeeded, but the caller will get an error.
// We could try to delete the file again.
RETURN_ERROR(error);
}
*_cookie = cookieDeleter.Detach();
*_newVnodeID = newFile->BlockIndex();
return B_OK;
}
static status_t
checksumfs_open(fs_volume* fsVolume, fs_vnode* vnode, int openMode,
void** _cookie)
{
Volume* volume = (Volume*)fsVolume->private_volume;
Node* node = (Node*)vnode->private_node;
FileCookie* cookie;
status_t error = open_file(volume, node, openMode, NULL, cookie);
if (error != B_OK)
RETURN_ERROR(error);
*_cookie = cookie;
return B_OK;
@ -604,6 +962,55 @@ checksumfs_free_cookie(fs_volume* fsVolume, fs_vnode* vnode, void* _cookie)
}
static status_t
checksumfs_read(fs_volume* fsVolume, fs_vnode* vnode, void* _cookie, off_t pos,
void* buffer, size_t* _length)
{
FileCookie* cookie = (FileCookie*)_cookie;
Node* node = (Node*)vnode->private_node;
switch (cookie->openMode & O_RWMASK) {
case O_RDONLY:
case O_RDWR:
break;
case O_WRONLY:
default:
RETURN_ERROR(EBADF);
}
return node->Read(pos, buffer, *_length, *_length);
}
static status_t
checksumfs_write(fs_volume* fsVolume, fs_vnode* vnode, void* _cookie, off_t pos,
const void* buffer, size_t* _length)
{
FileCookie* cookie = (FileCookie*)_cookie;
Node* node = (Node*)vnode->private_node;
switch (cookie->openMode & O_RWMASK) {
case O_WRONLY:
case O_RDWR:
break;
case O_RDONLY:
default:
RETURN_ERROR(EBADF);
}
if (pos < 0)
RETURN_ERROR(B_BAD_VALUE);
if ((cookie->openMode & O_APPEND) != 0) {
pos = -1;
// special value handled by Write()
}
RETURN_ERROR(node->Write(pos, buffer, *_length, *_length));
// TODO: Modification time update!
}
// #pragma mark - directory operations
@ -708,14 +1115,9 @@ checksumfs_create_dir(fs_volume* fsVolume, fs_vnode* parent, const char* name,
if (error != B_OK)
return error;
// start a transaction
// start a transaction and attach the directory (write locks it, too)
Transaction transaction(volume);
error = transaction.Start();
if (error != B_OK)
return error;
// attach the directory to the transaction (write locks it, too)
error = transaction.AddNode(directory);
error = transaction.StartAndAddNode(directory);
if (error != B_OK)
return error;
@ -934,11 +1336,11 @@ fs_vnode_ops gCheckSumFSVnodeOps = {
NULL, // write_pages
/* asynchronous I/O */
NULL, // checksumfs_io,
checksumfs_io,
NULL, // checksumfs_cancel_io,
/* cache file access */
NULL, // checksumfs_get_file_map,
checksumfs_get_file_map,
/* common operations */
NULL, // checksumfs_ioctl,
@ -956,15 +1358,15 @@ fs_vnode_ops gCheckSumFSVnodeOps = {
NULL, // checksumfs_access,
checksumfs_read_stat,
NULL, // checksumfs_write_stat,
checksumfs_write_stat,
/* file operations */
NULL, // checksumfs_create,
checksumfs_create,
checksumfs_open,
checksumfs_close,
checksumfs_free_cookie,
NULL, // checksumfs_read,
NULL, // checksumfs_write,
checksumfs_read,
checksumfs_write,
/* directory operations */
checksumfs_create_dir,