
code, the super block log data could have been wrong. Moved the doubly linked list code to the kernel's util/DoublyLinkedList.h. Also removed Journal::fCurrent, as it's not really used. git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@13367 a95241bf-73f2-0310-859d-f6bbb57e9c96
528 lines
13 KiB
C++
528 lines
13 KiB
C++
/* Journal - transaction and logging
|
|
*
|
|
* Copyright 2001-2005, Axel Dörfler, axeld@pinc-software.de.
|
|
* This file may be used under the terms of the MIT License.
|
|
*/
|
|
|
|
|
|
#include "Journal.h"
|
|
#include "Inode.h"
|
|
#include "Debug.h"
|
|
|
|
#include <Drivers.h>
|
|
#include <util/kernel_cpp.h>
|
|
#include <errno.h>
|
|
|
|
|
|
struct log_entry : public DoublyLinkedListLinkImpl<log_entry> {
|
|
uint16 start;
|
|
uint16 length;
|
|
uint32 cached_blocks;
|
|
Journal *journal;
|
|
};
|
|
|
|
|
|
Journal::Journal(Volume *volume)
|
|
:
|
|
fVolume(volume),
|
|
fLock("bfs journal"),
|
|
fOwner(NULL),
|
|
fArray(volume->BlockSize()),
|
|
fLogSize(volume->Log().length),
|
|
fMaxTransactionSize(fLogSize / 4 - 5),
|
|
fUsed(0),
|
|
fTransactionsInEntry(0)
|
|
{
|
|
if (fMaxTransactionSize > fLogSize / 2)
|
|
fMaxTransactionSize = fLogSize / 2 - 5;
|
|
}
|
|
|
|
|
|
Journal::~Journal()
|
|
{
|
|
FlushLogAndBlocks();
|
|
}
|
|
|
|
|
|
status_t
|
|
Journal::InitCheck()
|
|
{
|
|
if (fVolume->LogStart() != fVolume->LogEnd()) {
|
|
if (fVolume->SuperBlock().flags != SUPER_BLOCK_DISK_DIRTY)
|
|
FATAL(("log_start and log_end differ, but disk is marked clean - trying to replay log...\n"));
|
|
|
|
return ReplayLog();
|
|
}
|
|
|
|
return B_OK;
|
|
}
|
|
|
|
|
|
status_t
|
|
Journal::CheckLogEntry(int32 count, const off_t *array)
|
|
{
|
|
// ToDo: check log entry integrity (block numbers and entry size)
|
|
PRINT(("Log entry has %ld entries (%Ld)\n", count, array[0]));
|
|
return B_OK;
|
|
}
|
|
|
|
|
|
/** Replays an entry in the log.
|
|
* \a _start points to the entry in the log, and will be bumped to the next
|
|
* one if replaying succeeded.
|
|
*/
|
|
|
|
status_t
|
|
Journal::ReplayLogEntry(int32 *_start)
|
|
{
|
|
PRINT(("ReplayLogEntry(start = %ld)\n", *_start));
|
|
|
|
off_t logOffset = fVolume->ToBlock(fVolume->Log());
|
|
off_t arrayBlock = (*_start % fLogSize) + fVolume->ToBlock(fVolume->Log());
|
|
int32 blockSize = fVolume->BlockSize();
|
|
int32 count = 1, valuesInBlock = blockSize / sizeof(off_t);
|
|
int32 numArrayBlocks;
|
|
off_t blockNumber = 0;
|
|
bool first = true;
|
|
|
|
CachedBlock cached(fVolume);
|
|
|
|
while (count > 0) {
|
|
const off_t *array = (const off_t *)cached.SetTo(arrayBlock);
|
|
if (array == NULL)
|
|
return B_IO_ERROR;
|
|
|
|
int32 index = 0;
|
|
if (first) {
|
|
if (array[0] < 1 || array[0] >= fLogSize)
|
|
return B_BAD_DATA;
|
|
|
|
count = array[0];
|
|
first = false;
|
|
|
|
numArrayBlocks = ((count + 1) * sizeof(off_t) + blockSize - 1) / blockSize;
|
|
blockNumber = (*_start + numArrayBlocks) % fLogSize;
|
|
// first real block in this log entry
|
|
*_start += count;
|
|
index++;
|
|
// the first entry in the first block is the number
|
|
// of blocks in that log entry
|
|
}
|
|
(*_start)++;
|
|
|
|
if (CheckLogEntry(count, array + 1) < B_OK)
|
|
return B_BAD_DATA;
|
|
|
|
CachedBlock cachedCopy(fVolume);
|
|
for (; index < valuesInBlock && count-- > 0; index++) {
|
|
PRINT(("replay block %Ld in log at %Ld!\n", array[index], blockNumber));
|
|
|
|
const uint8 *copy = cachedCopy.SetTo(logOffset + blockNumber);
|
|
if (copy == NULL)
|
|
RETURN_ERROR(B_IO_ERROR);
|
|
|
|
ssize_t written = write_pos(fVolume->Device(),
|
|
array[index] << fVolume->BlockShift(), copy, blockSize);
|
|
if (written != blockSize)
|
|
RETURN_ERROR(B_IO_ERROR);
|
|
|
|
blockNumber = (blockNumber + 1) % fLogSize;
|
|
}
|
|
arrayBlock++;
|
|
if (arrayBlock > fVolume->ToBlock(fVolume->Log()) + fLogSize)
|
|
arrayBlock = fVolume->ToBlock(fVolume->Log());
|
|
}
|
|
return B_OK;
|
|
}
|
|
|
|
|
|
/** Replays all log entries - this will put the disk into a
|
|
* consistent and clean state, if it was not correctly unmounted
|
|
* before.
|
|
* This method is called by Journal::InitCheck() if the log start
|
|
* and end pointer don't match.
|
|
*/
|
|
|
|
status_t
|
|
Journal::ReplayLog()
|
|
{
|
|
INFORM(("Replay log, disk was not correctly unmounted...\n"));
|
|
|
|
int32 start = fVolume->LogStart();
|
|
int32 lastStart = -1;
|
|
while (true) {
|
|
// stop if the log is completely flushed
|
|
if (start == fVolume->LogEnd())
|
|
break;
|
|
|
|
if (start == lastStart) {
|
|
// strange, flushing the log hasn't changed the log_start pointer
|
|
return B_ERROR;
|
|
}
|
|
lastStart = start;
|
|
|
|
status_t status = ReplayLogEntry(&start);
|
|
if (status < B_OK) {
|
|
FATAL(("replaying log entry from %ld failed: %s\n", start, strerror(status)));
|
|
return B_ERROR;
|
|
}
|
|
start = start % fLogSize;
|
|
}
|
|
|
|
PRINT(("replaying worked fine!\n"));
|
|
fVolume->SuperBlock().log_start = fVolume->LogEnd();
|
|
fVolume->LogStart() = fVolume->LogEnd();
|
|
fVolume->SuperBlock().flags = SUPER_BLOCK_DISK_CLEAN;
|
|
|
|
return fVolume->WriteSuperBlock();
|
|
}
|
|
|
|
|
|
/** This is a callback function that is called by the cache, whenever
|
|
* a block is flushed to disk that was updated as part of a transaction.
|
|
* This is necessary to keep track of completed transactions, to be
|
|
* able to update the log start pointer.
|
|
*/
|
|
|
|
void
|
|
Journal::blockNotify(int32 transactionID, void *arg)
|
|
{
|
|
log_entry *logEntry = (log_entry *)arg;
|
|
|
|
PRINT(("Log entry %p has been finished, transaction ID = %ld\n", logEntry, transactionID));
|
|
|
|
Journal *journal = logEntry->journal;
|
|
disk_super_block &superBlock = journal->fVolume->SuperBlock();
|
|
bool update = false;
|
|
|
|
// Set log_start pointer if possible...
|
|
|
|
if (logEntry == journal->fEntries.First()) {
|
|
log_entry *next = journal->fEntries.GetNext(logEntry);
|
|
if (next != NULL) {
|
|
int32 length = next->start - logEntry->start;
|
|
superBlock.log_start = (superBlock.log_start + length) % journal->fLogSize;
|
|
} else
|
|
superBlock.log_start = journal->fVolume->LogEnd();
|
|
|
|
update = true;
|
|
}
|
|
|
|
journal->fEntriesLock.Lock();
|
|
journal->fUsed -= logEntry->length;
|
|
journal->fEntries.Remove(logEntry);
|
|
journal->fEntriesLock.Unlock();
|
|
|
|
free(logEntry);
|
|
|
|
// update the super block, and change the disk's state, if necessary
|
|
|
|
if (update) {
|
|
journal->fVolume->LogStart() = superBlock.log_start;
|
|
|
|
if (superBlock.log_start == superBlock.log_end)
|
|
superBlock.flags = SUPER_BLOCK_DISK_CLEAN;
|
|
|
|
status_t status = journal->fVolume->WriteSuperBlock();
|
|
if (status != B_OK) {
|
|
FATAL(("blockNotify: could not write back super block: %s\n",
|
|
strerror(status)));
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
status_t
|
|
Journal::WriteLogEntry()
|
|
{
|
|
fTransactionsInEntry = 0;
|
|
fHasChangedBlocks = false;
|
|
|
|
// insert all changed blocks into the log array
|
|
uint32 cookie = 0;
|
|
{
|
|
off_t blockNumber;
|
|
while (cache_next_block_in_transaction(fVolume->BlockCache(), fTransactionID, &cookie,
|
|
&blockNumber, NULL, NULL) == B_OK) {
|
|
fArray.Insert(blockNumber);
|
|
}
|
|
}
|
|
|
|
sorted_array *array = fArray.Array();
|
|
if (array == NULL || array->count == 0) {
|
|
// nothing has changed during this transaction
|
|
cache_end_transaction(fVolume->BlockCache(), fTransactionID, NULL, NULL);
|
|
return B_OK;
|
|
}
|
|
|
|
// Make sure there is enough space in the log.
|
|
// If that fails for whatever reason, panic!
|
|
// ToDo:
|
|
/* force_cache_flush(fVolume->Device(), false);
|
|
int32 tries = fLogSize / 2 + 1;
|
|
while (TransactionSize() > FreeLogBlocks() && tries-- > 0)
|
|
force_cache_flush(fVolume->Device(), true);
|
|
|
|
if (tries <= 0) {
|
|
fVolume->Panic();
|
|
return B_BAD_DATA;
|
|
}
|
|
*/
|
|
int32 blockShift = fVolume->BlockShift();
|
|
off_t logOffset = fVolume->ToBlock(fVolume->Log()) << blockShift;
|
|
off_t logStart = fVolume->LogEnd();
|
|
off_t logPosition = logStart % fLogSize;
|
|
|
|
// Write disk block array
|
|
|
|
uint8 *arrayBlock = (uint8 *)array;
|
|
|
|
// ToDo: the single writes should be combined!
|
|
for (int32 size = fArray.BlocksUsed(); size-- > 0;) {
|
|
write_pos(fVolume->Device(), logOffset + (logPosition << blockShift),
|
|
arrayBlock, fVolume->BlockSize());
|
|
|
|
logPosition = (logPosition + 1) % fLogSize;
|
|
arrayBlock += fVolume->BlockSize();
|
|
}
|
|
|
|
// Write logged blocks into the log
|
|
|
|
for (int32 i = 0; i < array->count; i++) {
|
|
const uint8 *block = (const uint8 *)block_cache_get(fVolume->BlockCache(), array->values[i]);
|
|
if (block == NULL) {
|
|
FATAL(("Could not get block %Ld\n", array->values[i]));
|
|
continue;
|
|
}
|
|
|
|
// ToDo: combine blocks whenever possible (using iovecs)!
|
|
write_pos(fVolume->Device(), logOffset + (logPosition << blockShift),
|
|
block, fVolume->BlockSize());
|
|
|
|
block_cache_put(fVolume->BlockCache(), array->values[i]);
|
|
logPosition = (logPosition + 1) % fLogSize;
|
|
}
|
|
|
|
// create and add log entry
|
|
|
|
log_entry *logEntry = (log_entry *)malloc(sizeof(log_entry));
|
|
if (logEntry == NULL) {
|
|
DIE(("Could not create next log entry (out of memory)\n"));
|
|
return B_NO_MEMORY;
|
|
}
|
|
|
|
logEntry->start = logStart;
|
|
logEntry->length = TransactionSize();
|
|
logEntry->journal = this;
|
|
|
|
fEntriesLock.Lock();
|
|
fEntries.Add(logEntry);
|
|
fUsed += logEntry->length;
|
|
fEntriesLock.Unlock();
|
|
|
|
// Update the log end pointer in the super block
|
|
|
|
fVolume->SuperBlock().flags = SUPER_BLOCK_DISK_DIRTY;
|
|
fVolume->SuperBlock().log_end = logPosition;
|
|
fVolume->LogEnd() = logPosition;
|
|
|
|
status_t status = fVolume->WriteSuperBlock();
|
|
|
|
// We need to flush the drives own cache here to ensure
|
|
// disk consistency.
|
|
// If that call fails, we can't do anything about it anyway
|
|
ioctl(fVolume->Device(), B_FLUSH_DRIVE_CACHE);
|
|
|
|
// at this point, we can finally end the transaction - we're in
|
|
// a guaranteed valid state
|
|
cache_end_transaction(fVolume->BlockCache(), fTransactionID, blockNotify, logEntry);
|
|
fArray.MakeEmpty();
|
|
|
|
// If the log goes to the next round (the log is written as a
|
|
// circular buffer), all blocks will be flushed out which is
|
|
// possible because we don't have any locked blocks at this
|
|
// point.
|
|
if (logPosition < logStart)
|
|
fVolume->FlushDevice();
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
status_t
|
|
Journal::FlushLogAndBlocks()
|
|
{
|
|
status_t status = fLock.Lock();
|
|
if (status != B_OK)
|
|
return status;
|
|
|
|
if (fLock.OwnerCount() > 1) {
|
|
// whoa, FlushLogAndBlocks() was called from inside a transaction
|
|
fLock.Unlock();
|
|
return B_OK;
|
|
}
|
|
|
|
// write the current log entry to disk
|
|
|
|
if (fTransactionID != -1 && TransactionSize() != 0) {
|
|
status = WriteLogEntry();
|
|
if (status < B_OK)
|
|
FATAL(("writing current log entry failed: %s\n", strerror(status)));
|
|
}
|
|
|
|
status = fVolume->FlushDevice();
|
|
|
|
fLock.Unlock();
|
|
return status;
|
|
}
|
|
|
|
|
|
status_t
|
|
Journal::Lock(Transaction *owner)
|
|
{
|
|
status_t status = fLock.Lock();
|
|
if (status != B_OK)
|
|
return status;
|
|
|
|
/* ToDo:
|
|
// if the last transaction is older than 2 secs, start a new one
|
|
if (fTransactionsInEntry != 0 && system_time() - fTimestamp > 2000000L)
|
|
WriteLogEntry();
|
|
*/
|
|
|
|
if (fLock.OwnerCount() > 1) {
|
|
// we'll just use the current transaction again
|
|
return B_OK;
|
|
}
|
|
|
|
fOwner = owner;
|
|
|
|
fTransactionID = cache_start_transaction(fVolume->BlockCache());
|
|
if (fTransactionID < B_OK) {
|
|
fLock.Unlock();
|
|
return fTransactionID;
|
|
}
|
|
|
|
return B_OK;
|
|
}
|
|
|
|
|
|
void
|
|
Journal::Unlock(Transaction *owner, bool success)
|
|
{
|
|
if (fLock.OwnerCount() == 1) {
|
|
// we only end the transaction if we would really unlock it
|
|
// ToDo: what about failing transactions that do not unlock?
|
|
TransactionDone(success);
|
|
|
|
fTransactionID = -1;
|
|
fTimestamp = system_time();
|
|
fOwner = NULL;
|
|
}
|
|
|
|
fLock.Unlock();
|
|
}
|
|
|
|
|
|
status_t
|
|
Journal::TransactionDone(bool success)
|
|
{
|
|
if (!success) {
|
|
fArray.MakeEmpty();
|
|
cache_abort_transaction(fVolume->BlockCache(), fTransactionID);
|
|
return B_OK;
|
|
}
|
|
|
|
// ToDo:
|
|
/*
|
|
if (!success && fTransactionsInEntry == 0) {
|
|
// we can safely abort the transaction
|
|
sorted_array *array = fArray.Array();
|
|
if (array != NULL) {
|
|
// release the lock for all blocks in the array (we don't need
|
|
// to be notified when they are actually written to disk)
|
|
for (int32 i = 0; i < array->count; i++)
|
|
release_block(fVolume->Device(), array->values[i]);
|
|
}
|
|
|
|
return B_OK;
|
|
}
|
|
|
|
// Up to a maximum size, we will just batch several
|
|
// transactions together to improve speed
|
|
if (TransactionSize() < fMaxTransactionSize) {
|
|
fTransactionsInEntry++;
|
|
fHasChangedBlocks = false;
|
|
|
|
return B_OK;
|
|
}
|
|
*/
|
|
|
|
return WriteLogEntry();
|
|
}
|
|
|
|
|
|
status_t
|
|
Journal::LogBlocks(off_t blockNumber, const uint8 *buffer, size_t numBlocks)
|
|
{
|
|
panic("LogBlocks() called!\n");
|
|
|
|
// ToDo: that's for now - we should change the log file size here
|
|
if (TransactionSize() + numBlocks + 1 > fLogSize)
|
|
return B_DEVICE_FULL;
|
|
|
|
fHasChangedBlocks = true;
|
|
int32 blockSize = fVolume->BlockSize();
|
|
|
|
for (;numBlocks-- > 0; blockNumber++, buffer += blockSize) {
|
|
if (fArray.Find(blockNumber) >= 0) {
|
|
// The block is already in the log, so just update its data
|
|
// Note, this is only necessary if this method is called with a buffer
|
|
// different from the cached block buffer - which is unlikely but
|
|
// we'll make sure this way (costs one cache lookup, though).
|
|
// ToDo:
|
|
/* status_t status = cached_write(fVolume->Device(), blockNumber, buffer, 1, blockSize);
|
|
if (status < B_OK)
|
|
return status;
|
|
*/
|
|
continue;
|
|
}
|
|
|
|
// Insert the block into the transaction's array, and write the changes
|
|
// back into the locked cache buffer
|
|
fArray.Insert(blockNumber);
|
|
|
|
// ToDo:
|
|
/* status_t status = cached_write_locked(fVolume->Device(), blockNumber, buffer, 1, blockSize);
|
|
if (status < B_OK)
|
|
return status;
|
|
*/ }
|
|
|
|
// ToDo:
|
|
// If necessary, flush the log, so that we have enough space for this transaction
|
|
/* if (TransactionSize() > FreeLogBlocks())
|
|
force_cache_flush(fVolume->Device(), true);
|
|
*/
|
|
return B_OK;
|
|
}
|
|
|
|
|
|
// #pragma mark -
|
|
|
|
|
|
status_t
|
|
Transaction::Start(Volume *volume, off_t refBlock)
|
|
{
|
|
// has it already been started?
|
|
if (fJournal != NULL)
|
|
return B_OK;
|
|
|
|
fJournal = volume->GetJournal(refBlock);
|
|
if (fJournal != NULL && fJournal->Lock(this) == B_OK)
|
|
return B_OK;
|
|
|
|
fJournal = NULL;
|
|
return B_ERROR;
|
|
}
|
|
|