diff --git a/src/system/kernel/device_manager/dma_resources.cpp b/src/system/kernel/device_manager/dma_resources.cpp
index 80d1a4aaa5..1037c54522 100644
--- a/src/system/kernel/device_manager/dma_resources.cpp
+++ b/src/system/kernel/device_manager/dma_resources.cpp
@@ -12,6 +12,14 @@
 #include "io_requests.h"
 
 
+#define TRACE_DMA_RESOURCE
+#ifdef TRACE_DMA_RESOURCE
+#	define TRACE(x...) dprintf(x)
+#else
+#	define TRACE(x...) ;
+#endif
+
+
 const size_t kMaxBounceBufferSize = 4 * B_PAGE_SIZE;
 
 
@@ -58,6 +66,18 @@ DMABuffer::SetToBounceBuffer(size_t length)
 }
 
 
+bool
+DMABuffer::UsesBounceBufferAt(uint32 index)
+{
+	if (index >= fVecCount)
+		return false;
+
+	return (addr_t)fVecs[index].iov_base >= fPhysicalBounceBuffer
+		&& (addr_t)fVecs[index].iov_base
+				< fPhysicalBounceBuffer + fBounceBufferSize;
+}
+
+
 //	#pragma mark -
 
 
@@ -95,10 +115,12 @@ DMAResource::Init(const dma_restrictions& restrictions, size_t blockSize,
 		fRestrictions.max_segment_size = ~(size_t)0;
 
 	if (_NeedsBoundsBuffers()) {
-// TODO: Enforce that the bounce buffer size won't cross boundaries.
-		fBounceBufferSize = fRestrictions.max_segment_size;
+		fBounceBufferSize = fRestrictions.max_segment_size
+			* min_c(fRestrictions.max_segment_count, 4);
 		if (fBounceBufferSize > kMaxBounceBufferSize)
-			fBounceBufferSize = max_c(kMaxBounceBufferSize, fBlockSize);
+			fBounceBufferSize = kMaxBounceBufferSize;
+		TRACE("DMAResource::Init(): chose bounce buffer size %lu\n",
+			fBounceBufferSize);
 	}
 
 	fScratchVecs = (iovec*)malloc(
@@ -183,6 +205,106 @@ DMAResource::_RestrictBoundaryAndSegmentSize(addr_t base, addr_t& length)
 }
 
 
+void
+DMAResource::_CutBuffer(DMABuffer& buffer, addr_t& physicalBounceBuffer,
+	size_t& bounceLeft, size_t toCut)
+{
+	int32 vecCount = buffer.VecCount();
+	for (int32 i = vecCount - 1; toCut > 0 && i >= 0; i--) {
+		iovec& vec = buffer.VecAt(i);
+		size_t length = vec.iov_len;
+		bool inBounceBuffer = buffer.UsesBounceBufferAt(i);
+
+		if (length <= toCut) {
+			vecCount--;
+			toCut -= length;
+
+			if (inBounceBuffer) {
+				bounceLeft += length;
+				physicalBounceBuffer -= length;
+			}
+		} else {
+			vec.iov_len -= toCut;
+
+			if (inBounceBuffer) {
+				bounceLeft += toCut;
+				physicalBounceBuffer -= toCut;
+			}
+			break;
+		}
+	}
+
+	buffer.SetVecCount(vecCount);
+}
+
+
+/*!	Adds \a length bytes from the bounce buffer to the DMABuffer \a buffer.
+	Takes care of boundary, and segment restrictions. \a length must be aligned.
+	If \a fixedLength is requested, this function will fail if it cannot
+	satisfy the request.
+
+	\return 0 if the request cannot be satisfied. There could have been some
+		additions to the DMA buffer, and you will need to cut them back.
+	TODO: is that what we want here?
+	\return >0 the number of bytes added to the buffer.
+*/
+size_t
+DMAResource::_AddBounceBuffer(DMABuffer& buffer, addr_t& physicalBounceBuffer,
+	size_t& bounceLeft, size_t length, bool fixedLength)
+{
+	if (bounceLeft < length) {
+		if (fixedLength)
+			return 0;
+
+		length = bounceLeft;
+	}
+
+	size_t bounceUsed = 0;
+
+	uint32 vecCount = buffer.VecCount();
+	if (vecCount > 0) {
+		// see if we can join the bounce buffer with the previously last vec
+		iovec& vec = buffer.VecAt(vecCount - 1);
+		addr_t vecBase = (addr_t)vec.iov_base;
+		size_t vecLength = vec.iov_len;
+
+		if (vecBase + vecLength == physicalBounceBuffer) {
+			vecLength += length;
+			_RestrictBoundaryAndSegmentSize(vecBase, vecLength);
+
+			size_t lengthDiff = vecLength - vec.iov_len;
+			length -= lengthDiff;
+
+			physicalBounceBuffer += lengthDiff;
+			bounceLeft -= lengthDiff;
+			bounceUsed += lengthDiff;
+
+			vec.iov_len = vecLength;
+		}
+	}
+
+	while (length > 0) {
+		// We need to add another bounce vec
+
+		if (vecCount == fRestrictions.max_segment_count)
+			return fixedLength ? 0 : bounceUsed;
+
+		addr_t vecLength = length;
+		_RestrictBoundaryAndSegmentSize(physicalBounceBuffer, vecLength);
+
+		buffer.AddVec((void*)physicalBounceBuffer, vecLength);
+		vecCount++;
+
+		physicalBounceBuffer += vecLength;
+		bounceLeft -= vecLength;
+		bounceUsed += vecLength;
+		length -= vecLength;
+	}
+
+	return bounceUsed;
+}
+
+
 status_t
 DMAResource::TranslateNext(IORequest* request, IOOperation* operation)
 {
@@ -208,21 +330,22 @@ DMAResource::TranslateNext(IORequest* request, IOOperation* operation)
 	iovec* vecs = NULL;
 	uint32 segmentCount = 0;
 
-	bool partialBegin = (offset & (fBlockSize - 1)) != 0;
-dprintf("  offset %Ld, block size %lu -> %s\n", offset, fBlockSize, partialBegin ? "partial" : "whole");
+	size_t partialBegin = offset & (fBlockSize - 1);
+	TRACE("  offset %Ld, block size %lu -> partial: %lu\n", offset, fBlockSize,
+		partialBegin);
 
 	if (buffer->IsVirtual()) {
 		// Unless we need the bounce buffer anyway, we have to translate the
 		// virtual addresses to physical addresses, so we can check the DMA
 		// restrictions.
-dprintf("  IS VIRTUAL\n");
+		TRACE("  buffer is virtual\n");
+		// TODO: !partialOperation || totalLength >= fBlockSize
+		// TODO: Maybe enforce fBounceBufferSize >= 2 * fBlockSize.
 		if (true) {
-// TODO: !partialOperation || totalLength >= fBlockSize
-// TODO: Maybe enforce fBounceBufferSize >= 2 * fBlockSize.
 			size_t transferLeft = totalLength;
 			vecs = fScratchVecs;
 
-dprintf("  CREATE PHYSICAL MAP %ld\n", buffer->VecCount());
+			TRACE("  create physical map (for %ld vecs)\n", buffer->VecCount());
 			for (uint32 i = vecIndex; i < buffer->VecCount(); i++) {
 				iovec& vec = buffer->VecAt(i);
 				addr_t base = (addr_t)vec.iov_base + vecOffset;
@@ -230,7 +353,6 @@ dprintf("  CREATE PHYSICAL MAP %ld\n", buffer->VecCount());
 				vecOffset = 0;
 				if (size > transferLeft)
 					size = transferLeft;
-dprintf("  size = %lu\n", size);
 
 				while (size > 0 && segmentCount
 						< fRestrictions.max_segment_count) {
@@ -262,33 +384,44 @@ dprintf("  size = %lu\n", size);
 			fRestrictions.max_segment_count);
 	}
 
-dprintf("  physical count %lu\n", segmentCount);
-for (uint32 i = 0; i < segmentCount; i++) {
-	dprintf("    [%lu] %p, %lu\n", i, vecs[i].iov_base, vecs[i].iov_len);
-}
+#ifdef TRACE_DMA_RESOURCE
+	TRACE("  physical count %lu\n", segmentCount);
+	for (uint32 i = 0; i < segmentCount; i++) {
+		TRACE("    [%lu] %p, %lu\n", i, vecs[i].iov_base, vecs[i].iov_len);
+	}
+#endif
+
 	// check alignment, boundaries, etc. and set vecs in DMA buffer
 
 	size_t dmaLength = 0;
 	addr_t physicalBounceBuffer = dmaBuffer->PhysicalBounceBuffer();
 	size_t bounceLeft = fBounceBufferSize;
+	size_t transferLeft = totalLength;
 
 	// If the offset isn't block-aligned, use the bounce buffer to bridge the
 	// gap to the start of the vec.
-	if (partialBegin) {
-		off_t diff = offset & (fBlockSize - 1);
-		addr_t base = physicalBounceBuffer;
-		size_t length = (diff + fRestrictions.alignment - 1)
-			& ~(fRestrictions.alignment - 1);
+	if (partialBegin > 0) {
+		size_t length;
+		if (request->IsWrite()) {
+			// we always need to read in a whole block for the partial write
+			length = fBlockSize;
+		} else {
+			length = (partialBegin + fRestrictions.alignment - 1)
+				& ~(fRestrictions.alignment - 1);
+		}
 
-		physicalBounceBuffer += length;
-		bounceLeft -= length;
+		if (_AddBounceBuffer(*dmaBuffer, physicalBounceBuffer, bounceLeft,
+				length, true) == 0) {
+			TRACE("  adding partial begin failed, length %lu!\n", length);
+			return B_BAD_VALUE;
+		}
 
-		dmaBuffer->AddVec((void*)base, length);
 		dmaLength += length;
 
-		vecOffset += length - diff;
-		offset -= diff;
-dprintf("  partial begin, using bounce buffer: offset: %lld, length: %lu\n", offset, length);
+		vecOffset += length - partialBegin;
+		offset -= partialBegin;
+		TRACE("  partial begin, using bounce buffer: offset: %lld, length: "
+			"%lu\n", offset, length);
 	}
 
 	for (uint32 i = vecIndex; i < segmentCount;) {
@@ -304,99 +437,110 @@ dprintf("  partial begin, using bounce buffer: offset: %lld, length: %lu\n", off
 
 		addr_t base = (addr_t)vec.iov_base + vecOffset;
 		size_t length = vec.iov_len - vecOffset;
+		if (length > transferLeft)
+			length = transferLeft;
 
 		// Cut the vec according to transfer size, segment size, and boundary.
 
-		if (dmaLength + length > fRestrictions.max_transfer_size)
-{
+		if (dmaLength + length > fRestrictions.max_transfer_size) {
 			length = fRestrictions.max_transfer_size - dmaLength;
-dprintf("  vec %lu: restricting length to %lu due to transfer size limit\n", i, length);
-}
+			TRACE("  vec %lu: restricting length to %lu due to transfer size "
+				"limit\n", i, length);
+		}
 		_RestrictBoundaryAndSegmentSize(base, length);
 
-		size_t useBounceBuffer = 0;
+		size_t useBounceBufferSize = 0;
 
 		// Check low address: use bounce buffer for range to low address.
 		// Check alignment: if not aligned, use bounce buffer for complete vec.
-		if (base < fRestrictions.low_address)
-{
-			useBounceBuffer = fRestrictions.low_address - base;
-dprintf("  vec %lu: below low address, using bounce buffer: %lu\n", i, useBounceBuffer);
-}
-		else if (base & (fRestrictions.alignment - 1))
-{
-			useBounceBuffer = length;
-dprintf("  vec %lu: misalignment, using bounce buffer: %lu\n", i, useBounceBuffer);
-}
+		if (base < fRestrictions.low_address) {
+			useBounceBufferSize = fRestrictions.low_address - base;
+			TRACE("  vec %lu: below low address, using bounce buffer: %lu\n", i,
+				useBounceBufferSize);
+		} else if (base & (fRestrictions.alignment - 1)) {
+			useBounceBufferSize = length;
+			TRACE("  vec %lu: misalignment, using bounce buffer: %lu\n", i,
+				useBounceBufferSize);
+		}
 
-// TODO: Enforce high address restriction!
+		// Enforce high address restriction
+		if (base > fRestrictions.high_address)
+			useBounceBufferSize = length;
+		else if (base + length > fRestrictions.high_address)
+			length = fRestrictions.high_address - base;
+
+		// Align length as well
+		if (useBounceBufferSize == 0)
+			length &= ~(fRestrictions.alignment - 1);
 
 		// If length is 0, use bounce buffer for complete vec.
 		if (length == 0) {
 			length = vec.iov_len - vecOffset;
-			useBounceBuffer = length;
-dprintf("  vec %lu: 0 length, using bounce buffer: %lu\n", i, useBounceBuffer);
+			useBounceBufferSize = length;
+			TRACE("  vec %lu: 0 length, using bounce buffer: %lu\n", i,
+				useBounceBufferSize);
 		}
 
-		if (useBounceBuffer > 0) {
-			if (bounceLeft == 0) {
-dprintf("  vec %lu: out of bounce buffer space\n", i);
+		if (useBounceBufferSize > 0) {
+			// alignment could still be wrong (we round up here)
+			useBounceBufferSize = (useBounceBufferSize
+				+ fRestrictions.alignment - 1) & ~(fRestrictions.alignment - 1);
+
+			length = _AddBounceBuffer(*dmaBuffer, physicalBounceBuffer,
+				bounceLeft, useBounceBufferSize, false);
+			if (length == 0) {
+				TRACE("  vec %lu: out of bounce buffer space\n", i);
 				// We don't have any bounce buffer space left, we need to move
 				// this request to the next I/O operation.
 				break;
 			}
-
-			base = physicalBounceBuffer;
-
-			if (useBounceBuffer > length)
-				useBounceBuffer = length;
-			if (useBounceBuffer > bounceLeft)
-				useBounceBuffer = bounceLeft;
-			length = useBounceBuffer;
+			TRACE("  vec %lu: final bounce length: %lu\n", i, length);
+		} else {
+			TRACE("  vec %lu: final length restriction: %lu\n", i, length);
+			dmaBuffer->AddVec((void*)base, length);
 		}
 
-		// check boundary and max segment size.
-		_RestrictBoundaryAndSegmentSize(base, length);
-dprintf("  vec %lu: final length restriction: %lu\n", i, length);
-
-		if (useBounceBuffer) {
-			// alignment could still be wrong
-			if (useBounceBuffer & (fRestrictions.alignment - 1)) {
-				useBounceBuffer
-					= (useBounceBuffer + fRestrictions.alignment - 1)
-						& ~(fRestrictions.alignment - 1);
-				if (dmaLength + useBounceBuffer
-						> fRestrictions.max_transfer_size) {
-					useBounceBuffer = (fRestrictions.max_transfer_size
-						- dmaLength) & ~(fRestrictions.alignment - 1);
-				}
-			}
-
-			physicalBounceBuffer += useBounceBuffer;
-			bounceLeft -= useBounceBuffer;
-		}
-
-		vecOffset += length;
-
-		// TODO: we might be able to join the vec with its preceding vec
-		// (but then we'd need to take the segment size into account again)
-		dmaBuffer->AddVec((void*)base, length);
 		dmaLength += length;
+		vecOffset += length;
+		transferLeft -= length;
 	}
 
-	// If total length not block aligned, use bounce buffer for padding.
-	if ((dmaLength & (fBlockSize - 1)) != 0) {
-dprintf("  dmaLength not block aligned: %lu\n", dmaLength);
-		size_t length = (dmaLength + fBlockSize - 1) & ~(fBlockSize - 1);
+	// If we're writing partially, we always need to have a block sized bounce
+	// buffer (or else we would overwrite memory to be written on the read in
+	// the first phase).
+	if (request->IsWrite() && (dmaLength & (fBlockSize - 1)) != 0) {
+		size_t diff = dmaLength  & (fBlockSize - 1);
+		TRACE("  partial end write: %lu, diff %lu\n", dmaLength, diff);
+
+		_CutBuffer(*dmaBuffer, physicalBounceBuffer, bounceLeft, diff);
+		dmaLength -= diff;
+
+		if (_AddBounceBuffer(*dmaBuffer, physicalBounceBuffer,
+				bounceLeft, fBlockSize, true) == 0) {
+			// If we cannot write anything, we can't process the request at all
+			TRACE("  adding bounce buffer failed!!!\n");
+			if (dmaLength == 0)
+				return B_BAD_VALUE;
+		} else
+			dmaLength += fBlockSize;
+	}
+
+	// If total length not block aligned, use bounce buffer for padding (read
+	// case only).
+	while ((dmaLength & (fBlockSize - 1)) != 0) {
+		TRACE("  dmaLength not block aligned: %lu\n", dmaLength);
+			size_t length = (dmaLength + fBlockSize - 1) & ~(fBlockSize - 1);
 
 		// If total length > max transfer size, segment count > max segment
 		// count, truncate.
+		// TODO: sometimes we can replace the last vec with the bounce buffer
+		// to let it match the restrictions.
 		if (length > fRestrictions.max_transfer_size
 			|| dmaBuffer->VecCount() == fRestrictions.max_segment_count
 			|| bounceLeft < length - dmaLength) {
 			// cut the part of dma length
-dprintf("  can't align length due to max transfer size, segment count "
-"restrictions, or lacking bounce buffer space\n");
+			TRACE("  can't align length due to max transfer size, segment "
+				"count restrictions, or lacking bounce buffer space\n");
 			size_t toCut = dmaLength
 				& (max_c(fBlockSize, fRestrictions.alignment) - 1);
 			dmaLength -= toCut;
@@ -410,36 +554,33 @@ dprintf("  can't align length due to max transfer size, segment count "
 					& ~(max_c(fBlockSize, fRestrictions.alignment) - 1);
 				_RestrictBoundaryAndSegmentSize(base, dmaLength);
 				dmaBuffer->AddVec((void*)base, dmaLength);
-			} else {
-				int32 dmaVecCount = dmaBuffer->VecCount();
-				for (int32 i = dmaVecCount - 1; toCut > 0 && i >= 0; i--) {
-					iovec& vec = dmaBuffer->VecAt(i);
-					size_t length = vec.iov_len;
-					if (length <= toCut) {
-						dmaVecCount--;
-						toCut -= length;
-					} else {
-						vec.iov_len -= toCut;
-						break;
-					}
-				}
 
-				dmaBuffer->SetVecCount(dmaVecCount);
+				physicalBounceBuffer = base + dmaLength;
+				bounceLeft = fBounceBufferSize - dmaLength;
+			} else {
+				_CutBuffer(*dmaBuffer, physicalBounceBuffer, bounceLeft, toCut);
 			}
 		} else {
-dprintf("  adding %lu bytes final bounce buffer\n", length - dmaLength);
-			dmaBuffer->AddVec((void*)physicalBounceBuffer, length - dmaLength);
-			dmaLength = length;
+			TRACE("  adding %lu bytes final bounce buffer\n",
+				length - dmaLength);
+			length -= dmaLength;
+			length = _AddBounceBuffer(*dmaBuffer, physicalBounceBuffer,
+				bounceLeft, length, true);
+			if (length == 0)
+				panic("don't do this to me!");
+			dmaLength += length;
 		}
 	}
 
+	off_t requestEnd = request->Offset() + request->Length();
+
 	operation->SetBuffer(dmaBuffer);
+	operation->SetBlockSize(fBlockSize);
 	operation->SetOriginalRange(originalOffset,
-		min_c(offset + dmaLength, request->Offset() + request->Length())
-			- originalOffset);
+		min_c(offset + dmaLength, requestEnd) - originalOffset);
 	operation->SetRange(offset, dmaLength);
-	operation->SetPartial(partialBegin,
-		offset + dmaLength > request->Offset() + request->Length());
+	operation->SetPartial(partialBegin != 0, offset + dmaLength > requestEnd);
+	operation->SetUsesBounceBuffer(bounceLeft < fBounceBufferSize);
 
 	status_t error = operation->SetRequest(request);
 	if (error != B_OK)
diff --git a/src/system/kernel/device_manager/dma_resources.h b/src/system/kernel/device_manager/dma_resources.h
index 50792e246f..d42e8800d1 100644
--- a/src/system/kernel/device_manager/dma_resources.h
+++ b/src/system/kernel/device_manager/dma_resources.h
@@ -47,11 +47,8 @@ public:
 			size_t				BounceBufferSize() const
 									{ return fBounceBufferSize; }
 
+			bool				UsesBounceBufferAt(uint32 index);
 			void				SetToBounceBuffer(size_t length);
-			bool				UsesBounceBuffer() const
-									{ return fVecCount >= 1
-										&& (addr_t)fVecs[0].iov_base
-											== fPhysicalBounceBuffer; }
 
 private:
 			void*				fBounceBuffer;
@@ -87,6 +84,13 @@ private:
 			bool				_NeedsBoundsBuffers() const;
 			void				_RestrictBoundaryAndSegmentSize(addr_t base,
 									addr_t& length);
+			void				_CutBuffer(DMABuffer& buffer,
+									addr_t& physicalBounceBuffer,
+									size_t& bounceLeft, size_t toCut);
+			size_t				_AddBounceBuffer(DMABuffer& buffer,
+									addr_t& physicalBounceBuffer,
+									size_t& bounceLeft, size_t length,
+									bool fixedLength);
 
 			mutex				fLock;
 			dma_restrictions	fRestrictions;
diff --git a/src/system/kernel/device_manager/io_requests.cpp b/src/system/kernel/device_manager/io_requests.cpp
index ac1fdae3f9..6e3db22ed5 100644
--- a/src/system/kernel/device_manager/io_requests.cpp
+++ b/src/system/kernel/device_manager/io_requests.cpp
@@ -14,6 +14,14 @@
 #include "dma_resources.h"
 
 
+#define TRACE_IO_REQUEST
+#ifdef TRACE_IO_REQUEST
+#	define TRACE(x...) dprintf(x)
+#else
+#	define TRACE(x...) ;
+#endif
+
+
 // partial I/O operation phases
 enum {
 	PHASE_READ_BEGIN	= 0,
@@ -105,12 +113,15 @@ IOBuffer::UnlockMemory(bool isWrite)
 bool
 IOOperation::Finish()
 {
-dprintf("IOOperation::Finish()\n");
+	TRACE("IOOperation::Finish()\n");
 	if (fStatus == B_OK) {
 		if (fParent->IsWrite()) {
-dprintf("  is write\n");
+			TRACE("  is write\n");
 			if (fPhase == PHASE_READ_BEGIN) {
-dprintf("  phase read begin\n");
+				TRACE("  phase read begin\n");
+				// repair phase adjusted vec
+				fDMABuffer->VecAt(fSavedVecIndex).iov_len = fSavedVecLength;
+
 				// partial write: copy partial begin to bounce buffer
 				bool skipReadEndPhase;
 				status_t error = _CopyPartialBegin(true, skipReadEndPhase);
@@ -119,6 +130,7 @@ dprintf("  phase read begin\n");
 					// Get ready for next phase...
 					fPhase = HasPartialEnd() && !skipReadEndPhase
 						? PHASE_READ_END : PHASE_DO_ALL;
+					_PrepareVecs();
 					SetStatus(1);
 						// TODO: Is there a race condition, if the request is
 						// aborted at the same time?
@@ -127,7 +139,13 @@ dprintf("  phase read begin\n");
 
 				SetStatus(error);
 			} else if (fPhase == PHASE_READ_END) {
-dprintf("  phase read end\n");
+				TRACE("  phase read end\n");
+				// repair phase adjusted vec
+				iovec& vec = fDMABuffer->VecAt(fSavedVecIndex);
+				vec.iov_base = (uint8*)vec.iov_base
+					+ vec.iov_len - fSavedVecLength;
+				vec.iov_len = fSavedVecLength;
+
 				// partial write: copy partial end to bounce buffer
 				status_t error = _CopyPartialEnd(true);
 				if (error == B_OK) {
@@ -146,7 +164,7 @@ dprintf("  phase read end\n");
 	}
 
 	if (fParent->IsRead() && UsesBounceBuffer()) {
-dprintf("  read with bounce buffer\n");
+		TRACE("  read with bounce buffer\n");
 		// copy the bounce buffer segments to the final location
 		uint8* bounceBuffer = (uint8*)fDMABuffer->BounceBuffer();
 		addr_t bounceBufferStart = fDMABuffer->PhysicalBounceBuffer();
@@ -155,32 +173,42 @@ dprintf("  read with bounce buffer\n");
 
 		const iovec* vecs = fDMABuffer->Vecs();
 		uint32 vecCount = fDMABuffer->VecCount();
-		uint32 i = 0;
-
-		off_t offset = Offset();
 
 		status_t error = B_OK;
-		bool partialBlockOnly = false;
-		if (HasPartialBegin()) {
-			error = _CopyPartialBegin(false, partialBlockOnly);
-			offset += vecs[0].iov_len;
-			i++;
-		}
 
-		if (error == B_OK && HasPartialEnd() && !partialBlockOnly) {
-			error = _CopyPartialEnd(false);
-			vecCount--;
-		}
+		off_t offset = fOffset;
+		off_t startOffset = fOriginalOffset;
+		off_t endOffset = fOriginalOffset + fOriginalLength;
 
-		for (; error == B_OK && i < vecCount; i++) {
+		for (uint32 i = 0; error == B_OK && i < vecCount; i++) {
 			const iovec& vec = vecs[i];
 			addr_t base = (addr_t)vec.iov_base;
+			size_t length = vec.iov_len;
+
+			if (offset < startOffset) {
+				if (offset + length <= startOffset) {
+					offset += length;
+					continue;
+				}
+
+				size_t diff = startOffset - offset;
+				base += diff;
+				length -= diff;
+			}
+
+			if (offset + length > endOffset) {
+				if (offset >= endOffset)
+					break;
+
+				length = endOffset - offset;
+			}
+
 			if (base >= bounceBufferStart && base < bounceBufferEnd) {
 				error = fParent->CopyData(
-					bounceBuffer + (base - bounceBufferStart), offset,
-					vec.iov_len);
+					bounceBuffer + (base - bounceBufferStart), offset, length);
 			}
-			offset += vec.iov_len;
+
+			offset += length;
 		}
 
 		if (error != B_OK)
@@ -208,15 +236,10 @@ IOOperation::SetRequest(IORequest* request)
 	// set initial phase
 	fPhase = PHASE_DO_ALL;
 	if (fParent->IsWrite()) {
-		if (HasPartialBegin())
-			fPhase = PHASE_READ_BEGIN;
-		else if (HasPartialEnd())
-			fPhase = PHASE_READ_END;
-
 		// Copy data to bounce buffer segments, save the partial begin/end vec,
 		// which will be copied after their respective read phase.
 		if (UsesBounceBuffer()) {
-dprintf("  write with bounce buffer\n");
+			TRACE("  write with bounce buffer\n");
 			uint8* bounceBuffer = (uint8*)fDMABuffer->BounceBuffer();
 			addr_t bounceBufferStart = fDMABuffer->PhysicalBounceBuffer();
 			addr_t bounceBufferEnd = bounceBufferStart
@@ -224,30 +247,67 @@ dprintf("  write with bounce buffer\n");
 
 			const iovec* vecs = fDMABuffer->Vecs();
 			uint32 vecCount = fDMABuffer->VecCount();
+			size_t vecOffset = 0;
 			uint32 i = 0;
 
-			off_t offset = Offset();
+			off_t offset = fOffset;
+			off_t endOffset = fOffset + fLength;
 
 			if (HasPartialBegin()) {
-				offset += vecs[0].iov_len;
-				i++;
+				// skip first block
+				size_t toSkip = fBlockSize;
+				while (toSkip > 0) {
+					if (vecs[i].iov_len <= toSkip) {
+						toSkip -= vecs[i].iov_len;
+						i++;
+					} else {
+						vecOffset = toSkip;
+						break;
+					}
+				}
+
+				offset += fBlockSize;
 			}
 
-			if (HasPartialEnd())
-				vecCount--;
+			if (HasPartialEnd()) {
+				// skip last block
+				size_t toSkip = fBlockSize;
+				while (toSkip > 0) {
+					if (vecs[vecCount - 1].iov_len <= toSkip) {
+						toSkip -= vecs[vecCount - 1].iov_len;
+						vecCount--;
+					} else
+						break;
+				}
+
+				endOffset -= fBlockSize;
+			}
 
 			for (; i < vecCount; i++) {
 				const iovec& vec = vecs[i];
-				addr_t base = (addr_t)vec.iov_base;
+				addr_t base = (addr_t)vec.iov_base + vecOffset;
+				size_t length = vec.iov_len - vecOffset;
+				vecOffset = 0;
+
 				if (base >= bounceBufferStart && base < bounceBufferEnd) {
+					if (offset + length > endOffset)
+						length = endOffset - offset;
 					status_t error = fParent->CopyData(offset,
-						bounceBuffer + (base - bounceBufferStart), vec.iov_len);
+						bounceBuffer + (base - bounceBufferStart), length);
 					if (error != B_OK)
 						return error;
 				}
-				offset += vec.iov_len;
+
+				offset += length;
 			}
 		}
+
+		if (HasPartialBegin())
+			fPhase = PHASE_READ_BEGIN;
+		else if (HasPartialEnd())
+			fPhase = PHASE_READ_END;
+
+		_PrepareVecs();
 	}
 
 	fStatus = 1;
@@ -275,12 +335,26 @@ IOOperation::SetRange(off_t offset, size_t length)
 }
 
 
+off_t
+IOOperation::Offset() const
+{
+	return fPhase == PHASE_READ_END ? fOffset + fLength - fBlockSize : fOffset;
+}
+
+
+size_t
+IOOperation::Length() const
+{
+	return fPhase == PHASE_DO_ALL ? fLength : fBlockSize;
+}
+
+
 iovec*
 IOOperation::Vecs() const
 {
 	switch (fPhase) {
 		case PHASE_READ_END:
-			return fDMABuffer->Vecs() + (fDMABuffer->VecCount() - 1);
+			return fDMABuffer->Vecs() + fSavedVecIndex;
 		case PHASE_READ_BEGIN:
 		case PHASE_DO_ALL:
 		default:
@@ -294,8 +368,9 @@ IOOperation::VecCount() const
 {
 	switch (fPhase) {
 		case PHASE_READ_BEGIN:
+			return fSavedVecIndex + 1;
 		case PHASE_READ_END:
-			return 1;
+			return fDMABuffer->VecCount() - fSavedVecIndex;
 		case PHASE_DO_ALL:
 		default:
 			return fDMABuffer->VecCount();
@@ -306,6 +381,7 @@ IOOperation::VecCount() const
 void
 IOOperation::SetPartial(bool partialBegin, bool partialEnd)
 {
+	TRACE("partial begin %d, end %d\n", partialBegin, partialEnd);
 	fPartialBegin = partialBegin;
 	fPartialEnd = partialEnd;
 }
@@ -314,7 +390,7 @@ IOOperation::SetPartial(bool partialBegin, bool partialEnd)
 bool
 IOOperation::IsWrite() const
 {
-	return fParent->IsWrite() && fPhase != PHASE_DO_ALL;
+	return fParent->IsWrite() && fPhase == PHASE_DO_ALL;
 }
 
 
@@ -325,24 +401,64 @@ IOOperation::IsRead() const
 }
 
 
-status_t
-IOOperation::_CopyPartialBegin(bool isWrite, bool& partialBlockOnly)
+void
+IOOperation::_PrepareVecs()
 {
-	size_t relativeOffset = OriginalOffset() - Offset();
-	size_t length = fDMABuffer->VecAt(0).iov_len;
+	// we need to prepare the vecs for consumption by the drivers
+	if (fPhase == PHASE_READ_BEGIN) {
+		iovec* vecs = fDMABuffer->Vecs();
+		uint32 vecCount = fDMABuffer->VecCount();
+		size_t vecLength = fBlockSize;
+		for (uint32 i = 0; i < vecCount; i++) {
+			iovec& vec = vecs[i];
+			if (vec.iov_len >= vecLength) {
+				fSavedVecIndex = i;
+				fSavedVecLength = vec.iov_len;
+				vec.iov_len = vecLength;
+				break;
+			}
+			vecLength -= vec.iov_len;
+		}
+	} else if (fPhase == PHASE_READ_END) {
+		iovec* vecs = fDMABuffer->Vecs();
+		uint32 vecCount = fDMABuffer->VecCount();
+		size_t vecLength = fBlockSize;
+		for (int32 i = vecCount - 1; i >= 0; i--) {
+			iovec& vec = vecs[i];
+			if (vec.iov_len >= vecLength) {
+				fSavedVecIndex = i;
+				fSavedVecLength = vec.iov_len;
+				vec.iov_base = (uint8*)vec.iov_base
+					+ vec.iov_len - vecLength;
+				vec.iov_len = vecLength;
+				break;
+			}
+			vecLength -= vec.iov_len;
+		}
+	}
+}
 
-	partialBlockOnly = relativeOffset + OriginalLength() <= length;
-	if (partialBlockOnly)
-		length = relativeOffset + OriginalLength();
+
+status_t
+IOOperation::_CopyPartialBegin(bool isWrite, bool& singleBlockOnly)
+{
+	size_t relativeOffset = OriginalOffset() - fOffset;
+	size_t length = fBlockSize - relativeOffset;
+
+	singleBlockOnly = length >= OriginalLength();
+	if (singleBlockOnly)
+		length = OriginalLength();
+
+	TRACE("_CopyPartialBegin(%s, single only %d)\n",
+		isWrite ? "write" : "read", singleBlockOnly);
 
 	if (isWrite) {
 		return fParent->CopyData(OriginalOffset(),
-			(uint8*)fDMABuffer->BounceBuffer() + relativeOffset,
-			length - relativeOffset);
+			(uint8*)fDMABuffer->BounceBuffer() + relativeOffset, length);
 	} else {
 		return fParent->CopyData(
 			(uint8*)fDMABuffer->BounceBuffer() + relativeOffset,
-			OriginalOffset(), length - relativeOffset);
+			OriginalOffset(), length);
 	}
 }
 
@@ -350,20 +466,20 @@ IOOperation::_CopyPartialBegin(bool isWrite, bool& partialBlockOnly)
 status_t
 IOOperation::_CopyPartialEnd(bool isWrite)
 {
+	TRACE("_CopyPartialEnd(%s)\n", isWrite ? "write" : "read");
+
 	const iovec& lastVec = fDMABuffer->VecAt(fDMABuffer->VecCount() - 1);
-	off_t lastVecPos = Offset() + Length() - lastVec.iov_len;
-	if (isWrite) {
-		return fParent->CopyData(lastVecPos,
-			(uint8*)fDMABuffer->BounceBuffer()
-				+ ((addr_t)lastVec.iov_base
-					- fDMABuffer->PhysicalBounceBuffer()),
-			OriginalOffset() + OriginalLength() - lastVecPos);
-	} else {
-		return fParent->CopyData((uint8*)fDMABuffer->BounceBuffer()
-				+ ((addr_t)lastVec.iov_base
-					- fDMABuffer->PhysicalBounceBuffer()),
-			lastVecPos, OriginalOffset() + OriginalLength() - lastVecPos);
-	}
+	off_t lastVecPos = fOffset + fLength - fBlockSize;
+	uint8* base = (uint8*)fDMABuffer->BounceBuffer() + ((addr_t)lastVec.iov_base
+		+ lastVec.iov_len - fBlockSize - fDMABuffer->PhysicalBounceBuffer());
+		// NOTE: this won't work if we don't use the bounce buffer contiguously
+		// (because of boundary alignments).
+	size_t length = OriginalOffset() + OriginalLength() - lastVecPos;
+
+	if (isWrite)
+		return fParent->CopyData(lastVecPos, base, length);
+
+	return fParent->CopyData(base, lastVecPos, length);
 }
 
 
@@ -435,8 +551,8 @@ IORequest::ChunkFinished(IORequestChunk* chunk, status_t status)
 void
 IORequest::Advance(size_t bySize)
 {
-dprintf("IORequest::Advance(%lu): remaining: %lu -> %lu\n", bySize,
-fRemainingBytes, fRemainingBytes - bySize);
+	TRACE("IORequest::Advance(%lu): remaining: %lu -> %lu\n", bySize,
+		fRemainingBytes, fRemainingBytes - bySize);
 	fRemainingBytes -= bySize;
 
 	iovec* vecs = fBuffer->Vecs();
@@ -540,7 +656,8 @@ IORequest::_CopyData(void* _buffer, off_t offset, size_t size, bool copyIn)
 IORequest::_CopySimple(void* bounceBuffer, void* external, size_t size,
 	bool copyIn)
 {
-dprintf("  IORequest::_CopySimple(%p, %p, %lu, %d)\n", bounceBuffer, external, size, copyIn);
+	TRACE("  IORequest::_CopySimple(%p, %p, %lu, %d)\n", bounceBuffer, external,
+		size, copyIn);
 	if (copyIn)
 		memcpy(bounceBuffer, external, size);
 	else
diff --git a/src/system/kernel/device_manager/io_requests.h b/src/system/kernel/device_manager/io_requests.h
index eb73add701..97c5239668 100644
--- a/src/system/kernel/device_manager/io_requests.h
+++ b/src/system/kernel/device_manager/io_requests.h
@@ -105,8 +105,9 @@ public:
 									// also sets range
 			void				SetRange(off_t offset, size_t length);
 
-			off_t				Offset() const	{ return fOffset; }
-			size_t				Length() const	{ return fLength; }
+// TODO: Fix Offset() and Length() for partial write phases!
+			off_t				Offset() const;
+			size_t				Length() const;
 			off_t				OriginalOffset() const
 									{ return fOriginalOffset; }
 			size_t				OriginalLength() const
@@ -123,26 +124,36 @@ public:
 			bool				IsWrite() const;
 			bool				IsRead() const;
 
+			void				SetBlockSize(size_t blockSize)
+									{ fBlockSize  = blockSize; }
+
 			bool				UsesBounceBuffer() const
-									{ return fDMABuffer->UsesBounceBuffer(); }
+									{ return fUsesBounceBuffer; }
+			void				SetUsesBounceBuffer(bool uses)
+									{ fUsesBounceBuffer = uses; }
 
 			DMABuffer*			Buffer() const { return fDMABuffer; }
 			void				SetBuffer(DMABuffer* buffer)
 									{ fDMABuffer = buffer; }
 
 protected:
+			void				_PrepareVecs();
 			status_t			_CopyPartialBegin(bool isWrite,
 									bool& partialBlockOnly);
 			status_t			_CopyPartialEnd(bool isWrite);
 
 			DMABuffer*			fDMABuffer;
 			off_t				fOffset;
-			size_t				fLength;
 			off_t				fOriginalOffset;
+			size_t				fLength;
 			size_t				fOriginalLength;
-			uint32				fPhase;
+			size_t				fBlockSize;
+			uint16				fSavedVecIndex;
+			uint16				fSavedVecLength;
+			uint8				fPhase;
 			bool				fPartialBegin;
 			bool				fPartialEnd;
+			bool				fUsesBounceBuffer;
 };
 
 typedef IOOperation io_operation;
diff --git a/src/tests/system/kernel/device_manager/dma_resource_test.cpp b/src/tests/system/kernel/device_manager/dma_resource_test.cpp
index 5967221d1e..374650f1b0 100644
--- a/src/tests/system/kernel/device_manager/dma_resource_test.cpp
+++ b/src/tests/system/kernel/device_manager/dma_resource_test.cpp
@@ -9,6 +9,8 @@
 
 #include <device_manager.h>
 
+#include <vm.h>
+
 #include "dma_resources.h"
 #include "io_requests.h"
 
@@ -16,18 +18,36 @@
 #define DMA_TEST_BLOCK_SIZE	512
 
 
-struct device_manager_info* sDeviceManager;
+class TestSuite;
 
-static area_id sArea;
-static size_t sAreaSize;
-static void* sAreaAddress;
-static DMAResource* sDMAResource;
+class TestSuiteContext {
+public:
+							TestSuiteContext();
+							~TestSuiteContext();
 
+			status_t		Init(size_t size);
+
+			addr_t			DataBase() const { return fDataBase; }
+			addr_t			PhysicalDataBase() const
+								{ return fPhysicalDataBase; }
+
+			addr_t			CompareBase() const { return fCompareBase; }
+
+			size_t			Size() const { return fSize; }
+
+private:
+			area_id			fDataArea;
+			addr_t			fDataBase;
+			addr_t			fPhysicalDataBase;
+			area_id			fCompareArea;
+			addr_t			fCompareBase;
+			size_t			fSize;
+};
 
 class Test : public DoublyLinkedListLinkImpl<Test> {
 public:
-							Test(off_t offset, uint8* base, uint8* physicalBase,
-								size_t length, bool isWrite, uint32 flags);
+							Test(TestSuite& suite, off_t offset, size_t length,
+								bool isWrite, uint32 flags);
 
 			Test&			AddSource(addr_t base, size_t length);
 			Test&			NextResult(off_t offset, bool partialBegin,
@@ -38,11 +58,17 @@ public:
 			void			Run(DMAResource& resource);
 
 private:
+			addr_t			_SourceToVirtual(addr_t source);
+			addr_t			_SourceToCompare(addr_t source);
+			void			_Prepare();
+			void			_CheckCompare();
+			void			_CheckWrite();
+			void			_CheckResults();
+			status_t		_DoIO(IOOperation& operation);
 			void			_Panic(const char* message,...);
 
+			TestSuite&		fSuite;
 			off_t			fOffset;
-			uint8*			fBase;
-			uint8*			fPhysicalBase;
 			size_t			fLength;
 			bool			fIsWrite;
 			uint32			fFlags;
@@ -70,11 +96,10 @@ typedef DoublyLinkedList<Test> TestList;
 
 class TestSuite {
 public:
-	TestSuite(const char* name, const dma_restrictions& restrictions,
-			size_t blockSize, uint8* base, uint8* physicalBase)
+	TestSuite(TestSuiteContext& context, const char* name,
+			const dma_restrictions& restrictions, size_t blockSize)
 		:
-		fBase(base),
-		fPhysicalBase(physicalBase)
+		fContext(context)
 	{
 		dprintf("----- Run \"%s\" tests ---------------------------\n", name);
 		dprintf("  DMA restrictions: address %#lx - %#lx, align %lu, boundary "
@@ -99,8 +124,8 @@ public:
 
 	Test& AddTest(off_t offset, size_t length, bool isWrite, uint32 flags)
 	{
-		Test* test = new(std::nothrow) Test(offset, fBase, fPhysicalBase,
-			length, isWrite, flags);
+		Test* test = new(std::nothrow) Test(*this, offset, length, isWrite,
+			flags);
 		fTests.Add(test);
 
 		return *test;
@@ -116,20 +141,79 @@ public:
 		}
 	}
 
+	addr_t DataBase() const { return fContext.DataBase(); }
+	addr_t PhysicalDataBase() const { return fContext.PhysicalDataBase(); }
+	addr_t CompareBase() const { return fContext.CompareBase(); }
+	size_t Size() const { return fContext.Size(); }
+
 private:
+	TestSuiteContext& fContext;
 	DMAResource		fDMAResource;
 	uint8*			fBase;
 	uint8*			fPhysicalBase;
+	size_t			fSize;
 	TestList		fTests;
 };
 
 
-Test::Test(off_t offset, uint8* base, uint8* physicalBase, size_t length,
-		bool isWrite, uint32 flags)
+struct device_manager_info* sDeviceManager;
+
+static area_id sArea;
+static size_t sAreaSize;
+static void* sAreaAddress;
+static DMAResource* sDMAResource;
+
+
+TestSuiteContext::TestSuiteContext()
 	:
+	fDataArea(-1),
+	fCompareArea(-1),
+	fSize(0)
+{
+}
+
+
+TestSuiteContext::~TestSuiteContext()
+{
+	delete_area(fDataArea);
+	delete_area(fCompareArea);
+}
+
+
+status_t
+TestSuiteContext::Init(size_t size)
+{
+	fDataArea = create_area("data buffer", (void**)&fDataBase,
+		B_ANY_KERNEL_ADDRESS, size, B_CONTIGUOUS,
+		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
+	if (fDataArea < B_OK)
+		return fDataArea;
+
+	physical_entry entry;
+	get_memory_map((void*)fDataBase, size, &entry, 1);
+
+	dprintf("DMA Test area %p, physical %p\n", (void*)fDataBase, entry.address);
+	fPhysicalDataBase = (addr_t)entry.address;
+
+	fCompareArea = create_area("compare buffer", (void**)&fCompareBase,
+		B_ANY_KERNEL_ADDRESS, size, B_FULL_LOCK,
+		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
+	if (fCompareArea < B_OK)
+		return fCompareArea;
+
+	fSize = size;
+	return B_OK;
+}
+
+
+//	#pragma mark -
+
+
+Test::Test(TestSuite& suite, off_t offset, size_t length, bool isWrite,
+		uint32 flags)
+	:
+	fSuite(suite),
 	fOffset(offset),
-	fBase(base),
-	fPhysicalBase(physicalBase),
 	fLength(length),
 	fIsWrite(isWrite),
 	fFlags(flags),
@@ -144,7 +228,7 @@ Test::AddSource(addr_t address, size_t length)
 {
 	fSourceVecs[fSourceCount].iov_base
 		= (void*)(((fFlags & B_PHYSICAL_IO_REQUEST) == 0
-			? fBase : fPhysicalBase) + address);
+			? fSuite.DataBase() : fSuite.PhysicalDataBase()) + address);
 	fSourceVecs[fSourceCount].iov_len = length;
 	fSourceCount++;
 
@@ -179,9 +263,220 @@ Test::AddTarget(addr_t base, size_t length, bool usesBounceBuffer)
 }
 
 
+addr_t
+Test::_SourceToVirtual(addr_t source)
+{
+	if ((fFlags & B_PHYSICAL_IO_REQUEST) != 0)
+		return source - fSuite.PhysicalDataBase() + fSuite.DataBase();
+
+	return source;
+}
+
+
+addr_t
+Test::_SourceToCompare(addr_t source)
+{
+	if ((fFlags & B_PHYSICAL_IO_REQUEST) != 0)
+		return source - fSuite.PhysicalDataBase() + fSuite.CompareBase();
+
+	return source - fSuite.DataBase() + fSuite.CompareBase();
+}
+
+
+void
+Test::_Prepare()
+{
+	// prepare disk
+
+	uint8* disk = (uint8*)sAreaAddress;
+	for (size_t i = 0; i < sAreaSize; i++) {
+		disk[i] = i % 26 + 'a';
+	}
+
+	// prepare data
+
+	memset((void*)fSuite.DataBase(), 0xcc, fSuite.Size());
+
+	if (fIsWrite) {
+		off_t offset = fOffset;
+		size_t length = fLength;
+
+		for (uint32 i = 0; i < fSourceCount; i++) {
+			uint8* data = (uint8*)_SourceToVirtual(
+				(addr_t)fSourceVecs[i].iov_base);
+			size_t vecLength = min_c(fSourceVecs[i].iov_len, length);
+
+			for (uint32 j = 0; j < vecLength; j++) {
+				data[j] = (offset + j) % 10 + '0';
+			}
+			offset += vecLength;
+			length -= vecLength;
+		}
+	}
+
+	// prepare compare data
+
+	memset((void*)fSuite.CompareBase(), 0xcc, fSuite.Size());
+
+	if (fIsWrite) {
+		// copy data from source
+		off_t offset = fOffset;
+		size_t length = fLength;
+
+		for (uint32 i = 0; i < fSourceCount; i++) {
+			uint8* compare = (uint8*)_SourceToCompare(
+				(addr_t)fSourceVecs[i].iov_base);
+			size_t vecLength = min_c(fSourceVecs[i].iov_len, length);
+
+			memcpy(compare,
+				(void*)_SourceToVirtual((addr_t)fSourceVecs[i].iov_base),
+				vecLength);
+			offset += vecLength;
+			length -= vecLength;
+		}
+	} else {
+		// copy data from drive
+		off_t offset = fOffset;
+		size_t length = fLength;
+
+		for (uint32 i = 0; i < fSourceCount; i++) {
+			uint8* compare = (uint8*)_SourceToCompare(
+				(addr_t)fSourceVecs[i].iov_base);
+			size_t vecLength = min_c(fSourceVecs[i].iov_len, length);
+
+			memcpy(compare, disk + offset, vecLength);
+			offset += vecLength;
+			length -= vecLength;
+		}
+	}
+
+	if (fIsWrite)
+		_CheckCompare();
+}
+
+
+void
+Test::_CheckCompare()
+{
+	uint8* data = (uint8*)fSuite.DataBase();
+	uint8* compare = (uint8*)fSuite.CompareBase();
+
+	for (size_t i = 0; i < fSuite.Size(); i++) {
+		if (data[i] != compare[i]) {
+			dprintf("offset %lu differs, %s:\n", i,
+				fIsWrite ? "write" : "read");
+			i &= ~63;
+			dump_block((char*)&data[i], min_c(64, fSuite.Size() - i), "  ");
+			dprintf("should be:\n");
+			dump_block((char*)&compare[i], min_c(64, fSuite.Size() - i), "  ");
+
+			_Panic("Data %s differs", fIsWrite ? "write" : "read");
+		}
+	}
+}
+
+
+void
+Test::_CheckWrite()
+{
+	_CheckCompare();
+
+	// check if we overwrote parts we shouldn't have
+
+	uint8* disk = (uint8*)sAreaAddress;
+	for (size_t i = 0; i < sAreaSize; i++) {
+		if (i >= fOffset && i < fOffset + fLength)
+			continue;
+
+		if (disk[i] != i % 26 + 'a') {
+			dprintf("disk[i] %c, expected %c, i %lu, fLength + fOffset %Ld\n",
+				disk[i], i % 26 + 'a', i, fLength + fOffset);
+			dprintf("offset %lu differs, touched innocent data:\n", i);
+			i &= ~63;
+			dump_block((char*)&disk[i], min_c(64, fSuite.Size() - i), "  ");
+
+			_Panic("Data %s differs", fIsWrite ? "write" : "read");
+		}
+	}
+
+	// check if the data we wanted to have on disk ended up there
+
+	off_t offset = fOffset;
+	size_t length = fLength;
+
+	for (uint32 i = 0; i < fSourceCount; i++) {
+		uint8* data = (uint8*)_SourceToVirtual(
+			(addr_t)fSourceVecs[i].iov_base);
+		size_t vecLength = min_c(fSourceVecs[i].iov_len, length);
+
+		for (uint32 j = 0; j < vecLength; j++) {
+			if (disk[offset + j] != data[j]) {
+				dprintf("offset %lu differs, found on disk:\n", j);
+				j &= ~63;
+				dump_block((char*)&disk[offset + j],
+					min_c(64, fSuite.Size() - i), "  ");
+				dprintf("should be:\n");
+				dump_block((char*)&data[j], min_c(64, fSuite.Size() - j), "  ");
+
+				_Panic("Data write differs");
+			}
+		}
+
+		offset += vecLength;
+		length -= vecLength;
+	}
+}
+
+
+void
+Test::_CheckResults()
+{
+	if (fIsWrite)
+		_CheckWrite();
+	else
+		_CheckCompare();
+}
+
+
+status_t
+Test::_DoIO(IOOperation& operation)
+{
+	uint8* disk = (uint8*)sAreaAddress;
+	off_t offset = operation.Offset();
+
+	for (uint32 i = 0; i < operation.VecCount(); i++) {
+		const iovec& vec = operation.Vecs()[i];
+		addr_t base = (addr_t)vec.iov_base;
+		size_t length = vec.iov_len;
+		size_t pageOffset = base & ~(B_PAGE_SIZE - 1);
+
+		while (length > 0) {
+			size_t toCopy = min_c(length, B_PAGE_SIZE - pageOffset);
+
+			uint8* virtualAddress;
+			vm_get_physical_page(base - pageOffset, (addr_t*)&virtualAddress,
+				PHYSICAL_PAGE_NO_WAIT);
+
+			if (operation.IsWrite())
+				memcpy(disk + offset, virtualAddress + pageOffset, toCopy);
+			else
+				memcpy(virtualAddress + pageOffset, disk + offset, toCopy);
+
+			length -= toCopy;
+			offset += toCopy;
+			pageOffset = 0;
+		}
+	}
+
+	return B_OK;
+}
+
+
 void
 Test::Run(DMAResource& resource)
 {
+	_Prepare();
+
 	IORequest request;
 	status_t status = request.Init(fOffset, fSourceVecs, fSourceCount,
 		fLength, fIsWrite, fFlags);
@@ -210,10 +505,11 @@ Test::Run(DMAResource& resource)
 		dprintf("  DMABuffer %p, %lu vecs, bounce buffer: %p (%p) %s\n", buffer,
 			buffer->VecCount(), buffer->BounceBuffer(),
 			(void*)buffer->PhysicalBounceBuffer(),
-			buffer->UsesBounceBuffer() ? "used" : "unused");
+			operation.UsesBounceBuffer() ? "used" : "unused");
 		for (uint32 i = 0; i < buffer->VecCount(); i++) {
-			dprintf("    [%lu] base %p, length %lu\n", i,
-				buffer->VecAt(i).iov_base, buffer->VecAt(i).iov_len);
+			dprintf("    [%lu] base %p, length %lu%s\n", i,
+				buffer->VecAt(i).iov_base, buffer->VecAt(i).iov_len,
+				buffer->UsesBounceBufferAt(i) ? ", bounce" : "");
 		}
 
 		dprintf("  remaining bytes: %lu\n", request.RemainingBytes());
@@ -236,7 +532,7 @@ Test::Run(DMAResource& resource)
 				address = (void*)(target.address
 					+ (addr_t)buffer->PhysicalBounceBuffer());
 			} else
-				address = (void*)(target.address + fPhysicalBase);
+				address = (void*)(target.address + fSuite.PhysicalDataBase());
 
 			if (address != vec.iov_base) {
 				_Panic("[%lu] address differs: %p, should be %p", i,
@@ -244,6 +540,7 @@ Test::Run(DMAResource& resource)
 			}
 		}
 
+		_DoIO(operation);
 		operation.SetStatus(B_OK);
 		bool finished = operation.Finish();
 		bool isPartial = result.partial_begin || result.partial_end;
@@ -252,6 +549,7 @@ Test::Run(DMAResource& resource)
 
 		if (!finished) {
 			dprintf("  operation not done yet!\n");
+			_DoIO(operation);
 			operation.SetStatus(B_OK);
 
 			isPartial = result.partial_begin && result.partial_end;
@@ -261,6 +559,7 @@ Test::Run(DMAResource& resource)
 
 			if (!finished) {
 				dprintf("  operation not done yet!\n");
+				_DoIO(operation);
 				operation.SetStatus(B_OK);
 
 				if (!operation.Finish())
@@ -270,6 +569,8 @@ Test::Run(DMAResource& resource)
 
 		resultIndex++;
 	}
+
+	_CheckResults();
 }
 
 
@@ -285,7 +586,8 @@ Test::_Panic(const char* message,...)
 
 	dprintf("test failed\n");
 	dprintf("  offset:  %lld\n", fOffset);
-	dprintf("  base:    %p (physical: %p)\n", fBase, fPhysicalBase);
+	dprintf("  base:    %p (physical: %p)\n", (void*)fSuite.DataBase(),
+		(void*)fSuite.PhysicalDataBase());
 	dprintf("  length:  %lu\n", fLength);
 	dprintf("  write:   %d\n", fIsWrite);
 	dprintf("  flags:   %#lx\n", fFlags);
@@ -313,7 +615,7 @@ Test::_Panic(const char* message,...)
 
 
 static void
-run_tests_no_restrictions(uint8* address, uint8* physicalAddress, size_t size)
+run_tests_no_restrictions(TestSuiteContext& context)
 {
 	const dma_restrictions restrictions = {
 		0x0,	// low
@@ -326,38 +628,64 @@ run_tests_no_restrictions(uint8* address, uint8* physicalAddress, size_t size)
 		0		// flags
 	};
 
-	TestSuite suite("no restrictions", restrictions, 512, address,
-		physicalAddress);
+	TestSuite suite(context, "no restrictions", restrictions, 512);
 
 	suite.AddTest(0, 1024, false, B_USER_IO_REQUEST)
 		.AddSource(0, 1024)
 		.NextResult(0, false, false)
 			.AddTarget(0, 1024, false);
-	suite.AddTest(23, 1024, true, B_USER_IO_REQUEST)
+
+	// read partial begin/end
+	suite.AddTest(23, 1024, false, B_USER_IO_REQUEST)
 		.AddSource(0, 1024)
 		.NextResult(0, true, true)
 			.AddTarget(0, 23, true)
 			.AddTarget(0, 1024, false)
-			.AddTarget(23, 512 - 23, true)
-			;
-	suite.AddTest(0, 1028, true, B_USER_IO_REQUEST)
+			.AddTarget(23, 512 - 23, true);
+
+	// read less than a block
+	suite.AddTest(23, 30, false, B_USER_IO_REQUEST)
+		.AddSource(0, 1024)
+		.NextResult(0, true, true)
+			.AddTarget(0, 23, true)
+			.AddTarget(0, 30, false)
+			.AddTarget(23, 512 - 53, true);
+
+	// write begin/end
+	suite.AddTest(23, 1024, true, B_USER_IO_REQUEST)
+		.AddSource(0, 1024)
+		.NextResult(0, true, true)
+			.AddTarget(0, 512, true)
+			.AddTarget(489, 512, false)
+			.AddTarget(512, 512, true);
+
+	// read partial end, length < iovec length
+	suite.AddTest(0, 1028, false, B_USER_IO_REQUEST)
 		.AddSource(0, 512)
-		.AddSource(1024, 516)
+		.AddSource(1024, 1024)
 		.NextResult(0, false, true)
 			.AddTarget(0, 512, false)
 			.AddTarget(1024, 516, false)
 			.AddTarget(0, 508, true);
 
+	// write partial end, length < iovec length
+	suite.AddTest(0, 1028, true, B_USER_IO_REQUEST)
+		.AddSource(0, 512)
+		.AddSource(1024, 1024)
+		.NextResult(0, false, true)
+			.AddTarget(0, 512, false)
+			.AddTarget(1024, 512, false)
+			.AddTarget(0, 512, true);
+
 	suite.Run();
 }
 
 
 static void
-run_tests_address_restrictions(uint8* address, uint8* physicalAddress,
-	size_t size)
+run_tests_address_restrictions(TestSuiteContext& context)
 {
 	const dma_restrictions restrictions = {
-		(addr_t)physicalAddress + 512,	// low
+		context.PhysicalDataBase() + 512,	// low
 		0,		// high
 		0,		// alignment
 		0,		// boundary
@@ -367,7 +695,7 @@ run_tests_address_restrictions(uint8* address, uint8* physicalAddress,
 		0		// flags
 	};
 
-	TestSuite suite("address", restrictions, 512, address, physicalAddress);
+	TestSuite suite(context, "address", restrictions, 512);
 
 	suite.AddTest(0, 1024, false, B_USER_IO_REQUEST)
 		.AddSource(0, 1024)
@@ -380,8 +708,7 @@ run_tests_address_restrictions(uint8* address, uint8* physicalAddress,
 
 
 static void
-run_tests_alignment_restrictions(uint8* address, uint8* physicalAddress,
-	size_t size)
+run_tests_alignment_restrictions(TestSuiteContext& context)
 {
 	const dma_restrictions restrictions = {
 		0x0,	// low
@@ -394,7 +721,7 @@ run_tests_alignment_restrictions(uint8* address, uint8* physicalAddress,
 		0		// flags
 	};
 
-	TestSuite suite("alignment", restrictions, 512, address, physicalAddress);
+	TestSuite suite(context, "alignment", restrictions, 512);
 
 	suite.AddTest(0, 1024, false, B_PHYSICAL_IO_REQUEST)
 		.AddSource(16, 1024)
@@ -406,8 +733,7 @@ run_tests_alignment_restrictions(uint8* address, uint8* physicalAddress,
 
 
 static void
-run_tests_boundary_restrictions(uint8* address, uint8* physicalAddress,
-	size_t size)
+run_tests_boundary_restrictions(TestSuiteContext& context)
 {
 	const dma_restrictions restrictions = {
 		0x0,	// low
@@ -420,7 +746,7 @@ run_tests_boundary_restrictions(uint8* address, uint8* physicalAddress,
 		0		// flags
 	};
 
-	TestSuite suite("boundary", restrictions, 512, address, physicalAddress);
+	TestSuite suite(context, "boundary", restrictions, 512);
 
 	suite.AddTest(0, 2000, false, B_USER_IO_REQUEST)
 		.AddSource(0, 2048)
@@ -434,8 +760,7 @@ run_tests_boundary_restrictions(uint8* address, uint8* physicalAddress,
 
 
 static void
-run_tests_segment_restrictions(uint8* address, uint8* physicalAddress,
-	size_t size)
+run_tests_segment_restrictions(TestSuiteContext& context)
 {
 	const dma_restrictions restrictions = {
 		0x0,	// low
@@ -448,41 +773,144 @@ run_tests_segment_restrictions(uint8* address, uint8* physicalAddress,
 		0		// flags
 	};
 
-	TestSuite suite("segment", restrictions, 512, address, physicalAddress);
+	TestSuite suite(context, "segment", restrictions, 512);
 
-#if 0
-	suite.AddTest(0, 1024, false, B_USER_IO_REQUEST)
-		.AddSource(0, 1024)
-		.NextResult(0, false)
-			.AddTarget(0, 1024, false);
-#endif
+	suite.AddTest(0, 4096, false, B_USER_IO_REQUEST)
+		.AddSource(0, 4096)
+		.NextResult(0, false, false)
+			.AddTarget(0, 1024, false)
+			.AddTarget(1024, 1024, false)
+			.AddTarget(2048, 1024, false)
+			.AddTarget(3072, 1024, false);
 
 	suite.Run();
 }
 
 
 static void
-run_tests_mean_restrictions(uint8* address, uint8* physicalAddress, size_t size)
+run_tests_transfer_restrictions(TestSuiteContext& context)
 {
 	const dma_restrictions restrictions = {
-		(addr_t)physicalAddress + 1024,	// low
+		0x0,	// low
 		0x0,	// high
-		32,		// alignment
-		512,	// boundary
-		2048,	// max transfer
-		2,		// max segment count
-		1024,	// max segment size
+		0,		// alignment
+		0,		// boundary
+		1024,	// max transfer
+		0,		// max segment count
+		0,		// max segment size
 		0		// flags
 	};
 
-	TestSuite suite("mean", restrictions, 512, address, physicalAddress);
+	TestSuite suite(context, "transfer", restrictions, 512);
+
+	suite.AddTest(0, 4000, false, B_USER_IO_REQUEST)
+		.AddSource(0, 4096)
+		.NextResult(0, false, false)
+			.AddTarget(0, 1024, false)
+		.NextResult(0, false, false)
+			.AddTarget(1024, 1024, false)
+		.NextResult(0, false, false)
+			.AddTarget(2048, 1024, false)
+		.NextResult(0, false, false)
+			.AddTarget(3072, 1024 - 96, false)
+			.AddTarget(0, 96, true);
+
+	suite.Run();
+}
+
+
+static void
+run_tests_interesting_restrictions(TestSuiteContext& context)
+{
+	dma_restrictions restrictions = {
+		0x0,	// low
+		0x0,	// high
+		32,		// alignment
+		512,	// boundary
+		0,		// max transfer
+		0,		// max segment count
+		0,		// max segment size
+		0		// flags
+	};
+
+	TestSuite suite(context, "interesting", restrictions, 512);
+
+	// read with partial begin/end
+	suite.AddTest(32, 1000, false, B_USER_IO_REQUEST)
+		.AddSource(0, 1024)
+		.NextResult(0, true, true)
+			.AddTarget(0, 32, true)
+			.AddTarget(0, 512, false)
+			.AddTarget(512, 480, false)
+			.AddTarget(32, 480, true)
+			.AddTarget(512, 32, true);
+
+	// write with partial begin/end
+	suite.AddTest(32, 1000, true, B_USER_IO_REQUEST)
+		.AddSource(0, 1024)
+		.NextResult(0, true, true)
+			.AddTarget(0, 512, true)
+			.AddTarget(480, 32, false)
+			.AddTarget(512, 480, false)
+			.AddTarget(512, 512, true);
+
+	suite.Run();
+
+	restrictions = (dma_restrictions){
+		0x0,	// low
+		0x0,	// high
+		32,		// alignment
+		512,	// boundary
+		0,		// max transfer
+		4,		// max segment count
+		0,		// max segment size
+		0		// flags
+	};
+
+	TestSuite suite2(context, "interesting2", restrictions, 512);
+
+	suite2.AddTest(32, 1000, false, B_USER_IO_REQUEST)
+		.AddSource(0, 1024)
+		.NextResult(0, true, false)
+			.AddTarget(0, 32, true)
+			.AddTarget(0, 512, false)
+			.AddTarget(512, 480, false)
+		.NextResult(0, false, true)
+			.AddTarget(0, 512, true);
+
+	suite2.Run();
+}
+
+
+static void
+run_tests_mean_restrictions(TestSuiteContext& context)
+{
+	const dma_restrictions restrictions = {
+		context.PhysicalDataBase() + 1024,	// low
+		0x0,	// high
+		32,		// alignment
+		1024,	// boundary
+		0,		// max transfer
+		2,		// max segment count
+		512,	// max segment size
+		0		// flags
+	};
+
+	TestSuite suite(context, "mean", restrictions, 512);
 
-#if 0
 	suite.AddTest(0, 1024, false, B_USER_IO_REQUEST)
 		.AddSource(0, 1024)
-		.NextResult(0, false)
-			.AddTarget(0, 1024, false);
-#endif
+		.NextResult(0, false, false)
+			.AddTarget(0, 512, true)
+			.AddTarget(512, 512, true);
+
+	suite.AddTest(0, 1024, false, B_USER_IO_REQUEST)
+		.AddSource(1024 + 32, 1024)
+		.NextResult(0, false, false)
+			.AddTarget(1024 + 32, 512, false)
+		.NextResult(0, false, false)
+			.AddTarget(1568, 480, false)
+			.AddTarget(1568 + 480, 32, false);
 
 	suite.Run();
 }
@@ -491,28 +919,21 @@ run_tests_mean_restrictions(uint8* address, uint8* physicalAddress, size_t size)
 static void
 run_test()
 {
-	size_t size = 1 * 1024 * 1024;
-	uint8* address;
-	area_id area = create_area("dma source", (void**)&address,
-		B_ANY_KERNEL_ADDRESS, size, B_CONTIGUOUS,
-		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
-	if (area < B_OK)
+	TestSuiteContext context;
+	status_t status = context.Init(4 * B_PAGE_SIZE);
+	if (status != B_OK)
 		return;
 
-	physical_entry entry;
-	get_memory_map(address, size, &entry, 1);
+	run_tests_no_restrictions(context);
+	run_tests_address_restrictions(context);
+	run_tests_alignment_restrictions(context);
+	run_tests_boundary_restrictions(context);
+	run_tests_segment_restrictions(context);
+	run_tests_transfer_restrictions(context);
+	run_tests_interesting_restrictions(context);
+	run_tests_mean_restrictions(context);
 
-	dprintf("DMA Test area %p, physical %p\n", address, entry.address);
-
-	run_tests_no_restrictions(address, (uint8*)entry.address, size);
-	run_tests_address_restrictions(address, (uint8*)entry.address, size);
-	run_tests_alignment_restrictions(address, (uint8*)entry.address, size);
-	run_tests_boundary_restrictions(address, (uint8*)entry.address, size);
-	run_tests_segment_restrictions(address, (uint8*)entry.address, size);
-	run_tests_mean_restrictions(address, (uint8*)entry.address, size);
-
-	delete_area(area);
-	panic("done.");
+	panic("All tests passed!");
 }
 
 
@@ -554,6 +975,7 @@ dma_test_init_driver(device_node *node, void **_driverCookie)
 		return sArea;
 
 	*_driverCookie = node;
+
 	run_test();
 	return B_OK;
 }