The Y check can be moved out of the loop of course, why didn't I see this

before?! Is about 4% faster than before. If anyone sees a way to make it faster yet, please shoot! I can watch movies fullscreen on a 2 GHz Core 2 Duo in VESA with bilinear scaling, but it would be nice to use less CPU... :-) git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@26667 a95241bf-73f2-0310-859d-f6bbb57e9c96
2008-07-28 21:30:26 +00:00 · 2008-07-28 21:30:26 +00:00 · e196501316
commit e196501316
parent 2011fa73d0
1 changed files with 9 additions and 5 deletions
--- a/src/servers/app/drawing/Painter/Painter.cpp
+++ b/src/servers/app/drawing/Painter/Painter.cpp
@ -1713,9 +1713,9 @@ Painter::_DrawBitmapBilinearCopy32(agg::rendering_buffer& srcBuffer,
 			// buffer handle for destination to be incremented per pixel
 			register uint8* d = dst;

-			for (int32 x = xIndexL; x <= xIndexR; x++) {
-				const uint8* s = src + xWeights[x].index;
-				if (wTop == 255) {
+			if (wTop == 255) {
+				for (int32 x = xIndexL; x <= xIndexR; x++) {
+					const uint8* s = src + xWeights[x].index;
 					// This case is important to prevent out
 					// of bounds access at bottom edge of the source
 					// bitmap. If the scale is low and integer, it will
@ -1733,7 +1733,11 @@ Painter::_DrawBitmapBilinearCopy32(agg::rendering_buffer& srcBuffer,
 						d[1] = (s[1] * wLeft + s[5] * wRight) >> 8;
 						d[2] = (s[2] * wLeft + s[6] * wRight) >> 8;
 					}
-				} else {
+					d += 4;
+				}
+			} else {
+				for (int32 x = xIndexL; x <= xIndexR; x++) {
+					const uint8* s = src + xWeights[x].index;
 					if (xWeights[x].weight == 255) {
 						// Prevent out of bounds access on the right edge
 						// or simply speed up.
@ -1761,8 +1765,8 @@ Painter::_DrawBitmapBilinearCopy32(agg::rendering_buffer& srcBuffer,
 						d[1] = t1 >> 16;
 						d[2] = t2 >> 16;
 					}
+					d += 4;
 				}
-				d += 4;
 			}
 			dst += dstBPR;
 		}