applied DMA mode speed optimations 'step 1.5': incorporated all top-level acceleration functions inside the engine's code so minimal nested calls are made. Confirmed further speedup of about 15% on fast CPU's...

git-svn-id: file:///srv/svn/repos/haiku/trunk/current@11427 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Rudolf Cornelissen 2005-02-21 13:00:14 +00:00
parent e605d52661
commit 1554e5cb4c
7 changed files with 230 additions and 257 deletions

View File

@ -3,26 +3,30 @@
This file may be used under the terms of the Be Sample Code License.
Other authors:
Rudolf Cornelissen 9/2003-1/2005.
Rudolf Cornelissen 9/2003-2/2005.
*/
/*
note:
attempting DMA on NV40 and higher because without it I can't get it going ATM.
Later on this can become a nv.settings switch, and maybe later we can even
forget about non-DMA completely (depends on 3D acceleration attempts).
moved DMA acceleration 'top-level' routines to be integrated in the engine:
it is costly to call the engine for every single function within a loop!!
(BeRoMeter 1.2.6 benchmarked: P4 3.2Ghz increased 15%, ...)
Leaving PIO acceleration as it is for now, for the purpose of benchmarking :-)
note also:
attempting DMA on NV40 and higher because without it I can't get them going ATM.
Maybe later we can forget about PIO mode acceleration totally (depends on 3D
acceleration attempts).
*/
#define MODULE_BIT 0x40000000
#include "acc_std.h"
void SCREEN_TO_SCREEN_BLIT(engine_token *et, blit_params *list, uint32 count)
void SCREEN_TO_SCREEN_BLIT_PIO(engine_token *et, blit_params *list, uint32 count)
{
int i;
if(!si->settings.dma_acc)
{
/* init acc engine for blit function */
nv_acc_setup_blit();
@ -42,30 +46,8 @@ void SCREEN_TO_SCREEN_BLIT(engine_token *et, blit_params *list, uint32 count)
i++;
}
}
else
{
/* init acc engine for blit function */
nv_acc_setup_blit_dma();
/* do each blit */
i=0;
while (count--)
{
nv_acc_blit_dma
(
list[i].src_left,
list[i].src_top,
list[i].dest_left,
list[i].dest_top,
list[i].width,
list[i].height
);
i++;
}
}
}
void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT(engine_token *et, scaled_blit_params *list, uint32 count)
void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_PIO(engine_token *et, scaled_blit_params *list, uint32 count)
{
int i;
@ -88,7 +70,7 @@ void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT(engine_token *et, scaled_blit_params
}
}
void SCREEN_TO_SCREEN_TRANSPARENT_BLIT(engine_token *et, uint32 transparent_colour, blit_params *list, uint32 count)
void SCREEN_TO_SCREEN_TRANSPARENT_BLIT_PIO(engine_token *et, uint32 transparent_colour, blit_params *list, uint32 count)
{
int i;
@ -110,12 +92,10 @@ void SCREEN_TO_SCREEN_TRANSPARENT_BLIT(engine_token *et, uint32 transparent_colo
}
}
void FILL_RECTANGLE(engine_token *et, uint32 colorIndex, fill_rect_params *list, uint32 count)
void FILL_RECTANGLE_PIO(engine_token *et, uint32 colorIndex, fill_rect_params *list, uint32 count)
{
int i;
if(!si->settings.dma_acc)
{
/* init acc engine for fill function */
nv_acc_setup_rectangle(colorIndex);
@ -133,33 +113,11 @@ void FILL_RECTANGLE(engine_token *et, uint32 colorIndex, fill_rect_params *list,
i++;
}
}
else
{
/* init acc engine for fill function */
nv_acc_setup_rectangle_dma(colorIndex);
/* draw each rectangle */
i=0;
while (count--)
{
nv_acc_rectangle_dma
(
list[i].left,
(list[i].right)+1,
list[i].top,
(list[i].bottom-list[i].top)+1
);
i++;
}
}
}
void INVERT_RECTANGLE(engine_token *et, fill_rect_params *list, uint32 count)
void INVERT_RECTANGLE_PIO(engine_token *et, fill_rect_params *list, uint32 count)
{
int i;
if(!si->settings.dma_acc)
{
/* init acc engine for invert function */
nv_acc_setup_rect_invert();
@ -177,33 +135,11 @@ void INVERT_RECTANGLE(engine_token *et, fill_rect_params *list, uint32 count)
i++;
}
}
else
{
/* init acc engine for invert function */
nv_acc_setup_rect_invert_dma();
/* invert each rectangle */
i=0;
while (count--)
{
nv_acc_rectangle_invert_dma
(
list[i].left,
(list[i].right)+1,
list[i].top,
(list[i].bottom-list[i].top)+1
);
i++;
}
}
}
void FILL_SPAN(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count)
void FILL_SPAN_PIO(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count)
{
int i;
if(!si->settings.dma_acc)
{
/* init acc engine for fill function */
nv_acc_setup_rectangle(colorIndex);
@ -221,23 +157,3 @@ void FILL_SPAN(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count)
i+=3;
}
}
else
{
/* init acc engine for fill function */
nv_acc_setup_rectangle_dma(colorIndex);
/* draw each span */
i=0;
while (count--)
{
nv_acc_rectangle_dma
(
list[i+1],
list[i+2]+1,
list[i],
1
);
i+=3;
}
}
}

View File

@ -4,7 +4,7 @@
other authors:
Mark Watson
Rudolf Cornelissen 3/2004-1/2005
Rudolf Cornelissen 3/2004-2/2005
*/
/*
@ -27,7 +27,7 @@ uint32 ACCELERANT_ENGINE_COUNT(void)
return 1;
}
status_t ACQUIRE_ENGINE(uint32 capabilities, uint32 max_wait, sync_token *st, engine_token **et)
status_t ACQUIRE_ENGINE_PIO(uint32 capabilities, uint32 max_wait, sync_token *st, engine_token **et)
{
/* acquire the shared benaphore */
AQUIRE_BEN(si->engine.lock)
@ -35,9 +35,21 @@ status_t ACQUIRE_ENGINE(uint32 capabilities, uint32 max_wait, sync_token *st, en
if (st) SYNC_TO_TOKEN(st);
/* make sure all needed engine cmd's are mapped to the FIFO */
if (!si->settings.dma_acc)
nv_acc_assert_fifo();
else
/* return an engine token */
*et = &nv_engine_token;
return B_OK;
}
status_t ACQUIRE_ENGINE_DMA(uint32 capabilities, uint32 max_wait, sync_token *st, engine_token **et)
{
/* acquire the shared benaphore */
AQUIRE_BEN(si->engine.lock)
/* sync if required */
if (st) SYNC_TO_TOKEN(st);
/* make sure all needed engine cmd's are mapped to the FIFO */
nv_acc_assert_fifo_dma();
/* return an engine token */

View File

@ -27,7 +27,11 @@ These definitions are out of pure lazyness.
#define CHKO(x) case B_##x: \
if (check_overlay_capability(B_##x) == B_OK) return (void *)x; else return (void *)0
#define CHKA(x) case B_##x: \
if (check_acc_capability(B_##x) == B_OK) return (void *)x; else return (void *)0
if (check_acc_capability(B_##x) == B_OK) \
{if(!si->settings.dma_acc) return (void *)x##_PIO; else return (void *)x##_DMA;} \
else return (void *)0
#define CHKS(x) case B_##x: \
if(!si->settings.dma_acc) return (void *)x##_PIO; else return (void *)x##_DMA
#define HOOK(x) case B_##x: return (void *)x
#define ZERO(x) case B_##x: return (void *)0
#define HRDC(x) case B_##x: return si->settings.hardcursor? (void *)x: (void *)0; // apsed
@ -76,7 +80,7 @@ void * get_accelerant_hook(uint32 feature, void *data)
/* synchronization */
HOOK(ACCELERANT_ENGINE_COUNT);
HOOK(ACQUIRE_ENGINE);
CHKS(ACQUIRE_ENGINE);
HOOK(RELEASE_ENGINE);
HOOK(WAIT_ENGINE_IDLE);
HOOK(GET_SYNC_TOKEN);

View File

@ -1,6 +1,8 @@
/*
Copyright 1999, Be Incorporated. All Rights Reserved.
This file may be used under the terms of the Be Sample Code License.
Modified by Rudolf Cornelissen 2/2005.
*/
#if !defined(GENERIC_H)
@ -39,18 +41,20 @@ void MOVE_CURSOR(uint16 x, uint16 y);
void SHOW_CURSOR(bool is_visible);
uint32 ACCELERANT_ENGINE_COUNT(void);
status_t ACQUIRE_ENGINE(uint32 capabilities, uint32 max_wait, sync_token *st, engine_token **et);
status_t ACQUIRE_ENGINE_PIO(uint32 capabilities, uint32 max_wait, sync_token *st, engine_token **et);
status_t ACQUIRE_ENGINE_DMA(uint32 capabilities, uint32 max_wait, sync_token *st, engine_token **et);
status_t RELEASE_ENGINE(engine_token *et, sync_token *st);
void WAIT_ENGINE_IDLE(void);
status_t GET_SYNC_TOKEN(engine_token *et, sync_token *st);
status_t SYNC_TO_TOKEN(sync_token *st);
void SCREEN_TO_SCREEN_BLIT(engine_token *et, blit_params *list, uint32 count);
void SCREEN_TO_SCREEN_TRANSPARENT_BLIT(engine_token *et, uint32 transparent_colour, blit_params *list, uint32 count);
void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT(engine_token *et, scaled_blit_params *list, uint32 count);
void FILL_RECTANGLE(engine_token *et, uint32 color, fill_rect_params *list, uint32 count);
void INVERT_RECTANGLE(engine_token *et, fill_rect_params *list, uint32 count);
void FILL_SPAN(engine_token *et, uint32 color, uint16 *list, uint32 count);
/* PIO acceleration */
void SCREEN_TO_SCREEN_BLIT_PIO(engine_token *et, blit_params *list, uint32 count);
void SCREEN_TO_SCREEN_TRANSPARENT_BLIT_PIO(engine_token *et, uint32 transparent_colour, blit_params *list, uint32 count);
void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_PIO(engine_token *et, scaled_blit_params *list, uint32 count);
void FILL_RECTANGLE_PIO(engine_token *et, uint32 color, fill_rect_params *list, uint32 count);
void INVERT_RECTANGLE_PIO(engine_token *et, fill_rect_params *list, uint32 count);
void FILL_SPAN_PIO(engine_token *et, uint32 color, uint16 *list, uint32 count);
/* video_overlay */
uint32 OVERLAY_COUNT(const display_mode *dm);

View File

@ -1043,103 +1043,141 @@ void nv_acc_assert_fifo_dma(void)
nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]);
/* Bitmap: */
nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]);
}
/* Make sure our pattern is loaded: */
//fixme: can be removed here if a 3D add-on isn't going to modify it..
/* wait for room in fifo for pattern cmd if needed. */
if (nv_acc_fifofree_dma(7) != B_OK) return;
/* now setup pattern (writing 7 32bit words) */
nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1);
si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */
nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4);
si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */
si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */
si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */
si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */
/* tell the engine to fetch and execute all (new) commands in the DMA buffer */
nv_start_dma();
}
}
/*
note:
moved acceleration 'top-level' routines to be integrated in the engine:
it is costly to call the engine for every single function within a loop!!
(BeRoMeter 1.2.6 benchmarked: P4 3.2Ghz increased 15%, ...)
*/
/* screen to screen blit - i.e. move windows around and scroll within them. */
status_t nv_acc_setup_blit_dma()
void SCREEN_TO_SCREEN_BLIT_DMA(engine_token *et, blit_params *list, uint32 count)
{
uint32 i = 0;
/*** init acc engine for blit function ***/
/* ROP registers (Raster OPeration):
* wait for room in fifo for ROP cmd if needed. */
if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR;
if (nv_acc_fifofree_dma(2) != B_OK) return;
/* now setup ROP (writing 2 32bit words) for GXcopy */
nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1);
si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xcc; /* SetRop5 */
return B_OK;
}
status_t nv_acc_blit_dma(uint16 xs,uint16 ys,uint16 xd,uint16 yd,uint16 w,uint16 h)
{
/*** do each blit ***/
/* Note:
* blit-copy direction is determined inside nvidia hardware: no setup needed */
while (count--)
{
/* instruct engine what to blit:
* wait for room in fifo for blit cmd if needed. */
if (nv_acc_fifofree_dma(4) != B_OK) return B_ERROR;
if (nv_acc_fifofree_dma(4) != B_OK) return;
/* now setup blit (writing 4 32bit words) */
nv_acc_cmd_dma(NV_IMAGE_BLIT, NV_IMAGE_BLIT_SOURCEORG, 3);
si->engine.dma.cmdbuffer[si->engine.dma.current++] = ((ys << 16) | xs); /* SourceOrg */
si->engine.dma.cmdbuffer[si->engine.dma.current++] = ((yd << 16) | xd); /* DestOrg */
si->engine.dma.cmdbuffer[si->engine.dma.current++] = (((h + 1) << 16) | (w + 1)); /* HeightWidth */
si->engine.dma.cmdbuffer[si->engine.dma.current++] =
(((list[i].src_top) << 16) | (list[i].src_left)); /* SourceOrg */
si->engine.dma.cmdbuffer[si->engine.dma.current++] =
(((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */
si->engine.dma.cmdbuffer[si->engine.dma.current++] =
((((list[i].height) + 1) << 16) | ((list[i].width) + 1)); /* HeightWidth */
/* tell the engine to fetch the commands in the DMA buffer that where not
* executed before. At this time the setup done by nv_acc_setup_blit_dma() is
* also executed on the first call of nv_acc_blit_dma(). */
* executed before. */
nv_start_dma();
return B_OK;
i++;
}
}
/* rectangle fill - i.e. workspace and window background color */
/* span fill - i.e. (selected) menuitem background color (Dano) */
status_t nv_acc_setup_rectangle_dma(uint32 color)
void FILL_RECTANGLE_DMA(engine_token *et, uint32 colorIndex, fill_rect_params *list, uint32 count)
{
uint32 i = 0;
/*** init acc engine for fill function ***/
/* ROP registers (Raster OPeration):
* wait for room in fifo for ROP and bitmap cmd if needed. */
if (nv_acc_fifofree_dma(4) != B_OK) return B_ERROR;
if (nv_acc_fifofree_dma(4) != B_OK) return;
/* now setup ROP (writing 2 32bit words) for GXcopy */
nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1);
si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xcc; /* SetRop5 */
/* now setup fill color (writing 2 32bit words) */
nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1);
si->engine.dma.cmdbuffer[si->engine.dma.current++] = color; /* Color1A */
si->engine.dma.cmdbuffer[si->engine.dma.current++] = colorIndex; /* Color1A */
return B_OK;
}
status_t nv_acc_rectangle_dma(uint32 xs,uint32 xe,uint32 ys,uint32 yl)
/*** draw each rectangle ***/
while (count--)
{
/* instruct engine what to fill:
* wait for room in fifo for bitmap cmd if needed. */
if (nv_acc_fifofree_dma(3) != B_OK) return B_ERROR;
if (nv_acc_fifofree_dma(3) != B_OK) return;
/* now setup fill (writing 3 32bit words) */
nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, 2);
si->engine.dma.cmdbuffer[si->engine.dma.current++] =
((xs << 16) | (ys & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */
(((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */
si->engine.dma.cmdbuffer[si->engine.dma.current++] =
(((xe - xs) << 16) | (yl & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */
(((((list[i].right)+1) - (list[i].left)) << 16) |
(((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */
/* tell the engine to fetch the commands in the DMA buffer that where not
* executed before. At this time the setup done by nv_acc_setup_rectangle_dma() is
* also executed on the first call of nv_acc_rectangle_dma(). */
* executed before. */
nv_start_dma();
return B_OK;
i++;
}
}
/* span fill - i.e. (selected) menuitem background color (Dano) */
void FILL_SPAN_DMA(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count)
{
uint32 i = 0;
/*** init acc engine for fill function ***/
/* ROP registers (Raster OPeration):
* wait for room in fifo for ROP and bitmap cmd if needed. */
if (nv_acc_fifofree_dma(4) != B_OK) return;
/* now setup ROP (writing 2 32bit words) for GXcopy */
nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1);
si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xcc; /* SetRop5 */
/* now setup fill color (writing 2 32bit words) */
nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1);
si->engine.dma.cmdbuffer[si->engine.dma.current++] = colorIndex; /* Color1A */
/*** draw each span ***/
while (count--)
{
/* instruct engine what to fill:
* wait for room in fifo for bitmap cmd if needed. */
if (nv_acc_fifofree_dma(3) != B_OK) return;
/* now setup fill (writing 3 32bit words) */
nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, 2);
si->engine.dma.cmdbuffer[si->engine.dma.current++] =
(((list[i+1]) << 16) | ((list[i]) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */
si->engine.dma.cmdbuffer[si->engine.dma.current++] =
((((list[i+2]+1) - (list[i+1])) << 16) | 0x00000001); /* Unclipped Rect 0 WidthHeight */
/* tell the engine to fetch the commands in the DMA buffer that where not
* executed before. */
nv_start_dma();
i+=3;
}
}
/* rectangle invert - i.e. text cursor and text selection */
status_t nv_acc_setup_rect_invert_dma()
void INVERT_RECTANGLE_DMA(engine_token *et, fill_rect_params *list, uint32 count)
{
uint32 i = 0;
/*** init acc engine for invert function ***/
/* ROP registers (Raster OPeration):
* wait for room in fifo for ROP and bitmap cmd if needed. */
if (nv_acc_fifofree_dma(4) != B_OK) return B_ERROR;
if (nv_acc_fifofree_dma(4) != B_OK) return;
/* now setup ROP (writing 2 32bit words) for GXinvert */
nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1);
si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x55; /* SetRop5 */
@ -1147,25 +1185,24 @@ status_t nv_acc_setup_rect_invert_dma()
nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1);
si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x00000000; /* Color1A */
return B_OK;
}
status_t nv_acc_rectangle_invert_dma(uint32 xs,uint32 xe,uint32 ys,uint32 yl)
/*** invert each rectangle ***/
while (count--)
{
/* instruct engine what to fill:
* wait for room in fifo for bitmap cmd if needed. */
if (nv_acc_fifofree_dma(3) != B_OK) return B_ERROR;
if (nv_acc_fifofree_dma(3) != B_OK) return;
/* now setup fill (writing 3 32bit words) */
nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, 2);
si->engine.dma.cmdbuffer[si->engine.dma.current++] =
((xs << 16) | (ys & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */
(((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */
si->engine.dma.cmdbuffer[si->engine.dma.current++] =
(((xe - xs) << 16) | (yl & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */
(((((list[i].right)+1) - (list[i].left)) << 16) |
(((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */
/* tell the engine to fetch the commands in the DMA buffer that where not
* executed before. At this time the setup done by nv_acc_setup_rectangle_dma() is
* also executed on the first call of nv_acc_rectangle_dma(). */
* executed before. */
nv_start_dma();
return B_OK;
i++;
}
}

View File

@ -90,7 +90,7 @@ status_t nv_general_powerup()
{
status_t status;
LOG(1,("POWERUP: Haiku nVidia Accelerant 0.38 running.\n"));
LOG(1,("POWERUP: Haiku nVidia Accelerant 0.39 running.\n"));
/* preset no laptop */
si->ps.laptop = false;

View File

@ -114,15 +114,15 @@ status_t nv_acc_video_blit(uint16 xs,uint16 ys,uint16 ws, uint16 hs,
uint16 xd,uint16 yd,uint16 wd,uint16 hd);
status_t nv_acc_wait_idle(void);
/* DMA versions */
status_t nv_acc_wait_idle_dma(void);
status_t nv_acc_init_dma(void);
void nv_acc_assert_fifo_dma(void);
status_t nv_acc_setup_blit_dma(void);
status_t nv_acc_blit_dma(uint16,uint16,uint16, uint16,uint16,uint16 );
status_t nv_acc_setup_rectangle_dma(uint32 color);
status_t nv_acc_rectangle_dma(uint32 xs,uint32 xe,uint32 ys,uint32 yl);
status_t nv_acc_setup_rect_invert_dma(void);
status_t nv_acc_rectangle_invert_dma(uint32 xs,uint32 xe,uint32 ys,uint32 yl);
status_t nv_acc_wait_idle_dma(void);
void SCREEN_TO_SCREEN_BLIT_DMA(engine_token *et, blit_params *list, uint32 count);
void SCREEN_TO_SCREEN_TRANSPARENT_BLIT_DMA(engine_token *et, uint32 transparent_colour, blit_params *list, uint32 count);
void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA(engine_token *et, scaled_blit_params *list, uint32 count);
void FILL_RECTANGLE_DMA(engine_token *et, uint32 color, fill_rect_params *list, uint32 count);
void INVERT_RECTANGLE_DMA(engine_token *et, fill_rect_params *list, uint32 count);
void FILL_SPAN_DMA(engine_token *et, uint32 color, uint16 *list, uint32 count);
/* backend scaler functions */
status_t check_overlay_capability(uint32 feature);