From 7e4b227ec1bfa1a85f187c9d052b75bd22f2f135 Mon Sep 17 00:00:00 2001 From: "Sergey Semyonov (Serge)" Date: Wed, 2 Jul 2008 12:41:34 +0000 Subject: [PATCH] use 3D texture engine for alpha blending git-svn-id: svn://kolibrios.org@813 a494cfbc-eb01-0410-851d-a64ba20cac60 --- programs/system/drivers/ati2d/accel_2d.h | 7 +- programs/system/drivers/ati2d/accel_2d.inc | 42 +- programs/system/drivers/ati2d/accel_3d.inc | 611 ++++++++++++++++++++- programs/system/drivers/ati2d/ati2d.c | 9 +- programs/system/drivers/ati2d/ati2d.h | 104 ++++ programs/system/drivers/ati2d/common.h | 4 + programs/system/drivers/ati2d/r500.inc | 89 ++- 7 files changed, 839 insertions(+), 27 deletions(-) diff --git a/programs/system/drivers/ati2d/accel_2d.h b/programs/system/drivers/ati2d/accel_2d.h index 9a8ed4814..11c99d806 100644 --- a/programs/system/drivers/ati2d/accel_2d.h +++ b/programs/system/drivers/ati2d/accel_2d.h @@ -3,10 +3,9 @@ #define DRAW_RECT 2 #define LINE_2P 3 #define BLIT 4 - +#define COMPIZ 5 typedef unsigned int color_t; -typedef unsigned int u32_t; typedef struct { @@ -60,6 +59,10 @@ int Line2P(line2p_t *draw); int Blit(blit_t *blit); + +int RadeonComposite( blit_t *blit); + + # define RADEON_GMC_SRC_PITCH_OFFSET_CNTL (1 << 0) # define RADEON_GMC_DST_PITCH_OFFSET_CNTL (1 << 1) # define RADEON_GMC_BRUSH_SOLID_COLOR (13 << 4) diff --git a/programs/system/drivers/ati2d/accel_2d.inc b/programs/system/drivers/ati2d/accel_2d.inc index 6a79022d7..5d98f0d52 100644 --- a/programs/system/drivers/ati2d/accel_2d.inc +++ b/programs/system/drivers/ati2d/accel_2d.inc @@ -1,7 +1,6 @@ #define BRUSH_MONO (0<<4) -#define R300_PIO int DrawRect(draw_t* draw) { @@ -28,7 +27,7 @@ int DrawRect(draw_t* draw) ifl = safe_cli(); -#ifdef R300_PIO +#if R300_PIO R5xxFIFOWait(7); @@ -88,7 +87,7 @@ int FillRect(fill_t *fill) ifl = safe_cli(); -#ifdef R300_PIO +#if R300_PIO #else @@ -140,7 +139,7 @@ int Blit(blit_t *blit) ifl = safe_cli(); -#ifdef R300_PIO +#if R300_PIO #else @@ -189,22 +188,8 @@ int Line2P(line2p_t *draw) ifl = safe_cli(); - BEGIN_RING(); - OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_POLYLINE, 4)); - OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_SOLID_COLOR | - RADEON_GMC_DST_32BPP | - RADEON_GMC_SRC_DATATYPE_COLOR | - (1 << 28)+(1 << 30) | R5XX_ROP3_P); +#if R300_PIO - OUT_RING(rhd.dst_pitch_offset); - OUT_RING(draw->color); - OUT_RING((y0<<16)|x0); - OUT_RING((y1<<16)|x1); - COMMIT_RING(); - - -/* R5xxFIFOWait(7); OUTREG(R5XX_DP_GUI_MASTER_CNTL, rhd.gui_control | R5XX_ROP3_P | @@ -219,7 +204,24 @@ int Line2P(line2p_t *draw) OUTREG(R5XX_DST_LINE_START,(y0<<16)|x0); OUTREG(R5XX_DST_LINE_END,(y1<<16)|x1); -*/ + +#else + + BEGIN_RING(); + OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_POLYLINE, 4)); + OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_SOLID_COLOR | + RADEON_GMC_DST_32BPP | + RADEON_GMC_SRC_DATATYPE_COLOR | + (1 << 28)+(1 << 30) | R5XX_ROP3_P); + + OUT_RING(rhd.dst_pitch_offset); + OUT_RING(draw->color); + OUT_RING((y0<<16)|x0); + OUT_RING((y1<<16)|x1); + COMMIT_RING(); + +#endif safe_sti(ifl); }; diff --git a/programs/system/drivers/ati2d/accel_3d.inc b/programs/system/drivers/ati2d/accel_3d.inc index c58f976d7..439e3f4cb 100644 --- a/programs/system/drivers/ati2d/accel_3d.inc +++ b/programs/system/drivers/ati2d/accel_3d.inc @@ -7,8 +7,50 @@ #define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val) -#define IS_R300_3D 0 -#define IS_R500_3D 1 +#define RADEON_SWITCH_TO_3D() \ +do { \ + u32_t wait_until = 0; \ + BEGIN_ACCEL(1); \ + wait_until |= RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN; \ + OUT_ACCEL_REG(RADEON_WAIT_UNTIL, wait_until); \ + FINISH_ACCEL(); \ +} while (0); + + +struct blendinfo { + Bool dst_alpha; + Bool src_alpha; + u32_t blend_cntl; +}; + +static struct blendinfo RadeonBlendOp[] = { + /* 0 - Clear */ + {0, 0, RADEON_SRC_BLEND_GL_ZERO | RADEON_DST_BLEND_GL_ZERO}, + /* 1 - Src */ + {0, 0, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO}, + /* 2 - Dst */ + {0, 0, RADEON_SRC_BLEND_GL_ZERO | RADEON_DST_BLEND_GL_ONE}, + /* 3 - Over */ + {0, 1, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA}, + /* 4 - OverReverse */ + {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE}, + /* 5 - In */ + {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO}, + /* 6 - InReverse */ + {0, 1, RADEON_SRC_BLEND_GL_ZERO | RADEON_DST_BLEND_GL_SRC_ALPHA}, + /* 7 - Out */ + {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO}, + /* 8 - OutReverse */ + {0, 1, RADEON_SRC_BLEND_GL_ZERO | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA}, + /* 9 - Atop */ + {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA}, + /* 10- AtopReverse */ + {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_SRC_ALPHA}, + /* 11 - Xor */ + {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA}, + /* 12 - Add */ + {0, 0, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ONE}, +}; static void Init3DEngine(RHDPtr rhdPtr) @@ -21,7 +63,8 @@ static void Init3DEngine(RHDPtr rhdPtr) // info->texW[0] = info->texH[0] = info->texW[1] = info->texH[1] = 1; - if (IS_R300_3D || IS_R500_3D) { + if (IS_R300_3D || IS_R500_3D) + { BEGIN_ACCEL(3); OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D); @@ -29,6 +72,16 @@ static void Init3DEngine(RHDPtr rhdPtr) OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); FINISH_ACCEL(); + if ( IS_R500_3D) + { + u32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT); + + rhdPtr->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1; + RADEONOUTPLL(R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4)); + } + + dbgprintf("Pipes count %d\n", rhdPtr->num_gb_pipes ); + gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16 | R300_SUBPIXEL_1_16); switch(rhdPtr->num_gb_pipes) { @@ -628,5 +681,557 @@ static void Init3DEngine(RHDPtr rhdPtr) } +static Bool R300TextureSetup(int w, int h, int unit) +{ + //RINFO_FROM_SCREEN(pPix->drawable.pScreen); + u32_t txfilter, txformat0, txformat1, txoffset, txpitch; + // int w = pPict->pDrawable->width; + // int h = pPict->pDrawable->height; + int i, pixel_shift; + + //ACCEL_PREAMBLE(); + + //TRACE; + + //txpitch = exaGetPixmapPitch(pPix); + txpitch = rhd.displayWidth * 4; + + // txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; + txoffset = rhd.FbIntAddress + rhd.FbScanoutStart; + + if ((txoffset & 0x1f) != 0) + dbgprintf("Bad texture offset 0x%x\n", (int)txoffset); + if ((txpitch & 0x1f) != 0) + dbgprintf("Bad texture pitch 0x%x\n", (int)txpitch); + + /* TXPITCH = pixels (texels) per line - 1 */ + pixel_shift = 32 >> 4; + txpitch >>= pixel_shift; + txpitch -= 1; + + // if (RADEONPixmapIsColortiled(pPix)) + // txoffset |= R300_MACRO_TILE; + + // for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++) + // { + // if (R300TexFormats[i].fmt == pPict->format) + // break; + // } + + //txformat1 = R300TexFormats[i].card_fmt; + txformat1 = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + + txformat0 = ((((w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | + (((h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT)); + + if (IS_R500_3D && ((w - 1) & 0x800)) + txpitch |= R500_TXWIDTH_11; + + if (IS_R500_3D && ((h - 1) & 0x800)) + txpitch |= R500_TXHEIGHT_11; + + /* Use TXPITCH instead of TXWIDTH for address computations: we could + * omit this if there is no padding, but there is no apparent advantage + * in doing so. + */ + txformat0 |= R300_TXPITCH_EN; + + // info->texW[unit] = w; + // info->texH[unit] = h; + + // if (pPict->repeat && !(unit == 0 && need_src_tile_x)) + // txfilter = R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP); + // else + txfilter = R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL); + + // if (pPict->repeat && !(unit == 0 && need_src_tile_y)) + // txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP); + // else + txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL); + + txfilter |= (unit << R300_TX_ID_SHIFT); + +// switch (pPict->filter) { +// case PictFilterNearest: + txfilter |= (R300_TX_MAG_FILTER_NEAREST | R300_TX_MIN_FILTER_NEAREST); +// break; +// case PictFilterBilinear: +// txfilter |= (R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR); +// break; +// default: +// RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); +// } + + { + u32_t *ring, write; + + BEGIN_ACCEL(7); + OUT_ACCEL_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter); + OUT_ACCEL_REG(R300_TX_FILTER1_0 + (unit * 4), 0); + OUT_ACCEL_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0); + OUT_ACCEL_REG(R300_TX_FORMAT1_0 + (unit * 4), txformat1); + OUT_ACCEL_REG(R300_TX_FORMAT2_0 + (unit * 4), txpitch); + OUT_ACCEL_REG(R300_TX_OFFSET_0 + (unit * 4), txoffset); +// if (!pPict->repeat) + OUT_ACCEL_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0); + FINISH_ACCEL(); + } +// if (pPict->transform != 0) { +// is_transform[unit] = TRUE; +// transform[unit] = pPict->transform; +// } else { +// is_transform[unit] = FALSE; +// } + + return TRUE; +} + +static u32_t RADEONGetBlendCntl(int op, u32_t dst_format) +{ + u32_t sblend, dblend; + + sblend = RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK; + dblend = RadeonBlendOp[op].blend_cntl & RADEON_DST_BLEND_MASK; + + /* If there's no dst alpha channel, adjust the blend op so that we'll treat + * it as always 1. + */ + if (PICT_FORMAT_A(dst_format) == 0 && RadeonBlendOp[op].dst_alpha) { + if (sblend == RADEON_SRC_BLEND_GL_DST_ALPHA) + sblend = RADEON_SRC_BLEND_GL_ONE; + else if (sblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA) + sblend = RADEON_SRC_BLEND_GL_ZERO; + } + + return sblend | dblend; +} + +static Bool R300PrepareComposite(int op, int srcX, int srcY, + int dstX, int dstY, + int w, int h) +{ + // RINFO_FROM_SCREEN(pDst->drawable.pScreen); + u32_t dst_format, dst_offset, dst_pitch; + u32_t txenable, colorpitch; + u32_t blendcntl; + int pixel_shift; + u32_t *ring, write; + + //ACCEL_PREAMBLE(); + + //TRACE; + + //if (!info->XInited3D) + // RADEONInit3DEngine(pScrn); + + //if (!R300GetDestFormat(pDstPicture, &dst_format)) + // return FALSE; + dst_format = R300_COLORFORMAT_ARGB8888; + + pixel_shift = 32 >> 4; + + //dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + dst_offset = rhd.FbIntAddress + rhd.FbScanoutStart; + + //dst_pitch = exaGetPixmapPitch(pDst); + dst_pitch = rhd.displayWidth * 4; + colorpitch = dst_pitch >> pixel_shift; + + // if (RADEONPixmapIsColortiled(pDst)) + // colorpitch |= R300_COLORTILE; + + colorpitch |= dst_format; + + if ((dst_offset & 0x0f) != 0) + dbgprintf("Bad destination offset 0x%x\n", (int)dst_offset); + if (((dst_pitch >> pixel_shift) & 0x7) != 0) + dbgprintf("Bad destination pitch 0x%x\n", (int)dst_pitch); + + // if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE)) + // return FALSE; + + if (!R300TextureSetup(w, h, 0)) + return FALSE; + + txenable = R300_TEX_0_ENABLE; + + // RADEON_SWITCH_TO_3D(); + + /* setup the VAP */ + BEGIN_ACCEL(7); + + /* These registers define the number, type, and location of data submitted + * to the PVS unit of GA input (when PVS is disabled) + * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is + * enabled. This memory provides the imputs to the vertex shader program + * and ordering is not important. When PVS/TCL is disabled, this field maps + * directly to the GA input memory and the order is signifigant. In + * PVS_BYPASS mode the order is as follows: + * Position + * Point Size + * Color 0-3 + * Textures 0-7 + * Fog + */ + + OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, + ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | + (0 << R300_SKIP_DWORDS_0_SHIFT) | + (0 << R300_DST_VEC_LOC_0_SHIFT) | + R300_SIGNED_0 | + (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | + (0 << R300_SKIP_DWORDS_1_SHIFT) | + (6 << R300_DST_VEC_LOC_1_SHIFT) | + R300_LAST_VEC_1 | + R300_SIGNED_1)); + + /* load the vertex shader + * We pre-load vertex programs in RADEONInit3DEngine(): + * - exa no mask + * - exa mask + * - Xv + * Here we select the offset of the vertex program we want to use + */ + OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, + ((3 << R300_PVS_FIRST_INST_SHIFT) | + (4 << R300_PVS_XYZW_VALID_INST_SHIFT) | + (4 << R300_PVS_LAST_INST_SHIFT))); + OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, + (4 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); + + /* Position and one or two sets of 2 texture coordinates */ + OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); + OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); + + OUT_ACCEL_REG(R300_TX_INVALTAGS, 0x0); + OUT_ACCEL_REG(R300_TX_ENABLE, txenable); + FINISH_ACCEL(); + + /* setup pixel shader */ + if (IS_R300_3D) { + } else { + u32_t output_fmt; + u32_t src_color, src_alpha; + u32_t mask_color, mask_alpha; + + if (PICT_FORMAT_RGB(PICT_a8r8g8b8) == 0) + src_color = (R500_ALU_RGB_R_SWIZ_A_0 | + R500_ALU_RGB_G_SWIZ_A_0 | + R500_ALU_RGB_B_SWIZ_A_0); + else + src_color = (R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B); + + if (PICT_FORMAT_A(PICT_a8r8g8b8) == 0) + src_alpha = R500_ALPHA_SWIZ_A_1; + else + src_alpha = R500_ALPHA_SWIZ_A_A; + + mask_color = (R500_ALU_RGB_R_SWIZ_B_1 | + R500_ALU_RGB_G_SWIZ_B_1 | + R500_ALU_RGB_B_SWIZ_B_1); + mask_alpha = R500_ALPHA_SWIZ_B_1; + + /* shader output swizzling */ + output_fmt = (R300_OUT_FMT_C4_8 | + R300_OUT_FMT_C0_SEL_BLUE | + R300_OUT_FMT_C1_SEL_GREEN | + R300_OUT_FMT_C2_SEL_RED | + R300_OUT_FMT_C3_SEL_ALPHA); + + BEGIN_ACCEL(6); + OUT_ACCEL_REG(R300_RS_COUNT, + ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); + + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); + + OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | + R500_US_CODE_END_ADDR(1))); + OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | + R500_US_CODE_RANGE_SIZE(1))); + OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); + + OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); + FINISH_ACCEL(); + + BEGIN_ACCEL(13); + OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, 0); + /* tex inst for src texture */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP)); + + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | + R500_TEX_INST_LD | + R500_TEX_SEM_ACQUIRE | + R500_TEX_IGNORE_UNCOVERED)); + + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | + R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_DST_ADDR(0) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | + R500_DX_S_SWIZ_R | + R500_DX_T_SWIZ_R | + R500_DX_R_SWIZ_R | + R500_DX_Q_SWIZ_R | + R500_DY_ADDR(0) | + R500_DY_S_SWIZ_R | + R500_DY_T_SWIZ_R | + R500_DY_R_SWIZ_R | + R500_DY_Q_SWIZ_R)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + + /* ALU inst */ + /* *_OMASK* - output component write mask */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP)); + /* ALU inst + * temp addresses for texture inputs + * RGB_ADDR0 is src tex (temp 0) + * RGB_ADDR1 is mask tex (temp 1) + */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(1) | + R500_RGB_ADDR2(0))); + /* ALU inst + * temp addresses for texture inputs + * ALPHA_ADDR0 is src tex (temp 0) + * ALPHA_ADDR1 is mask tex (temp 1) + */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(1) | + R500_ALPHA_ADDR2(0))); + + /* R500_ALU_RGB_TARGET - RGB render target */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | + src_color | + R500_ALU_RGB_SEL_B_SRC1 | + mask_color | + R500_ALU_RGB_TARGET(0))); + + /* R500_ALPHA_RGB_TARGET - alpha render target */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | + R500_ALPHA_ADDRD(0) | + R500_ALPHA_SEL_A_SRC0 | + src_alpha | + R500_ALPHA_SEL_B_SRC1 | + mask_alpha | + R500_ALPHA_TARGET(0))); + + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_ADDRD(0) | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0)); + FINISH_ACCEL(); + } + + BEGIN_ACCEL(3); + + OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset); + OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch); + + blendcntl = RADEONGetBlendCntl(op, PICT_a8r8g8b8); + OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE); + + FINISH_ACCEL(); + + return TRUE; +} + + + + +#define VTX_COUNT 4 + +static __inline__ u32_t F_TO_DW(float val) +{ + union { + float f; + u32_t l; + } tmp; + tmp.f = val; + return tmp.l; +} + +#define OUT_RING_F(x) OUT_RING(F_TO_DW(x)) + +#define VTX_OUT(_dstX, _dstY, _srcX, _srcY) \ +do { \ + OUT_RING_F(_dstX); \ + OUT_RING_F(_dstY); \ + OUT_RING_F(_srcX); \ + OUT_RING_F(_srcY); \ +} while (0) + + + +static void RadeonCompositeTile(int srcX, int srcY, + int dstX, int dstY, + int w, int h) +{ + // RINFO_FROM_SCREEN(pDst->drawable.pScreen); + int vtx_count; + xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight; + xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight; + + u32 *ring, write; + + + // ACCEL_PREAMBLE(); + + // ENTER_DRAW(0); + + /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n", + srcX, srcY, maskX, maskY,dstX, dstY, w, h); */ + + srcTopLeft.x = IntToxFixed(srcX); + srcTopLeft.y = IntToxFixed(srcY); + srcTopRight.x = IntToxFixed(srcX + w); + srcTopRight.y = IntToxFixed(srcY); + srcBottomLeft.x = IntToxFixed(srcX); + srcBottomLeft.y = IntToxFixed(srcY + h); + srcBottomRight.x = IntToxFixed(srcX + w); + srcBottomRight.y = IntToxFixed(srcY + h); + +/* + if (is_transform[0]) { + transformPoint(transform[0], &srcTopLeft); + transformPoint(transform[0], &srcTopRight); + transformPoint(transform[0], &srcBottomLeft); + transformPoint(transform[0], &srcBottomRight); + } + if (is_transform[1]) { + transformPoint(transform[1], &maskTopLeft); + transformPoint(transform[1], &maskTopRight); + transformPoint(transform[1], &maskBottomLeft); + transformPoint(transform[1], &maskBottomRight); + } +*/ + vtx_count = VTX_COUNT; + + BEGIN_ACCEL(1); + OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count); + FINISH_ACCEL(); + + BEGIN_RING(4 * vtx_count + 4); + + OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, + 4 * vtx_count)); + OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN | + RADEON_CP_VC_CNTL_PRIM_WALK_RING | + (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); + + + VTX_OUT((float)dstX, (float)dstY, + xFixedToFloat(srcTopLeft.x) / w, // info->texW[0], + xFixedToFloat(srcTopLeft.y) / h); // info->texH[0]); + + VTX_OUT((float)dstX, (float)(dstY + h), + xFixedToFloat(srcBottomLeft.x) / w, // info->texW[0], + xFixedToFloat(srcBottomLeft.y) / h); // info->texH[0]); + + VTX_OUT((float)(dstX + w), (float)(dstY + h), + xFixedToFloat(srcBottomRight.x) / w, // info->texW[0], + xFixedToFloat(srcBottomRight.y) / h); // info->texH[0]); + + VTX_OUT((float)(dstX + w), (float)dstY, + xFixedToFloat(srcTopRight.x) / w, // info->texW[0], + xFixedToFloat(srcTopRight.y) / h); // info->texH[0]); + + /* flushing is pipelined, free/finish is not */ + OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); + + // OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); + + COMMIT_RING(); + + // LEAVE_DRAW(0); +} +#undef VTX_OUT +#undef VTX_OUT_MASK + + +int RadeonComposite( blit_t *blit) +{ + int tileSrcY, tileMaskY, tileDstY; + int remainingHeight; + + R300PrepareComposite(3, blit->src_x, blit->src_y, + blit->dst_x, blit->dst_y, + blit->w, blit->h); + +// if (!need_src_tile_x && !need_src_tile_y) { + RadeonCompositeTile( blit->src_x, blit->src_y, + blit->dst_x, blit->dst_y, + blit->w, blit->h); + return 0; + // } + + /* Tiling logic borrowed from exaFillRegionTiled */ + +#if 0 + modulus(srcY, src_tile_height, tileSrcY); + tileMaskY = maskY; + tileDstY = dstY; + + remainingHeight = height; + while (remainingHeight > 0) { + int remainingWidth = width; + int tileSrcX, tileMaskX, tileDstX; + int h = src_tile_height - tileSrcY; + + if (h > remainingHeight) + h = remainingHeight; + remainingHeight -= h; + + modulus(srcX, src_tile_width, tileSrcX); + tileMaskX = maskX; + tileDstX = dstX; + + while (remainingWidth > 0) { + int w = src_tile_width - tileSrcX; + if (w > remainingWidth) + w = remainingWidth; + remainingWidth -= w; + + FUNC_NAME(RadeonCompositeTile)(pDst, + tileSrcX, tileSrcY, + tileMaskX, tileMaskY, + tileDstX, tileDstY, + w, h); + + tileSrcX = 0; + tileMaskX += w; + tileDstX += w; + } + tileSrcY = 0; + tileMaskY += h; + tileDstY += h; + } +#endif +} + diff --git a/programs/system/drivers/ati2d/ati2d.c b/programs/system/drivers/ati2d/ati2d.c index a2a5a42f6..6a1e3b3e9 100644 --- a/programs/system/drivers/ati2d/ati2d.c +++ b/programs/system/drivers/ati2d/ati2d.c @@ -65,7 +65,8 @@ u32 __stdcall drvEntry(int action) // old_create = HwCursorCreate; R5xx2DInit(); -// Init3DEngine(&rhd); + rhd.has_tcl = 1; + Init3DEngine(&rhd); //init_r500(); @@ -155,6 +156,12 @@ int _stdcall srv_2d(ioctl_t *io) return Blit((blit_t*)inp); break; + case COMPIZ: + if(io->inp_size==6) + return RadeonComposite((blit_t*)inp); + break; + + default: return ERR_PARAM; }; diff --git a/programs/system/drivers/ati2d/ati2d.h b/programs/system/drivers/ati2d/ati2d.h index b45f86351..1f69e0a29 100644 --- a/programs/system/drivers/ati2d/ati2d.h +++ b/programs/system/drivers/ati2d/ati2d.h @@ -1,6 +1,12 @@ + #include "pci.h" #include "rhd_regs.h" +#define IS_R300_3D 0 +#define IS_R500_3D 1 + +#define R300_PIO 0 + enum RHD_CHIPSETS { RHD_UNKNOWN = 0, RHD_R300, @@ -174,6 +180,15 @@ typedef struct { const char * name; /* token name */ } SymTabRec, *SymTabPtr; + + +extern inline void +OUTREG8(CARD16 offset, u8 value) +{ + *(volatile CARD8 *)((CARD8 *)(rhd.MMIOBase + offset)) = value; +} + + extern inline CARD32 INREG(CARD16 offset) { return *(volatile CARD32 *)((CARD8*)(rhd.MMIOBase + offset)); @@ -266,3 +281,92 @@ void __stdcall r500_SetCursor(cursor_t *cursor, int x, int y); void __stdcall r500_CursorRestore(int x, int y); void R5xx2DInit(); + + +typedef struct { + u32_t x ; + u32_t y ; +} xPointFixed; + +typedef u32_t xFixed_16_16; + +typedef xFixed_16_16 xFixed; + +#define XFIXED_BITS 16 + +#define xFixedToInt(f) (int) ((f) >> XFIXED_BITS) +#define IntToxFixed(i) ((xFixed) (i) << XFIXED_BITS) + +#define xFixedToFloat(f) (((float) (f)) / 65536) + +#define PICT_FORMAT(bpp,type,a,r,g,b) (((bpp) << 24) | \ + ((type) << 16) | \ + ((a) << 12) | \ + ((r) << 8) | \ + ((g) << 4) | \ + ((b))) + +#define PICT_FORMAT_A(f) (((f) >> 12) & 0x0f) +#define PICT_FORMAT_RGB(f) (((f) ) & 0xfff) + +#define PICT_TYPE_OTHER 0 +#define PICT_TYPE_A 1 +#define PICT_TYPE_ARGB 2 +#define PICT_TYPE_ABGR 3 +#define PICT_TYPE_COLOR 4 +#define PICT_TYPE_GRAY 5 + +typedef enum _PictFormatShort { + PICT_a8r8g8b8 = PICT_FORMAT(32,PICT_TYPE_ARGB,8,8,8,8), + PICT_x8r8g8b8 = PICT_FORMAT(32,PICT_TYPE_ARGB,0,8,8,8), + PICT_a8b8g8r8 = PICT_FORMAT(32,PICT_TYPE_ABGR,8,8,8,8), + PICT_x8b8g8r8 = PICT_FORMAT(32,PICT_TYPE_ABGR,0,8,8,8), + +/* 24bpp formats */ + PICT_r8g8b8 = PICT_FORMAT(24,PICT_TYPE_ARGB,0,8,8,8), + PICT_b8g8r8 = PICT_FORMAT(24,PICT_TYPE_ABGR,0,8,8,8), + +/* 16bpp formats */ + PICT_r5g6b5 = PICT_FORMAT(16,PICT_TYPE_ARGB,0,5,6,5), + PICT_b5g6r5 = PICT_FORMAT(16,PICT_TYPE_ABGR,0,5,6,5), + + PICT_a1r5g5b5 = PICT_FORMAT(16,PICT_TYPE_ARGB,1,5,5,5), + PICT_x1r5g5b5 = PICT_FORMAT(16,PICT_TYPE_ARGB,0,5,5,5), + PICT_a1b5g5r5 = PICT_FORMAT(16,PICT_TYPE_ABGR,1,5,5,5), + PICT_x1b5g5r5 = PICT_FORMAT(16,PICT_TYPE_ABGR,0,5,5,5), + PICT_a4r4g4b4 = PICT_FORMAT(16,PICT_TYPE_ARGB,4,4,4,4), + PICT_x4r4g4b4 = PICT_FORMAT(16,PICT_TYPE_ARGB,0,4,4,4), + PICT_a4b4g4r4 = PICT_FORMAT(16,PICT_TYPE_ABGR,4,4,4,4), + PICT_x4b4g4r4 = PICT_FORMAT(16,PICT_TYPE_ABGR,0,4,4,4), + +/* 8bpp formats */ + PICT_a8 = PICT_FORMAT(8,PICT_TYPE_A,8,0,0,0), + PICT_r3g3b2 = PICT_FORMAT(8,PICT_TYPE_ARGB,0,3,3,2), + PICT_b2g3r3 = PICT_FORMAT(8,PICT_TYPE_ABGR,0,3,3,2), + PICT_a2r2g2b2 = PICT_FORMAT(8,PICT_TYPE_ARGB,2,2,2,2), + PICT_a2b2g2r2 = PICT_FORMAT(8,PICT_TYPE_ABGR,2,2,2,2), + + PICT_c8 = PICT_FORMAT(8,PICT_TYPE_COLOR,0,0,0,0), + PICT_g8 = PICT_FORMAT(8,PICT_TYPE_GRAY,0,0,0,0), + + PICT_x4a4 = PICT_FORMAT(8,PICT_TYPE_A,4,0,0,0), + + PICT_x4c4 = PICT_FORMAT(8,PICT_TYPE_COLOR,0,0,0,0), + PICT_x4g4 = PICT_FORMAT(8,PICT_TYPE_GRAY,0,0,0,0), + +/* 4bpp formats */ + PICT_a4 = PICT_FORMAT(4,PICT_TYPE_A,4,0,0,0), + PICT_r1g2b1 = PICT_FORMAT(4,PICT_TYPE_ARGB,0,1,2,1), + PICT_b1g2r1 = PICT_FORMAT(4,PICT_TYPE_ABGR,0,1,2,1), + PICT_a1r1g1b1 = PICT_FORMAT(4,PICT_TYPE_ARGB,1,1,1,1), + PICT_a1b1g1r1 = PICT_FORMAT(4,PICT_TYPE_ABGR,1,1,1,1), + + PICT_c4 = PICT_FORMAT(4,PICT_TYPE_COLOR,0,0,0,0), + PICT_g4 = PICT_FORMAT(4,PICT_TYPE_GRAY,0,0,0,0), + +/* 1bpp formats */ + PICT_a1 = PICT_FORMAT(1,PICT_TYPE_A,1,0,0,0), + + PICT_g1 = PICT_FORMAT(1,PICT_TYPE_GRAY,0,0,0,0), +} PictFormatShort; + diff --git a/programs/system/drivers/ati2d/common.h b/programs/system/drivers/ati2d/common.h index cde04cb4c..a598417fb 100644 --- a/programs/system/drivers/ati2d/common.h +++ b/programs/system/drivers/ati2d/common.h @@ -16,6 +16,10 @@ typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; +typedef unsigned char u8_t; +typedef unsigned short u16_t; +typedef unsigned int u32_t; + typedef unsigned int memType; typedef unsigned int size_t; diff --git a/programs/system/drivers/ati2d/r500.inc b/programs/system/drivers/ati2d/r500.inc index 8441a39b7..8751af0b9 100644 --- a/programs/system/drivers/ati2d/r500.inc +++ b/programs/system/drivers/ati2d/r500.inc @@ -1,5 +1,5 @@ -#define R300_TEST +//#define R300_TEST #include "r5xx_regs.h" @@ -54,6 +54,70 @@ #include "microcode.h" +#define RADEON_CLOCK_CNTL_DATA 0x000c + +#define RADEON_CLOCK_CNTL_INDEX 0x0008 +# define RADEON_PLL_WR_EN (1 << 7) +# define RADEON_PLL_DIV_SEL (3 << 8) +# define RADEON_PLL2_DIV_SEL_MASK ~(3 << 8) + +#define RADEON_MCLK_CNTL 0x0012 /* PLL */ +# define RADEON_FORCEON_MCLKA (1 << 16) +# define RADEON_FORCEON_MCLKB (1 << 17) +# define RADEON_FORCEON_YCLKA (1 << 18) +# define RADEON_FORCEON_YCLKB (1 << 19) +# define RADEON_FORCEON_MC (1 << 20) +# define RADEON_FORCEON_AIC (1 << 21) +# define R300_DISABLE_MC_MCLKA (1 << 21) +# define R300_DISABLE_MC_MCLKB (1 << 21) + + +void RADEONPllErrataAfterData() +{ + + /* This function is required to workaround a hardware bug in some (all?) + * revisions of the R300. This workaround should be called after every + * CLOCK_CNTL_INDEX register access. If not, register reads afterward + * may not be correct. + */ + if (rhd.ChipSet <= RHD_RV380) + { + u32_t save, tmp; + + save = INREG(RADEON_CLOCK_CNTL_INDEX); + tmp = save & ~(0x3f | RADEON_PLL_WR_EN); + OUTREG(RADEON_CLOCK_CNTL_INDEX, tmp); + tmp = INREG(RADEON_CLOCK_CNTL_DATA); + OUTREG(RADEON_CLOCK_CNTL_INDEX, save); + } +} + + +/* Read PLL register */ +u32_t RADEONINPLL(int addr) +{ + u32_t data; + + OUTREG8(RADEON_CLOCK_CNTL_INDEX, addr & 0x3f); + //RADEONPllErrataAfterIndex(); + data = INREG(RADEON_CLOCK_CNTL_DATA); + RADEONPllErrataAfterData(); + + return data; +}; + +/* Write PLL information */ +void RADEONOUTPLL(int addr, u32_t data) +{ + OUTREG8(RADEON_CLOCK_CNTL_INDEX, (((addr) & 0x3f) | + RADEON_PLL_WR_EN)); +// RADEONPllErrataAfterIndex(info); + OUTREG(RADEON_CLOCK_CNTL_DATA, data); + RADEONPllErrataAfterData(); +} + + + static Bool R5xxFIFOWaitLocal(CARD32 required) //R100-R500 { @@ -120,6 +184,8 @@ static void R5xx2DReset() { CARD32 save, tmp; + u32_t clock_cntl_index; + u32_t mclk_cntl; /* The following RBBM_SOFT_RESET sequence can help un-wedge * an R300 after the command processor got stuck. */ @@ -143,6 +209,21 @@ R5xx2DReset() R5xx2DFlush(); +#if 0 + clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX); + RADEONPllErrataAfterIndex(info); + + mclk_cntl = RADEONINPLL(RADEON_MCLK_CNTL); + + RADEONOUTPLL(RADEON_MCLK_CNTL, (mclk_cntl | + RADEON_FORCEON_MCLKA | + RADEON_FORCEON_MCLKB | + RADEON_FORCEON_YCLKA | + RADEON_FORCEON_YCLKB | + RADEON_FORCEON_MC | + RADEON_FORCEON_AIC)); +#endif + /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some * unexpected behaviour on some machines. Here we use * R5XX_HOST_PATH_CNTL to reset it. */ @@ -162,6 +243,12 @@ R5xx2DReset() OUTREG(R5XX_HOST_PATH_CNTL, save | R5XX_HDP_SOFT_RESET); INREG(R5XX_HOST_PATH_CNTL); OUTREG(R5XX_HOST_PATH_CNTL, save); + +#if 0 + OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index); + RADEONPllErrataAfterIndex(info); + RADEONOUTPLL(RADEON_MCLK_CNTL, mclk_cntl); +#endif } void