qemu/hw/display/ati_2d.c

245 lines
10 KiB
C
Raw Normal View History

/*
* QEMU ATI SVGA emulation
* 2D engine functions
*
* Copyright (c) 2019 BALATON Zoltan
*
* This work is licensed under the GNU GPL license version 2 or later.
*/
#include "qemu/osdep.h"
#include "ati_int.h"
#include "ati_regs.h"
#include "qemu/log.h"
#include "ui/pixel_ops.h"
#include "ui/console.h"
/*
* NOTE:
* This is 2D _acceleration_ and supposed to be fast. Therefore, don't try to
* reinvent the wheel (unlikely to get better with a naive implementation than
* existing libraries) and avoid (poorly) reimplementing gfx primitives.
* That is unnecessary and would become a performance problem. Instead, try to
* map to and reuse existing optimised facilities (e.g. pixman) wherever
* possible.
*/
static int ati_bpp_from_datatype(ATIVGAState *s)
{
switch (s->regs.dp_datatype & 0xf) {
case 2:
return 8;
case 3:
case 4:
return 16;
case 5:
return 24;
case 6:
return 32;
default:
qemu_log_mask(LOG_UNIMP, "Unknown dst datatype %d\n",
s->regs.dp_datatype & 0xf);
return 0;
}
}
#define DEFAULT_CNTL (s->regs.dp_gui_master_cntl & GMC_DST_PITCH_OFFSET_CNTL)
void ati_2d_blt(ATIVGAState *s)
{
/* FIXME it is probably more complex than this and may need to be */
/* rewritten but for now as a start just to get some output: */
DisplaySurface *ds = qemu_console_surface(s->vga.con);
DPRINTF("%p %u ds: %p %d %d rop: %x\n", s->vga.vram_ptr,
s->vga.vbe_start_addr, surface_data(ds), surface_stride(ds),
surface_bits_per_pixel(ds),
(s->regs.dp_mix & GMC_ROP3_MASK) >> 16);
unsigned dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
s->regs.dst_x : s->regs.dst_x + 1 - s->regs.dst_width);
unsigned dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
s->regs.dst_y : s->regs.dst_y + 1 - s->regs.dst_height);
int bpp = ati_bpp_from_datatype(s);
if (!bpp) {
qemu_log_mask(LOG_GUEST_ERROR, "Invalid bpp\n");
return;
}
int dst_stride = DEFAULT_CNTL ? s->regs.dst_pitch : s->regs.default_pitch;
if (!dst_stride) {
qemu_log_mask(LOG_GUEST_ERROR, "Zero dest pitch\n");
return;
}
uint8_t *dst_bits = s->vga.vram_ptr + (DEFAULT_CNTL ?
s->regs.dst_offset : s->regs.default_offset);
if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) {
dst_bits += s->regs.crtc_offset & 0x07ffffff;
dst_stride *= bpp;
}
uint8_t *end = s->vga.vram_ptr + s->vga.vram_size;
if (dst_x > 0x3fff || dst_y > 0x3fff || dst_bits >= end
|| dst_bits + dst_x
+ (dst_y + s->regs.dst_height) * dst_stride >= end) {
qemu_log_mask(LOG_UNIMP, "blt outside vram not implemented\n");
return;
}
DPRINTF("%d %d %d, %d %d %d, (%d,%d) -> (%d,%d) %dx%d %c %c\n",
s->regs.src_offset, s->regs.dst_offset, s->regs.default_offset,
s->regs.src_pitch, s->regs.dst_pitch, s->regs.default_pitch,
hw/display/ati_2d: Fix buffer overflow in ati_2d_blt (CVE-2021-3638) When building QEMU with DEBUG_ATI defined then running with '-device ati-vga,romfile="" -d unimp,guest_errors -trace ati\*' we get: ati_mm_write 4 0x16c0 DP_CNTL <- 0x1 ati_mm_write 4 0x146c DP_GUI_MASTER_CNTL <- 0x2 ati_mm_write 4 0x16c8 DP_MIX <- 0xff0000 ati_mm_write 4 0x16c4 DP_DATATYPE <- 0x2 ati_mm_write 4 0x224 CRTC_OFFSET <- 0x0 ati_mm_write 4 0x142c DST_PITCH_OFFSET <- 0xfe00000 ati_mm_write 4 0x1420 DST_Y <- 0x3fff ati_mm_write 4 0x1410 DST_HEIGHT <- 0x3fff ati_mm_write 4 0x1588 DST_WIDTH_X <- 0x3fff3fff ati_2d_blt: vram:0x7fff5fa00000 addr:0 ds:0x7fff61273800 stride:2560 bpp:32 rop:0xff ati_2d_blt: 0 0 0, 0 127 0, (0,0) -> (16383,16383) 16383x16383 > ^ ati_2d_blt: pixman_fill(dst:0x7fff5fa00000, stride:254, bpp:8, x:16383, y:16383, w:16383, h:16383, xor:0xff000000) Thread 3 "qemu-system-i38" received signal SIGSEGV, Segmentation fault. (gdb) bt #0 0x00007ffff7f62ce0 in sse2_fill.lto_priv () at /lib64/libpixman-1.so.0 #1 0x00007ffff7f09278 in pixman_fill () at /lib64/libpixman-1.so.0 #2 0x0000555557b5a9af in ati_2d_blt (s=0x631000028800) at hw/display/ati_2d.c:196 #3 0x0000555557b4b5a2 in ati_mm_write (opaque=0x631000028800, addr=5512, data=1073692671, size=4) at hw/display/ati.c:843 #4 0x0000555558b90ec4 in memory_region_write_accessor (mr=0x631000039cc0, addr=5512, ..., size=4, ...) at softmmu/memory.c:492 Commit 584acf34cb0 ("ati-vga: Fix reverse bit blts") introduced the local dst_x and dst_y which adjust the (x, y) coordinates depending on the direction in the SRCCOPY ROP3 operation, but forgot to address the same issue for the PATCOPY, BLACKNESS and WHITENESS operations, which also call pixman_fill(). Fix that now by using the adjusted coordinates in the pixman_fill call, and update the related debug printf(). Reported-by: Qiang Liu <qiangliu@zju.edu.cn> Fixes: 584acf34cb0 ("ati-vga: Fix reverse bit blts") Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> Tested-by: Mauro Matteo Cascella <mcascell@redhat.com> Message-Id: <20210906153103.1661195-1-philmd@redhat.com> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2021-09-06 18:31:03 +03:00
s->regs.src_x, s->regs.src_y, dst_x, dst_y,
s->regs.dst_width, s->regs.dst_height,
(s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? '>' : '<'),
(s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? 'v' : '^'));
switch (s->regs.dp_mix & GMC_ROP3_MASK) {
case ROP3_SRCCOPY:
{
bool fallback = false;
unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
s->regs.src_y : s->regs.src_y + 1 - s->regs.dst_height);
int src_stride = DEFAULT_CNTL ?
s->regs.src_pitch : s->regs.default_pitch;
if (!src_stride) {
qemu_log_mask(LOG_GUEST_ERROR, "Zero source pitch\n");
return;
}
uint8_t *src_bits = s->vga.vram_ptr + (DEFAULT_CNTL ?
s->regs.src_offset : s->regs.default_offset);
if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) {
src_bits += s->regs.crtc_offset & 0x07ffffff;
src_stride *= bpp;
}
if (src_x > 0x3fff || src_y > 0x3fff || src_bits >= end
|| src_bits + src_x
+ (src_y + s->regs.dst_height) * src_stride >= end) {
qemu_log_mask(LOG_UNIMP, "blt outside vram not implemented\n");
return;
}
src_stride /= sizeof(uint32_t);
dst_stride /= sizeof(uint32_t);
DPRINTF("pixman_blt(%p, %p, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d)\n",
src_bits, dst_bits, src_stride, dst_stride, bpp, bpp,
src_x, src_y, dst_x, dst_y,
s->regs.dst_width, s->regs.dst_height);
#ifdef CONFIG_PIXMAN
if ((s->use_pixman & BIT(1)) &&
s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
src_stride, dst_stride, bpp, bpp,
src_x, src_y, dst_x, dst_y,
s->regs.dst_width, s->regs.dst_height);
} else if (s->use_pixman & BIT(1)) {
/* FIXME: We only really need a temporary if src and dst overlap */
int llb = s->regs.dst_width * (bpp / 8);
int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
s->regs.dst_height);
fallback = !pixman_blt((uint32_t *)src_bits, tmp,
src_stride, tmp_stride, bpp, bpp,
src_x, src_y, 0, 0,
s->regs.dst_width, s->regs.dst_height);
if (!fallback) {
fallback = !pixman_blt(tmp, (uint32_t *)dst_bits,
tmp_stride, dst_stride, bpp, bpp,
0, 0, dst_x, dst_y,
s->regs.dst_width, s->regs.dst_height);
}
g_free(tmp);
} else
#endif
{
fallback = true;
}
if (fallback) {
unsigned int y, i, j, bypp = bpp / 8;
unsigned int src_pitch = src_stride * sizeof(uint32_t);
unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
for (y = 0; y < s->regs.dst_height; y++) {
i = dst_x * bypp;
j = src_x * bypp;
if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
i += (dst_y + y) * dst_pitch;
j += (src_y + y) * src_pitch;
} else {
i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch;
j += (src_y + s->regs.dst_height - 1 - y) * src_pitch;
}
memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp);
}
}
if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
memory_region_set_dirty(&s->vga.vram, s->vga.vbe_start_addr +
s->regs.dst_offset +
dst_y * surface_stride(ds),
s->regs.dst_height * surface_stride(ds));
}
s->regs.dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
dst_x + s->regs.dst_width : dst_x);
s->regs.dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
dst_y + s->regs.dst_height : dst_y);
break;
}
case ROP3_PATCOPY:
case ROP3_BLACKNESS:
case ROP3_WHITENESS:
{
uint32_t filler = 0;
switch (s->regs.dp_mix & GMC_ROP3_MASK) {
case ROP3_PATCOPY:
filler = s->regs.dp_brush_frgd_clr;
break;
case ROP3_BLACKNESS:
filler = 0xffUL << 24 | rgb_to_pixel32(s->vga.palette[0],
s->vga.palette[1], s->vga.palette[2]);
break;
case ROP3_WHITENESS:
filler = 0xffUL << 24 | rgb_to_pixel32(s->vga.palette[3],
s->vga.palette[4], s->vga.palette[5]);
break;
}
dst_stride /= sizeof(uint32_t);
DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n",
dst_bits, dst_stride, bpp, dst_x, dst_y,
s->regs.dst_width, s->regs.dst_height, filler);
#ifdef CONFIG_PIXMAN
if (!(s->use_pixman & BIT(0)) ||
!pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y,
s->regs.dst_width, s->regs.dst_height, filler))
#endif
{
/* fallback when pixman failed or we don't want to call it */
unsigned int x, y, i, bypp = bpp / 8;
unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
for (y = 0; y < s->regs.dst_height; y++) {
i = dst_x * bypp + (dst_y + y) * dst_pitch;
for (x = 0; x < s->regs.dst_width; x++, i += bypp) {
stn_he_p(&dst_bits[i], bypp, filler);
}
}
}
if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
memory_region_set_dirty(&s->vga.vram, s->vga.vbe_start_addr +
s->regs.dst_offset +
dst_y * surface_stride(ds),
s->regs.dst_height * surface_stride(ds));
}
s->regs.dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
dst_y + s->regs.dst_height : dst_y);
break;
}
default:
qemu_log_mask(LOG_UNIMP, "Unimplemented ati_2d blt op %x\n",
(s->regs.dp_mix & GMC_ROP3_MASK) >> 16);
}
}