From 937de9a981a86be1e002cff58f02272610708fbb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 29 Dec 2014 14:38:48 +0100 Subject: [PATCH 01/16] vga: use common endian swap macros The constant-expression bswap is provided by const_le32(), and GET_PLANE() can also be implemented using cpu_to_le32(). Remove the custom macros in vga.c. Signed-off-by: Paolo Bonzini --- hw/display/vga.c | 65 +++++++++++++----------------------------------- 1 file changed, 17 insertions(+), 48 deletions(-) diff --git a/hw/display/vga.c b/hw/display/vga.c index 886a4020e5..a8b5830a30 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -90,58 +90,27 @@ const uint8_t gr_mask[16] = { 0x00, /* 0x0f */ }; -#define cbswap_32(__x) \ -((uint32_t)( \ - (((uint32_t)(__x) & (uint32_t)0x000000ffUL) << 24) | \ - (((uint32_t)(__x) & (uint32_t)0x0000ff00UL) << 8) | \ - (((uint32_t)(__x) & (uint32_t)0x00ff0000UL) >> 8) | \ - (((uint32_t)(__x) & (uint32_t)0xff000000UL) >> 24) )) - -#if HOST_BIG_ENDIAN -#define PAT(x) cbswap_32(x) -#else -#define PAT(x) (x) -#endif - -#if HOST_BIG_ENDIAN -#define BIG 1 -#else -#define BIG 0 -#endif - -#if HOST_BIG_ENDIAN -#define GET_PLANE(data, p) (((data) >> (24 - (p) * 8)) & 0xff) -#else -#define GET_PLANE(data, p) (((data) >> ((p) * 8)) & 0xff) -#endif +#define GET_PLANE(data, p) ((cpu_to_le32(data) >> ((p) * 8)) & 0xff) static const uint32_t mask16[16] = { - PAT(0x00000000), - PAT(0x000000ff), - PAT(0x0000ff00), - PAT(0x0000ffff), - PAT(0x00ff0000), - PAT(0x00ff00ff), - PAT(0x00ffff00), - PAT(0x00ffffff), - PAT(0xff000000), - PAT(0xff0000ff), - PAT(0xff00ff00), - PAT(0xff00ffff), - PAT(0xffff0000), - PAT(0xffff00ff), - PAT(0xffffff00), - PAT(0xffffffff), + const_le32(0x00000000), + const_le32(0x000000ff), + const_le32(0x0000ff00), + const_le32(0x0000ffff), + const_le32(0x00ff0000), + const_le32(0x00ff00ff), + const_le32(0x00ffff00), + const_le32(0x00ffffff), + const_le32(0xff000000), + const_le32(0xff0000ff), + const_le32(0xff00ff00), + const_le32(0xff00ffff), + const_le32(0xffff0000), + const_le32(0xffff00ff), + const_le32(0xffffff00), + const_le32(0xffffffff), }; -#undef PAT - -#if HOST_BIG_ENDIAN -#define PAT(x) (x) -#else -#define PAT(x) cbswap_32(x) -#endif - static uint32_t expand4[256]; static uint16_t expand2[256]; static uint8_t expand4to8[16]; From f9b925fd41337027e959baeb23d714b5214cd5ff Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 9 Jan 2015 10:47:33 +0100 Subject: [PATCH 02/16] vga: introduce VGADisplayParams The next patches will introduce more parameters that cause a full refresh. Instead of adding arguments to get_offsets and lines to update_basic_params, do everything through a struct. Signed-off-by: Paolo Bonzini --- hw/display/cirrus_vga.c | 24 +++++------- hw/display/vga.c | 82 +++++++++++++++++------------------------ hw/display/vga_int.h | 15 ++++---- 3 files changed, 52 insertions(+), 69 deletions(-) diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c index 5dd5136a0c..e637e5a816 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -798,9 +798,9 @@ static int cirrus_bitblt_videotovideo_copy(CirrusVGAState * s) if (blit_is_unsafe(s, false)) return 0; - return cirrus_do_copy(s, s->cirrus_blt_dstaddr - s->vga.start_addr, - s->cirrus_blt_srcaddr - s->vga.start_addr, - s->cirrus_blt_width, s->cirrus_blt_height); + return cirrus_do_copy(s, s->cirrus_blt_dstaddr - s->vga.params.start_addr, + s->cirrus_blt_srcaddr - s->vga.params.start_addr, + s->cirrus_blt_width, s->cirrus_blt_height); } /*************************************** @@ -1101,30 +1101,26 @@ static void cirrus_write_bitblt(CirrusVGAState * s, unsigned reg_value) * ***************************************/ -static void cirrus_get_offsets(VGACommonState *s1, - uint32_t *pline_offset, - uint32_t *pstart_addr, - uint32_t *pline_compare) +static void cirrus_get_params(VGACommonState *s1, + VGADisplayParams *params) { CirrusVGAState * s = container_of(s1, CirrusVGAState, vga); - uint32_t start_addr, line_offset, line_compare; + uint32_t line_offset; line_offset = s->vga.cr[0x13] | ((s->vga.cr[0x1b] & 0x10) << 4); line_offset <<= 3; - *pline_offset = line_offset; + params->line_offset = line_offset; - start_addr = (s->vga.cr[0x0c] << 8) + params->start_addr = (s->vga.cr[0x0c] << 8) | s->vga.cr[0x0d] | ((s->vga.cr[0x1b] & 0x01) << 16) | ((s->vga.cr[0x1b] & 0x0c) << 15) | ((s->vga.cr[0x1d] & 0x80) << 12); - *pstart_addr = start_addr; - line_compare = s->vga.cr[0x18] | + params->line_compare = s->vga.cr[0x18] | ((s->vga.cr[0x07] & 0x10) << 4) | ((s->vga.cr[0x09] & 0x40) << 3); - *pline_compare = line_compare; } static uint32_t cirrus_get_bpp16_depth(CirrusVGAState * s) @@ -2925,7 +2921,7 @@ void cirrus_init_common(CirrusVGAState *s, Object *owner, s->linear_mmio_mask = s->real_vram_size - 256; s->vga.get_bpp = cirrus_get_bpp; - s->vga.get_offsets = cirrus_get_offsets; + s->vga.get_params = cirrus_get_params; s->vga.get_resolution = cirrus_get_resolution; s->vga.cursor_invalidate = cirrus_cursor_invalidate; s->vga.cursor_draw_line = cirrus_cursor_draw_line; diff --git a/hw/display/vga.c b/hw/display/vga.c index a8b5830a30..fea26f9123 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -1042,52 +1042,40 @@ static int update_palette256(VGACommonState *s) return full_update; } -static void vga_get_offsets(VGACommonState *s, - uint32_t *pline_offset, - uint32_t *pstart_addr, - uint32_t *pline_compare) +static void vga_get_params(VGACommonState *s, + VGADisplayParams *params) { - uint32_t start_addr, line_offset, line_compare; - if (vbe_enabled(s)) { - line_offset = s->vbe_line_offset; - start_addr = s->vbe_start_addr; - line_compare = 65535; + params->line_offset = s->vbe_line_offset; + params->start_addr = s->vbe_start_addr; + params->line_compare = 65535; } else { /* compute line_offset in bytes */ - line_offset = s->cr[VGA_CRTC_OFFSET]; - line_offset <<= 3; + params->line_offset = s->cr[VGA_CRTC_OFFSET] << 3; /* starting address */ - start_addr = s->cr[VGA_CRTC_START_LO] | + params->start_addr = s->cr[VGA_CRTC_START_LO] | (s->cr[VGA_CRTC_START_HI] << 8); /* line compare */ - line_compare = s->cr[VGA_CRTC_LINE_COMPARE] | + params->line_compare = s->cr[VGA_CRTC_LINE_COMPARE] | ((s->cr[VGA_CRTC_OVERFLOW] & 0x10) << 4) | ((s->cr[VGA_CRTC_MAX_SCAN] & 0x40) << 3); } - *pline_offset = line_offset; - *pstart_addr = start_addr; - *pline_compare = line_compare; } /* update start_addr and line_offset. Return TRUE if modified */ static int update_basic_params(VGACommonState *s) { int full_update; - uint32_t start_addr, line_offset, line_compare; + VGADisplayParams current; full_update = 0; - s->get_offsets(s, &line_offset, &start_addr, &line_compare); + s->get_params(s, ¤t); - if (line_offset != s->line_offset || - start_addr != s->start_addr || - line_compare != s->line_compare) { - s->line_offset = line_offset; - s->start_addr = start_addr; - s->line_compare = line_compare; + if (memcmp(¤t, &s->params, sizeof(current))) { + s->params = current; full_update = 1; } return full_update; @@ -1188,7 +1176,7 @@ static void vga_draw_text(VGACommonState *s, int full_update) } full_update |= update_basic_params(s); - line_offset = s->line_offset; + line_offset = s->params.line_offset; vga_get_text_resolution(s, &width, &height, &cw, &cheight); if ((height * width) <= 1) { @@ -1227,7 +1215,7 @@ static void vga_draw_text(VGACommonState *s, int full_update) } cursor_offset = ((s->cr[VGA_CRTC_CURSOR_HI] << 8) | - s->cr[VGA_CRTC_CURSOR_LO]) - s->start_addr; + s->cr[VGA_CRTC_CURSOR_LO]) - s->params.start_addr; if (cursor_offset != s->cursor_offset || s->cr[VGA_CRTC_CURSOR_START] != s->cursor_start || s->cr[VGA_CRTC_CURSOR_END] != s->cursor_end) { @@ -1241,7 +1229,7 @@ static void vga_draw_text(VGACommonState *s, int full_update) s->cursor_start = s->cr[VGA_CRTC_CURSOR_START]; s->cursor_end = s->cr[VGA_CRTC_CURSOR_END]; } - cursor_ptr = s->vram_ptr + (s->start_addr + cursor_offset) * 4; + cursor_ptr = s->vram_ptr + (s->params.start_addr + cursor_offset) * 4; if (now >= s->cursor_blink_time) { s->cursor_blink_time = now + VGA_TEXT_CURSOR_PERIOD_MS / 2; s->cursor_visible_phase = !s->cursor_visible_phase; @@ -1251,7 +1239,7 @@ static void vga_draw_text(VGACommonState *s, int full_update) linesize = surface_stride(surface); ch_attr_ptr = s->last_ch_attr; line = 0; - offset = s->start_addr * 4; + offset = s->params.start_addr * 4; for(cy = 0; cy < height; cy++) { d1 = dest; src = s->vram_ptr + offset; @@ -1331,7 +1319,7 @@ static void vga_draw_text(VGACommonState *s, int full_update) dest += linesize * cheight; line1 = line + cheight; offset += line_offset; - if (line < s->line_compare && line1 >= s->line_compare) { + if (line < s->params.line_compare && line1 >= s->params.line_compare) { offset = 0; } line = line1; @@ -1461,10 +1449,10 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) disp_width = width; depth = s->get_bpp(s); - region_start = (s->start_addr * 4); - region_end = region_start + (ram_addr_t)s->line_offset * height; + region_start = (s->params.start_addr * 4); + region_end = region_start + (ram_addr_t)s->params.line_offset * height; region_end += width * depth / 8; /* scanline length */ - region_end -= s->line_offset; + region_end -= s->params.line_offset; if (region_end > s->vbe_size || depth == 0 || depth == 15) { /* * We land here on: @@ -1529,7 +1517,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) share_surface = false; } - if (s->line_offset != s->last_line_offset || + if (s->params.line_offset != s->last_line_offset || disp_width != s->last_width || height != s->last_height || s->last_depth != depth || @@ -1540,12 +1528,12 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) s->last_scr_height = height; s->last_width = disp_width; s->last_height = height; - s->last_line_offset = s->line_offset; + s->last_line_offset = s->params.line_offset; s->last_depth = depth; s->last_byteswap = byteswap; full_update = 1; } - if (surface_data(surface) != s->vram_ptr + (s->start_addr * 4) + if (surface_data(surface) != s->vram_ptr + (s->params.start_addr * 4) && is_buffer_shared(surface)) { /* base address changed (page flip) -> shared display surfaces * must be updated with the new base address */ @@ -1555,8 +1543,8 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) if (full_update) { if (share_surface) { surface = qemu_create_displaysurface_from(disp_width, - height, format, s->line_offset, - s->vram_ptr + (s->start_addr * 4)); + height, format, s->params.line_offset, + s->vram_ptr + (s->params.start_addr * 4)); dpy_gfx_replace_surface(s->con, surface); } else { qemu_console_resize(s->con, disp_width, height); @@ -1620,9 +1608,9 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) #if 0 printf("w=%d h=%d v=%d line_offset=%d cr[0x09]=0x%02x cr[0x17]=0x%02x linecmp=%d sr[0x01]=0x%02x\n", width, height, v, line_offset, s->cr[9], s->cr[VGA_CRTC_MODE], - s->line_compare, sr(s, VGA_SEQ_CLOCK_MODE)); + s->params.line_compare, sr(s, VGA_SEQ_CLOCK_MODE)); #endif - addr1 = (s->start_addr * 4); + addr1 = (s->params.start_addr * 4); bwidth = DIV_ROUND_UP(width * bits, 8); y_start = -1; d = surface_data(surface); @@ -1630,7 +1618,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) y1 = 0; if (!full_update) { - if (s->line_compare < height) { + if (s->params.line_compare < height) { /* split screen mode */ region_start = 0; } @@ -1686,14 +1674,14 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) if (!multi_run) { mask = (s->cr[VGA_CRTC_MODE] & 3) ^ 3; if ((y1 & mask) == mask) - addr1 += s->line_offset; + addr1 += s->params.line_offset; y1++; multi_run = multi_scan; } else { multi_run--; } /* line compare acts on the displayed lines */ - if (y == s->line_compare) + if (y == s->params.line_compare) addr1 = 0; d += linesize; } @@ -1810,9 +1798,7 @@ void vga_common_reset(VGACommonState *s) s->graphic_mode = -1; /* force full update */ s->shift_control = 0; s->double_scan = 0; - s->line_offset = 0; - s->line_compare = 0; - s->start_addr = 0; + memset(&s->params, '\0', sizeof(s->params)); s->plane_updated = 0; s->last_cw = 0; s->last_ch = 0; @@ -1934,7 +1920,7 @@ static void vga_update_text(void *opaque, console_ch_t *chardata) /* Update "hardware" cursor */ cursor_offset = ((s->cr[VGA_CRTC_CURSOR_HI] << 8) | - s->cr[VGA_CRTC_CURSOR_LO]) - s->start_addr; + s->cr[VGA_CRTC_CURSOR_LO]) - s->params.start_addr; if (cursor_offset != s->cursor_offset || s->cr[VGA_CRTC_CURSOR_START] != s->cursor_start || s->cr[VGA_CRTC_CURSOR_END] != s->cursor_end || full_update) { @@ -1950,7 +1936,7 @@ static void vga_update_text(void *opaque, console_ch_t *chardata) s->cursor_end = s->cr[VGA_CRTC_CURSOR_END]; } - src = (uint32_t *) s->vram_ptr + s->start_addr; + src = (uint32_t *) s->vram_ptr + s->params.start_addr; dst = chardata; if (full_update) { @@ -2195,7 +2181,7 @@ bool vga_common_init(VGACommonState *s, Object *obj, Error **errp) xen_register_framebuffer(&s->vram); s->vram_ptr = memory_region_get_ram_ptr(&s->vram); s->get_bpp = vga_get_bpp; - s->get_offsets = vga_get_offsets; + s->get_params = vga_get_params; s->get_resolution = vga_get_resolution; s->hw_ops = &vga_ops; switch (vga_retrace_method) { diff --git a/hw/display/vga_int.h b/hw/display/vga_int.h index 7cf0d11201..6be61e0428 100644 --- a/hw/display/vga_int.h +++ b/hw/display/vga_int.h @@ -56,6 +56,12 @@ struct VGACommonState; typedef uint8_t (* vga_retrace_fn)(struct VGACommonState *s); typedef void (* vga_update_retrace_info_fn)(struct VGACommonState *s); +typedef struct VGADisplayParams { + uint32_t line_offset; + uint32_t start_addr; + uint32_t line_compare; +} VGADisplayParams; + typedef struct VGACommonState { MemoryRegion *legacy_address_space; uint8_t *vram_ptr; @@ -90,10 +96,7 @@ typedef struct VGACommonState { uint8_t palette[768]; int32_t bank_offset; int (*get_bpp)(struct VGACommonState *s); - void (*get_offsets)(struct VGACommonState *s, - uint32_t *pline_offset, - uint32_t *pstart_addr, - uint32_t *pline_compare); + void (*get_params)(struct VGACommonState *s, VGADisplayParams *params); void (*get_resolution)(struct VGACommonState *s, int *pwidth, int *pheight); @@ -111,9 +114,7 @@ typedef struct VGACommonState { int graphic_mode; uint8_t shift_control; uint8_t double_scan; - uint32_t line_offset; - uint32_t line_compare; - uint32_t start_addr; + VGADisplayParams params; uint32_t plane_updated; uint32_t last_line_offset; uint8_t last_cw, last_ch; From 9b53b95a1c3b46e5a54734a46f37790460c9265e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 29 Dec 2014 14:43:42 +0100 Subject: [PATCH 03/16] vga: mask addresses in non-VESA modes to 256k This allows setting the start address to a high value, and reading the bottom of the screen from the beginning of VRAM. Commander Keen 4 ("Goodbye, Galaxy!") relies on this behavior. Signed-off-by: Paolo Bonzini --- hw/display/vga-helpers.h | 9 +++++---- hw/display/vga.c | 3 +++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/hw/display/vga-helpers.h b/hw/display/vga-helpers.h index 10e9cfd40a..83b9a15604 100644 --- a/hw/display/vga-helpers.h +++ b/hw/display/vga-helpers.h @@ -108,7 +108,7 @@ static void vga_draw_line2(VGACommonState *vga, uint8_t *d, plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; width >>= 3; for(x = 0; x < width; x++) { - data = vga_read_dword_le(vga, addr); + data = vga_read_dword_le(vga, addr & (VGA_VRAM_SIZE - 1)); data &= plane_mask; v = expand2[GET_PLANE(data, 0)]; v |= expand2[GET_PLANE(data, 2)] << 2; @@ -144,7 +144,7 @@ static void vga_draw_line2d2(VGACommonState *vga, uint8_t *d, plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; width >>= 3; for(x = 0; x < width; x++) { - data = vga_read_dword_le(vga, addr); + data = vga_read_dword_le(vga, addr & (VGA_VRAM_SIZE - 1)); data &= plane_mask; v = expand2[GET_PLANE(data, 0)]; v |= expand2[GET_PLANE(data, 2)] << 2; @@ -177,7 +177,7 @@ static void vga_draw_line4(VGACommonState *vga, uint8_t *d, plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; width >>= 3; for(x = 0; x < width; x++) { - data = vga_read_dword_le(vga, addr); + data = vga_read_dword_le(vga, addr & (VGA_VRAM_SIZE - 1)); data &= plane_mask; v = expand4[GET_PLANE(data, 0)]; v |= expand4[GET_PLANE(data, 1)] << 1; @@ -209,7 +209,7 @@ static void vga_draw_line4d2(VGACommonState *vga, uint8_t *d, plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; width >>= 3; for(x = 0; x < width; x++) { - data = vga_read_dword_le(vga, addr); + data = vga_read_dword_le(vga, addr & (VGA_VRAM_SIZE - 1)); data &= plane_mask; v = expand4[GET_PLANE(data, 0)]; v |= expand4[GET_PLANE(data, 1)] << 1; @@ -242,6 +242,7 @@ static void vga_draw_line8d2(VGACommonState *vga, uint8_t *d, palette = vga->last_palette; width >>= 3; for(x = 0; x < width; x++) { + addr &= VGA_VRAM_SIZE - 1; PUT_PIXEL2(d, 0, palette[vga_read_byte(vga, addr + 0)]); PUT_PIXEL2(d, 1, palette[vga_read_byte(vga, addr + 1)]); PUT_PIXEL2(d, 2, palette[vga_read_byte(vga, addr + 2)]); diff --git a/hw/display/vga.c b/hw/display/vga.c index fea26f9123..5bf4d14f34 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -47,6 +47,9 @@ bool have_vga = true; /* 16 state changes per vertical frame @60 Hz */ #define VGA_TEXT_CURSOR_PERIOD_MS (1000 * 2 * 16 / 60) +/* Address mask for non-VESA modes. */ +#define VGA_VRAM_SIZE (256 * KiB) + /* * Video Graphics Array (VGA) * From 973a724eb006f674301a0c45f34b3c08dee0fe49 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 29 Dec 2014 14:48:14 +0100 Subject: [PATCH 04/16] vga: implement horizontal pel panning in graphics modes This implements smooth scrolling, as used for example by Commander Keen and Second Reality. Unfortunately, this is not enough to avoid tearing in Commander Keen, because sometimes the wrong start address is used for a frame. On real EGA, the panning register is sampled on every line, while the display start is latched for the next frame at the start of the vertical retrace. On real VGA, the panning register is also latched, but at the end of the vertical retrace. It looks like Keen exploits this by only waiting for horizontal retrace when setting the display start, but implementing it breaks the 256-color Keen games... Signed-off-by: Paolo Bonzini --- hw/display/cirrus_vga.c | 4 ++ hw/display/vga-helpers.h | 100 ++++++++++++++++++++++++++++----------- hw/display/vga.c | 36 ++++++++++++-- hw/display/vga_int.h | 3 ++ 4 files changed, 111 insertions(+), 32 deletions(-) diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c index e637e5a816..150883a971 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -43,6 +43,7 @@ #include "hw/qdev-properties.h" #include "migration/vmstate.h" #include "ui/pixel_ops.h" +#include "vga_regs.h" #include "cirrus_vga_internal.h" #include "qom/object.h" #include "ui/console.h" @@ -1121,6 +1122,9 @@ static void cirrus_get_params(VGACommonState *s1, params->line_compare = s->vga.cr[0x18] | ((s->vga.cr[0x07] & 0x10) << 4) | ((s->vga.cr[0x09] & 0x40) << 3); + + params->hpel = s->vga.ar[VGA_ATC_PEL]; + params->hpel_split = s->vga.ar[VGA_ATC_MODE] & 0x20; } static uint32_t cirrus_get_bpp16_depth(CirrusVGAState * s) diff --git a/hw/display/vga-helpers.h b/hw/display/vga-helpers.h index 83b9a15604..29933562c4 100644 --- a/hw/display/vga-helpers.h +++ b/hw/display/vga-helpers.h @@ -98,14 +98,19 @@ static void vga_draw_glyph9(uint8_t *d, int linesize, /* * 4 color mode */ -static void vga_draw_line2(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line2(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { uint32_t plane_mask, *palette, data, v; int x; palette = vga->last_palette; plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; + hpel &= 7; + if (hpel) { + width += 8; + d = vga->panning_buf; + } width >>= 3; for(x = 0; x < width; x++) { data = vga_read_dword_le(vga, addr & (VGA_VRAM_SIZE - 1)); @@ -126,6 +131,7 @@ static void vga_draw_line2(VGACommonState *vga, uint8_t *d, d += 32; addr += 4; } + return hpel ? vga->panning_buf + 4 * hpel : NULL; } #define PUT_PIXEL2(d, n, v) \ @@ -134,14 +140,19 @@ static void vga_draw_line2(VGACommonState *vga, uint8_t *d, /* * 4 color mode, dup2 horizontal */ -static void vga_draw_line2d2(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line2d2(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { uint32_t plane_mask, *palette, data, v; int x; palette = vga->last_palette; plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; + hpel &= 7; + if (hpel) { + width += 8; + d = vga->panning_buf; + } width >>= 3; for(x = 0; x < width; x++) { data = vga_read_dword_le(vga, addr & (VGA_VRAM_SIZE - 1)); @@ -162,19 +173,25 @@ static void vga_draw_line2d2(VGACommonState *vga, uint8_t *d, d += 64; addr += 4; } + return hpel ? vga->panning_buf + 8 * hpel : NULL; } /* * 16 color mode */ -static void vga_draw_line4(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line4(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { uint32_t plane_mask, data, v, *palette; int x; palette = vga->last_palette; plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; + hpel &= 7; + if (hpel) { + width += 8; + d = vga->panning_buf; + } width >>= 3; for(x = 0; x < width; x++) { data = vga_read_dword_le(vga, addr & (VGA_VRAM_SIZE - 1)); @@ -194,19 +211,25 @@ static void vga_draw_line4(VGACommonState *vga, uint8_t *d, d += 32; addr += 4; } + return hpel ? vga->panning_buf + 4 * hpel : NULL; } /* * 16 color mode, dup2 horizontal */ -static void vga_draw_line4d2(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line4d2(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { uint32_t plane_mask, data, v, *palette; int x; palette = vga->last_palette; plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf]; + hpel &= 7; + if (hpel) { + width += 8; + d = vga->panning_buf; + } width >>= 3; for(x = 0; x < width; x++) { data = vga_read_dword_le(vga, addr & (VGA_VRAM_SIZE - 1)); @@ -226,6 +249,7 @@ static void vga_draw_line4d2(VGACommonState *vga, uint8_t *d, d += 64; addr += 4; } + return hpel ? vga->panning_buf + 8 * hpel : NULL; } /* @@ -233,13 +257,18 @@ static void vga_draw_line4d2(VGACommonState *vga, uint8_t *d, * * XXX: add plane_mask support (never used in standard VGA modes) */ -static void vga_draw_line8d2(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line8d2(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { uint32_t *palette; int x; palette = vga->last_palette; + hpel = (hpel >> 1) & 3; + if (hpel) { + width += 8; + d = vga->panning_buf; + } width >>= 3; for(x = 0; x < width; x++) { addr &= VGA_VRAM_SIZE - 1; @@ -250,6 +279,7 @@ static void vga_draw_line8d2(VGACommonState *vga, uint8_t *d, d += 32; addr += 4; } + return hpel ? vga->panning_buf + 8 * hpel : NULL; } /* @@ -257,13 +287,18 @@ static void vga_draw_line8d2(VGACommonState *vga, uint8_t *d, * * XXX: add plane_mask support (never used in standard VGA modes) */ -static void vga_draw_line8(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line8(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { uint32_t *palette; int x; palette = vga->last_palette; + hpel = (hpel >> 1) & 3; + if (hpel) { + width += 8; + d = vga->panning_buf; + } width >>= 3; for(x = 0; x < width; x++) { ((uint32_t *)d)[0] = palette[vga_read_byte(vga, addr + 0)]; @@ -277,13 +312,14 @@ static void vga_draw_line8(VGACommonState *vga, uint8_t *d, d += 32; addr += 8; } + return hpel ? vga->panning_buf + 4 * hpel : NULL; } /* * 15 bit color */ -static void vga_draw_line15_le(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line15_le(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { int w; uint32_t v, r, g, b; @@ -298,10 +334,11 @@ static void vga_draw_line15_le(VGACommonState *vga, uint8_t *d, addr += 2; d += 4; } while (--w != 0); + return NULL; } -static void vga_draw_line15_be(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line15_be(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { int w; uint32_t v, r, g, b; @@ -316,13 +353,14 @@ static void vga_draw_line15_be(VGACommonState *vga, uint8_t *d, addr += 2; d += 4; } while (--w != 0); + return NULL; } /* * 16 bit color */ -static void vga_draw_line16_le(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line16_le(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { int w; uint32_t v, r, g, b; @@ -337,10 +375,11 @@ static void vga_draw_line16_le(VGACommonState *vga, uint8_t *d, addr += 2; d += 4; } while (--w != 0); + return NULL; } -static void vga_draw_line16_be(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line16_be(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { int w; uint32_t v, r, g, b; @@ -355,13 +394,14 @@ static void vga_draw_line16_be(VGACommonState *vga, uint8_t *d, addr += 2; d += 4; } while (--w != 0); + return NULL; } /* * 24 bit color */ -static void vga_draw_line24_le(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line24_le(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { int w; uint32_t r, g, b; @@ -375,10 +415,11 @@ static void vga_draw_line24_le(VGACommonState *vga, uint8_t *d, addr += 3; d += 4; } while (--w != 0); + return NULL; } -static void vga_draw_line24_be(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line24_be(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { int w; uint32_t r, g, b; @@ -392,13 +433,14 @@ static void vga_draw_line24_be(VGACommonState *vga, uint8_t *d, addr += 3; d += 4; } while (--w != 0); + return NULL; } /* * 32 bit color */ -static void vga_draw_line32_le(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line32_le(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { int w; uint32_t r, g, b; @@ -412,10 +454,11 @@ static void vga_draw_line32_le(VGACommonState *vga, uint8_t *d, addr += 4; d += 4; } while (--w != 0); + return NULL; } -static void vga_draw_line32_be(VGACommonState *vga, uint8_t *d, - uint32_t addr, int width) +static void *vga_draw_line32_be(VGACommonState *vga, uint8_t *d, + uint32_t addr, int width, int hpel) { int w; uint32_t r, g, b; @@ -429,4 +472,5 @@ static void vga_draw_line32_be(VGACommonState *vga, uint8_t *d, addr += 4; d += 4; } while (--w != 0); + return NULL; } diff --git a/hw/display/vga.c b/hw/display/vga.c index 5bf4d14f34..a39f802ab7 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -50,6 +50,13 @@ bool have_vga = true; /* Address mask for non-VESA modes. */ #define VGA_VRAM_SIZE (256 * KiB) +/* This value corresponds to a shift of zero pixels + * in 9-dot text mode. In other modes, bit 3 is undefined; + * we just ignore it, so that 8 corresponds to zero pixels + * in all modes. + */ +#define VGA_HPEL_NEUTRAL 8 + /* * Video Graphics Array (VGA) * @@ -984,8 +991,8 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val) } } -typedef void vga_draw_line_func(VGACommonState *s1, uint8_t *d, - uint32_t srcaddr, int width); +typedef void *vga_draw_line_func(VGACommonState *s1, uint8_t *d, + uint32_t srcaddr, int width, int hpel); #include "vga-access.h" #include "vga-helpers.h" @@ -1052,6 +1059,8 @@ static void vga_get_params(VGACommonState *s, params->line_offset = s->vbe_line_offset; params->start_addr = s->vbe_start_addr; params->line_compare = 65535; + params->hpel = VGA_HPEL_NEUTRAL; + params->hpel_split = false; } else { /* compute line_offset in bytes */ params->line_offset = s->cr[VGA_CRTC_OFFSET] << 3; @@ -1064,6 +1073,9 @@ static void vga_get_params(VGACommonState *s, params->line_compare = s->cr[VGA_CRTC_LINE_COMPARE] | ((s->cr[VGA_CRTC_OVERFLOW] & 0x10) << 4) | ((s->cr[VGA_CRTC_MAX_SCAN] & 0x40) << 3); + + params->hpel = s->ar[VGA_ATC_PEL]; + params->hpel_split = s->ar[VGA_ATC_MODE] & 0x20; } } @@ -1435,6 +1447,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) ram_addr_t page0, page1, region_start, region_end; DirtyBitmapSnapshot *snap = NULL; int disp_width, multi_scan, multi_run; + int hpel; uint8_t *d; uint32_t v, addr1, addr; vga_draw_line_func *vga_draw_line = NULL; @@ -1534,6 +1547,9 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) s->last_line_offset = s->params.line_offset; s->last_depth = depth; s->last_byteswap = byteswap; + /* 16 extra pixels are needed for double-width planar modes. */ + s->panning_buf = g_realloc(s->panning_buf, + (disp_width + 16) * sizeof(uint32_t)); full_update = 1; } if (surface_data(surface) != s->vram_ptr + (s->params.start_addr * 4) @@ -1613,8 +1629,12 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) width, height, v, line_offset, s->cr[9], s->cr[VGA_CRTC_MODE], s->params.line_compare, sr(s, VGA_SEQ_CLOCK_MODE)); #endif + hpel = bits <= 8 ? s->params.hpel : 0; addr1 = (s->params.start_addr * 4); bwidth = DIV_ROUND_UP(width * bits, 8); + if (hpel) { + bwidth += 4; + } y_start = -1; d = surface_data(surface); linesize = surface_stride(surface); @@ -1662,7 +1682,11 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) if (y_start < 0) y_start = y; if (!(is_buffer_shared(surface))) { - vga_draw_line(s, d, addr, width); + uint8_t *p; + p = vga_draw_line(s, d, addr, width, hpel); + if (p) { + memcpy(d, p, disp_width * sizeof(uint32_t)); + } if (s->cursor_draw_line) s->cursor_draw_line(s, d, y); } @@ -1684,8 +1708,12 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) multi_run--; } /* line compare acts on the displayed lines */ - if (y == s->params.line_compare) + if (y == s->params.line_compare) { + if (s->params.hpel_split) { + hpel = VGA_HPEL_NEUTRAL; + } addr1 = 0; + } d += linesize; } if (y_start >= 0) { diff --git a/hw/display/vga_int.h b/hw/display/vga_int.h index 6be61e0428..876a1d3697 100644 --- a/hw/display/vga_int.h +++ b/hw/display/vga_int.h @@ -60,6 +60,8 @@ typedef struct VGADisplayParams { uint32_t line_offset; uint32_t start_addr; uint32_t line_compare; + uint8_t hpel; + bool hpel_split; } VGADisplayParams; typedef struct VGACommonState { @@ -111,6 +113,7 @@ typedef struct VGACommonState { /* display refresh support */ QemuConsole *con; uint32_t font_offsets[2]; + uint8_t *panning_buf; int graphic_mode; uint8_t shift_control; uint8_t double_scan; From 4d6c310502a4d825ae27a3bf8905e70447d74671 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 29 Dec 2014 14:46:59 +0100 Subject: [PATCH 05/16] vga: optimize horizontal pel panning in 256-color modes Do not go through the panning buffer unless the address wraps in the middle of the line. Signed-off-by: Paolo Bonzini --- hw/display/vga-helpers.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hw/display/vga-helpers.h b/hw/display/vga-helpers.h index 29933562c4..2029b61791 100644 --- a/hw/display/vga-helpers.h +++ b/hw/display/vga-helpers.h @@ -265,6 +265,18 @@ static void *vga_draw_line8d2(VGACommonState *vga, uint8_t *d, palette = vga->last_palette; hpel = (hpel >> 1) & 3; + + /* For 256 color modes, we can adjust the source address and write directly + * to the destination, even if horizontal pel panning is active. However, + * the loop below assumes that the address does not wrap in the middle of a + * plane. If that happens... + */ + if (addr + (width >> 3) * 4 < VGA_VRAM_SIZE) { + addr += hpel * 4; + hpel = 0; + } + + /* ... use the panning buffer as in planar modes. */ if (hpel) { width += 8; d = vga->panning_buf; From ae9d71a003c8cfd2f035c8f768341a40bcef8ed4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 30 Dec 2014 22:41:24 +0100 Subject: [PATCH 06/16] vga: reindent memory access code The next patch will reuse latched memory access in text modes. Start with a patch that moves the latched access code out of the "if". Best reviewed with "git diff -b". Signed-off-by: Paolo Bonzini --- hw/display/vga.c | 211 ++++++++++++++++++++++++----------------------- 1 file changed, 110 insertions(+), 101 deletions(-) diff --git a/hw/display/vga.c b/hw/display/vga.c index a39f802ab7..f89409f8f2 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -815,37 +815,41 @@ uint32_t vga_mem_readb(VGACommonState *s, hwaddr addr) } if (sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_CHN_4M) { - /* chain 4 mode : simplest access */ + /* chain 4 mode : simplest access (but it should use the same + * algorithms as below; see e.g. vga_mem_writeb's plane mask check). + */ assert(addr < s->vram_size); - ret = s->vram_ptr[addr]; - } else if (s->gr[VGA_GFX_MODE] & 0x10) { + return s->vram_ptr[addr]; + } + + if (s->gr[VGA_GFX_MODE] & 0x10) { /* odd/even mode (aka text mode mapping) */ plane = (s->gr[VGA_GFX_PLANE_READ] & 2) | (addr & 1); addr = ((addr & ~1) << 1) | plane; if (addr >= s->vram_size) { return 0xff; } - ret = s->vram_ptr[addr]; - } else { - /* standard VGA latched access */ - if (addr * sizeof(uint32_t) >= s->vram_size) { - return 0xff; - } - s->latch = ((uint32_t *)s->vram_ptr)[addr]; - - if (!(s->gr[VGA_GFX_MODE] & 0x08)) { - /* read mode 0 */ - plane = s->gr[VGA_GFX_PLANE_READ]; - ret = GET_PLANE(s->latch, plane); - } else { - /* read mode 1 */ - ret = (s->latch ^ mask16[s->gr[VGA_GFX_COMPARE_VALUE]]) & - mask16[s->gr[VGA_GFX_COMPARE_MASK]]; - ret |= ret >> 16; - ret |= ret >> 8; - ret = (~ret) & 0xff; - } + return s->vram_ptr[addr]; } + + /* standard VGA latched access */ + plane = s->gr[VGA_GFX_PLANE_READ]; + if (addr * sizeof(uint32_t) >= s->vram_size) { + return 0xff; + } + s->latch = ((uint32_t *)s->vram_ptr)[addr]; + if (!(s->gr[VGA_GFX_MODE] & 0x08)) { + /* read mode 0 */ + ret = GET_PLANE(s->latch, plane); + } else { + /* read mode 1 */ + ret = (s->latch ^ mask16[s->gr[VGA_GFX_COMPARE_VALUE]]) & + mask16[s->gr[VGA_GFX_COMPARE_MASK]]; + ret |= ret >> 16; + ret |= ret >> 8; + ret = (~ret) & 0xff; + } + return ret; } @@ -895,7 +899,10 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val) s->plane_updated |= mask; /* only used to detect font change */ memory_region_set_dirty(&s->vram, addr, 1); } - } else if (s->gr[VGA_GFX_MODE] & 0x10) { + return; + } + + if (s->gr[VGA_GFX_MODE] & 0x10) { /* odd/even mode (aka text mode mapping) */ plane = (s->gr[VGA_GFX_PLANE_READ] & 2) | (addr & 1); mask = (1 << plane); @@ -911,84 +918,86 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val) s->plane_updated |= mask; /* only used to detect font change */ memory_region_set_dirty(&s->vram, addr, 1); } - } else { - /* standard VGA latched access */ - write_mode = s->gr[VGA_GFX_MODE] & 3; - switch(write_mode) { - default: - case 0: - /* rotate */ - b = s->gr[VGA_GFX_DATA_ROTATE] & 7; - val = ((val >> b) | (val << (8 - b))) & 0xff; - val |= val << 8; - val |= val << 16; - - /* apply set/reset mask */ - set_mask = mask16[s->gr[VGA_GFX_SR_ENABLE]]; - val = (val & ~set_mask) | - (mask16[s->gr[VGA_GFX_SR_VALUE]] & set_mask); - bit_mask = s->gr[VGA_GFX_BIT_MASK]; - break; - case 1: - val = s->latch; - goto do_write; - case 2: - val = mask16[val & 0x0f]; - bit_mask = s->gr[VGA_GFX_BIT_MASK]; - break; - case 3: - /* rotate */ - b = s->gr[VGA_GFX_DATA_ROTATE] & 7; - val = (val >> b) | (val << (8 - b)); - - bit_mask = s->gr[VGA_GFX_BIT_MASK] & val; - val = mask16[s->gr[VGA_GFX_SR_VALUE]]; - break; - } - - /* apply logical operation */ - func_select = s->gr[VGA_GFX_DATA_ROTATE] >> 3; - switch(func_select) { - case 0: - default: - /* nothing to do */ - break; - case 1: - /* and */ - val &= s->latch; - break; - case 2: - /* or */ - val |= s->latch; - break; - case 3: - /* xor */ - val ^= s->latch; - break; - } - - /* apply bit mask */ - bit_mask |= bit_mask << 8; - bit_mask |= bit_mask << 16; - val = (val & bit_mask) | (s->latch & ~bit_mask); - - do_write: - /* mask data according to sr[2] */ - mask = sr(s, VGA_SEQ_PLANE_WRITE); - s->plane_updated |= mask; /* only used to detect font change */ - write_mask = mask16[mask]; - if (addr * sizeof(uint32_t) >= s->vram_size) { - return; - } - ((uint32_t *)s->vram_ptr)[addr] = - (((uint32_t *)s->vram_ptr)[addr] & ~write_mask) | - (val & write_mask); -#ifdef DEBUG_VGA_MEM - printf("vga: latch: [0x" HWADDR_FMT_plx "] mask=0x%08x val=0x%08x\n", - addr * 4, write_mask, val); -#endif - memory_region_set_dirty(&s->vram, addr << 2, sizeof(uint32_t)); + return; } + + mask = sr(s, VGA_SEQ_PLANE_WRITE); + + /* standard VGA latched access */ + write_mode = s->gr[VGA_GFX_MODE] & 3; + switch(write_mode) { + default: + case 0: + /* rotate */ + b = s->gr[VGA_GFX_DATA_ROTATE] & 7; + val = ((val >> b) | (val << (8 - b))) & 0xff; + val |= val << 8; + val |= val << 16; + + /* apply set/reset mask */ + set_mask = mask16[s->gr[VGA_GFX_SR_ENABLE]]; + val = (val & ~set_mask) | + (mask16[s->gr[VGA_GFX_SR_VALUE]] & set_mask); + bit_mask = s->gr[VGA_GFX_BIT_MASK]; + break; + case 1: + val = s->latch; + goto do_write; + case 2: + val = mask16[val & 0x0f]; + bit_mask = s->gr[VGA_GFX_BIT_MASK]; + break; + case 3: + /* rotate */ + b = s->gr[VGA_GFX_DATA_ROTATE] & 7; + val = (val >> b) | (val << (8 - b)); + + bit_mask = s->gr[VGA_GFX_BIT_MASK] & val; + val = mask16[s->gr[VGA_GFX_SR_VALUE]]; + break; + } + + /* apply logical operation */ + func_select = s->gr[VGA_GFX_DATA_ROTATE] >> 3; + switch(func_select) { + case 0: + default: + /* nothing to do */ + break; + case 1: + /* and */ + val &= s->latch; + break; + case 2: + /* or */ + val |= s->latch; + break; + case 3: + /* xor */ + val ^= s->latch; + break; + } + + /* apply bit mask */ + bit_mask |= bit_mask << 8; + bit_mask |= bit_mask << 16; + val = (val & bit_mask) | (s->latch & ~bit_mask); + +do_write: + /* mask data according to sr[2] */ + s->plane_updated |= mask; /* only used to detect font change */ + write_mask = mask16[mask]; + if (addr * sizeof(uint32_t) >= s->vram_size) { + return; + } + ((uint32_t *)s->vram_ptr)[addr] = + (((uint32_t *)s->vram_ptr)[addr] & ~write_mask) | + (val & write_mask); +#ifdef DEBUG_VGA_MEM + printf("vga: latch: [0x" HWADDR_FMT_plx "] mask=0x%08x val=0x%08x\n", + addr * 4, write_mask, val); +#endif + memory_region_set_dirty(&s->vram, addr << 2, sizeof(uint32_t)); } typedef void *vga_draw_line_func(VGACommonState *s1, uint8_t *d, From 3f83435042af241ea8773d04ef3726ce4146bfca Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 30 Dec 2014 22:42:06 +0100 Subject: [PATCH 07/16] vga: use latches in odd/even mode too Jazz Jackrabbit uses odd/even mode with 256-color graphics. This is probably so that it can do very fast blitting with a decent resolution (two pixels, compared to four pixels for "regular" mode X). Accesses still use all planes (reads go to the latches and the game uses read mode 1 so that the CPU always gets 0xFF; writes use the plane mask register because the game sets bit 2 of the sequencer's memory mode register). For this to work, QEMU needs to use the code for latched memory accesses in odd/even mode. The only difference between odd/even mode and "regular" planar mode is how the plane is computed in read mode 0, and how the planes are masked if the aforementioned bit 2 is reset. It is almost enough to fix the game. You also need to honor byte/word mode selection, which is done in the next patch. Signed-off-by: Paolo Bonzini --- hw/display/vga.c | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/hw/display/vga.c b/hw/display/vga.c index f89409f8f2..d1ef716642 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -825,15 +825,12 @@ uint32_t vga_mem_readb(VGACommonState *s, hwaddr addr) if (s->gr[VGA_GFX_MODE] & 0x10) { /* odd/even mode (aka text mode mapping) */ plane = (s->gr[VGA_GFX_PLANE_READ] & 2) | (addr & 1); - addr = ((addr & ~1) << 1) | plane; - if (addr >= s->vram_size) { - return 0xff; - } - return s->vram_ptr[addr]; + addr >>= 1; + } else { + /* standard VGA latched access */ + plane = s->gr[VGA_GFX_PLANE_READ]; } - /* standard VGA latched access */ - plane = s->gr[VGA_GFX_PLANE_READ]; if (addr * sizeof(uint32_t) >= s->vram_size) { return 0xff; } @@ -886,11 +883,12 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val) break; } + mask = sr(s, VGA_SEQ_PLANE_WRITE); if (sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_CHN_4M) { /* chain 4 mode : simplest access */ plane = addr & 3; - mask = (1 << plane); - if (sr(s, VGA_SEQ_PLANE_WRITE) & mask) { + mask &= (1 << plane); + if (mask) { assert(addr < s->vram_size); s->vram_ptr[addr] = val; #ifdef DEBUG_VGA_MEM @@ -902,26 +900,14 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val) return; } - if (s->gr[VGA_GFX_MODE] & 0x10) { - /* odd/even mode (aka text mode mapping) */ - plane = (s->gr[VGA_GFX_PLANE_READ] & 2) | (addr & 1); - mask = (1 << plane); - if (sr(s, VGA_SEQ_PLANE_WRITE) & mask) { - addr = ((addr & ~1) << 1) | plane; - if (addr >= s->vram_size) { - return; - } - s->vram_ptr[addr] = val; -#ifdef DEBUG_VGA_MEM - printf("vga: odd/even: [0x" HWADDR_FMT_plx "]\n", addr); -#endif - s->plane_updated |= mask; /* only used to detect font change */ - memory_region_set_dirty(&s->vram, addr, 1); - } - return; + if ((sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_SEQ_MODE) == 0) { + mask &= (addr & 1) ? 0x0a : 0x05; } - mask = sr(s, VGA_SEQ_PLANE_WRITE); + if (s->gr[VGA_GFX_MODE] & 0x10) { + /* odd/even mode (aka text mode mapping) */ + addr >>= 1; + } /* standard VGA latched access */ write_mode = s->gr[VGA_GFX_MODE] & 3; From 435262605fa2737af73b7057f525f8fe221c1006 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 29 Dec 2014 14:56:24 +0100 Subject: [PATCH 08/16] vga: sort-of implement word and double-word access modes Jazz Jackrabbit has a very unusual VGA setup, where it uses odd/even mode with 256-color graphics. Probably, it wants to use fast VRAM-to-VRAM copies without having to store 4 copies of the sprites as needed in mode X, one for each mod-4 alignment; odd/even mode simplifies the code a lot if it's okay to place on a 160-pixels horizontal grid. At the same time, because it wants to use double buffering (a la "mode X") it uses byte mode, not word mode as is the case in text modes. In order to implement the combination of odd/even mode (plane number comes from bit 0 of the address) and byte mode (use all bytes of VRAM, whereas word mode only uses bytes 0, 2, 4,... on each of the four planes), we need to separate the effect on the plane number from the effect on the address. Implementing the modes properly is a mess in QEMU, because it would change the layout of VRAM and break migration. As an approximation, shift right when the CPU accesses memory instead of shifting left when the CRT controller reads it. A hack is needed in order to write font data properly (see comment in the code), but it works well enough for the game. Because doubleword and chain4 modes are now independent, chain4 does not assert anymore that the address is in range. Instead it just returns all ones and discards writes, like other modes. Signed-off-by: Paolo Bonzini --- hw/display/vga.c | 89 +++++++++++++++++++++++++++++++------------ hw/display/vga_regs.h | 4 ++ 2 files changed, 69 insertions(+), 24 deletions(-) diff --git a/hw/display/vga.c b/hw/display/vga.c index d1ef716642..bc5b83421b 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -815,25 +815,40 @@ uint32_t vga_mem_readb(VGACommonState *s, hwaddr addr) } if (sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_CHN_4M) { - /* chain 4 mode : simplest access (but it should use the same - * algorithms as below; see e.g. vga_mem_writeb's plane mask check). - */ - assert(addr < s->vram_size); - return s->vram_ptr[addr]; - } - - if (s->gr[VGA_GFX_MODE] & 0x10) { + /* chain4 mode */ + plane = addr & 3; + addr &= ~3; + } else if (s->gr[VGA_GFX_MODE] & VGA_GR05_HOST_ODD_EVEN) { /* odd/even mode (aka text mode mapping) */ plane = (s->gr[VGA_GFX_PLANE_READ] & 2) | (addr & 1); - addr >>= 1; } else { /* standard VGA latched access */ plane = s->gr[VGA_GFX_PLANE_READ]; } + if (s->gr[VGA_GFX_MISC] & VGA_GR06_CHAIN_ODD_EVEN) { + addr &= ~1; + } + + /* Doubleword/word mode. See comment in vga_mem_writeb */ + if (s->cr[VGA_CRTC_UNDERLINE] & VGA_CR14_DW) { + addr >>= 2; + } else if ((s->gr[VGA_GFX_MODE] & VGA_GR05_HOST_ODD_EVEN) && + (s->cr[VGA_CRTC_MODE] & VGA_CR17_WORD_BYTE) == 0) { + addr >>= 1; + } + if (addr * sizeof(uint32_t) >= s->vram_size) { return 0xff; } + + if (s->sr[VGA_SEQ_MEMORY_MODE] & VGA_SR04_CHN_4M) { + /* chain 4 mode: simplified access (but it should use the same + * algorithms as below, see e.g. vga_mem_writeb's plane mask check). + */ + return s->vram_ptr[(addr << 2) | plane]; + } + s->latch = ((uint32_t *)s->vram_ptr)[addr]; if (!(s->gr[VGA_GFX_MODE] & 0x08)) { /* read mode 0 */ @@ -853,8 +868,9 @@ uint32_t vga_mem_readb(VGACommonState *s, hwaddr addr) /* called for accesses between 0xa0000 and 0xc0000 */ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val) { - int memory_map_mode, plane, write_mode, b, func_select, mask; + int memory_map_mode, write_mode, b, func_select, mask; uint32_t write_mask, bit_mask, set_mask; + int plane = 0; #ifdef DEBUG_VGA_MEM printf("vga: [0x" HWADDR_FMT_plx "] = 0x%02x\n", addr, val); @@ -888,9 +904,46 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val) /* chain 4 mode : simplest access */ plane = addr & 3; mask &= (1 << plane); + addr &= ~3; + } else { + if ((sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_SEQ_MODE) == 0) { + mask &= (addr & 1) ? 0x0a : 0x05; + } + if (s->gr[VGA_GFX_MISC] & VGA_GR06_CHAIN_ODD_EVEN) { + addr &= ~1; + } + } + + /* Doubleword/word mode. These should be honored when displaying, + * not when reading/writing to memory! For example, chain4 modes + * use double-word mode and, on real hardware, would fetch bytes + * 0,1,2,3, 16,17,18,19, 32,33,34,35, etc. Text modes use word + * mode and, on real hardware, would fetch bytes 0,1, 8,9, etc. + * + * QEMU instead shifted addresses on memory accesses because it + * allows more optimizations (e.g. chain4_alias) and simplifies + * the draw_line handlers. Unfortunately, there is one case where + * the difference shows. When fetching font data, accesses are + * always in consecutive bytes, even if the text/attribute pairs + * are done in word mode. Hence, doing a right shift when operating + * on font data is wrong. So check the odd/even mode bits together with + * word mode bit. The odd/even read bit is 0 when reading font data, + * and the odd/even write bit is 1 when writing it. + */ + if (s->cr[VGA_CRTC_UNDERLINE] & VGA_CR14_DW) { + addr >>= 2; + } else if ((sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_SEQ_MODE) == 0 && + (s->cr[VGA_CRTC_MODE] & VGA_CR17_WORD_BYTE) == 0) { + addr >>= 1; + } + + if (addr * sizeof(uint32_t) >= s->vram_size) { + return; + } + + if (sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_CHN_4M) { if (mask) { - assert(addr < s->vram_size); - s->vram_ptr[addr] = val; + s->vram_ptr[(addr << 2) | plane] = val; #ifdef DEBUG_VGA_MEM printf("vga: chain4: [0x" HWADDR_FMT_plx "]\n", addr); #endif @@ -900,15 +953,6 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val) return; } - if ((sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_SEQ_MODE) == 0) { - mask &= (addr & 1) ? 0x0a : 0x05; - } - - if (s->gr[VGA_GFX_MODE] & 0x10) { - /* odd/even mode (aka text mode mapping) */ - addr >>= 1; - } - /* standard VGA latched access */ write_mode = s->gr[VGA_GFX_MODE] & 3; switch(write_mode) { @@ -973,9 +1017,6 @@ do_write: /* mask data according to sr[2] */ s->plane_updated |= mask; /* only used to detect font change */ write_mask = mask16[mask]; - if (addr * sizeof(uint32_t) >= s->vram_size) { - return; - } ((uint32_t *)s->vram_ptr)[addr] = (((uint32_t *)s->vram_ptr)[addr] & ~write_mask) | (val & write_mask); diff --git a/hw/display/vga_regs.h b/hw/display/vga_regs.h index 7fdba34b9b..40e673f164 100644 --- a/hw/display/vga_regs.h +++ b/hw/display/vga_regs.h @@ -100,7 +100,9 @@ /* VGA CRT controller bit masks */ #define VGA_CR11_LOCK_CR0_CR7 0x80 /* lock writes to CR0 - CR7 */ +#define VGA_CR14_DW 0x40 #define VGA_CR17_H_V_SIGNALS_ENABLED 0x80 +#define VGA_CR17_WORD_BYTE 0x40 /* VGA attribute controller register indices */ #define VGA_ATC_PALETTE0 0x00 @@ -154,6 +156,8 @@ #define VGA_GFX_BIT_MASK 0x08 /* VGA graphics controller bit masks */ +#define VGA_GR05_HOST_ODD_EVEN 0x10 #define VGA_GR06_GRAPHICS_MODE 0x01 +#define VGA_GR06_CHAIN_ODD_EVEN 0x02 #endif /* HW_VGA_REGS_H */ From aef158b093b9d67381f88468d39ac8dd62ae9e8b Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Sat, 21 Oct 2023 15:40:15 +0200 Subject: [PATCH 09/16] Add class property to configure KVM device node to use This allows passing the KVM device node to use as a file descriptor via /dev/fdset/XX. Passing the device node to use as a file descriptor allows running qemu unprivileged even when the user running qemu is not in the kvm group on distributions where access to /dev/kvm is gated behind membership of the kvm group (as long as the process invoking qemu is able to open /dev/kvm and passes the file descriptor to qemu). Signed-off-by: Daan De Meyer Message-ID: <20231021134015.1119597-1-daan.j.demeyer@gmail.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 25 ++++++++++++++++++++++++- include/sysemu/kvm_int.h | 1 + qemu-options.hx | 8 +++++++- 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index bbc60146d1..49e755ec4a 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2349,7 +2349,7 @@ static int kvm_init(MachineState *ms) QTAILQ_INIT(&s->kvm_sw_breakpoints); #endif QLIST_INIT(&s->kvm_parked_vcpus); - s->fd = qemu_open_old("/dev/kvm", O_RDWR); + s->fd = qemu_open_old(s->device ?: "/dev/kvm", O_RDWR); if (s->fd == -1) { fprintf(stderr, "Could not access KVM kernel module: %m\n"); ret = -errno; @@ -3585,6 +3585,24 @@ static void kvm_set_dirty_ring_size(Object *obj, Visitor *v, s->kvm_dirty_ring_size = value; } +static char *kvm_get_device(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + KVMState *s = KVM_STATE(obj); + + return g_strdup(s->device); +} + +static void kvm_set_device(Object *obj, + const char *value, + Error **errp G_GNUC_UNUSED) +{ + KVMState *s = KVM_STATE(obj); + + g_free(s->device); + s->device = g_strdup(value); +} + static void kvm_accel_instance_init(Object *obj) { KVMState *s = KVM_STATE(obj); @@ -3603,6 +3621,7 @@ static void kvm_accel_instance_init(Object *obj) s->xen_version = 0; s->xen_gnttab_max_frames = 64; s->xen_evtchn_max_pirq = 256; + s->device = NULL; } /** @@ -3643,6 +3662,10 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, "dirty-ring-size", "Size of KVM dirty page ring buffer (default: 0, i.e. use bitmap)"); + object_class_property_add_str(oc, "device", kvm_get_device, kvm_set_device); + object_class_property_set_description(oc, "device", + "Path to the device node to use (default: /dev/kvm)"); + kvm_arch_accel_class_init(oc); } diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index fd846394be..882e37e12c 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -120,6 +120,7 @@ struct KVMState uint32_t xen_caps; uint16_t xen_gnttab_max_frames; uint16_t xen_evtchn_max_pirq; + char *device; }; void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, diff --git a/qemu-options.hx b/qemu-options.hx index b66570ae00..2ae51005c1 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -188,7 +188,8 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel, " dirty-ring-size=n (KVM dirty ring GFN count, default 0)\n" " eager-split-size=n (KVM Eager Page Split chunk size, default 0, disabled. ARM only)\n" " notify-vmexit=run|internal-error|disable,notify-window=n (enable notify VM exit and set notify window, x86 only)\n" - " thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL) + " thread=single|multi (enable multi-threaded TCG)\n" + " device=path (KVM device path, default /dev/kvm)\n", QEMU_ARCH_ALL) SRST ``-accel name[,prop=value[,...]]`` This is used to enable an accelerator. Depending on the target @@ -269,6 +270,11 @@ SRST open up for a specified of time (i.e. notify-window). Default: notify-vmexit=run,notify-window=0. + ``device=path`` + Sets the path to the KVM device node. Defaults to ``/dev/kvm``. This + option can be used to pass the KVM device to use via a file descriptor + by setting the value to ``/dev/fdset/NN``. + ERST DEF("smp", HAS_ARG, QEMU_OPTION_smp, From 3cbc17ee92479ff56d0d6afecc0def6f7ec01153 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 12 Jul 2021 15:10:10 +0200 Subject: [PATCH 10/16] io_uring: move LuringState typedef to block/aio.h The LuringState typedef is defined twice, in include/block/raw-aio.h and block/io_uring.c. Move it in include/block/aio.h, which is included everywhere the typedef is needed, since include/block/aio.h already has to define the forward reference to the struct. Signed-off-by: Paolo Bonzini --- block/io_uring.c | 4 ++-- include/block/aio.h | 8 ++++---- include/block/raw-aio.h | 1 - 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/block/io_uring.c b/block/io_uring.c index 7cdd00e9f1..1e5886c30b 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -49,7 +49,7 @@ typedef struct LuringQueue { QSIMPLEQ_HEAD(, LuringAIOCB) submit_queue; } LuringQueue; -typedef struct LuringState { +struct LuringState { AioContext *aio_context; struct io_uring ring; @@ -58,7 +58,7 @@ typedef struct LuringState { LuringQueue io_q; QEMUBH *completion_bh; -} LuringState; +}; /** * luring_resubmit: diff --git a/include/block/aio.h b/include/block/aio.h index c802a392e5..5d0a114988 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -55,7 +55,7 @@ typedef void IOHandler(void *opaque); struct ThreadPool; struct LinuxAioState; -struct LuringState; +typedef struct LuringState LuringState; /* Is polling disabled? */ bool aio_poll_disabled(AioContext *ctx); @@ -212,7 +212,7 @@ struct AioContext { struct LinuxAioState *linux_aio; #endif #ifdef CONFIG_LINUX_IO_URING - struct LuringState *linux_io_uring; + LuringState *linux_io_uring; /* State for file descriptor monitoring using Linux io_uring */ struct io_uring fdmon_io_uring; @@ -504,10 +504,10 @@ struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp); struct LinuxAioState *aio_get_linux_aio(AioContext *ctx); /* Setup the LuringState bound to this AioContext */ -struct LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp); +LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp); /* Return the LuringState bound to this AioContext */ -struct LuringState *aio_get_linux_io_uring(AioContext *ctx); +LuringState *aio_get_linux_io_uring(AioContext *ctx); /** * aio_timer_new_with_attrs: * @ctx: the aio context diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h index 0f63c2800c..20e000b8ef 100644 --- a/include/block/raw-aio.h +++ b/include/block/raw-aio.h @@ -65,7 +65,6 @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); #endif /* io_uring.c - Linux io_uring implementation */ #ifdef CONFIG_LINUX_IO_URING -typedef struct LuringState LuringState; LuringState *luring_init(Error **errp); void luring_cleanup(LuringState *s); From a58506b748b8988a95f4fa1a2420ac5c17038b30 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 10:06:17 +1100 Subject: [PATCH 11/16] target/i386: Do not re-compute new pc with CF_PCREL With PCREL, we have a page-relative view of EIP, and an approximation of PC = EIP+CSBASE that is good enough to detect page crossings. If we try to recompute PC after masking EIP, we will mess up that approximation and write a corrupt value to EIP. We already handled masking properly for PCREL, so the fix in b5e0d5d2 was only needed for the !PCREL path. Cc: qemu-stable@nongnu.org Fixes: b5e0d5d22fbf ("target/i386: Fix 32-bit wrapping of pc/eip computation") Reported-by: Michael Tokarev Signed-off-by: Richard Henderson Message-ID: <20240101230617.129349-1-richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index e1eb82a5c6..d4d7e904ad 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2866,10 +2866,6 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num) } } new_eip &= mask; - new_pc = new_eip + s->cs_base; - if (!CODE64(s)) { - new_pc = (uint32_t)new_pc; - } gen_update_cc_op(s); set_cc_op(s, CC_OP_DYNAMIC); @@ -2885,6 +2881,8 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num) tcg_gen_andi_tl(cpu_eip, cpu_eip, mask); use_goto_tb = false; } + } else if (!CODE64(s)) { + new_pc = (uint32_t)(new_eip + s->cs_base); } if (use_goto_tb && translator_use_goto_tb(&s->base, new_pc)) { From 2926eab8969908bc068629e973062a0fb6ff3759 Mon Sep 17 00:00:00 2001 From: guoguangyao Date: Mon, 15 Jan 2024 10:08:04 +0800 Subject: [PATCH 12/16] target/i386: fix incorrect EIP in PC-relative translation blocks The PCREL patches introduced a bug when updating EIP in the !CF_PCREL case. Using s->pc in func gen_update_eip_next() solves the problem. Cc: qemu-stable@nongnu.org Fixes: b5e0d5d22fbf ("target/i386: Fix 32-bit wrapping of pc/eip computation") Signed-off-by: guoguangyao Reviewed-by: Richard Henderson Message-ID: <20240115020804.30272-1-guoguangyao18@mails.ucas.ac.cn> Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index d4d7e904ad..cadf13bce4 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -567,9 +567,9 @@ static void gen_update_eip_next(DisasContext *s) if (tb_cflags(s->base.tb) & CF_PCREL) { tcg_gen_addi_tl(cpu_eip, cpu_eip, s->pc - s->pc_save); } else if (CODE64(s)) { - tcg_gen_movi_tl(cpu_eip, s->base.pc_next); + tcg_gen_movi_tl(cpu_eip, s->pc); } else { - tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->base.pc_next - s->cs_base)); + tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->pc - s->cs_base)); } s->pc_save = s->pc; } From 729ba8e933f8af5800c3a92b37e630e9bdaa9f1e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 17 Jan 2024 16:27:42 +0100 Subject: [PATCH 13/16] target/i386: pcrel: store low bits of physical address in data[0] For PC-relative translation blocks, env->eip changes during the execution of a translation block, Therefore, QEMU must be able to recover an instruction's PC just from the TranslationBlock struct and the instruction data with. Because a TB will not span two pages, QEMU stores all the low bits of EIP in the instruction data and replaces them in x86_restore_state_to_opc. Bits 12 and higher (which may vary between executions of a PCREL TB, since these only use the physical address in the hash key) are kept unmodified from env->eip. The assumption is that these bits of EIP, unlike bits 0-11, will not change as the translation block executes. Unfortunately, this is incorrect when the CS base is not aligned to a page. Then the linear address of the instructions (i.e. the one with the CS base addred) indeed will never span two pages, but bits 12+ of EIP can actually change. For example, if CS base is 0x80262200 and EIP = 0x6FF4, the first instruction in the translation block will be at linear address 0x802691F4. Even a very small TB will cross to EIP = 0x7xxx, while the linear addresses will remain comfortably within a single page. The fix is simply to use the low bits of the linear address for data[0], since those don't change. Then x86_restore_state_to_opc uses tb->cs_base to compute a temporary linear address (referring to some unknown instruction in the TB, but with the correct values of bits 12 and higher); the low bits are replaced with data[0], and EIP is obtained by subtracting again the CS base. Huge thanks to Mark Cave-Ayland for the image and initial debugging, and to Gitlab user @kjliew for help with bisecting another occurrence of (hopefully!) the same bug. It should be relatively easy to write a testcase that performs MMIO on an EIP with different bits 12+ than the first instruction of the translation block; any help is welcome. Fixes: e3a79e0e878 ("target/i386: Enable TARGET_TB_PCREL", 2022-10-11) Cc: qemu-stable@nongnu.org Cc: Mark Cave-Ayland Cc: Richard Henderson Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1759 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1964 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2012 Signed-off-by: Paolo Bonzini --- target/i386/tcg/tcg-cpu.c | 20 ++++++++++++++++---- target/i386/tcg/translate.c | 1 - 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c index 6e881e9e27..1d54164bdf 100644 --- a/target/i386/tcg/tcg-cpu.c +++ b/target/i386/tcg/tcg-cpu.c @@ -68,14 +68,26 @@ static void x86_restore_state_to_opc(CPUState *cs, X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; int cc_op = data[1]; + uint64_t new_pc; if (tb_cflags(tb) & CF_PCREL) { - env->eip = (env->eip & TARGET_PAGE_MASK) | data[0]; - } else if (tb->flags & HF_CS64_MASK) { - env->eip = data[0]; + /* + * data[0] in PC-relative TBs is also a linear address, i.e. an address with + * the CS base added, because it is not guaranteed that EIP bits 12 and higher + * stay the same across the translation block. Add the CS base back before + * replacing the low bits, and subtract it below just like for !CF_PCREL. + */ + uint64_t pc = env->eip + tb->cs_base; + new_pc = (pc & TARGET_PAGE_MASK) | data[0]; } else { - env->eip = (uint32_t)(data[0] - tb->cs_base); + new_pc = data[0]; } + if (tb->flags & HF_CS64_MASK) { + env->eip = new_pc; + } else { + env->eip = (uint32_t)(new_pc - tb->cs_base); + } + if (cc_op != CC_OP_DYNAMIC) { env->cc_op = cc_op; } diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index cadf13bce4..e193c74472 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -6996,7 +6996,6 @@ static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) dc->prev_insn_end = tcg_last_op(); if (tb_cflags(dcbase->tb) & CF_PCREL) { - pc_arg -= dc->cs_base; pc_arg &= ~TARGET_PAGE_MASK; } tcg_gen_insn_start(pc_arg, dc->cc_op); From 592d0bc0302ff5b5209ecd7f8733f285bc008cff Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 13 Dec 2023 19:32:45 +0100 Subject: [PATCH 14/16] remove unnecessary casts from uintptr_t uintptr_t, or unsigned long which is equivalent on Linux I32LP64 systems, is an unsigned type and there is no need to further cast to __u64 which is another unsigned integer type; widening casts from unsigned integers zero-extend the value. Signed-off-by: Paolo Bonzini --- block/io_uring.c | 2 +- hw/vfio/common.c | 4 ++-- target/i386/sev.c | 8 ++++---- util/fdmon-io_uring.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/block/io_uring.c b/block/io_uring.c index 1e5886c30b..d77ae55745 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -102,7 +102,7 @@ static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb, /* Update sqe */ luringcb->sqeq.off += nread; - luringcb->sqeq.addr = (__u64)(uintptr_t)luringcb->resubmit_qiov.iov; + luringcb->sqeq.addr = (uintptr_t)luringcb->resubmit_qiov.iov; luringcb->sqeq.len = luringcb->resubmit_qiov.niov; luring_resubmit(s, luringcb); diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 0b3352f2a9..4aa86f563c 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1000,7 +1000,7 @@ vfio_device_feature_dma_logging_start_create(VFIOContainerBase *bcontainer, return NULL; } - control->ranges = (__u64)(uintptr_t)ranges; + control->ranges = (uintptr_t)ranges; if (tracking->max32) { ranges->iova = tracking->min32; ranges->length = (tracking->max32 - tracking->min32) + 1; @@ -1126,7 +1126,7 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, report->iova = iova; report->length = size; report->page_size = qemu_real_host_page_size(); - report->bitmap = (__u64)(uintptr_t)bitmap; + report->bitmap = (uintptr_t)bitmap; feature->argsz = sizeof(buf); feature->flags = VFIO_DEVICE_FEATURE_GET | diff --git a/target/i386/sev.c b/target/i386/sev.c index 9a71246682..173de91afe 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -167,7 +167,7 @@ sev_ioctl(int fd, int cmd, void *data, int *error) input.id = cmd; input.sev_fd = fd; - input.data = (__u64)(unsigned long)data; + input.data = (uintptr_t)data; r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &input); @@ -240,7 +240,7 @@ sev_ram_block_added(RAMBlockNotifier *n, void *host, size_t size, return; } - range.addr = (__u64)(unsigned long)host; + range.addr = (uintptr_t)host; range.size = max_size; trace_kvm_memcrypt_register_region(host, max_size); @@ -270,7 +270,7 @@ sev_ram_block_removed(RAMBlockNotifier *n, void *host, size_t size, return; } - range.addr = (__u64)(unsigned long)host; + range.addr = (uintptr_t)host; range.size = max_size; trace_kvm_memcrypt_unregister_region(host, max_size); @@ -767,7 +767,7 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) return 1; } - update.uaddr = (__u64)(unsigned long)addr; + update.uaddr = (uintptr_t)addr; update.len = len; trace_kvm_sev_launch_update_data(addr, len); ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c index 16054c5ede..b0d68bdc44 100644 --- a/util/fdmon-io_uring.c +++ b/util/fdmon-io_uring.c @@ -180,7 +180,7 @@ static void add_poll_remove_sqe(AioContext *ctx, AioHandler *node) struct io_uring_sqe *sqe = get_sqe(ctx); #ifdef LIBURING_HAVE_DATA64 - io_uring_prep_poll_remove(sqe, (__u64)(uintptr_t)node); + io_uring_prep_poll_remove(sqe, (uintptr_t)node); #else io_uring_prep_poll_remove(sqe, node); #endif From 5fd1674d21d3e3489f077c67dcd841b72ed351b8 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 15 Jan 2024 15:32:44 +0800 Subject: [PATCH 15/16] qemu/osdep: Add huge page aligned support on LoongArch platform On LoongArch kvm mode if transparent huge page wants to be enabled, base address and size of memslot from both HVA and GPA view. And LoongArch supports both 4K and 16K page size with Linux kernel, so transparent huge page size is calculated from real page size rather than hardcoded size. Signed-off-by: Bibo Mao Message-ID: <20240115073244.174155-1-maobibo@loongson.cn> Signed-off-by: Paolo Bonzini --- include/qemu/osdep.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 9a405bed89..c9692cc314 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -547,6 +547,14 @@ int madvise(char *, size_t, int); # define QEMU_VMALLOC_ALIGN (256 * 4096) #elif defined(__linux__) && defined(__sparc__) # define QEMU_VMALLOC_ALIGN MAX(qemu_real_host_page_size(), SHMLBA) +#elif defined(__linux__) && defined(__loongarch__) + /* + * For transparent hugepage optimization, it has better be huge page + * aligned. LoongArch host system supports two kinds of pagesize: 4K + * and 16K, here calculate huge page size from host page size + */ +# define QEMU_VMALLOC_ALIGN (qemu_real_host_page_size() * \ + qemu_real_host_page_size() / sizeof(long)) #else # define QEMU_VMALLOC_ALIGN qemu_real_host_page_size() #endif From 379652e967b32ac905056bf723b54298b2f79a51 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 8 Jan 2024 13:50:00 +0100 Subject: [PATCH 16/16] tests/tcg: Don't #include in aarch64/system/vtimer.c make check-tcg fails on Fedora with: vtimer.c:9:10: fatal error: inttypes.h: No such file or directory Fedora has a minimal aarch64 cross-compiler, which satisfies the configure checks, so it's chosen instead of the dockerized one. There is no cross-version of inttypes.h, however. Fix by using stdint.h instead. The test does not require anything from inttypes.h anyway. Signed-off-by: Ilya Leoshkevich Message-ID: <20240108125030.58569-1-iii@linux.ibm.com> Signed-off-by: Paolo Bonzini --- tests/tcg/aarch64/system/vtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tcg/aarch64/system/vtimer.c b/tests/tcg/aarch64/system/vtimer.c index 42f2f7796c..7d725eced3 100644 --- a/tests/tcg/aarch64/system/vtimer.c +++ b/tests/tcg/aarch64/system/vtimer.c @@ -6,7 +6,7 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#include +#include #include /* grabbed from Linux */