ide: Correct handling of malformed/short PRDTs
This impacts both BMDMA and AHCI HBA interfaces for IDE. Currently, we confuse the difference between a PRDT having "0 bytes" and a PRDT having "0 complete sectors." When we receive an incomplete sector, inconsistent error checking leads to an infinite loop wherein the call succeeds, but it didn't give us enough bytes -- leading us to re-call the DMA chain over and over again. This leads to, in the BMDMA case, leaked memory for short PRDTs, and infinite loops and resource usage in the AHCI case. The .prepare_buf() callback is reworked to return the number of bytes that it successfully prepared. 0 is a valid, non-error answer that means the table was empty and described no bytes. -1 indicates an error. Our current implementation uses the io_buffer in IDEState to ultimately describe the size of a prepared scatter-gather list. Even though the AHCI PRDT/SGList can be as large as 256GiB, the AHCI command header limits transactions to just 4GiB. ATA8-ACS3, however, defines the largest transaction to be an LBA48 command that transfers 65,536 sectors. With a 512 byte sector size, this is just 32MiB. Since our current state structures use the int type to describe the size of the buffer, and this state is migrated as int32, we are limited to describing 2GiB buffer sizes unless we change the migration protocol. For this reason, this patch begins to unify the assertions in the IDE pathways that the scatter-gather list provided by either the AHCI PRDT or the PCI BMDMA PRDs can only describe, at a maximum, 2GiB. This should be resilient enough unless we need a sector size that exceeds 32KiB. Further, the likelihood of any guest operating system actually attempting to transfer this much data in a single operation is very slim. To this end, the IDEState variables have been updated to more explicitly clarify our maximum supported size. Callers to the prepare_buf callback have been reworked to understand the new return code, and all versions of the prepare_buf callback have been adjusted accordingly. Lastly, the ahci_populate_sglist helper, relied upon by the AHCI implementation of .prepare_buf() as well as the PCI implementation of the callback have had overflow assertions added to help make clear the reasonings behind the various type changes. [Added %d -> %"PRId64" fix John sent because off_pos changed from int to int64_t. --Stefan] Signed-off-by: John Snow <jsnow@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-id: 1414785819-26209-4-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
parent
bef1301acb
commit
3251bdcf1c
@ -730,7 +730,8 @@ static int prdt_tbl_entry_size(const AHCI_SG *tbl)
|
|||||||
return (le32_to_cpu(tbl->flags_size) & AHCI_PRDT_SIZE_MASK) + 1;
|
return (le32_to_cpu(tbl->flags_size) & AHCI_PRDT_SIZE_MASK) + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
|
static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist,
|
||||||
|
int32_t offset)
|
||||||
{
|
{
|
||||||
AHCICmdHdr *cmd = ad->cur_cmd;
|
AHCICmdHdr *cmd = ad->cur_cmd;
|
||||||
uint32_t opts = le32_to_cpu(cmd->opts);
|
uint32_t opts = le32_to_cpu(cmd->opts);
|
||||||
@ -741,13 +742,21 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
|
|||||||
uint8_t *prdt;
|
uint8_t *prdt;
|
||||||
int i;
|
int i;
|
||||||
int r = 0;
|
int r = 0;
|
||||||
int sum = 0;
|
uint64_t sum = 0;
|
||||||
int off_idx = -1;
|
int off_idx = -1;
|
||||||
int off_pos = -1;
|
int64_t off_pos = -1;
|
||||||
int tbl_entry_size;
|
int tbl_entry_size;
|
||||||
IDEBus *bus = &ad->port;
|
IDEBus *bus = &ad->port;
|
||||||
BusState *qbus = BUS(bus);
|
BusState *qbus = BUS(bus);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: AHCI PRDT can describe up to 256GiB. SATA/ATA only support
|
||||||
|
* transactions of up to 32MiB as of ATA8-ACS3 rev 1b, assuming a
|
||||||
|
* 512 byte sector size. We limit the PRDT in this implementation to
|
||||||
|
* a reasonably large 2GiB, which can accommodate the maximum transfer
|
||||||
|
* request for sector sizes up to 32K.
|
||||||
|
*/
|
||||||
|
|
||||||
if (!sglist_alloc_hint) {
|
if (!sglist_alloc_hint) {
|
||||||
DPRINTF(ad->port_no, "no sg list given by guest: 0x%08x\n", opts);
|
DPRINTF(ad->port_no, "no sg list given by guest: 0x%08x\n", opts);
|
||||||
return -1;
|
return -1;
|
||||||
@ -782,7 +791,7 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
|
|||||||
}
|
}
|
||||||
if ((off_idx == -1) || (off_pos < 0) || (off_pos > tbl_entry_size)) {
|
if ((off_idx == -1) || (off_pos < 0) || (off_pos > tbl_entry_size)) {
|
||||||
DPRINTF(ad->port_no, "%s: Incorrect offset! "
|
DPRINTF(ad->port_no, "%s: Incorrect offset! "
|
||||||
"off_idx: %d, off_pos: %d\n",
|
"off_idx: %d, off_pos: %"PRId64"\n",
|
||||||
__func__, off_idx, off_pos);
|
__func__, off_idx, off_pos);
|
||||||
r = -1;
|
r = -1;
|
||||||
goto out;
|
goto out;
|
||||||
@ -797,6 +806,13 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
|
|||||||
/* flags_size is zero-based */
|
/* flags_size is zero-based */
|
||||||
qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr),
|
qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr),
|
||||||
prdt_tbl_entry_size(&tbl[i]));
|
prdt_tbl_entry_size(&tbl[i]));
|
||||||
|
if (sglist->size > INT32_MAX) {
|
||||||
|
error_report("AHCI Physical Region Descriptor Table describes "
|
||||||
|
"more than 2 GiB.\n");
|
||||||
|
qemu_sglist_destroy(sglist);
|
||||||
|
r = -1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1140,16 +1156,19 @@ static void ahci_start_dma(IDEDMA *dma, IDEState *s,
|
|||||||
* Not currently invoked by PIO R/W chains,
|
* Not currently invoked by PIO R/W chains,
|
||||||
* which invoke ahci_populate_sglist via ahci_start_transfer.
|
* which invoke ahci_populate_sglist via ahci_start_transfer.
|
||||||
*/
|
*/
|
||||||
static int ahci_dma_prepare_buf(IDEDMA *dma, int is_write)
|
static int32_t ahci_dma_prepare_buf(IDEDMA *dma, int is_write)
|
||||||
{
|
{
|
||||||
AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
|
AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
|
||||||
IDEState *s = &ad->port.ifs[0];
|
IDEState *s = &ad->port.ifs[0];
|
||||||
|
|
||||||
ahci_populate_sglist(ad, &s->sg, s->io_buffer_offset);
|
if (ahci_populate_sglist(ad, &s->sg, s->io_buffer_offset) == -1) {
|
||||||
|
DPRINTF(ad->port_no, "ahci_dma_prepare_buf failed.\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
s->io_buffer_size = s->sg.size;
|
s->io_buffer_size = s->sg.size;
|
||||||
|
|
||||||
DPRINTF(ad->port_no, "len=%#x\n", s->io_buffer_size);
|
DPRINTF(ad->port_no, "len=%#x\n", s->io_buffer_size);
|
||||||
return s->io_buffer_size != 0;
|
return s->io_buffer_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -731,10 +731,11 @@ void ide_dma_cb(void *opaque, int ret)
|
|||||||
n = s->nsector;
|
n = s->nsector;
|
||||||
s->io_buffer_index = 0;
|
s->io_buffer_index = 0;
|
||||||
s->io_buffer_size = n * 512;
|
s->io_buffer_size = n * 512;
|
||||||
if (s->bus->dma->ops->prepare_buf(s->bus->dma, ide_cmd_is_read(s)) == 0) {
|
if (s->bus->dma->ops->prepare_buf(s->bus->dma, ide_cmd_is_read(s)) < 512) {
|
||||||
/* The PRDs were too short. Reset the Active bit, but don't raise an
|
/* The PRDs were too short. Reset the Active bit, but don't raise an
|
||||||
* interrupt. */
|
* interrupt. */
|
||||||
s->status = READY_STAT | SEEK_STAT;
|
s->status = READY_STAT | SEEK_STAT;
|
||||||
|
dma_buf_commit(s, 0);
|
||||||
goto eot;
|
goto eot;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2313,12 +2314,17 @@ static int ide_nop_int(IDEDMA *dma, int x)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int32_t ide_nop_int32(IDEDMA *dma, int x)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void ide_nop_restart(void *opaque, int x, RunState y)
|
static void ide_nop_restart(void *opaque, int x, RunState y)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
static const IDEDMAOps ide_dma_nop_ops = {
|
static const IDEDMAOps ide_dma_nop_ops = {
|
||||||
.prepare_buf = ide_nop_int,
|
.prepare_buf = ide_nop_int32,
|
||||||
.rw_buf = ide_nop_int,
|
.rw_buf = ide_nop_int,
|
||||||
.set_unit = ide_nop_int,
|
.set_unit = ide_nop_int,
|
||||||
.restart_cb = ide_nop_restart,
|
.restart_cb = ide_nop_restart,
|
||||||
|
@ -322,6 +322,7 @@ typedef void EndTransferFunc(IDEState *);
|
|||||||
typedef void DMAStartFunc(IDEDMA *, IDEState *, BlockCompletionFunc *);
|
typedef void DMAStartFunc(IDEDMA *, IDEState *, BlockCompletionFunc *);
|
||||||
typedef void DMAVoidFunc(IDEDMA *);
|
typedef void DMAVoidFunc(IDEDMA *);
|
||||||
typedef int DMAIntFunc(IDEDMA *, int);
|
typedef int DMAIntFunc(IDEDMA *, int);
|
||||||
|
typedef int32_t DMAInt32Func(IDEDMA *, int);
|
||||||
typedef void DMAu32Func(IDEDMA *, uint32_t);
|
typedef void DMAu32Func(IDEDMA *, uint32_t);
|
||||||
typedef void DMAStopFunc(IDEDMA *, bool);
|
typedef void DMAStopFunc(IDEDMA *, bool);
|
||||||
typedef void DMARestartFunc(void *, int, RunState);
|
typedef void DMARestartFunc(void *, int, RunState);
|
||||||
@ -385,7 +386,7 @@ struct IDEState {
|
|||||||
uint8_t cdrom_changed;
|
uint8_t cdrom_changed;
|
||||||
int packet_transfer_size;
|
int packet_transfer_size;
|
||||||
int elementary_transfer_size;
|
int elementary_transfer_size;
|
||||||
int io_buffer_index;
|
int32_t io_buffer_index;
|
||||||
int lba;
|
int lba;
|
||||||
int cd_sector_size;
|
int cd_sector_size;
|
||||||
int atapi_dma; /* true if dma is requested for the packet cmd */
|
int atapi_dma; /* true if dma is requested for the packet cmd */
|
||||||
@ -394,8 +395,8 @@ struct IDEState {
|
|||||||
struct iovec iov;
|
struct iovec iov;
|
||||||
QEMUIOVector qiov;
|
QEMUIOVector qiov;
|
||||||
/* ATA DMA state */
|
/* ATA DMA state */
|
||||||
int io_buffer_offset;
|
int32_t io_buffer_offset;
|
||||||
int io_buffer_size;
|
int32_t io_buffer_size;
|
||||||
QEMUSGList sg;
|
QEMUSGList sg;
|
||||||
/* PIO transfer handling */
|
/* PIO transfer handling */
|
||||||
int req_nb_sectors; /* number of sectors per interrupt */
|
int req_nb_sectors; /* number of sectors per interrupt */
|
||||||
@ -405,8 +406,8 @@ struct IDEState {
|
|||||||
uint8_t *io_buffer;
|
uint8_t *io_buffer;
|
||||||
/* PIO save/restore */
|
/* PIO save/restore */
|
||||||
int32_t io_buffer_total_len;
|
int32_t io_buffer_total_len;
|
||||||
int cur_io_buffer_offset;
|
int32_t cur_io_buffer_offset;
|
||||||
int cur_io_buffer_len;
|
int32_t cur_io_buffer_len;
|
||||||
uint8_t end_transfer_fn_idx;
|
uint8_t end_transfer_fn_idx;
|
||||||
QEMUTimer *sector_write_timer; /* only used for win2k install hack */
|
QEMUTimer *sector_write_timer; /* only used for win2k install hack */
|
||||||
uint32_t irq_count; /* counts IRQs when using win2k install hack */
|
uint32_t irq_count; /* counts IRQs when using win2k install hack */
|
||||||
@ -430,7 +431,7 @@ struct IDEState {
|
|||||||
struct IDEDMAOps {
|
struct IDEDMAOps {
|
||||||
DMAStartFunc *start_dma;
|
DMAStartFunc *start_dma;
|
||||||
DMAVoidFunc *start_transfer;
|
DMAVoidFunc *start_transfer;
|
||||||
DMAIntFunc *prepare_buf;
|
DMAInt32Func *prepare_buf;
|
||||||
DMAu32Func *commit_buf;
|
DMAu32Func *commit_buf;
|
||||||
DMAIntFunc *rw_buf;
|
DMAIntFunc *rw_buf;
|
||||||
DMAIntFunc *set_unit;
|
DMAIntFunc *set_unit;
|
||||||
|
@ -553,6 +553,11 @@ static int ide_nop_int(IDEDMA *dma, int x)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int32_t ide_nop_int32(IDEDMA *dma, int x)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void ide_nop_restart(void *opaque, int x, RunState y)
|
static void ide_nop_restart(void *opaque, int x, RunState y)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
@ -569,7 +574,7 @@ static void ide_dbdma_start(IDEDMA *dma, IDEState *s,
|
|||||||
|
|
||||||
static const IDEDMAOps dbdma_ops = {
|
static const IDEDMAOps dbdma_ops = {
|
||||||
.start_dma = ide_dbdma_start,
|
.start_dma = ide_dbdma_start,
|
||||||
.prepare_buf = ide_nop_int,
|
.prepare_buf = ide_nop_int32,
|
||||||
.rw_buf = ide_nop_int,
|
.rw_buf = ide_nop_int,
|
||||||
.set_unit = ide_nop_int,
|
.set_unit = ide_nop_int,
|
||||||
.restart_cb = ide_nop_restart,
|
.restart_cb = ide_nop_restart,
|
||||||
|
27
hw/ide/pci.c
27
hw/ide/pci.c
@ -28,7 +28,7 @@
|
|||||||
#include <hw/isa/isa.h>
|
#include <hw/isa/isa.h>
|
||||||
#include "sysemu/block-backend.h"
|
#include "sysemu/block-backend.h"
|
||||||
#include "sysemu/dma.h"
|
#include "sysemu/dma.h"
|
||||||
|
#include "qemu/error-report.h"
|
||||||
#include <hw/ide/pci.h>
|
#include <hw/ide/pci.h>
|
||||||
|
|
||||||
#define BMDMA_PAGE_SIZE 4096
|
#define BMDMA_PAGE_SIZE 4096
|
||||||
@ -55,8 +55,11 @@ static void bmdma_start_dma(IDEDMA *dma, IDEState *s,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* return 0 if buffer completed */
|
/**
|
||||||
static int bmdma_prepare_buf(IDEDMA *dma, int is_write)
|
* Return the number of bytes successfully prepared.
|
||||||
|
* -1 on error.
|
||||||
|
*/
|
||||||
|
static int32_t bmdma_prepare_buf(IDEDMA *dma, int is_write)
|
||||||
{
|
{
|
||||||
BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma);
|
BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma);
|
||||||
IDEState *s = bmdma_active_if(bm);
|
IDEState *s = bmdma_active_if(bm);
|
||||||
@ -74,8 +77,9 @@ static int bmdma_prepare_buf(IDEDMA *dma, int is_write)
|
|||||||
if (bm->cur_prd_len == 0) {
|
if (bm->cur_prd_len == 0) {
|
||||||
/* end of table (with a fail safe of one page) */
|
/* end of table (with a fail safe of one page) */
|
||||||
if (bm->cur_prd_last ||
|
if (bm->cur_prd_last ||
|
||||||
(bm->cur_addr - bm->addr) >= BMDMA_PAGE_SIZE)
|
(bm->cur_addr - bm->addr) >= BMDMA_PAGE_SIZE) {
|
||||||
return s->io_buffer_size != 0;
|
return s->io_buffer_size;
|
||||||
|
}
|
||||||
pci_dma_read(pci_dev, bm->cur_addr, &prd, 8);
|
pci_dma_read(pci_dev, bm->cur_addr, &prd, 8);
|
||||||
bm->cur_addr += 8;
|
bm->cur_addr += 8;
|
||||||
prd.addr = le32_to_cpu(prd.addr);
|
prd.addr = le32_to_cpu(prd.addr);
|
||||||
@ -90,12 +94,23 @@ static int bmdma_prepare_buf(IDEDMA *dma, int is_write)
|
|||||||
l = bm->cur_prd_len;
|
l = bm->cur_prd_len;
|
||||||
if (l > 0) {
|
if (l > 0) {
|
||||||
qemu_sglist_add(&s->sg, bm->cur_prd_addr, l);
|
qemu_sglist_add(&s->sg, bm->cur_prd_addr, l);
|
||||||
|
|
||||||
|
/* Note: We limit the max transfer to be 2GiB.
|
||||||
|
* This should accommodate the largest ATA transaction
|
||||||
|
* for LBA48 (65,536 sectors) and 32K sector sizes. */
|
||||||
|
if (s->sg.size > INT32_MAX) {
|
||||||
|
error_report("IDE: sglist describes more than 2GiB.\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
bm->cur_prd_addr += l;
|
bm->cur_prd_addr += l;
|
||||||
bm->cur_prd_len -= l;
|
bm->cur_prd_len -= l;
|
||||||
s->io_buffer_size += l;
|
s->io_buffer_size += l;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 1;
|
|
||||||
|
qemu_sglist_destroy(&s->sg);
|
||||||
|
s->io_buffer_size = 0;
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* return 0 if buffer completed */
|
/* return 0 if buffer completed */
|
||||||
|
Loading…
Reference in New Issue
Block a user