Merge remote-tracking branch 'sstabellini/xen-2013-01-14' into staging
* sstabellini/xen-2013-01-14: xen_disk: implement BLKIF_OP_FLUSH_DISKCACHE, remove BLKIF_OP_WRITE_BARRIER xen_disk: add persistent grant support to xen_disk backend xen_disk: fix memory leak Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
commit
5e72179b8f
208
hw/xen_disk.c
208
hw/xen_disk.c
@ -51,6 +51,13 @@ static int max_requests = 32;
|
|||||||
#define BLOCK_SIZE 512
|
#define BLOCK_SIZE 512
|
||||||
#define IOCB_COUNT (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
|
#define IOCB_COUNT (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
|
||||||
|
|
||||||
|
struct PersistentGrant {
|
||||||
|
void *page;
|
||||||
|
struct XenBlkDev *blkdev;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct PersistentGrant PersistentGrant;
|
||||||
|
|
||||||
struct ioreq {
|
struct ioreq {
|
||||||
blkif_request_t req;
|
blkif_request_t req;
|
||||||
int16_t status;
|
int16_t status;
|
||||||
@ -68,6 +75,7 @@ struct ioreq {
|
|||||||
int prot;
|
int prot;
|
||||||
void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||||
void *pages;
|
void *pages;
|
||||||
|
int num_unmap;
|
||||||
|
|
||||||
/* aio status */
|
/* aio status */
|
||||||
int aio_inflight;
|
int aio_inflight;
|
||||||
@ -104,6 +112,12 @@ struct XenBlkDev {
|
|||||||
int requests_inflight;
|
int requests_inflight;
|
||||||
int requests_finished;
|
int requests_finished;
|
||||||
|
|
||||||
|
/* Persistent grants extension */
|
||||||
|
gboolean feature_persistent;
|
||||||
|
GTree *persistent_gnts;
|
||||||
|
unsigned int persistent_gnt_count;
|
||||||
|
unsigned int max_grants;
|
||||||
|
|
||||||
/* qemu block driver */
|
/* qemu block driver */
|
||||||
DriveInfo *dinfo;
|
DriveInfo *dinfo;
|
||||||
BlockDriverState *bs;
|
BlockDriverState *bs;
|
||||||
@ -112,6 +126,54 @@ struct XenBlkDev {
|
|||||||
|
|
||||||
/* ------------------------------------------------------------- */
|
/* ------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void ioreq_reset(struct ioreq *ioreq)
|
||||||
|
{
|
||||||
|
memset(&ioreq->req, 0, sizeof(ioreq->req));
|
||||||
|
ioreq->status = 0;
|
||||||
|
ioreq->start = 0;
|
||||||
|
ioreq->presync = 0;
|
||||||
|
ioreq->postsync = 0;
|
||||||
|
ioreq->mapped = 0;
|
||||||
|
|
||||||
|
memset(ioreq->domids, 0, sizeof(ioreq->domids));
|
||||||
|
memset(ioreq->refs, 0, sizeof(ioreq->refs));
|
||||||
|
ioreq->prot = 0;
|
||||||
|
memset(ioreq->page, 0, sizeof(ioreq->page));
|
||||||
|
ioreq->pages = NULL;
|
||||||
|
|
||||||
|
ioreq->aio_inflight = 0;
|
||||||
|
ioreq->aio_errors = 0;
|
||||||
|
|
||||||
|
ioreq->blkdev = NULL;
|
||||||
|
memset(&ioreq->list, 0, sizeof(ioreq->list));
|
||||||
|
memset(&ioreq->acct, 0, sizeof(ioreq->acct));
|
||||||
|
|
||||||
|
qemu_iovec_reset(&ioreq->v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
|
||||||
|
{
|
||||||
|
uint ua = GPOINTER_TO_UINT(a);
|
||||||
|
uint ub = GPOINTER_TO_UINT(b);
|
||||||
|
return (ua > ub) - (ua < ub);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void destroy_grant(gpointer pgnt)
|
||||||
|
{
|
||||||
|
PersistentGrant *grant = pgnt;
|
||||||
|
XenGnttab gnt = grant->blkdev->xendev.gnttabdev;
|
||||||
|
|
||||||
|
if (xc_gnttab_munmap(gnt, grant->page, 1) != 0) {
|
||||||
|
xen_be_printf(&grant->blkdev->xendev, 0,
|
||||||
|
"xc_gnttab_munmap failed: %s\n",
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
grant->blkdev->persistent_gnt_count--;
|
||||||
|
xen_be_printf(&grant->blkdev->xendev, 3,
|
||||||
|
"unmapped grant %p\n", grant->page);
|
||||||
|
g_free(grant);
|
||||||
|
}
|
||||||
|
|
||||||
static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
|
static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
|
||||||
{
|
{
|
||||||
struct ioreq *ioreq = NULL;
|
struct ioreq *ioreq = NULL;
|
||||||
@ -129,7 +191,6 @@ static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
|
|||||||
/* get one from freelist */
|
/* get one from freelist */
|
||||||
ioreq = QLIST_FIRST(&blkdev->freelist);
|
ioreq = QLIST_FIRST(&blkdev->freelist);
|
||||||
QLIST_REMOVE(ioreq, list);
|
QLIST_REMOVE(ioreq, list);
|
||||||
qemu_iovec_reset(&ioreq->v);
|
|
||||||
}
|
}
|
||||||
QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list);
|
QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list);
|
||||||
blkdev->requests_inflight++;
|
blkdev->requests_inflight++;
|
||||||
@ -153,7 +214,7 @@ static void ioreq_release(struct ioreq *ioreq, bool finish)
|
|||||||
struct XenBlkDev *blkdev = ioreq->blkdev;
|
struct XenBlkDev *blkdev = ioreq->blkdev;
|
||||||
|
|
||||||
QLIST_REMOVE(ioreq, list);
|
QLIST_REMOVE(ioreq, list);
|
||||||
memset(ioreq, 0, sizeof(*ioreq));
|
ioreq_reset(ioreq);
|
||||||
ioreq->blkdev = blkdev;
|
ioreq->blkdev = blkdev;
|
||||||
QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list);
|
QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list);
|
||||||
if (finish) {
|
if (finish) {
|
||||||
@ -182,12 +243,11 @@ static int ioreq_parse(struct ioreq *ioreq)
|
|||||||
case BLKIF_OP_READ:
|
case BLKIF_OP_READ:
|
||||||
ioreq->prot = PROT_WRITE; /* to memory */
|
ioreq->prot = PROT_WRITE; /* to memory */
|
||||||
break;
|
break;
|
||||||
case BLKIF_OP_WRITE_BARRIER:
|
case BLKIF_OP_FLUSH_DISKCACHE:
|
||||||
|
ioreq->presync = 1;
|
||||||
if (!ioreq->req.nr_segments) {
|
if (!ioreq->req.nr_segments) {
|
||||||
ioreq->presync = 1;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
ioreq->presync = ioreq->postsync = 1;
|
|
||||||
/* fall through */
|
/* fall through */
|
||||||
case BLKIF_OP_WRITE:
|
case BLKIF_OP_WRITE:
|
||||||
ioreq->prot = PROT_READ; /* from memory */
|
ioreq->prot = PROT_READ; /* from memory */
|
||||||
@ -241,21 +301,21 @@ static void ioreq_unmap(struct ioreq *ioreq)
|
|||||||
XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
|
XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (ioreq->v.niov == 0 || ioreq->mapped == 0) {
|
if (ioreq->num_unmap == 0 || ioreq->mapped == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (batch_maps) {
|
if (batch_maps) {
|
||||||
if (!ioreq->pages) {
|
if (!ioreq->pages) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0) {
|
if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->num_unmap) != 0) {
|
||||||
xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
|
xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
}
|
}
|
||||||
ioreq->blkdev->cnt_map -= ioreq->v.niov;
|
ioreq->blkdev->cnt_map -= ioreq->num_unmap;
|
||||||
ioreq->pages = NULL;
|
ioreq->pages = NULL;
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; i < ioreq->v.niov; i++) {
|
for (i = 0; i < ioreq->num_unmap; i++) {
|
||||||
if (!ioreq->page[i]) {
|
if (!ioreq->page[i]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -273,41 +333,120 @@ static void ioreq_unmap(struct ioreq *ioreq)
|
|||||||
static int ioreq_map(struct ioreq *ioreq)
|
static int ioreq_map(struct ioreq *ioreq)
|
||||||
{
|
{
|
||||||
XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
|
XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
|
||||||
int i;
|
uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||||
|
uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||||
|
void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||||
|
int i, j, new_maps = 0;
|
||||||
|
PersistentGrant *grant;
|
||||||
|
/* domids and refs variables will contain the information necessary
|
||||||
|
* to map the grants that are needed to fulfill this request.
|
||||||
|
*
|
||||||
|
* After mapping the needed grants, the page array will contain the
|
||||||
|
* memory address of each granted page in the order specified in ioreq
|
||||||
|
* (disregarding if it's a persistent grant or not).
|
||||||
|
*/
|
||||||
|
|
||||||
if (ioreq->v.niov == 0 || ioreq->mapped == 1) {
|
if (ioreq->v.niov == 0 || ioreq->mapped == 1) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (batch_maps) {
|
if (ioreq->blkdev->feature_persistent) {
|
||||||
|
for (i = 0; i < ioreq->v.niov; i++) {
|
||||||
|
grant = g_tree_lookup(ioreq->blkdev->persistent_gnts,
|
||||||
|
GUINT_TO_POINTER(ioreq->refs[i]));
|
||||||
|
|
||||||
|
if (grant != NULL) {
|
||||||
|
page[i] = grant->page;
|
||||||
|
xen_be_printf(&ioreq->blkdev->xendev, 3,
|
||||||
|
"using persistent-grant %" PRIu32 "\n",
|
||||||
|
ioreq->refs[i]);
|
||||||
|
} else {
|
||||||
|
/* Add the grant to the list of grants that
|
||||||
|
* should be mapped
|
||||||
|
*/
|
||||||
|
domids[new_maps] = ioreq->domids[i];
|
||||||
|
refs[new_maps] = ioreq->refs[i];
|
||||||
|
page[i] = NULL;
|
||||||
|
new_maps++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Set the protection to RW, since grants may be reused later
|
||||||
|
* with a different protection than the one needed for this request
|
||||||
|
*/
|
||||||
|
ioreq->prot = PROT_WRITE | PROT_READ;
|
||||||
|
} else {
|
||||||
|
/* All grants in the request should be mapped */
|
||||||
|
memcpy(refs, ioreq->refs, sizeof(refs));
|
||||||
|
memcpy(domids, ioreq->domids, sizeof(domids));
|
||||||
|
memset(page, 0, sizeof(page));
|
||||||
|
new_maps = ioreq->v.niov;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (batch_maps && new_maps) {
|
||||||
ioreq->pages = xc_gnttab_map_grant_refs
|
ioreq->pages = xc_gnttab_map_grant_refs
|
||||||
(gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot);
|
(gnt, new_maps, domids, refs, ioreq->prot);
|
||||||
if (ioreq->pages == NULL) {
|
if (ioreq->pages == NULL) {
|
||||||
xen_be_printf(&ioreq->blkdev->xendev, 0,
|
xen_be_printf(&ioreq->blkdev->xendev, 0,
|
||||||
"can't map %d grant refs (%s, %d maps)\n",
|
"can't map %d grant refs (%s, %d maps)\n",
|
||||||
ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map);
|
new_maps, strerror(errno), ioreq->blkdev->cnt_map);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
for (i = 0; i < ioreq->v.niov; i++) {
|
for (i = 0, j = 0; i < ioreq->v.niov; i++) {
|
||||||
ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE +
|
if (page[i] == NULL) {
|
||||||
(uintptr_t)ioreq->v.iov[i].iov_base;
|
page[i] = ioreq->pages + (j++) * XC_PAGE_SIZE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
ioreq->blkdev->cnt_map += ioreq->v.niov;
|
ioreq->blkdev->cnt_map += new_maps;
|
||||||
} else {
|
} else if (new_maps) {
|
||||||
for (i = 0; i < ioreq->v.niov; i++) {
|
for (i = 0; i < new_maps; i++) {
|
||||||
ioreq->page[i] = xc_gnttab_map_grant_ref
|
ioreq->page[i] = xc_gnttab_map_grant_ref
|
||||||
(gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot);
|
(gnt, domids[i], refs[i], ioreq->prot);
|
||||||
if (ioreq->page[i] == NULL) {
|
if (ioreq->page[i] == NULL) {
|
||||||
xen_be_printf(&ioreq->blkdev->xendev, 0,
|
xen_be_printf(&ioreq->blkdev->xendev, 0,
|
||||||
"can't map grant ref %d (%s, %d maps)\n",
|
"can't map grant ref %d (%s, %d maps)\n",
|
||||||
ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map);
|
refs[i], strerror(errno), ioreq->blkdev->cnt_map);
|
||||||
ioreq_unmap(ioreq);
|
ioreq_unmap(ioreq);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base;
|
|
||||||
ioreq->blkdev->cnt_map++;
|
ioreq->blkdev->cnt_map++;
|
||||||
}
|
}
|
||||||
|
for (i = 0, j = 0; i < ioreq->v.niov; i++) {
|
||||||
|
if (page[i] == NULL) {
|
||||||
|
page[i] = ioreq->page[j++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ioreq->blkdev->feature_persistent) {
|
||||||
|
while ((ioreq->blkdev->persistent_gnt_count < ioreq->blkdev->max_grants)
|
||||||
|
&& new_maps) {
|
||||||
|
/* Go through the list of newly mapped grants and add as many
|
||||||
|
* as possible to the list of persistently mapped grants.
|
||||||
|
*
|
||||||
|
* Since we start at the end of ioreq->page(s), we only need
|
||||||
|
* to decrease new_maps to prevent this granted pages from
|
||||||
|
* being unmapped in ioreq_unmap.
|
||||||
|
*/
|
||||||
|
grant = g_malloc0(sizeof(*grant));
|
||||||
|
new_maps--;
|
||||||
|
if (batch_maps) {
|
||||||
|
grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE;
|
||||||
|
} else {
|
||||||
|
grant->page = ioreq->page[new_maps];
|
||||||
|
}
|
||||||
|
grant->blkdev = ioreq->blkdev;
|
||||||
|
xen_be_printf(&ioreq->blkdev->xendev, 3,
|
||||||
|
"adding grant %" PRIu32 " page: %p\n",
|
||||||
|
refs[new_maps], grant->page);
|
||||||
|
g_tree_insert(ioreq->blkdev->persistent_gnts,
|
||||||
|
GUINT_TO_POINTER(refs[new_maps]),
|
||||||
|
grant);
|
||||||
|
ioreq->blkdev->persistent_gnt_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (i = 0; i < ioreq->v.niov; i++) {
|
||||||
|
ioreq->v.iov[i].iov_base += (uintptr_t)page[i];
|
||||||
}
|
}
|
||||||
ioreq->mapped = 1;
|
ioreq->mapped = 1;
|
||||||
|
ioreq->num_unmap = new_maps;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -369,7 +508,7 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
|
|||||||
qemu_aio_complete, ioreq);
|
qemu_aio_complete, ioreq);
|
||||||
break;
|
break;
|
||||||
case BLKIF_OP_WRITE:
|
case BLKIF_OP_WRITE:
|
||||||
case BLKIF_OP_WRITE_BARRIER:
|
case BLKIF_OP_FLUSH_DISKCACHE:
|
||||||
if (!ioreq->req.nr_segments) {
|
if (!ioreq->req.nr_segments) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -654,7 +793,8 @@ static int blk_init(struct XenDevice *xendev)
|
|||||||
blkdev->file_size, blkdev->file_size >> 20);
|
blkdev->file_size, blkdev->file_size >> 20);
|
||||||
|
|
||||||
/* fill info */
|
/* fill info */
|
||||||
xenstore_write_be_int(&blkdev->xendev, "feature-barrier", 1);
|
xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1);
|
||||||
|
xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1);
|
||||||
xenstore_write_be_int(&blkdev->xendev, "info", info);
|
xenstore_write_be_int(&blkdev->xendev, "info", info);
|
||||||
xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk);
|
xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk);
|
||||||
xenstore_write_be_int(&blkdev->xendev, "sectors",
|
xenstore_write_be_int(&blkdev->xendev, "sectors",
|
||||||
@ -678,6 +818,7 @@ out_error:
|
|||||||
static int blk_connect(struct XenDevice *xendev)
|
static int blk_connect(struct XenDevice *xendev)
|
||||||
{
|
{
|
||||||
struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
|
struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
|
||||||
|
int pers;
|
||||||
|
|
||||||
if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) {
|
if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) {
|
||||||
return -1;
|
return -1;
|
||||||
@ -686,6 +827,11 @@ static int blk_connect(struct XenDevice *xendev)
|
|||||||
&blkdev->xendev.remote_port) == -1) {
|
&blkdev->xendev.remote_port) == -1) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
if (xenstore_read_fe_int(&blkdev->xendev, "feature-persistent", &pers)) {
|
||||||
|
blkdev->feature_persistent = FALSE;
|
||||||
|
} else {
|
||||||
|
blkdev->feature_persistent = !!pers;
|
||||||
|
}
|
||||||
|
|
||||||
blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
|
blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
|
||||||
if (blkdev->xendev.protocol) {
|
if (blkdev->xendev.protocol) {
|
||||||
@ -729,6 +875,15 @@ static int blk_connect(struct XenDevice *xendev)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (blkdev->feature_persistent) {
|
||||||
|
/* Init persistent grants */
|
||||||
|
blkdev->max_grants = max_requests * BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||||
|
blkdev->persistent_gnts = g_tree_new_full((GCompareDataFunc)int_cmp,
|
||||||
|
NULL, NULL,
|
||||||
|
(GDestroyNotify)destroy_grant);
|
||||||
|
blkdev->persistent_gnt_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
xen_be_bind_evtchn(&blkdev->xendev);
|
xen_be_bind_evtchn(&blkdev->xendev);
|
||||||
|
|
||||||
xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
|
xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
|
||||||
@ -769,6 +924,11 @@ static int blk_free(struct XenDevice *xendev)
|
|||||||
blk_disconnect(xendev);
|
blk_disconnect(xendev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Free persistent grants */
|
||||||
|
if (blkdev->feature_persistent) {
|
||||||
|
g_tree_destroy(blkdev->persistent_gnts);
|
||||||
|
}
|
||||||
|
|
||||||
while (!QLIST_EMPTY(&blkdev->freelist)) {
|
while (!QLIST_EMPTY(&blkdev->freelist)) {
|
||||||
ioreq = QLIST_FIRST(&blkdev->freelist);
|
ioreq = QLIST_FIRST(&blkdev->freelist);
|
||||||
QLIST_REMOVE(ioreq, list);
|
QLIST_REMOVE(ioreq, list);
|
||||||
|
Loading…
Reference in New Issue
Block a user