iSCSI block driver

This provides built-in support for iSCSI to QEMU.

This has the advantage that the iSCSI devices need not be made visible to the host, which is useful if you have very many virtual machines and very many iscsi devices.
It also has the benefit that non-root users of QEMU can access iSCSI devices across the network without requiring root privilege on the host.

This driver interfaces with the multiplatform posix library for iscsi initiator/client access to iscsi devices hosted at
    git://github.com/sahlberg/libiscsi.git

The patch adds the driver to interface with the iscsi library.
It also updated the configure script to
* by default, probe is libiscsi is available and if so, build
  qemu against libiscsi.
* --enable-libiscsi
  Force a build against libiscsi. If libiscsi is not available
  the build will fail.
* --disable-libiscsi
  Do not link against libiscsi, even if it is available.

When linked with libiscsi, qemu gains support to access iscsi resources such as disks and cdrom directly, without having to make the devices visible to the host.

You can specify devices using a iscsi url of the form :
iscsi://[<username>[:<password>@]]<host>[:<port]/<target-iqn-name>/<lun>
When using authentication, the password can optionally be set with
LIBISCSI_CHAP_PASSWORD="password" to avoid it showing up in the process list

Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
Ronnie Sahlberg 2011-10-25 19:24:24 +11:00 committed by Kevin Wolf
parent b5a12aa204
commit c589b24972
4 changed files with 629 additions and 0 deletions

View File

@ -36,6 +36,7 @@ block-nested-y += qed-check.o
block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
block-nested-$(CONFIG_WIN32) += raw-win32.o
block-nested-$(CONFIG_POSIX) += raw-posix.o
block-nested-$(CONFIG_LIBISCSI) += iscsi.o
block-nested-$(CONFIG_CURL) += curl.o
block-nested-$(CONFIG_RBD) += rbd.o

591
block/iscsi.c Normal file
View File

@ -0,0 +1,591 @@
/*
* QEMU Block driver for iSCSI images
*
* Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "config-host.h"
#include <poll.h>
#include "qemu-common.h"
#include "qemu-error.h"
#include "block_int.h"
#include "trace.h"
#include <iscsi/iscsi.h>
#include <iscsi/scsi-lowlevel.h>
typedef struct IscsiLun {
struct iscsi_context *iscsi;
int lun;
int block_size;
unsigned long num_blocks;
} IscsiLun;
typedef struct IscsiAIOCB {
BlockDriverAIOCB common;
QEMUIOVector *qiov;
QEMUBH *bh;
IscsiLun *iscsilun;
struct scsi_task *task;
uint8_t *buf;
int status;
int canceled;
size_t read_size;
size_t read_offset;
} IscsiAIOCB;
struct IscsiTask {
IscsiLun *iscsilun;
BlockDriverState *bs;
int status;
int complete;
};
static void
iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
void *private_data)
{
}
static void
iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
{
IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
IscsiLun *iscsilun = acb->iscsilun;
acb->common.cb(acb->common.opaque, -ECANCELED);
acb->canceled = 1;
/* send a task mgmt call to the target to cancel the task on the target */
iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
iscsi_abort_task_cb, NULL);
/* then also cancel the task locally in libiscsi */
iscsi_scsi_task_cancel(iscsilun->iscsi, acb->task);
}
static AIOPool iscsi_aio_pool = {
.aiocb_size = sizeof(IscsiAIOCB),
.cancel = iscsi_aio_cancel,
};
static void iscsi_process_read(void *arg);
static void iscsi_process_write(void *arg);
static int iscsi_process_flush(void *arg)
{
IscsiLun *iscsilun = arg;
return iscsi_queue_length(iscsilun->iscsi) > 0;
}
static void
iscsi_set_events(IscsiLun *iscsilun)
{
struct iscsi_context *iscsi = iscsilun->iscsi;
qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), iscsi_process_read,
(iscsi_which_events(iscsi) & POLLOUT)
? iscsi_process_write : NULL,
iscsi_process_flush, NULL, iscsilun);
}
static void
iscsi_process_read(void *arg)
{
IscsiLun *iscsilun = arg;
struct iscsi_context *iscsi = iscsilun->iscsi;
iscsi_service(iscsi, POLLIN);
iscsi_set_events(iscsilun);
}
static void
iscsi_process_write(void *arg)
{
IscsiLun *iscsilun = arg;
struct iscsi_context *iscsi = iscsilun->iscsi;
iscsi_service(iscsi, POLLOUT);
iscsi_set_events(iscsilun);
}
static int
iscsi_schedule_bh(QEMUBHFunc *cb, IscsiAIOCB *acb)
{
acb->bh = qemu_bh_new(cb, acb);
if (!acb->bh) {
error_report("oom: could not create iscsi bh");
return -EIO;
}
qemu_bh_schedule(acb->bh);
return 0;
}
static void
iscsi_readv_writev_bh_cb(void *p)
{
IscsiAIOCB *acb = p;
qemu_bh_delete(acb->bh);
if (acb->canceled == 0) {
acb->common.cb(acb->common.opaque, acb->status);
}
qemu_aio_release(acb);
}
static void
iscsi_aio_write10_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
{
IscsiAIOCB *acb = opaque;
trace_iscsi_aio_write10_cb(iscsi, status, acb, acb->canceled);
g_free(acb->buf);
if (acb->canceled != 0) {
qemu_aio_release(acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
return;
}
acb->status = 0;
if (status < 0) {
error_report("Failed to write10 data to iSCSI lun. %s",
iscsi_get_error(iscsi));
acb->status = -EIO;
}
iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
}
static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
{
return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
}
static BlockDriverAIOCB *
iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb,
void *opaque)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = iscsilun->iscsi;
IscsiAIOCB *acb;
size_t size;
int fua = 0;
/* set FUA on writes when cache mode is write through */
if (!(bs->open_flags & BDRV_O_CACHE_WB)) {
fua = 1;
}
acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
trace_iscsi_aio_writev(iscsi, sector_num, nb_sectors, opaque, acb);
acb->iscsilun = iscsilun;
acb->qiov = qiov;
acb->canceled = 0;
/* XXX we should pass the iovec to write10 to avoid the extra copy */
/* this will allow us to get rid of 'buf' completely */
size = nb_sectors * BDRV_SECTOR_SIZE;
acb->buf = g_malloc(size);
qemu_iovec_to_buffer(acb->qiov, acb->buf);
acb->task = iscsi_write10_task(iscsi, iscsilun->lun, acb->buf, size,
sector_qemu2lun(sector_num, iscsilun),
fua, 0, iscsilun->block_size,
iscsi_aio_write10_cb, acb);
if (acb->task == NULL) {
error_report("iSCSI: Failed to send write10 command. %s",
iscsi_get_error(iscsi));
g_free(acb->buf);
qemu_aio_release(acb);
return NULL;
}
iscsi_set_events(iscsilun);
return &acb->common;
}
static void
iscsi_aio_read10_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
{
IscsiAIOCB *acb = opaque;
trace_iscsi_aio_read10_cb(iscsi, status, acb, acb->canceled);
if (acb->canceled != 0) {
qemu_aio_release(acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
return;
}
acb->status = 0;
if (status != 0) {
error_report("Failed to read10 data from iSCSI lun. %s",
iscsi_get_error(iscsi));
acb->status = -EIO;
}
iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
}
static BlockDriverAIOCB *
iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb,
void *opaque)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = iscsilun->iscsi;
IscsiAIOCB *acb;
size_t qemu_read_size, lun_read_size;
int i;
qemu_read_size = BDRV_SECTOR_SIZE * (size_t)nb_sectors;
acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
trace_iscsi_aio_readv(iscsi, sector_num, nb_sectors, opaque, acb);
acb->iscsilun = iscsilun;
acb->qiov = qiov;
acb->canceled = 0;
acb->read_size = qemu_read_size;
acb->buf = NULL;
/* If LUN blocksize is bigger than BDRV_BLOCK_SIZE a read from QEMU
* may be misaligned to the LUN, so we may need to read some extra
* data.
*/
acb->read_offset = 0;
if (iscsilun->block_size > BDRV_SECTOR_SIZE) {
uint64_t bdrv_offset = BDRV_SECTOR_SIZE * sector_num;
acb->read_offset = bdrv_offset % iscsilun->block_size;
}
lun_read_size = (qemu_read_size + iscsilun->block_size
+ acb->read_offset - 1)
/ iscsilun->block_size * iscsilun->block_size;
acb->task = iscsi_read10_task(iscsi, iscsilun->lun,
sector_qemu2lun(sector_num, iscsilun),
lun_read_size, iscsilun->block_size,
iscsi_aio_read10_cb, acb);
if (acb->task == NULL) {
error_report("iSCSI: Failed to send read10 command. %s",
iscsi_get_error(iscsi));
qemu_aio_release(acb);
return NULL;
}
for (i = 0; i < acb->qiov->niov; i++) {
scsi_task_add_data_in_buffer(acb->task,
acb->qiov->iov[i].iov_len,
acb->qiov->iov[i].iov_base);
}
iscsi_set_events(iscsilun);
return &acb->common;
}
static void
iscsi_synccache10_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
{
IscsiAIOCB *acb = opaque;
if (acb->canceled != 0) {
qemu_aio_release(acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
return;
}
acb->status = 0;
if (status < 0) {
error_report("Failed to sync10 data on iSCSI lun. %s",
iscsi_get_error(iscsi));
acb->status = -EIO;
}
iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
}
static BlockDriverAIOCB *
iscsi_aio_flush(BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = iscsilun->iscsi;
IscsiAIOCB *acb;
acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
acb->iscsilun = iscsilun;
acb->canceled = 0;
acb->task = iscsi_synchronizecache10_task(iscsi, iscsilun->lun,
0, 0, 0, 0,
iscsi_synccache10_cb,
acb);
if (acb->task == NULL) {
error_report("iSCSI: Failed to send synchronizecache10 command. %s",
iscsi_get_error(iscsi));
qemu_aio_release(acb);
return NULL;
}
iscsi_set_events(iscsilun);
return &acb->common;
}
static int64_t
iscsi_getlength(BlockDriverState *bs)
{
IscsiLun *iscsilun = bs->opaque;
int64_t len;
len = iscsilun->num_blocks;
len *= iscsilun->block_size;
return len;
}
static void
iscsi_readcapacity10_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
{
struct IscsiTask *itask = opaque;
struct scsi_readcapacity10 *rc10;
struct scsi_task *task = command_data;
if (status != 0) {
error_report("iSCSI: Failed to read capacity of iSCSI lun. %s",
iscsi_get_error(iscsi));
itask->status = 1;
itask->complete = 1;
scsi_free_scsi_task(task);
return;
}
rc10 = scsi_datain_unmarshall(task);
if (rc10 == NULL) {
error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
itask->status = 1;
itask->complete = 1;
scsi_free_scsi_task(task);
return;
}
itask->iscsilun->block_size = rc10->block_size;
itask->iscsilun->num_blocks = rc10->lba;
itask->bs->total_sectors = (uint64_t)rc10->lba *
rc10->block_size / BDRV_SECTOR_SIZE ;
itask->status = 0;
itask->complete = 1;
scsi_free_scsi_task(task);
}
static void
iscsi_connect_cb(struct iscsi_context *iscsi, int status, void *command_data,
void *opaque)
{
struct IscsiTask *itask = opaque;
struct scsi_task *task;
if (status != 0) {
itask->status = 1;
itask->complete = 1;
return;
}
task = iscsi_readcapacity10_task(iscsi, itask->iscsilun->lun, 0, 0,
iscsi_readcapacity10_cb, opaque);
if (task == NULL) {
error_report("iSCSI: failed to send readcapacity command.");
itask->status = 1;
itask->complete = 1;
return;
}
}
/*
* We support iscsi url's on the form
* iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
*/
static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = NULL;
struct iscsi_url *iscsi_url = NULL;
struct IscsiTask task;
int ret;
if ((BDRV_SECTOR_SIZE % 512) != 0) {
error_report("iSCSI: Invalid BDRV_SECTOR_SIZE. "
"BDRV_SECTOR_SIZE(%lld) is not a multiple "
"of 512", BDRV_SECTOR_SIZE);
return -EINVAL;
}
memset(iscsilun, 0, sizeof(IscsiLun));
/* Should really append the KVM name after the ':' here */
iscsi = iscsi_create_context("iqn.2008-11.org.linux-kvm:");
if (iscsi == NULL) {
error_report("iSCSI: Failed to create iSCSI context.");
ret = -ENOMEM;
goto failed;
}
iscsi_url = iscsi_parse_full_url(iscsi, filename);
if (iscsi_url == NULL) {
error_report("Failed to parse URL : %s %s", filename,
iscsi_get_error(iscsi));
ret = -EINVAL;
goto failed;
}
if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
error_report("iSCSI: Failed to set target name.");
ret = -EINVAL;
goto failed;
}
if (iscsi_url->user != NULL) {
ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
iscsi_url->passwd);
if (ret != 0) {
error_report("Failed to set initiator username and password");
ret = -EINVAL;
goto failed;
}
}
if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
error_report("iSCSI: Failed to set session type to normal.");
ret = -EINVAL;
goto failed;
}
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
task.iscsilun = iscsilun;
task.status = 0;
task.complete = 0;
task.bs = bs;
iscsilun->iscsi = iscsi;
iscsilun->lun = iscsi_url->lun;
if (iscsi_full_connect_async(iscsi, iscsi_url->portal, iscsi_url->lun,
iscsi_connect_cb, &task)
!= 0) {
error_report("iSCSI: Failed to start async connect.");
ret = -EINVAL;
goto failed;
}
while (!task.complete) {
iscsi_set_events(iscsilun);
qemu_aio_wait();
}
if (task.status != 0) {
error_report("iSCSI: Failed to connect to LUN : %s",
iscsi_get_error(iscsi));
ret = -EINVAL;
goto failed;
}
if (iscsi_url != NULL) {
iscsi_destroy_url(iscsi_url);
}
return 0;
failed:
if (iscsi_url != NULL) {
iscsi_destroy_url(iscsi_url);
}
if (iscsi != NULL) {
iscsi_destroy_context(iscsi);
}
memset(iscsilun, 0, sizeof(IscsiLun));
return ret;
}
static void iscsi_close(BlockDriverState *bs)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = iscsilun->iscsi;
qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL, NULL);
iscsi_destroy_context(iscsi);
memset(iscsilun, 0, sizeof(IscsiLun));
}
static BlockDriver bdrv_iscsi = {
.format_name = "iscsi",
.protocol_name = "iscsi",
.instance_size = sizeof(IscsiLun),
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_getlength = iscsi_getlength,
.bdrv_aio_readv = iscsi_aio_readv,
.bdrv_aio_writev = iscsi_aio_writev,
.bdrv_aio_flush = iscsi_aio_flush,
};
static void iscsi_block_init(void)
{
bdrv_register(&bdrv_iscsi);
}
block_init(iscsi_block_init);

31
configure vendored
View File

@ -182,6 +182,7 @@ usb_redir=""
opengl=""
zlib="yes"
guest_agent="yes"
libiscsi=""
# parse CC options first
for opt do
@ -657,6 +658,10 @@ for opt do
;;
--enable-spice) spice="yes"
;;
--disable-libiscsi) libiscsi="no"
;;
--enable-libiscsi) libiscsi="yes"
;;
--enable-profiler) profiler="yes"
;;
--enable-cocoa)
@ -1046,6 +1051,8 @@ echo " Default:trace-<pid>"
echo " --disable-spice disable spice"
echo " --enable-spice enable spice"
echo " --enable-rbd enable building the rados block device (rbd)"
echo " --disable-libiscsi disable iscsi support"
echo " --enable-libiscsi enable iscsi support"
echo " --disable-smartcard disable smartcard support"
echo " --enable-smartcard enable smartcard support"
echo " --disable-smartcard-nss disable smartcard nss support"
@ -2334,6 +2341,25 @@ if compile_prog "" "" ; then
bswap_h=yes
fi
##########################################
# Do we have libiscsi
if test "$libiscsi" != "no" ; then
cat > $TMPC << EOF
#include <iscsi/iscsi.h>
int main(void) { iscsi_create_context(""); return 0; }
EOF
if compile_prog "-Werror" "-liscsi" ; then
libiscsi="yes"
LIBS="$LIBS -liscsi"
else
if test "$libiscsi" = "yes" ; then
feature_not_found "libiscsi"
fi
libiscsi="no"
fi
fi
##########################################
# Do we need librt
cat > $TMPC <<EOF
@ -2744,6 +2770,7 @@ echo "xfsctl support $xfs"
echo "nss used $smartcard_nss"
echo "usb net redir $usb_redir"
echo "OpenGL support $opengl"
echo "libiscsi support $libiscsi"
echo "build guest agent $guest_agent"
if test "$sdl_too_old" = "yes"; then
@ -3042,6 +3069,10 @@ if test "$opengl" = "yes" ; then
echo "CONFIG_OPENGL=y" >> $config_host_mak
fi
if test "$libiscsi" = "yes" ; then
echo "CONFIG_LIBISCSI=y" >> $config_host_mak
fi
# XXX: suppress that
if [ "$bsd" = "yes" ] ; then
echo "CONFIG_BSD=y" >> $config_host_mak

View File

@ -505,6 +505,12 @@ escc_sunkbd_event_out(int ch) "Translated keycode %2.2x"
escc_kbd_command(int val) "Command %d"
escc_sunmouse_event(int dx, int dy, int buttons_state) "dx=%d dy=%d buttons=%01x"
# block/iscsi.c
iscsi_aio_write10_cb(void *iscsi, int status, void *acb, int canceled) "iscsi %p status %d acb %p canceled %d"
iscsi_aio_writev(void *iscsi, int64_t sector_num, int nb_sectors, void *opaque, void *acb) "iscsi %p sector_num %"PRId64" nb_sectors %d opaque %p acb %p"
iscsi_aio_read10_cb(void *iscsi, int status, void *acb, int canceled) "iscsi %p status %d acb %p canceled %d"
iscsi_aio_readv(void *iscsi, int64_t sector_num, int nb_sectors, void *opaque, void *acb) "iscsi %p sector_num %"PRId64" nb_sectors %d opaque %p acb %p"
# hw/esp.c
esp_raise_irq(void) "Raise IRQ"
esp_lower_irq(void) "Lower IRQ"