diff --git a/block.c b/block.c index 9bb236c989..86910b046c 100644 --- a/block.c +++ b/block.c @@ -816,6 +816,13 @@ bool bdrv_dev_has_removable_media(BlockDriverState *bs) return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb); } +void bdrv_dev_eject_request(BlockDriverState *bs, bool force) +{ + if (bs->dev_ops && bs->dev_ops->eject_request_cb) { + bs->dev_ops->eject_request_cb(bs->dev_opaque, force); + } +} + bool bdrv_dev_is_tray_open(BlockDriverState *bs) { if (bs->dev_ops && bs->dev_ops->is_tray_open) { @@ -2782,12 +2789,27 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque) int coroutine_fn bdrv_co_flush(BlockDriverState *bs) { + int ret; + + if (!bs->drv) { + return 0; + } + + /* Write back cached data to the OS even with cache=unsafe */ + if (bs->drv->bdrv_co_flush_to_os) { + ret = bs->drv->bdrv_co_flush_to_os(bs); + if (ret < 0) { + return ret; + } + } + + /* But don't actually force it to the disk with cache=unsafe */ if (bs->open_flags & BDRV_O_NO_FLUSH) { return 0; - } else if (!bs->drv) { - return 0; - } else if (bs->drv->bdrv_co_flush) { - return bs->drv->bdrv_co_flush(bs); + } + + if (bs->drv->bdrv_co_flush_to_disk) { + return bs->drv->bdrv_co_flush_to_disk(bs); } else if (bs->drv->bdrv_aio_flush) { BlockDriverAIOCB *acb; CoroutineIOCompletion co = { diff --git a/block.h b/block.h index 38cd748b60..051a25d8d6 100644 --- a/block.h +++ b/block.h @@ -38,6 +38,15 @@ typedef struct BlockDevOps { * Device models with removable media must implement this callback. */ void (*change_media_cb)(void *opaque, bool load); + /* + * Runs when an eject request is issued from the monitor, the tray + * is closed, and the medium is locked. + * Device models that do not implement is_medium_locked will not need + * this callback. Device models that can lock the medium or tray might + * want to implement the callback and unlock the tray when "force" is + * true, even if they do not support eject requests. + */ + void (*eject_request_cb)(void *opaque, bool force); /* * Is the virtual tray open? * Device models implement this only when the device has a tray. @@ -111,6 +120,7 @@ void bdrv_detach_dev(BlockDriverState *bs, void *dev); void *bdrv_get_attached_dev(BlockDriverState *bs); void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops, void *opaque); +void bdrv_dev_eject_request(BlockDriverState *bs, bool force); bool bdrv_dev_has_removable_media(BlockDriverState *bs); bool bdrv_dev_is_tray_open(BlockDriverState *bs); bool bdrv_dev_is_medium_locked(BlockDriverState *bs); diff --git a/block/cow.c b/block/cow.c index 707c0aad88..089d395c40 100644 --- a/block/cow.c +++ b/block/cow.c @@ -326,16 +326,18 @@ static QEMUOptionParameter cow_create_options[] = { }; static BlockDriver bdrv_cow = { - .format_name = "cow", - .instance_size = sizeof(BDRVCowState), - .bdrv_probe = cow_probe, - .bdrv_open = cow_open, - .bdrv_read = cow_co_read, - .bdrv_write = cow_co_write, - .bdrv_close = cow_close, - .bdrv_create = cow_create, - .bdrv_co_flush = cow_co_flush, - .bdrv_is_allocated = cow_is_allocated, + .format_name = "cow", + .instance_size = sizeof(BDRVCowState), + + .bdrv_probe = cow_probe, + .bdrv_open = cow_open, + .bdrv_close = cow_close, + .bdrv_create = cow_create, + + .bdrv_read = cow_co_read, + .bdrv_write = cow_co_write, + .bdrv_co_flush_to_disk = cow_co_flush, + .bdrv_is_allocated = cow_is_allocated, .create_options = cow_create_options, }; diff --git a/block/qcow.c b/block/qcow.c index 35e21eb6b3..adecee06c9 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -828,14 +828,16 @@ static BlockDriver bdrv_qcow = { .bdrv_open = qcow_open, .bdrv_close = qcow_close, .bdrv_create = qcow_create, - .bdrv_is_allocated = qcow_is_allocated, - .bdrv_set_key = qcow_set_key, - .bdrv_make_empty = qcow_make_empty, - .bdrv_co_readv = qcow_co_readv, - .bdrv_co_writev = qcow_co_writev, - .bdrv_co_flush = qcow_co_flush, - .bdrv_write_compressed = qcow_write_compressed, - .bdrv_get_info = qcow_get_info, + + .bdrv_co_readv = qcow_co_readv, + .bdrv_co_writev = qcow_co_writev, + .bdrv_co_flush_to_disk = qcow_co_flush, + .bdrv_is_allocated = qcow_is_allocated, + + .bdrv_set_key = qcow_set_key, + .bdrv_make_empty = qcow_make_empty, + .bdrv_write_compressed = qcow_write_compressed, + .bdrv_get_info = qcow_get_info, .create_options = qcow_create_options, }; diff --git a/block/qcow2.c b/block/qcow2.c index ef057d31e0..5c784eee51 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1105,7 +1105,7 @@ fail: return ret; } -static int qcow2_co_flush(BlockDriverState *bs) +static int qcow2_co_flush_to_os(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; int ret; @@ -1124,6 +1124,11 @@ static int qcow2_co_flush(BlockDriverState *bs) } qemu_co_mutex_unlock(&s->lock); + return 0; +} + +static int qcow2_co_flush_to_disk(BlockDriverState *bs) +{ return bdrv_co_flush(bs->file); } @@ -1243,9 +1248,10 @@ static BlockDriver bdrv_qcow2 = { .bdrv_set_key = qcow2_set_key, .bdrv_make_empty = qcow2_make_empty, - .bdrv_co_readv = qcow2_co_readv, - .bdrv_co_writev = qcow2_co_writev, - .bdrv_co_flush = qcow2_co_flush, + .bdrv_co_readv = qcow2_co_readv, + .bdrv_co_writev = qcow2_co_writev, + .bdrv_co_flush_to_os = qcow2_co_flush_to_os, + .bdrv_co_flush_to_disk = qcow2_co_flush_to_disk, .bdrv_co_discard = qcow2_co_discard, .bdrv_truncate = qcow2_truncate, diff --git a/block/raw-win32.c b/block/raw-win32.c index f5f73bcd64..e4b0b75b70 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -281,9 +281,11 @@ static BlockDriver bdrv_file = { .bdrv_file_open = raw_open, .bdrv_close = raw_close, .bdrv_create = raw_create, - .bdrv_co_flush = raw_flush, - .bdrv_read = raw_read, - .bdrv_write = raw_write, + + .bdrv_read = raw_read, + .bdrv_write = raw_write, + .bdrv_co_flush_to_disk = raw_flush, + .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, .bdrv_get_allocated_file_size @@ -409,11 +411,12 @@ static BlockDriver bdrv_host_device = { .bdrv_probe_device = hdev_probe_device, .bdrv_file_open = hdev_open, .bdrv_close = raw_close, - .bdrv_co_flush = raw_flush, .bdrv_has_zero_init = hdev_has_zero_init, - .bdrv_read = raw_read, - .bdrv_write = raw_write, + .bdrv_read = raw_read, + .bdrv_write = raw_write, + .bdrv_co_flush_to_disk = raw_flush, + .bdrv_getlength = raw_getlength, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, diff --git a/block/raw.c b/block/raw.c index 33cc4716d3..6098070d65 100644 --- a/block/raw.c +++ b/block/raw.c @@ -111,10 +111,10 @@ static BlockDriver bdrv_raw = { .bdrv_open = raw_open, .bdrv_close = raw_close, - .bdrv_co_readv = raw_co_readv, - .bdrv_co_writev = raw_co_writev, - .bdrv_co_flush = raw_co_flush, - .bdrv_co_discard = raw_co_discard, + .bdrv_co_readv = raw_co_readv, + .bdrv_co_writev = raw_co_writev, + .bdrv_co_flush_to_disk = raw_co_flush, + .bdrv_co_discard = raw_co_discard, .bdrv_probe = raw_probe, .bdrv_getlength = raw_getlength, diff --git a/block/rbd.c b/block/rbd.c index c684e0cb0b..9088c52d24 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -851,18 +851,18 @@ static BlockDriver bdrv_rbd = { .bdrv_file_open = qemu_rbd_open, .bdrv_close = qemu_rbd_close, .bdrv_create = qemu_rbd_create, - .bdrv_co_flush = qemu_rbd_co_flush, .bdrv_get_info = qemu_rbd_getinfo, .create_options = qemu_rbd_create_options, .bdrv_getlength = qemu_rbd_getlength, .bdrv_truncate = qemu_rbd_truncate, .protocol_name = "rbd", - .bdrv_aio_readv = qemu_rbd_aio_readv, - .bdrv_aio_writev = qemu_rbd_aio_writev, + .bdrv_aio_readv = qemu_rbd_aio_readv, + .bdrv_aio_writev = qemu_rbd_aio_writev, + .bdrv_co_flush_to_disk = qemu_rbd_co_flush, - .bdrv_snapshot_create = qemu_rbd_snap_create, - .bdrv_snapshot_list = qemu_rbd_snap_list, + .bdrv_snapshot_create = qemu_rbd_snap_create, + .bdrv_snapshot_list = qemu_rbd_snap_list, }; static void bdrv_rbd_init(void) diff --git a/block/vdi.c b/block/vdi.c index 523a6409c5..684a4a87b6 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -980,7 +980,7 @@ static BlockDriver bdrv_vdi = { .bdrv_open = vdi_open, .bdrv_close = vdi_close, .bdrv_create = vdi_create, - .bdrv_co_flush = vdi_co_flush, + .bdrv_co_flush_to_disk = vdi_co_flush, .bdrv_is_allocated = vdi_is_allocated, .bdrv_make_empty = vdi_make_empty, diff --git a/block/vmdk.c b/block/vmdk.c index 985006e203..e53a2f0920 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1581,8 +1581,8 @@ static BlockDriver bdrv_vmdk = { .bdrv_write = vmdk_co_write, .bdrv_close = vmdk_close, .bdrv_create = vmdk_create, - .bdrv_co_flush = vmdk_co_flush, - .bdrv_is_allocated = vmdk_is_allocated, + .bdrv_co_flush_to_disk = vmdk_co_flush, + .bdrv_is_allocated = vmdk_is_allocated, .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, .create_options = vmdk_create_options, diff --git a/block/vpc.c b/block/vpc.c index 79be7d051b..39a324705d 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -617,7 +617,11 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options) memcpy(dyndisk_header->magic, "cxsparse", 8); - dyndisk_header->data_offset = be64_to_cpu(0xFFFFFFFF); + /* + * Note: The spec is actually wrong here for data_offset, it says + * 0xFFFFFFFF, but MS tools expect all 64 bits to be set. + */ + dyndisk_header->data_offset = be64_to_cpu(0xFFFFFFFFFFFFFFFFULL); dyndisk_header->table_offset = be64_to_cpu(3 * 512); dyndisk_header->version = be32_to_cpu(0x00010000); dyndisk_header->block_size = be32_to_cpu(block_size); @@ -661,14 +665,16 @@ static QEMUOptionParameter vpc_create_options[] = { static BlockDriver bdrv_vpc = { .format_name = "vpc", .instance_size = sizeof(BDRVVPCState), + .bdrv_probe = vpc_probe, .bdrv_open = vpc_open, - .bdrv_read = vpc_co_read, - .bdrv_write = vpc_co_write, - .bdrv_co_flush = vpc_co_flush, .bdrv_close = vpc_close, .bdrv_create = vpc_create, + .bdrv_read = vpc_co_read, + .bdrv_write = vpc_co_write, + .bdrv_co_flush_to_disk = vpc_co_flush, + .create_options = vpc_create_options, }; diff --git a/block/vvfat.c b/block/vvfat.c index 8511fe523c..131680f6f4 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1254,15 +1254,15 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num, return -1; if (s->qcow) { int n; - if (s->qcow->drv->bdrv_is_allocated(s->qcow, - sector_num, nb_sectors-i, &n)) { + if (bdrv_is_allocated(s->qcow, sector_num, nb_sectors-i, &n)) { DLOG(fprintf(stderr, "sectors %d+%d allocated\n", (int)sector_num, n)); - if (s->qcow->drv->bdrv_read(s->qcow, sector_num, buf+i*0x200, n)) - return -1; - i += n - 1; - sector_num += n - 1; - continue; - } + if (bdrv_read(s->qcow, sector_num, buf + i*0x200, n)) { + return -1; + } + i += n - 1; + sector_num += n - 1; + continue; + } DLOG(fprintf(stderr, "sector %d not allocated\n", (int)sector_num)); } if(sector_numfaked_sectors) { @@ -1516,7 +1516,7 @@ static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num) return 0; for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) - was_modified = s->qcow->drv->bdrv_is_allocated(s->qcow, + was_modified = bdrv_is_allocated(s->qcow, cluster2sector(s, cluster_num) + i, 1, &dummy); return was_modified; @@ -1665,16 +1665,16 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s, int64_t offset = cluster2sector(s, cluster_num); vvfat_close_current_file(s); - for (i = 0; i < s->sectors_per_cluster; i++) - if (!s->qcow->drv->bdrv_is_allocated(s->qcow, - offset + i, 1, &dummy)) { - if (vvfat_read(s->bs, - offset, s->cluster_buffer, 1)) - return -1; - if (s->qcow->drv->bdrv_write(s->qcow, - offset, s->cluster_buffer, 1)) - return -2; - } + for (i = 0; i < s->sectors_per_cluster; i++) { + if (!bdrv_is_allocated(s->qcow, offset + i, 1, &dummy)) { + if (vvfat_read(s->bs, offset, s->cluster_buffer, 1)) { + return -1; + } + if (bdrv_write(s->qcow, offset, s->cluster_buffer, 1)) { + return -2; + } + } + } } } @@ -2619,7 +2619,9 @@ static int do_commit(BDRVVVFATState* s) return ret; } - s->qcow->drv->bdrv_make_empty(s->qcow); + if (s->qcow->drv->bdrv_make_empty) { + s->qcow->drv->bdrv_make_empty(s->qcow); + } memset(s->used_clusters, 0, sector2cluster(s, s->sector_count)); @@ -2714,7 +2716,7 @@ DLOG(checkpoint()); * Use qcow backend. Commit later. */ DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors)); - ret = s->qcow->drv->bdrv_write(s->qcow, sector_num, buf, nb_sectors); + ret = bdrv_write(s->qcow, sector_num, buf, nb_sectors); if (ret < 0) { fprintf(stderr, "Error writing to qcow backend\n"); return ret; diff --git a/block_int.h b/block_int.h index f4547f6d93..1ec4921cc6 100644 --- a/block_int.h +++ b/block_int.h @@ -84,10 +84,22 @@ struct BlockDriver { int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); - int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs, int64_t sector_num, int nb_sectors); + /* + * Flushes all data that was already written to the OS all the way down to + * the disk (for example raw-posix calls fsync()). + */ + int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs); + + /* + * Flushes all internal caches to the OS. The data may still sit in a + * writeback cache of the host OS, but it will survive a crash of the qemu + * process. + */ + int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs); + int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs, int num_reqs); int (*bdrv_merge_requests)(BlockDriverState *bs, BlockRequest* a, diff --git a/blockdev.c b/blockdev.c index 0827bf7743..222818690d 100644 --- a/blockdev.c +++ b/blockdev.c @@ -635,10 +635,12 @@ static int eject_device(Monitor *mon, BlockDriverState *bs, int force) qerror_report(QERR_DEVICE_NOT_REMOVABLE, bdrv_get_device_name(bs)); return -1; } - if (!force && !bdrv_dev_is_tray_open(bs) - && bdrv_dev_is_medium_locked(bs)) { - qerror_report(QERR_DEVICE_LOCKED, bdrv_get_device_name(bs)); - return -1; + if (bdrv_dev_is_medium_locked(bs) && !bdrv_dev_is_tray_open(bs)) { + bdrv_dev_eject_request(bs, force); + if (!force) { + qerror_report(QERR_DEVICE_LOCKED, bdrv_get_device_name(bs)); + return -1; + } } bdrv_close(bs); return 0; diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index 90b6729692..1fed359ab1 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -516,9 +516,14 @@ static unsigned int event_status_media(IDEState *s, /* Event notification descriptor */ event_code = MEC_NO_CHANGE; - if (media_status != MS_TRAY_OPEN && s->events.new_media) { - event_code = MEC_NEW_MEDIA; - s->events.new_media = false; + if (media_status != MS_TRAY_OPEN) { + if (s->events.new_media) { + event_code = MEC_NEW_MEDIA; + s->events.new_media = false; + } else if (s->events.eject_request) { + event_code = MEC_EJECT_REQUESTED; + s->events.eject_request = false; + } } buf[4] = event_code; diff --git a/hw/ide/core.c b/hw/ide/core.c index 9a2fd30607..93a1a689c4 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -804,6 +804,18 @@ static void ide_cd_change_cb(void *opaque, bool load) */ s->cdrom_changed = 1; s->events.new_media = true; + s->events.eject_request = false; + ide_set_irq(s->bus); +} + +static void ide_cd_eject_request_cb(void *opaque, bool force) +{ + IDEState *s = opaque; + + s->events.eject_request = true; + if (force) { + s->tray_locked = false; + } ide_set_irq(s->bus); } @@ -1811,6 +1823,7 @@ static bool ide_cd_is_medium_locked(void *opaque) static const BlockDevOps ide_cd_block_ops = { .change_media_cb = ide_cd_change_cb, + .eject_request_cb = ide_cd_eject_request_cb, .is_tray_open = ide_cd_is_tray_open, .is_medium_locked = ide_cd_is_medium_locked, }; diff --git a/hw/pc.c b/hw/pc.c index 3015671858..33778fe422 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -335,7 +335,7 @@ void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size, ISADevice *s) { int val, nb, nb_heads, max_track, last_sect, i; - FDriveType fd_type[2]; + FDriveType fd_type[2] = { FDRIVE_DRV_NONE, FDRIVE_DRV_NONE }; BlockDriverState *fd[MAX_FD]; static pc_cmos_init_late_arg arg; @@ -385,8 +385,6 @@ void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size, bdrv_get_floppy_geometry_hint(fd[i], &nb_heads, &max_track, &last_sect, FDRIVE_DRV_NONE, &fd_type[i]); - } else { - fd_type[i] = FDRIVE_DRV_NONE; } } } diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 1c04872af7..62f538f4f8 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -65,6 +65,7 @@ struct SCSIDiskState uint32_t removable; bool media_changed; bool media_event; + bool eject_request; QEMUBH *bh; char *version; char *serial; @@ -671,9 +672,14 @@ static int scsi_event_status_media(SCSIDiskState *s, uint8_t *outbuf) /* Event notification descriptor */ event_code = MEC_NO_CHANGE; - if (media_status != MS_TRAY_OPEN && s->media_event) { - event_code = MEC_NEW_MEDIA; - s->media_event = false; + if (media_status != MS_TRAY_OPEN) { + if (s->media_event) { + event_code = MEC_NEW_MEDIA; + s->media_event = false; + } else if (s->eject_request) { + event_code = MEC_EJECT_REQUESTED; + s->eject_request = false; + } } outbuf[0] = event_code; @@ -1470,6 +1476,17 @@ static void scsi_cd_change_media_cb(void *opaque, bool load) s->tray_open = !load; s->qdev.unit_attention = SENSE_CODE(UNIT_ATTENTION_NO_MEDIUM); s->media_event = true; + s->eject_request = false; +} + +static void scsi_cd_eject_request_cb(void *opaque, bool force) +{ + SCSIDiskState *s = opaque; + + s->eject_request = true; + if (force) { + s->tray_locked = false; + } } static bool scsi_cd_is_tray_open(void *opaque) @@ -1484,6 +1501,7 @@ static bool scsi_cd_is_medium_locked(void *opaque) static const BlockDevOps scsi_cd_block_ops = { .change_media_cb = scsi_cd_change_media_cb, + .eject_request_cb = scsi_cd_eject_request_cb, .is_tray_open = scsi_cd_is_tray_open, .is_medium_locked = scsi_cd_is_medium_locked, }; diff --git a/nbd.c b/nbd.c index fb5e424abe..e6c931c5ce 100644 --- a/nbd.c +++ b/nbd.c @@ -425,6 +425,13 @@ int nbd_client(int fd) TRACE("Doing NBD loop"); ret = ioctl(fd, NBD_DO_IT); + if (ret == -1 && errno == EPIPE) { + /* NBD_DO_IT normally returns EPIPE when someone has disconnected + * the socket via NBD_DISCONNECT. We do not want to return 1 in + * that case. + */ + ret = 0; + } serrno = errno; TRACE("NBD loop returned %d: %s", ret, strerror(serrno)); diff --git a/qemu-nbd.c b/qemu-nbd.c index d8d3e15a84..291cba2eaa 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -31,12 +31,17 @@ #include #include #include +#include #define SOCKET_PATH "/var/lock/qemu-nbd-%s" #define NBD_BUFFER_SIZE (1024*1024) +static int sigterm_wfd; static int verbose; +static char *device; +static char *srcpath; +static char *sockpath; static void usage(const char *name) { @@ -163,23 +168,82 @@ static int find_partition(BlockDriverState *bs, int partition, return -1; } -static void show_parts(const char *device) +static void termsig_handler(int signum) { - if (fork() == 0) { - int nbd; - - /* linux just needs an open() to trigger - * the partition table update - * but remember to load the module with max_part != 0 : - * modprobe nbd max_part=63 - */ - nbd = open(device, O_RDWR); - if (nbd != -1) - close(nbd); - exit(0); + static int sigterm_reported; + if (!sigterm_reported) { + sigterm_reported = (write(sigterm_wfd, "", 1) == 1); } } +static void *show_parts(void *arg) +{ + int nbd; + + /* linux just needs an open() to trigger + * the partition table update + * but remember to load the module with max_part != 0 : + * modprobe nbd max_part=63 + */ + nbd = open(device, O_RDWR); + if (nbd != -1) { + close(nbd); + } + return NULL; +} + +static void *nbd_client_thread(void *arg) +{ + int fd = *(int *)arg; + off_t size; + size_t blocksize; + uint32_t nbdflags; + int sock; + int ret; + pthread_t show_parts_thread; + + do { + sock = unix_socket_outgoing(sockpath); + if (sock == -1) { + goto out; + } + } while (sock == -1); + + ret = nbd_receive_negotiate(sock, NULL, &nbdflags, + &size, &blocksize); + if (ret == -1) { + goto out; + } + + ret = nbd_init(fd, sock, nbdflags, size, blocksize); + if (ret == -1) { + goto out; + } + + /* update partition table */ + pthread_create(&show_parts_thread, NULL, show_parts, NULL); + + if (verbose) { + fprintf(stderr, "NBD device %s is now connected to %s\n", + device, srcpath); + } else { + /* Close stderr so that the qemu-nbd process exits. */ + dup2(STDOUT_FILENO, STDERR_FILENO); + } + + ret = nbd_client(fd); + if (ret) { + goto out; + } + close(fd); + kill(getpid(), SIGTERM); + return (void *) EXIT_SUCCESS; + +out: + kill(getpid(), SIGTERM); + return (void *) EXIT_FAILURE; +} + int main(int argc, char **argv) { BlockDriverState *bs; @@ -192,9 +256,6 @@ int main(int argc, char **argv) struct sockaddr_in addr; socklen_t addr_len = sizeof(addr); off_t fd_size; - char *device = NULL; - char *socket = NULL; - char sockpath[128]; const char *sopt = "hVb:o:p:rsnP:c:dvk:e:t"; struct option lopt[] = { { "help", 0, NULL, 'h' }, @@ -230,6 +291,21 @@ int main(int argc, char **argv) int nb_fds = 0; int max_fd; int persistent = 0; + pthread_t client_thread; + + /* The client thread uses SIGTERM to interrupt the server. A signal + * handler ensures that "qemu-nbd -v -c" exits with a nice status code. + */ + struct sigaction sa_sigterm; + int sigterm_fd[2]; + if (qemu_pipe(sigterm_fd) == -1) { + err(EXIT_FAILURE, "Error setting up communication pipe"); + } + + sigterm_wfd = sigterm_fd[1]; + memset(&sa_sigterm, 0, sizeof(sa_sigterm)); + sa_sigterm.sa_handler = termsig_handler; + sigaction(SIGTERM, &sa_sigterm, NULL); while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { switch (ch) { @@ -273,8 +349,8 @@ int main(int argc, char **argv) errx(EXIT_FAILURE, "Invalid partition %d", partition); break; case 'k': - socket = optarg; - if (socket[0] != '/') + sockpath = optarg; + if (sockpath[0] != '/') errx(EXIT_FAILURE, "socket path must be absolute\n"); break; case 'd': @@ -332,11 +408,79 @@ int main(int argc, char **argv) return 0; } + if (device && !verbose) { + int stderr_fd[2]; + pid_t pid; + int ret; + + if (qemu_pipe(stderr_fd) == -1) { + err(EXIT_FAILURE, "Error setting up communication pipe"); + } + + /* Now daemonize, but keep a communication channel open to + * print errors and exit with the proper status code. + */ + pid = fork(); + if (pid == 0) { + close(stderr_fd[0]); + ret = qemu_daemon(0, 0); + + /* Temporarily redirect stderr to the parent's pipe... */ + dup2(stderr_fd[1], STDERR_FILENO); + if (ret == -1) { + err(EXIT_FAILURE, "Failed to daemonize"); + } + + /* ... close the descriptor we inherited and go on. */ + close(stderr_fd[1]); + } else { + bool errors = false; + char *buf; + + /* In the parent. Print error messages from the child until + * it closes the pipe. + */ + close(stderr_fd[1]); + buf = g_malloc(1024); + while ((ret = read(stderr_fd[0], buf, 1024)) > 0) { + errors = true; + ret = qemu_write_full(STDERR_FILENO, buf, ret); + if (ret == -1) { + exit(EXIT_FAILURE); + } + } + if (ret == -1) { + err(EXIT_FAILURE, "Cannot read from daemon"); + } + + /* Usually the daemon should not print any message. + * Exit with zero status in that case. + */ + exit(errors); + } + } + + if (device) { + /* Open before spawning new threads. In the future, we may + * drop privileges after opening. + */ + fd = open(device, O_RDWR); + if (fd == -1) { + err(EXIT_FAILURE, "Failed to open %s", device); + } + + if (sockpath == NULL) { + sockpath = g_malloc(128); + snprintf(sockpath, 128, SOCKET_PATH, basename(device)); + } + } + bdrv_init(); + atexit(bdrv_close_all); bs = bdrv_new("hda"); - - if ((ret = bdrv_open(bs, argv[optind], flags, NULL)) < 0) { + srcpath = argv[optind]; + if ((ret = bdrv_open(bs, srcpath, flags, NULL)) < 0) { errno = -ret; err(EXIT_FAILURE, "Failed to bdrv_open '%s'", argv[optind]); } @@ -344,118 +488,54 @@ int main(int argc, char **argv) fd_size = bs->total_sectors * 512; if (partition != -1 && - find_partition(bs, partition, &dev_offset, &fd_size)) + find_partition(bs, partition, &dev_offset, &fd_size)) { err(EXIT_FAILURE, "Could not find partition %d", partition); - - if (device) { - pid_t pid; - int sock; - - /* want to fail before daemonizing */ - if (access(device, R_OK|W_OK) == -1) { - err(EXIT_FAILURE, "Could not access '%s'", device); - } - - if (!verbose) { - /* detach client and server */ - if (qemu_daemon(0, 0) == -1) { - err(EXIT_FAILURE, "Failed to daemonize"); - } - } - - if (socket == NULL) { - snprintf(sockpath, sizeof(sockpath), SOCKET_PATH, - basename(device)); - socket = sockpath; - } - - pid = fork(); - if (pid < 0) - return 1; - if (pid != 0) { - off_t size; - size_t blocksize; - - ret = 0; - bdrv_close(bs); - - do { - sock = unix_socket_outgoing(socket); - if (sock == -1) { - if (errno != ENOENT && errno != ECONNREFUSED) { - ret = 1; - goto out; - } - sleep(1); /* wait children */ - } - } while (sock == -1); - - fd = open(device, O_RDWR); - if (fd == -1) { - ret = 1; - goto out; - } - - ret = nbd_receive_negotiate(sock, NULL, &nbdflags, - &size, &blocksize); - if (ret == -1) { - ret = 1; - goto out; - } - - ret = nbd_init(fd, sock, nbdflags, size, blocksize); - if (ret == -1) { - ret = 1; - goto out; - } - - printf("NBD device %s is now connected to file %s\n", - device, argv[optind]); - - /* update partition table */ - - show_parts(device); - - ret = nbd_client(fd); - if (ret) { - ret = 1; - } - close(fd); - out: - kill(pid, SIGTERM); - unlink(socket); - - return ret; - } - /* children */ } sharing_fds = g_malloc((shared + 1) * sizeof(int)); - if (socket) { - sharing_fds[0] = unix_socket_incoming(socket); + if (sockpath) { + sharing_fds[0] = unix_socket_incoming(sockpath); } else { sharing_fds[0] = tcp_socket_incoming(bindto, port); } if (sharing_fds[0] == -1) return 1; + + if (device) { + int ret; + + ret = pthread_create(&client_thread, NULL, nbd_client_thread, &fd); + if (ret != 0) { + errx(EXIT_FAILURE, "Failed to create client thread: %s", + strerror(ret)); + } + } else { + /* Shut up GCC warnings. */ + memset(&client_thread, 0, sizeof(client_thread)); + } + max_fd = sharing_fds[0]; nb_fds++; data = qemu_blockalign(bs, NBD_BUFFER_SIZE); - if (data == NULL) + if (data == NULL) { errx(EXIT_FAILURE, "Cannot allocate data buffer"); + } do { - FD_ZERO(&fds); + FD_SET(sigterm_fd[0], &fds); for (i = 0; i < nb_fds; i++) FD_SET(sharing_fds[i], &fds); - ret = select(max_fd + 1, &fds, NULL, NULL, NULL); - if (ret == -1) + do { + ret = select(max_fd + 1, &fds, NULL, NULL, NULL); + } while (ret == -1 && errno == EINTR); + if (ret == -1 || FD_ISSET(sigterm_fd[0], &fds)) { break; + } if (FD_ISSET(sharing_fds[0], &fds)) ret--; @@ -489,10 +569,16 @@ int main(int argc, char **argv) qemu_vfree(data); close(sharing_fds[0]); - bdrv_close(bs); g_free(sharing_fds); - if (socket) - unlink(socket); + if (sockpath) { + unlink(sockpath); + } - return 0; + if (device) { + void *ret; + pthread_join(client_thread, &ret); + exit(ret != NULL); + } else { + exit(EXIT_SUCCESS); + } }