From ae2990c259abec198879c362dc13f7047f26c2cf Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 8 Oct 2013 11:58:31 +0200 Subject: [PATCH 1/5] osdep: initialize glib threads in all QEMU tools glib versions prior to 2.31.0 require an explicit g_thread_init() call to enable multi-threading. Failure to initialize threading causes glib to take single-threaded code paths without synchronization. For example, the g_slice allocator will crash due to race conditions. Fix this for all QEMU tool programs (qemu-nbd, qemu-io, qemu-img) by moving the g_thread_init() call from vl.c:main() into a new osdep.c:thread_init() constructor function. thread_init() has __attribute__((constructor)) and is automatically invoked by the runtime during startup. We can now drop the "simple" trace backend's g_thread_init() call since thread_init() already called it. Note that we must keep coroutine-gthread.c's g_thread_init() call which is located in a constructor function. There is no guarantee for constructor function ordering so thread_init() may only be called later. Reported-by: Mario de Chenno Signed-off-by: Stefan Hajnoczi --- trace/simple.c | 9 --------- util/osdep.c | 18 ++++++++++++++++++ vl.c | 8 -------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/trace/simple.c b/trace/simple.c index 57572c4905..aaa010ee2b 100644 --- a/trace/simple.c +++ b/trace/simple.c @@ -414,15 +414,6 @@ bool trace_backend_init(const char *events, const char *file) { GThread *thread; - if (!g_thread_supported()) { -#if !GLIB_CHECK_VERSION(2, 31, 0) - g_thread_init(NULL); -#else - fprintf(stderr, "glib threading failed to initialize.\n"); - exit(1); -#endif - } - #if !GLIB_CHECK_VERSION(2, 31, 0) trace_available_cond = g_cond_new(); trace_empty_cond = g_cond_new(); diff --git a/util/osdep.c b/util/osdep.c index bd4f530ad1..a9029f8894 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -436,6 +436,24 @@ int socket_init(void) return 0; } +/* Ensure that glib is running in multi-threaded mode */ +static void __attribute__((constructor)) thread_init(void) +{ + if (!g_thread_supported()) { +#if !GLIB_CHECK_VERSION(2, 31, 0) + /* Old versions of glib require explicit initialization. Failure to do + * this results in the single-threaded code paths being taken inside + * glib. For example, the g_slice allocator will not be thread-safe + * and cause crashes. + */ + g_thread_init(NULL); +#else + fprintf(stderr, "glib threading failed to initialize.\n"); + exit(1); +#endif + } +} + #ifndef CONFIG_IOVEC /* helper function for iov_send_recv() */ static ssize_t diff --git a/vl.c b/vl.c index acd97a841c..2355227bcb 100644 --- a/vl.c +++ b/vl.c @@ -2970,14 +2970,6 @@ int main(int argc, char **argv, char **envp) qemu_init_exec_dir(argv[0]); g_mem_set_vtable(&mem_trace); - if (!g_thread_supported()) { -#if !GLIB_CHECK_VERSION(2, 31, 0) - g_thread_init(NULL); -#else - fprintf(stderr, "glib threading failed to initialize.\n"); - exit(1); -#endif - } module_call_init(MODULE_INIT_QOM); From 4fd6a984b93701fcb40a0053098ae5c2c4ee27f4 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Tue, 25 Mar 2014 00:08:54 +0530 Subject: [PATCH 2/5] qemu-img: mandate argument to 'qemu-img check --repair' qemu-img check --repair option accepts an argument. The argument to --repair switch can either be 'all' or 'leak'. Fix the long option to mandate argument with --repair switch. The patch fixes following segmentation fault Core was generated by `qemu-img check -f qcow2 --repair all t.qcow2'. Program terminated with signal 11, Segmentation fault. 0 in img_check (argc=6, argv=0x7fffab9b8a10) at qemu-img.c:588 588 if (!strcmp(optarg, "leaks")) { (gdb) bt 0 img_check (argc=6, argv=0x7fffab9b8a10) at qemu-img.c:588 1 __libc_start_main () from /lib/x86_64-linux-gnu/libc.so.6 2 _start () (gdb) Signed-off-by: Prasad Joshi Reviewed-by: Leandro Dorileo Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- qemu-img.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qemu-img.c b/qemu-img.c index 2e40cc1e69..77d946b5cc 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -565,7 +565,7 @@ static int img_check(int argc, char **argv) static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"format", required_argument, 0, 'f'}, - {"repair", no_argument, 0, 'r'}, + {"repair", required_argument, 0, 'r'}, {"output", required_argument, 0, OPTION_OUTPUT}, {0, 0, 0, 0} }; From dc6fb73d219472e011d93867f5e7eebfffde0319 Mon Sep 17 00:00:00 2001 From: Deepak Kathayat Date: Mon, 24 Mar 2014 16:30:17 +0800 Subject: [PATCH 3/5] Fixed various typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Deepak Kathayat Reviewed-by: Andreas Färber Signed-off-by: Stefan Hajnoczi --- block/gluster.c | 2 +- block/qcow.c | 2 +- block/sheepdog.c | 8 ++++---- block/vdi.c | 2 +- block/vhdx-log.c | 2 +- slirp/tftp.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/block/gluster.c b/block/gluster.c index a44d612923..8836085646 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -80,7 +80,7 @@ static int parse_volume_options(GlusterConf *gconf, char *path) * 'server' specifies the server where the volume file specification for * the given volume resides. This can be either hostname, ipv4 address * or ipv6 address. ipv6 address needs to be within square brackets [ ]. - * If transport type is 'unix', then 'server' field should not be specifed. + * If transport type is 'unix', then 'server' field should not be specified. * The 'socket' field needs to be populated with the path to unix domain * socket. * diff --git a/block/qcow.c b/block/qcow.c index 1e128becf0..d5a7d5fd1e 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -723,7 +723,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options, backing_file = NULL; } header.cluster_bits = 9; /* 512 byte cluster to avoid copying - unmodifyed sectors */ + unmodified sectors */ header.l2_bits = 12; /* 32 KB L2 tables */ } else { header.cluster_bits = 12; /* 4 KB clusters */ diff --git a/block/sheepdog.c b/block/sheepdog.c index f7bd0242e5..0eb33ee80e 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -909,9 +909,9 @@ static void co_write_request(void *opaque) } /* - * Return a socket discriptor to read/write objects. + * Return a socket descriptor to read/write objects. * - * We cannot use this discriptor for other operations because + * We cannot use this descriptor for other operations because * the block driver may be on waiting response from the server. */ static int get_sheep_fd(BDRVSheepdogState *s) @@ -1896,7 +1896,7 @@ static int sd_create_branch(BDRVSheepdogState *s) /* * Even If deletion fails, we will just create extra snapshot based on - * the workding VDI which was supposed to be deleted. So no need to + * the working VDI which was supposed to be deleted. So no need to * false bail out. */ deleted = sd_delete(s); @@ -2194,7 +2194,7 @@ cleanup: * We implement rollback(loadvm) operation to the specified snapshot by * 1) switch to the snapshot * 2) rely on sd_create_branch to delete working VDI and - * 3) create a new working VDI based on the speicified snapshot + * 3) create a new working VDI based on the specified snapshot */ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) { diff --git a/block/vdi.c b/block/vdi.c index ae49cd83ca..ac9a025624 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -31,7 +31,7 @@ * Allocation of blocks could be optimized (less writes to block map and * header). * - * Read and write of adjacents blocks could be done in one operation + * Read and write of adjacent blocks could be done in one operation * (current code uses one operation per block (1 MiB). * * The code is not thread safe (missing locks for changes in header and diff --git a/block/vhdx-log.c b/block/vhdx-log.c index 02755b8ded..a77c040ee0 100644 --- a/block/vhdx-log.c +++ b/block/vhdx-log.c @@ -578,7 +578,7 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s, total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE; - /* read_desc() will incrememnt the read idx */ + /* read_desc() will increment the read idx */ ret = vhdx_log_read_desc(bs, s, log, &desc_buffer); if (ret < 0) { goto free_and_exit; diff --git a/slirp/tftp.c b/slirp/tftp.c index 1a79c45cfb..a329fb281b 100644 --- a/slirp/tftp.c +++ b/slirp/tftp.c @@ -279,7 +279,7 @@ static void tftp_handle_rrq(Slirp *slirp, struct tftp_t *tp, int pktlen) spt = &slirp->tftp_sessions[s]; - /* unspecifed prefix means service disabled */ + /* unspecified prefix means service disabled */ if (!slirp->tftp_prefix) { tftp_send_error(spt, 2, "Access violation", tp); return; From cc8c9d6c6f28e4e376a6561a2a31524fd069bc2d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 21 Mar 2014 13:55:18 +0100 Subject: [PATCH 4/5] mirror: fix throttling delay calculation The throttling delay calculation was using an inaccurate sector count to calculate the time to sleep. This broke rate-limiting for the block mirror job. Move the delay calculation into mirror_iteration() where we know how many sectors were transferred. This lets us calculate an accurate delay time. Reported-by: Joaquim Barrera Signed-off-by: Paolo Bonzini Signed-off-by: Stefan Hajnoczi --- block/mirror.c | 28 +++++++++++++++------------- trace-events | 2 +- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index dd5ee056b4..adb09cf4ab 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -139,11 +139,12 @@ static void mirror_read_complete(void *opaque, int ret) mirror_write_complete, op); } -static void coroutine_fn mirror_iteration(MirrorBlockJob *s) +static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { BlockDriverState *source = s->common.bs; int nb_sectors, sectors_per_chunk, nb_chunks; int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector; + uint64_t delay_ns; MirrorOp *op; s->sector_num = hbitmap_iter_next(&s->hbi); @@ -231,7 +232,12 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) nb_chunks += added_chunks; next_sector += added_sectors; next_chunk += added_chunks; - } while (next_sector < end); + if (!s->synced && s->common.speed) { + delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors); + } else { + delay_ns = 0; + } + } while (delay_ns == 0 && next_sector < end); /* Allocate a MirrorOp that is used as an AIO callback. */ op = g_slice_new(MirrorOp); @@ -268,6 +274,7 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) trace_mirror_one_iteration(s, sector_num, nb_sectors); bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, mirror_read_complete, op); + return delay_ns; } static void mirror_free_init(MirrorBlockJob *s) @@ -362,7 +369,7 @@ static void coroutine_fn mirror_run(void *opaque) bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi); last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); for (;;) { - uint64_t delay_ns; + uint64_t delay_ns = 0; int64_t cnt; bool should_complete; @@ -386,8 +393,10 @@ static void coroutine_fn mirror_run(void *opaque) qemu_coroutine_yield(); continue; } else if (cnt != 0) { - mirror_iteration(s); - continue; + delay_ns = mirror_iteration(s); + if (delay_ns == 0) { + continue; + } } } @@ -432,17 +441,10 @@ static void coroutine_fn mirror_run(void *opaque) } ret = 0; - trace_mirror_before_sleep(s, cnt, s->synced); + trace_mirror_before_sleep(s, cnt, s->synced, delay_ns); if (!s->synced) { /* Publish progress */ s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE; - - if (s->common.speed) { - delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk); - } else { - delay_ns = 0; - } - block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); if (block_job_is_cancelled(&s->common)) { break; diff --git a/trace-events b/trace-events index 002c2604d8..3b7ff4d970 100644 --- a/trace-events +++ b/trace-events @@ -82,7 +82,7 @@ mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64 mirror_before_flush(void *s) "s %p" mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64 -mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d" +mirror_before_sleep(void *s, int64_t cnt, int synced, uint64_t delay_ns) "s %p dirty count %"PRId64" synced %d delay %"PRIu64"ns" mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d" mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d" mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d" From 7b770c720b28b8ac5b82ae431f2f354b7f8add91 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 21 Mar 2014 13:55:19 +0100 Subject: [PATCH 5/5] mirror: fix early wake from sleep due to aio The mirror blockjob coroutine rate-limits itself by sleeping. The coroutine also performs I/O asynchronously so it's important that the aio callback doesn't wake the coroutine early as that breaks rate-limiting. Reported-by: Joaquim Barrera Signed-off-by: Stefan Hajnoczi --- block/mirror.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/block/mirror.c b/block/mirror.c index adb09cf4ab..0ef41f999e 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -98,7 +98,14 @@ static void mirror_iteration_done(MirrorOp *op, int ret) qemu_iovec_destroy(&op->qiov); g_slice_free(MirrorOp, op); - qemu_coroutine_enter(s->common.co, NULL); + + /* Enter coroutine when it is not sleeping. The coroutine sleeps to + * rate-limit itself. The coroutine will eventually resume since there is + * a sleep timeout so don't wake it early. + */ + if (s->common.busy) { + qemu_coroutine_enter(s->common.co, NULL); + } } static void mirror_write_complete(void *opaque, int ret)