diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 7879d13ddb..909f0517f8 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -52,8 +52,6 @@ struct BdrvDirtyBitmap { Such operations must fail and both the image and this bitmap must remain unchanged while this flag is set. */ - bool autoload; /* For persistent bitmaps: bitmap must be - autoloaded on image opening */ bool persistent; /* bitmap must be saved to owner disk image */ QLIST_ENTRY(BdrvDirtyBitmap) list; }; @@ -104,7 +102,6 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) g_free(bitmap->name); bitmap->name = NULL; bitmap->persistent = false; - bitmap->autoload = false; } /* Called with BQL taken. */ @@ -261,8 +258,6 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, bitmap->successor = NULL; successor->persistent = bitmap->persistent; bitmap->persistent = false; - successor->autoload = bitmap->autoload; - bitmap->autoload = false; bdrv_release_dirty_bitmap(bs, bitmap); return successor; @@ -666,19 +661,6 @@ bool bdrv_has_readonly_bitmaps(BlockDriverState *bs) return false; } -/* Called with BQL taken. */ -void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload) -{ - qemu_mutex_lock(bitmap->mutex); - bitmap->autoload = autoload; - qemu_mutex_unlock(bitmap->mutex); -} - -bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap) -{ - return bitmap->autoload; -} - /* Called with BQL taken. */ void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool persistent) { diff --git a/block/gluster.c b/block/gluster.c index d8decc41ad..3f17b7819d 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -965,12 +965,68 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs, } #endif +static int qemu_gluster_do_truncate(struct glfs_fd *fd, int64_t offset, + PreallocMode prealloc, Error **errp) +{ + int64_t current_length; + + current_length = glfs_lseek(fd, 0, SEEK_END); + if (current_length < 0) { + error_setg_errno(errp, errno, "Failed to determine current size"); + return -errno; + } + + if (current_length > offset && prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Cannot use preallocation for shrinking files"); + return -ENOTSUP; + } + + if (current_length == offset) { + return 0; + } + + switch (prealloc) { +#ifdef CONFIG_GLUSTERFS_FALLOCATE + case PREALLOC_MODE_FALLOC: + if (glfs_fallocate(fd, 0, current_length, offset - current_length)) { + error_setg_errno(errp, errno, "Could not preallocate data"); + return -errno; + } + break; +#endif /* CONFIG_GLUSTERFS_FALLOCATE */ +#ifdef CONFIG_GLUSTERFS_ZEROFILL + case PREALLOC_MODE_FULL: + if (glfs_ftruncate(fd, offset)) { + error_setg_errno(errp, errno, "Could not resize file"); + return -errno; + } + if (glfs_zerofill(fd, current_length, offset - current_length)) { + error_setg_errno(errp, errno, "Could not zerofill the new area"); + return -errno; + } + break; +#endif /* CONFIG_GLUSTERFS_ZEROFILL */ + case PREALLOC_MODE_OFF: + if (glfs_ftruncate(fd, offset)) { + error_setg_errno(errp, errno, "Could not resize file"); + return -errno; + } + break; + default: + error_setg(errp, "Unsupported preallocation mode: %s", + PreallocMode_str(prealloc)); + return -EINVAL; + } + + return 0; +} + static int qemu_gluster_create(const char *filename, QemuOpts *opts, Error **errp) { BlockdevOptionsGluster *gconf; struct glfs *glfs; - struct glfs_fd *fd; + struct glfs_fd *fd = NULL; int ret = 0; PreallocMode prealloc; int64_t total_size = 0; @@ -1019,45 +1075,14 @@ static int qemu_gluster_create(const char *filename, goto out; } - switch (prealloc) { -#ifdef CONFIG_GLUSTERFS_FALLOCATE - case PREALLOC_MODE_FALLOC: - if (glfs_fallocate(fd, 0, 0, total_size)) { - error_setg(errp, "Could not preallocate data for the new file"); - ret = -errno; - } - break; -#endif /* CONFIG_GLUSTERFS_FALLOCATE */ -#ifdef CONFIG_GLUSTERFS_ZEROFILL - case PREALLOC_MODE_FULL: - if (!glfs_ftruncate(fd, total_size)) { - if (glfs_zerofill(fd, 0, total_size)) { - error_setg(errp, "Could not zerofill the new file"); - ret = -errno; - } - } else { - error_setg(errp, "Could not resize file"); - ret = -errno; - } - break; -#endif /* CONFIG_GLUSTERFS_ZEROFILL */ - case PREALLOC_MODE_OFF: - if (glfs_ftruncate(fd, total_size) != 0) { - ret = -errno; - error_setg(errp, "Could not resize file"); - } - break; - default: - ret = -EINVAL; - error_setg(errp, "Unsupported preallocation mode: %s", - PreallocMode_str(prealloc)); - break; - } + ret = qemu_gluster_do_truncate(fd, total_size, prealloc, errp); - if (glfs_close(fd) != 0) { - ret = -errno; - } out: + if (fd) { + if (glfs_close(fd) != 0 && ret == 0) { + ret = -errno; + } + } qapi_free_BlockdevOptionsGluster(gconf); glfs_clear_preopened(glfs); return ret; @@ -1097,23 +1122,8 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset, PreallocMode prealloc, Error **errp) { - int ret; BDRVGlusterState *s = bs->opaque; - - if (prealloc != PREALLOC_MODE_OFF) { - error_setg(errp, "Unsupported preallocation mode '%s'", - PreallocMode_str(prealloc)); - return -ENOTSUP; - } - - ret = glfs_ftruncate(s->fd, offset); - if (ret < 0) { - ret = -errno; - error_setg_errno(errp, -ret, "Failed to truncate file"); - return ret; - } - - return 0; + return qemu_gluster_do_truncate(s->fd, offset, prealloc, errp); } static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs, diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index efa10c6663..4f6fd863ea 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -933,14 +933,14 @@ static void set_readonly_helper(gpointer bitmap, gpointer value) bdrv_dirty_bitmap_set_readonly(bitmap, (bool)value); } -/* qcow2_load_autoloading_dirty_bitmaps() +/* qcow2_load_dirty_bitmaps() * Return value is a hint for caller: true means that the Qcow2 header was * updated. (false doesn't mean that the header should be updated by the * caller, it just means that updating was not needed or the image cannot be * written to). * On failure the function returns false. */ -bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp) +bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp) { BDRVQcow2State *s = bs->opaque; Qcow2BitmapList *bm_list; @@ -960,14 +960,16 @@ bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp) } QSIMPLEQ_FOREACH(bm, bm_list, entry) { - if ((bm->flags & BME_FLAG_AUTO) && !(bm->flags & BME_FLAG_IN_USE)) { + if (!(bm->flags & BME_FLAG_IN_USE)) { BdrvDirtyBitmap *bitmap = load_bitmap(bs, bm, errp); if (bitmap == NULL) { goto fail; } + if (!(bm->flags & BME_FLAG_AUTO)) { + bdrv_disable_dirty_bitmap(bitmap); + } bdrv_dirty_bitmap_set_persistance(bitmap, true); - bdrv_dirty_bitmap_set_autoload(bitmap, true); bm->flags |= BME_FLAG_IN_USE; created_dirty_bitmaps = g_slist_append(created_dirty_bitmaps, bitmap); @@ -1369,7 +1371,7 @@ void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp) bm->table.size = 0; QSIMPLEQ_INSERT_TAIL(&drop_tables, tb, entry); } - bm->flags = bdrv_dirty_bitmap_get_autoload(bitmap) ? BME_FLAG_AUTO : 0; + bm->flags = bdrv_dirty_bitmap_enabled(bitmap) ? BME_FLAG_AUTO : 0; bm->granularity_bits = ctz32(bdrv_dirty_bitmap_granularity(bitmap)); bm->dirty_bitmap = bitmap; } diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index c48ffebd8f..d9dafa31e5 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -39,26 +39,23 @@ struct Qcow2Cache { Qcow2CachedTable *entries; struct Qcow2Cache *depends; int size; + int table_size; bool depends_on_flush; void *table_array; uint64_t lru_counter; uint64_t cache_clean_lru_counter; }; -static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs, - Qcow2Cache *c, int table) +static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table) { - BDRVQcow2State *s = bs->opaque; - return (uint8_t *) c->table_array + (size_t) table * s->cluster_size; + return (uint8_t *) c->table_array + (size_t) table * c->table_size; } -static inline int qcow2_cache_get_table_idx(BlockDriverState *bs, - Qcow2Cache *c, void *table) +static inline int qcow2_cache_get_table_idx(Qcow2Cache *c, void *table) { - BDRVQcow2State *s = bs->opaque; ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array; - int idx = table_offset / s->cluster_size; - assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0); + int idx = table_offset / c->table_size; + assert(idx >= 0 && idx < c->size && table_offset % c->table_size == 0); return idx; } @@ -74,15 +71,13 @@ static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c) } } -static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c, - int i, int num_tables) +static void qcow2_cache_table_release(Qcow2Cache *c, int i, int num_tables) { /* Using MADV_DONTNEED to discard memory is a Linux-specific feature */ #ifdef CONFIG_LINUX - BDRVQcow2State *s = bs->opaque; - void *t = qcow2_cache_get_table_addr(bs, c, i); + void *t = qcow2_cache_get_table_addr(c, i); int align = getpagesize(); - size_t mem_size = (size_t) s->cluster_size * num_tables; + size_t mem_size = (size_t) c->table_size * num_tables; size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t; size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align); if (mem_size > offset && length > 0) { @@ -98,7 +93,7 @@ static inline bool can_clean_entry(Qcow2Cache *c, int i) t->lru_counter <= c->cache_clean_lru_counter; } -void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c) +void qcow2_cache_clean_unused(Qcow2Cache *c) { int i = 0; while (i < c->size) { @@ -118,23 +113,30 @@ void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c) } if (to_clean > 0) { - qcow2_cache_table_release(bs, c, i - to_clean, to_clean); + qcow2_cache_table_release(c, i - to_clean, to_clean); } } c->cache_clean_lru_counter = c->lru_counter; } -Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) +Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables, + unsigned table_size) { BDRVQcow2State *s = bs->opaque; Qcow2Cache *c; + assert(num_tables > 0); + assert(is_power_of_2(table_size)); + assert(table_size >= (1 << MIN_CLUSTER_BITS)); + assert(table_size <= s->cluster_size); + c = g_new0(Qcow2Cache, 1); c->size = num_tables; + c->table_size = table_size; c->entries = g_try_new0(Qcow2CachedTable, num_tables); c->table_array = qemu_try_blockalign(bs->file->bs, - (size_t) num_tables * s->cluster_size); + (size_t) num_tables * c->table_size); if (!c->entries || !c->table_array) { qemu_vfree(c->table_array); @@ -146,7 +148,7 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) return c; } -int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c) +int qcow2_cache_destroy(Qcow2Cache *c) { int i; @@ -203,13 +205,13 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) if (c == s->refcount_block_cache) { ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK, - c->entries[i].offset, s->cluster_size); + c->entries[i].offset, c->table_size); } else if (c == s->l2_table_cache) { ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2, - c->entries[i].offset, s->cluster_size); + c->entries[i].offset, c->table_size); } else { ret = qcow2_pre_write_overlap_check(bs, 0, - c->entries[i].offset, s->cluster_size); + c->entries[i].offset, c->table_size); } if (ret < 0) { @@ -223,7 +225,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) } ret = bdrv_pwrite(bs->file, c->entries[i].offset, - qcow2_cache_get_table_addr(bs, c, i), s->cluster_size); + qcow2_cache_get_table_addr(c, i), c->table_size); if (ret < 0) { return ret; } @@ -309,7 +311,7 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c) c->entries[i].lru_counter = 0; } - qcow2_cache_table_release(bs, c, 0, c->size); + qcow2_cache_table_release(c, 0, c->size); c->lru_counter = 0; @@ -331,7 +333,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, offset, read_from_disk); - if (offset_into_cluster(s, offset)) { + if (!QEMU_IS_ALIGNED(offset, c->table_size)) { qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s " "cache: Offset %#" PRIx64 " is unaligned", qcow2_cache_get_name(s, c), offset); @@ -339,7 +341,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, } /* Check if the table is already cached */ - i = lookup_index = (offset / s->cluster_size * 4) % c->size; + i = lookup_index = (offset / c->table_size * 4) % c->size; do { const Qcow2CachedTable *t = &c->entries[i]; if (t->offset == offset) { @@ -379,8 +381,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, } ret = bdrv_pread(bs->file, offset, - qcow2_cache_get_table_addr(bs, c, i), - s->cluster_size); + qcow2_cache_get_table_addr(c, i), + c->table_size); if (ret < 0) { return ret; } @@ -391,7 +393,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, /* And return the right table */ found: c->entries[i].ref++; - *table = qcow2_cache_get_table_addr(bs, c, i); + *table = qcow2_cache_get_table_addr(c, i); trace_qcow2_cache_get_done(qemu_coroutine_self(), c == s->l2_table_cache, i); @@ -411,9 +413,9 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, return qcow2_cache_do_get(bs, c, offset, table, false); } -void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table) +void qcow2_cache_put(Qcow2Cache *c, void **table) { - int i = qcow2_cache_get_table_idx(bs, c, *table); + int i = qcow2_cache_get_table_idx(c, *table); c->entries[i].ref--; *table = NULL; @@ -425,30 +427,28 @@ void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table) assert(c->entries[i].ref >= 0); } -void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c, - void *table) +void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table) { - int i = qcow2_cache_get_table_idx(bs, c, table); + int i = qcow2_cache_get_table_idx(c, table); assert(c->entries[i].offset != 0); c->entries[i].dirty = true; } -void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c, - uint64_t offset) +void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset) { int i; for (i = 0; i < c->size; i++) { if (c->entries[i].offset == offset) { - return qcow2_cache_get_table_addr(bs, c, i); + return qcow2_cache_get_table_addr(c, i); } } return NULL; } -void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table) +void qcow2_cache_discard(Qcow2Cache *c, void *table) { - int i = qcow2_cache_get_table_idx(bs, c, table); + int i = qcow2_cache_get_table_idx(c, table); assert(c->entries[i].ref == 0); @@ -456,5 +456,5 @@ void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table) c->entries[i].lru_counter = 0; c->entries[i].dirty = false; - qcow2_cache_table_release(bs, c, i, 1); + qcow2_cache_table_release(c, i, 1); } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 3a979bcd82..e406b0f3b9 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -195,20 +195,26 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, /* * l2_load * - * Loads a L2 table into memory. If the table is in the cache, the cache - * is used; otherwise the L2 table is loaded from the image file. + * @bs: The BlockDriverState + * @offset: A guest offset, used to calculate what slice of the L2 + * table to load. + * @l2_offset: Offset to the L2 table in the image file. + * @l2_slice: Location to store the pointer to the L2 slice. * - * Returns a pointer to the L2 table on success, or NULL if the read from - * the image file failed. + * Loads a L2 slice into memory (L2 slices are the parts of L2 tables + * that are loaded by the qcow2 cache). If the slice is in the cache, + * the cache is used; otherwise the L2 slice is loaded from the image + * file. */ - -static int l2_load(BlockDriverState *bs, uint64_t l2_offset, - uint64_t **l2_table) +static int l2_load(BlockDriverState *bs, uint64_t offset, + uint64_t l2_offset, uint64_t **l2_slice) { BDRVQcow2State *s = bs->opaque; + int start_of_slice = sizeof(uint64_t) * + (offset_to_l2_index(s, offset) - offset_to_l2_slice_index(s, offset)); - return qcow2_cache_get(bs, s->l2_table_cache, l2_offset, - (void **)l2_table); + return qcow2_cache_get(bs, s->l2_table_cache, l2_offset + start_of_slice, + (void **)l2_slice); } /* @@ -257,11 +263,12 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) * */ -static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) +static int l2_allocate(BlockDriverState *bs, int l1_index) { BDRVQcow2State *s = bs->opaque; uint64_t old_l2_offset; - uint64_t *l2_table = NULL; + uint64_t *l2_slice = NULL; + unsigned slice, slice_size2, n_slices; int64_t l2_offset; int ret; @@ -292,39 +299,47 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) /* allocate a new entry in the l2 cache */ + slice_size2 = s->l2_slice_size * sizeof(uint64_t); + n_slices = s->cluster_size / slice_size2; + trace_qcow2_l2_allocate_get_empty(bs, l1_index); - ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table); - if (ret < 0) { - goto fail; - } - - l2_table = *table; - - if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { - /* if there was no old l2 table, clear the new table */ - memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); - } else { - uint64_t* old_table; - - /* if there was an old l2 table, read it from the disk */ - BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); - ret = qcow2_cache_get(bs, s->l2_table_cache, - old_l2_offset & L1E_OFFSET_MASK, - (void**) &old_table); + for (slice = 0; slice < n_slices; slice++) { + ret = qcow2_cache_get_empty(bs, s->l2_table_cache, + l2_offset + slice * slice_size2, + (void **) &l2_slice); if (ret < 0) { goto fail; } - memcpy(l2_table, old_table, s->cluster_size); + if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { + /* if there was no old l2 table, clear the new slice */ + memset(l2_slice, 0, slice_size2); + } else { + uint64_t *old_slice; + uint64_t old_l2_slice_offset = + (old_l2_offset & L1E_OFFSET_MASK) + slice * slice_size2; - qcow2_cache_put(bs, s->l2_table_cache, (void **) &old_table); + /* if there was an old l2 table, read a slice from the disk */ + BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); + ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_slice_offset, + (void **) &old_slice); + if (ret < 0) { + goto fail; + } + + memcpy(l2_slice, old_slice, slice_size2); + + qcow2_cache_put(s->l2_table_cache, (void **) &old_slice); + } + + /* write the l2 slice to the file */ + BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); + + trace_qcow2_l2_allocate_write_l2(bs, l1_index); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); } - /* write the l2 table to the file */ - BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); - - trace_qcow2_l2_allocate_write_l2(bs, l1_index); - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); ret = qcow2_cache_flush(bs, s->l2_table_cache); if (ret < 0) { goto fail; @@ -338,14 +353,13 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) goto fail; } - *table = l2_table; trace_qcow2_l2_allocate_done(bs, l1_index, 0); return 0; fail: trace_qcow2_l2_allocate_done(bs, l1_index, ret); - if (l2_table != NULL) { - qcow2_cache_put(bs, s->l2_table_cache, (void**) table); + if (l2_slice != NULL) { + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); } s->l1_table[l1_index] = old_l2_offset; if (l2_offset > 0) { @@ -356,19 +370,19 @@ fail: } /* - * Checks how many clusters in a given L2 table are contiguous in the image + * Checks how many clusters in a given L2 slice are contiguous in the image * file. As soon as one of the flags in the bitmask stop_flags changes compared * to the first cluster, the search is stopped and the cluster is not counted * as contiguous. (This allows it, for example, to stop at the first compressed * cluster which may require a different handling) */ static int count_contiguous_clusters(int nb_clusters, int cluster_size, - uint64_t *l2_table, uint64_t stop_flags) + uint64_t *l2_slice, uint64_t stop_flags) { int i; QCow2ClusterType first_cluster_type; uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED; - uint64_t first_entry = be64_to_cpu(l2_table[0]); + uint64_t first_entry = be64_to_cpu(l2_slice[0]); uint64_t offset = first_entry & mask; if (!offset) { @@ -381,7 +395,7 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC); for (i = 0; i < nb_clusters; i++) { - uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask; + uint64_t l2_entry = be64_to_cpu(l2_slice[i]) & mask; if (offset + (uint64_t) i * cluster_size != l2_entry) { break; } @@ -392,10 +406,10 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, /* * Checks how many consecutive unallocated clusters in a given L2 - * table have the same cluster type. + * slice have the same cluster type. */ static int count_contiguous_clusters_unallocated(int nb_clusters, - uint64_t *l2_table, + uint64_t *l2_slice, QCow2ClusterType wanted_type) { int i; @@ -403,7 +417,7 @@ static int count_contiguous_clusters_unallocated(int nb_clusters, assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN || wanted_type == QCOW2_CLUSTER_UNALLOCATED); for (i = 0; i < nb_clusters; i++) { - uint64_t entry = be64_to_cpu(l2_table[i]); + uint64_t entry = be64_to_cpu(l2_slice[i]); QCow2ClusterType type = qcow2_get_cluster_type(entry); if (type != wanted_type) { @@ -515,8 +529,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, { BDRVQcow2State *s = bs->opaque; unsigned int l2_index; - uint64_t l1_index, l2_offset, *l2_table; - int l1_bits, c; + uint64_t l1_index, l2_offset, *l2_slice; + int c; unsigned int offset_in_cluster; uint64_t bytes_available, bytes_needed, nb_clusters; QCow2ClusterType type; @@ -525,12 +539,12 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, offset_in_cluster = offset_into_cluster(s, offset); bytes_needed = (uint64_t) *bytes + offset_in_cluster; - l1_bits = s->l2_bits + s->cluster_bits; - /* compute how many bytes there are between the start of the cluster - * containing offset and the end of the l1 entry */ - bytes_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1)) - + offset_in_cluster; + * containing offset and the end of the l2 slice that contains + * the entry pointing to it */ + bytes_available = + ((uint64_t) (s->l2_slice_size - offset_to_l2_slice_index(s, offset))) + << s->cluster_bits; if (bytes_needed > bytes_available) { bytes_needed = bytes_available; @@ -540,7 +554,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, /* seek to the l2 offset in the l1 table */ - l1_index = offset >> l1_bits; + l1_index = offset_to_l1_index(s, offset); if (l1_index >= s->l1_size) { type = QCOW2_CLUSTER_UNALLOCATED; goto out; @@ -559,17 +573,17 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, return -EIO; } - /* load the l2 table in memory */ + /* load the l2 slice in memory */ - ret = l2_load(bs, l2_offset, &l2_table); + ret = l2_load(bs, offset, l2_offset, &l2_slice); if (ret < 0) { return ret; } /* find the cluster offset for the given disk offset */ - l2_index = offset_to_l2_index(s, offset); - *cluster_offset = be64_to_cpu(l2_table[l2_index]); + l2_index = offset_to_l2_slice_index(s, offset); + *cluster_offset = be64_to_cpu(l2_slice[l2_index]); nb_clusters = size_to_clusters(s, bytes_needed); /* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned @@ -596,14 +610,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, case QCOW2_CLUSTER_UNALLOCATED: /* how many empty clusters ? */ c = count_contiguous_clusters_unallocated(nb_clusters, - &l2_table[l2_index], type); + &l2_slice[l2_index], type); *cluster_offset = 0; break; case QCOW2_CLUSTER_ZERO_ALLOC: case QCOW2_CLUSTER_NORMAL: /* how many allocated clusters ? */ c = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], QCOW_OFLAG_ZERO); + &l2_slice[l2_index], QCOW_OFLAG_ZERO); *cluster_offset &= L2E_OFFSET_MASK; if (offset_into_cluster(s, *cluster_offset)) { qcow2_signal_corruption(bs, true, -1, -1, @@ -619,7 +633,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, abort(); } - qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); bytes_available = (int64_t)c * s->cluster_size; @@ -637,7 +651,7 @@ out: return type; fail: - qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table); + qcow2_cache_put(s->l2_table_cache, (void **)&l2_slice); return ret; } @@ -645,26 +659,25 @@ fail: * get_cluster_table * * for a given disk offset, load (and allocate if needed) - * the l2 table. + * the appropriate slice of its l2 table. * - * the l2 table offset in the qcow2 file and the cluster index - * in the l2 table are given to the caller. + * the cluster index in the l2 slice is given to the caller. * * Returns 0 on success, -errno in failure case */ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, - uint64_t **new_l2_table, + uint64_t **new_l2_slice, int *new_l2_index) { BDRVQcow2State *s = bs->opaque; unsigned int l2_index; uint64_t l1_index, l2_offset; - uint64_t *l2_table = NULL; + uint64_t *l2_slice = NULL; int ret; /* seek to the l2 offset in the l1 table */ - l1_index = offset >> (s->l2_bits + s->cluster_bits); + l1_index = offset_to_l1_index(s, offset); if (l1_index >= s->l1_size) { ret = qcow2_grow_l1_table(bs, l1_index + 1, false); if (ret < 0) { @@ -681,17 +694,9 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, return -EIO; } - /* seek the l2 table of the given l2 offset */ - - if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) { - /* load the l2 table in memory */ - ret = l2_load(bs, l2_offset, &l2_table); - if (ret < 0) { - return ret; - } - } else { + if (!(s->l1_table[l1_index] & QCOW_OFLAG_COPIED)) { /* First allocate a new L2 table (and do COW if needed) */ - ret = l2_allocate(bs, l1_index, &l2_table); + ret = l2_allocate(bs, l1_index); if (ret < 0) { return ret; } @@ -701,13 +706,23 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), QCOW2_DISCARD_OTHER); } + + /* Get the offset of the newly-allocated l2 table */ + l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; + assert(offset_into_cluster(s, l2_offset) == 0); + } + + /* load the l2 slice in memory */ + ret = l2_load(bs, offset, l2_offset, &l2_slice); + if (ret < 0) { + return ret; } /* find the cluster offset for the given disk offset */ - l2_index = offset_to_l2_index(s, offset); + l2_index = offset_to_l2_slice_index(s, offset); - *new_l2_table = l2_table; + *new_l2_slice = l2_slice; *new_l2_index = l2_index; return 0; @@ -732,26 +747,26 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, { BDRVQcow2State *s = bs->opaque; int l2_index, ret; - uint64_t *l2_table; + uint64_t *l2_slice; int64_t cluster_offset; int nb_csectors; - ret = get_cluster_table(bs, offset, &l2_table, &l2_index); + ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); if (ret < 0) { return 0; } /* Compression can't overwrite anything. Fail if the cluster was already * allocated. */ - cluster_offset = be64_to_cpu(l2_table[l2_index]); + cluster_offset = be64_to_cpu(l2_slice[l2_index]); if (cluster_offset & L2E_OFFSET_MASK) { - qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); return 0; } cluster_offset = qcow2_alloc_bytes(bs, compressed_size); if (cluster_offset < 0) { - qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); return 0; } @@ -766,9 +781,9 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, /* compressed clusters never have the copied flag */ BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); - l2_table[l2_index] = cpu_to_be64(cluster_offset); - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + l2_slice[l2_index] = cpu_to_be64(cluster_offset); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); return cluster_offset; } @@ -907,7 +922,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) { BDRVQcow2State *s = bs->opaque; int i, j = 0, l2_index, ret; - uint64_t *old_cluster, *l2_table; + uint64_t *old_cluster, *l2_slice; uint64_t cluster_offset = m->alloc_offset; trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); @@ -934,13 +949,13 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) s->refcount_block_cache); } - ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index); + ret = get_cluster_table(bs, m->offset, &l2_slice, &l2_index); if (ret < 0) { goto err; } - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); - assert(l2_index + m->nb_clusters <= s->l2_size); + assert(l2_index + m->nb_clusters <= s->l2_slice_size); for (i = 0; i < m->nb_clusters; i++) { /* if two concurrent writes happen to the same unallocated cluster * each write allocates separate cluster and writes data concurrently. @@ -948,16 +963,16 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) * cluster the second one has to do RMW (which is done above by * perform_cow()), update l2 table with its cluster pointer and free * old cluster. This is what this loop does */ - if (l2_table[l2_index + i] != 0) { - old_cluster[j++] = l2_table[l2_index + i]; + if (l2_slice[l2_index + i] != 0) { + old_cluster[j++] = l2_slice[l2_index + i]; } - l2_table[l2_index + i] = cpu_to_be64((cluster_offset + + l2_slice[l2_index + i] = cpu_to_be64((cluster_offset + (i << s->cluster_bits)) | QCOW_OFLAG_COPIED); } - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); /* * If this was a COW, we need to decrease the refcount of the old cluster. @@ -984,12 +999,12 @@ err: * which must copy from the backing file) */ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters, - uint64_t *l2_table, int l2_index) + uint64_t *l2_slice, int l2_index) { int i; for (i = 0; i < nb_clusters; i++) { - uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]); + uint64_t l2_entry = be64_to_cpu(l2_slice[l2_index + i]); QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry); switch(cluster_type) { @@ -1104,7 +1119,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, BDRVQcow2State *s = bs->opaque; int l2_index; uint64_t cluster_offset; - uint64_t *l2_table; + uint64_t *l2_slice; uint64_t nb_clusters; unsigned int keep_clusters; int ret; @@ -1116,23 +1131,23 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, == offset_into_cluster(s, *host_offset)); /* - * Calculate the number of clusters to look for. We stop at L2 table + * Calculate the number of clusters to look for. We stop at L2 slice * boundaries to keep things simple. */ nb_clusters = size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); - l2_index = offset_to_l2_index(s, guest_offset); - nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + l2_index = offset_to_l2_slice_index(s, guest_offset); + nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); assert(nb_clusters <= INT_MAX); /* Find L2 entry for the first involved cluster */ - ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); + ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); if (ret < 0) { return ret; } - cluster_offset = be64_to_cpu(l2_table[l2_index]); + cluster_offset = be64_to_cpu(l2_slice[l2_index]); /* Check how many clusters are already allocated and don't need COW */ if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL @@ -1160,7 +1175,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, /* We keep all QCOW_OFLAG_COPIED clusters */ keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], + &l2_slice[l2_index], QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO); assert(keep_clusters <= nb_clusters); @@ -1175,7 +1190,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, /* Cleanup */ out: - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); /* Only return a host offset if we actually made progress. Otherwise we * would make requirements for handle_alloc() that it can't fulfill */ @@ -1259,7 +1274,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, { BDRVQcow2State *s = bs->opaque; int l2_index; - uint64_t *l2_table; + uint64_t *l2_slice; uint64_t entry; uint64_t nb_clusters; int ret; @@ -1272,29 +1287,29 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, assert(*bytes > 0); /* - * Calculate the number of clusters to look for. We stop at L2 table + * Calculate the number of clusters to look for. We stop at L2 slice * boundaries to keep things simple. */ nb_clusters = size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); - l2_index = offset_to_l2_index(s, guest_offset); - nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + l2_index = offset_to_l2_slice_index(s, guest_offset); + nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); assert(nb_clusters <= INT_MAX); /* Find L2 entry for the first involved cluster */ - ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); + ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); if (ret < 0) { return ret; } - entry = be64_to_cpu(l2_table[l2_index]); + entry = be64_to_cpu(l2_slice[l2_index]); /* For the moment, overwrite compressed clusters one by one */ if (entry & QCOW_OFLAG_COMPRESSED) { nb_clusters = 1; } else { - nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index); + nb_clusters = count_cow_clusters(s, nb_clusters, l2_slice, l2_index); } /* This function is only called when there were no non-COW clusters, so if @@ -1323,7 +1338,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, * nb_clusters already to a range of COW clusters */ preallocated_nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], QCOW_OFLAG_COPIED); + &l2_slice[l2_index], QCOW_OFLAG_COPIED); assert(preallocated_nb_clusters > 0); nb_clusters = preallocated_nb_clusters; @@ -1334,7 +1349,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, keep_old_clusters = true; } - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); if (!alloc_cluster_offset) { /* Allocate, if necessary at a given offset in the image file */ @@ -1616,32 +1631,32 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) /* * This discards as many clusters of nb_clusters as possible at once (i.e. - * all clusters in the same L2 table) and returns the number of discarded + * all clusters in the same L2 slice) and returns the number of discarded * clusters. */ -static int discard_single_l2(BlockDriverState *bs, uint64_t offset, - uint64_t nb_clusters, enum qcow2_discard_type type, - bool full_discard) +static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, + uint64_t nb_clusters, + enum qcow2_discard_type type, bool full_discard) { BDRVQcow2State *s = bs->opaque; - uint64_t *l2_table; + uint64_t *l2_slice; int l2_index; int ret; int i; - ret = get_cluster_table(bs, offset, &l2_table, &l2_index); + ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); if (ret < 0) { return ret; } - /* Limit nb_clusters to one L2 table */ - nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + /* Limit nb_clusters to one L2 slice */ + nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); assert(nb_clusters <= INT_MAX); for (i = 0; i < nb_clusters; i++) { uint64_t old_l2_entry; - old_l2_entry = be64_to_cpu(l2_table[l2_index + i]); + old_l2_entry = be64_to_cpu(l2_slice[l2_index + i]); /* * If full_discard is false, make sure that a discarded area reads back @@ -1679,18 +1694,18 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, } /* First remove L2 entries */ - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); if (!full_discard && s->qcow_version >= 3) { - l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); + l2_slice[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); } else { - l2_table[l2_index + i] = cpu_to_be64(0); + l2_slice[l2_index + i] = cpu_to_be64(0); } /* Then decrease the refcount */ qcow2_free_any_clusters(bs, old_l2_entry, 1, type); } - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); return nb_clusters; } @@ -1714,10 +1729,10 @@ int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, s->cache_discards = true; - /* Each L2 table is handled by its own loop iteration */ + /* Each L2 slice is handled by its own loop iteration */ while (nb_clusters > 0) { - cleared = discard_single_l2(bs, offset, nb_clusters, type, - full_discard); + cleared = discard_in_l2_slice(bs, offset, nb_clusters, type, + full_discard); if (cleared < 0) { ret = cleared; goto fail; @@ -1737,33 +1752,33 @@ fail: /* * This zeroes as many clusters of nb_clusters as possible at once (i.e. - * all clusters in the same L2 table) and returns the number of zeroed + * all clusters in the same L2 slice) and returns the number of zeroed * clusters. */ -static int zero_single_l2(BlockDriverState *bs, uint64_t offset, - uint64_t nb_clusters, int flags) +static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset, + uint64_t nb_clusters, int flags) { BDRVQcow2State *s = bs->opaque; - uint64_t *l2_table; + uint64_t *l2_slice; int l2_index; int ret; int i; bool unmap = !!(flags & BDRV_REQ_MAY_UNMAP); - ret = get_cluster_table(bs, offset, &l2_table, &l2_index); + ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); if (ret < 0) { return ret; } - /* Limit nb_clusters to one L2 table */ - nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + /* Limit nb_clusters to one L2 slice */ + nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); assert(nb_clusters <= INT_MAX); for (i = 0; i < nb_clusters; i++) { uint64_t old_offset; QCow2ClusterType cluster_type; - old_offset = be64_to_cpu(l2_table[l2_index + i]); + old_offset = be64_to_cpu(l2_slice[l2_index + i]); /* * Minimize L2 changes if the cluster already reads back as @@ -1775,16 +1790,16 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, continue; } - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) { - l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); + l2_slice[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); } else { - l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO); + l2_slice[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO); } } - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); return nb_clusters; } @@ -1808,13 +1823,13 @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, return -ENOTSUP; } - /* Each L2 table is handled by its own loop iteration */ + /* Each L2 slice is handled by its own loop iteration */ nb_clusters = size_to_clusters(s, bytes); s->cache_discards = true; while (nb_clusters > 0) { - cleared = zero_single_l2(bs, offset, nb_clusters, flags); + cleared = zero_in_l2_slice(bs, offset, nb_clusters, flags); if (cleared < 0) { ret = cleared; goto fail; @@ -1848,22 +1863,25 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, { BDRVQcow2State *s = bs->opaque; bool is_active_l1 = (l1_table == s->l1_table); - uint64_t *l2_table = NULL; + uint64_t *l2_slice = NULL; + unsigned slice, slice_size2, n_slices; int ret; int i, j; + slice_size2 = s->l2_slice_size * sizeof(uint64_t); + n_slices = s->cluster_size / slice_size2; + if (!is_active_l1) { /* inactive L2 tables require a buffer to be stored in when loading * them from disk */ - l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size); - if (l2_table == NULL) { + l2_slice = qemu_try_blockalign(bs->file->bs, slice_size2); + if (l2_slice == NULL) { return -ENOMEM; } } for (i = 0; i < l1_size; i++) { uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; - bool l2_dirty = false; uint64_t l2_refcount; if (!l2_offset) { @@ -1883,124 +1901,131 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, goto fail; } - if (is_active_l1) { - /* get active L2 tables from cache */ - ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, - (void **)&l2_table); - } else { - /* load inactive L2 tables from disk */ - ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE, - (void *)l2_table, s->cluster_sectors); - } - if (ret < 0) { - goto fail; - } - ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, &l2_refcount); if (ret < 0) { goto fail; } - for (j = 0; j < s->l2_size; j++) { - uint64_t l2_entry = be64_to_cpu(l2_table[j]); - int64_t offset = l2_entry & L2E_OFFSET_MASK; - QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry); - - if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN && - cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) { - continue; + for (slice = 0; slice < n_slices; slice++) { + uint64_t slice_offset = l2_offset + slice * slice_size2; + bool l2_dirty = false; + if (is_active_l1) { + /* get active L2 tables from cache */ + ret = qcow2_cache_get(bs, s->l2_table_cache, slice_offset, + (void **)&l2_slice); + } else { + /* load inactive L2 tables from disk */ + ret = bdrv_pread(bs->file, slice_offset, l2_slice, slice_size2); + } + if (ret < 0) { + goto fail; } - if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { - if (!bs->backing) { - /* not backed; therefore we can simply deallocate the - * cluster */ - l2_table[j] = 0; - l2_dirty = true; + for (j = 0; j < s->l2_slice_size; j++) { + uint64_t l2_entry = be64_to_cpu(l2_slice[j]); + int64_t offset = l2_entry & L2E_OFFSET_MASK; + QCow2ClusterType cluster_type = + qcow2_get_cluster_type(l2_entry); + + if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN && + cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) { continue; } - offset = qcow2_alloc_clusters(bs, s->cluster_size); - if (offset < 0) { - ret = offset; - goto fail; - } + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { + if (!bs->backing) { + /* not backed; therefore we can simply deallocate the + * cluster */ + l2_slice[j] = 0; + l2_dirty = true; + continue; + } - if (l2_refcount > 1) { - /* For shared L2 tables, set the refcount accordingly (it is - * already 1 and needs to be l2_refcount) */ - ret = qcow2_update_cluster_refcount(bs, - offset >> s->cluster_bits, - refcount_diff(1, l2_refcount), false, - QCOW2_DISCARD_OTHER); - if (ret < 0) { - qcow2_free_clusters(bs, offset, s->cluster_size, - QCOW2_DISCARD_OTHER); + offset = qcow2_alloc_clusters(bs, s->cluster_size); + if (offset < 0) { + ret = offset; goto fail; } + + if (l2_refcount > 1) { + /* For shared L2 tables, set the refcount accordingly + * (it is already 1 and needs to be l2_refcount) */ + ret = qcow2_update_cluster_refcount( + bs, offset >> s->cluster_bits, + refcount_diff(1, l2_refcount), false, + QCOW2_DISCARD_OTHER); + if (ret < 0) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_OTHER); + goto fail; + } + } } + + if (offset_into_cluster(s, offset)) { + int l2_index = slice * s->l2_slice_size + j; + qcow2_signal_corruption( + bs, true, -1, -1, + "Cluster allocation offset " + "%#" PRIx64 " unaligned (L2 offset: %#" + PRIx64 ", L2 index: %#x)", offset, + l2_offset, l2_index); + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + } + ret = -EIO; + goto fail; + } + + ret = qcow2_pre_write_overlap_check(bs, 0, offset, + s->cluster_size); + if (ret < 0) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + } + goto fail; + } + + ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0); + if (ret < 0) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + } + goto fail; + } + + if (l2_refcount == 1) { + l2_slice[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED); + } else { + l2_slice[j] = cpu_to_be64(offset); + } + l2_dirty = true; } - if (offset_into_cluster(s, offset)) { - qcow2_signal_corruption(bs, true, -1, -1, - "Cluster allocation offset " - "%#" PRIx64 " unaligned (L2 offset: %#" - PRIx64 ", L2 index: %#x)", offset, - l2_offset, j); - if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { - qcow2_free_clusters(bs, offset, s->cluster_size, - QCOW2_DISCARD_ALWAYS); + if (is_active_l1) { + if (l2_dirty) { + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + qcow2_cache_depends_on_flush(s->l2_table_cache); } - ret = -EIO; - goto fail; - } - - ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size); - if (ret < 0) { - if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { - qcow2_free_clusters(bs, offset, s->cluster_size, - QCOW2_DISCARD_ALWAYS); - } - goto fail; - } - - ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0); - if (ret < 0) { - if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { - qcow2_free_clusters(bs, offset, s->cluster_size, - QCOW2_DISCARD_ALWAYS); - } - goto fail; - } - - if (l2_refcount == 1) { - l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); } else { - l2_table[j] = cpu_to_be64(offset); - } - l2_dirty = true; - } + if (l2_dirty) { + ret = qcow2_pre_write_overlap_check( + bs, QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, + slice_offset, slice_size2); + if (ret < 0) { + goto fail; + } - if (is_active_l1) { - if (l2_dirty) { - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); - qcow2_cache_depends_on_flush(s->l2_table_cache); - } - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); - } else { - if (l2_dirty) { - ret = qcow2_pre_write_overlap_check(bs, - QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, l2_offset, - s->cluster_size); - if (ret < 0) { - goto fail; - } - - ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE, - (void *)l2_table, s->cluster_sectors); - if (ret < 0) { - goto fail; + ret = bdrv_pwrite(bs->file, slice_offset, + l2_slice, slice_size2); + if (ret < 0) { + goto fail; + } } } } @@ -2014,11 +2039,11 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, ret = 0; fail: - if (l2_table) { + if (l2_slice) { if (!is_active_l1) { - qemu_vfree(l2_table); + qemu_vfree(l2_slice); } else { - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); } } return ret; @@ -2070,7 +2095,15 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs, int l1_sectors = DIV_ROUND_UP(s->snapshots[i].l1_size * sizeof(uint64_t), BDRV_SECTOR_SIZE); - l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE); + uint64_t *new_l1_table = + g_try_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE); + + if (!new_l1_table) { + ret = -ENOMEM; + goto fail; + } + + l1_table = new_l1_table; ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE, diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 92701ab7af..d46b69d7f3 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -277,7 +277,7 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, block_index = cluster_index & (s->refcount_block_size - 1); *refcount = s->get_refcount(refcount_block, block_index); - qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); + qcow2_cache_put(s->refcount_block_cache, &refcount_block); return 0; } @@ -421,7 +421,7 @@ static int alloc_refcount_block(BlockDriverState *bs, /* Now the new refcount block needs to be written to disk */ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE); - qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block); + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block); ret = qcow2_cache_flush(bs, s->refcount_block_cache); if (ret < 0) { goto fail; @@ -449,7 +449,7 @@ static int alloc_refcount_block(BlockDriverState *bs, return -EAGAIN; } - qcow2_cache_put(bs, s->refcount_block_cache, refcount_block); + qcow2_cache_put(s->refcount_block_cache, refcount_block); /* * If we come here, we need to grow the refcount table. Again, a new @@ -501,7 +501,7 @@ static int alloc_refcount_block(BlockDriverState *bs, fail: if (*refcount_block != NULL) { - qcow2_cache_put(bs, s->refcount_block_cache, refcount_block); + qcow2_cache_put(s->refcount_block_cache, refcount_block); } return ret; } @@ -623,7 +623,7 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset, goto fail; } memset(refblock_data, 0, s->cluster_size); - qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock_data); new_table[i] = block_offset; @@ -656,11 +656,11 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset, s->set_refcount(refblock_data, j, 1); } - qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock_data); } - qcow2_cache_put(bs, s->refcount_block_cache, &refblock_data); + qcow2_cache_put(s->refcount_block_cache, &refblock_data); } assert(block_offset == table_offset); @@ -836,7 +836,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, /* Load the refcount block and allocate it if needed */ if (table_index != old_table_index) { if (refcount_block) { - qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); + qcow2_cache_put(s->refcount_block_cache, &refcount_block); } ret = alloc_refcount_block(bs, cluster_index, &refcount_block); if (ret < 0) { @@ -845,8 +845,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, } old_table_index = table_index; - qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, - refcount_block); + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block); /* we can update the count and save it */ block_index = cluster_index & (s->refcount_block_size - 1); @@ -872,16 +871,16 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, if (refcount == 0) { void *table; - table = qcow2_cache_is_table_offset(bs, s->refcount_block_cache, + table = qcow2_cache_is_table_offset(s->refcount_block_cache, offset); if (table != NULL) { - qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); - qcow2_cache_discard(bs, s->refcount_block_cache, table); + qcow2_cache_put(s->refcount_block_cache, &refcount_block); + qcow2_cache_discard(s->refcount_block_cache, table); } - table = qcow2_cache_is_table_offset(bs, s->l2_table_cache, offset); + table = qcow2_cache_is_table_offset(s->l2_table_cache, offset); if (table != NULL) { - qcow2_cache_discard(bs, s->l2_table_cache, table); + qcow2_cache_discard(s->l2_table_cache, table); } if (s->discard_passthrough[type]) { @@ -898,7 +897,7 @@ fail: /* Write last changed block to disk */ if (refcount_block) { - qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); + qcow2_cache_put(s->refcount_block_cache, &refcount_block); } /* @@ -1184,17 +1183,20 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, int64_t l1_table_offset, int l1_size, int addend) { BDRVQcow2State *s = bs->opaque; - uint64_t *l1_table, *l2_table, l2_offset, entry, l1_size2, refcount; + uint64_t *l1_table, *l2_slice, l2_offset, entry, l1_size2, refcount; bool l1_allocated = false; int64_t old_entry, old_l2_offset; + unsigned slice, slice_size2, n_slices; int i, j, l1_modified = 0, nb_csectors; int ret; assert(addend >= -1 && addend <= 1); - l2_table = NULL; + l2_slice = NULL; l1_table = NULL; l1_size2 = l1_size * sizeof(uint64_t); + slice_size2 = s->l2_slice_size * sizeof(uint64_t); + n_slices = s->cluster_size / slice_size2; s->cache_discards = true; @@ -1237,92 +1239,98 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, goto fail; } - ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, - (void**) &l2_table); - if (ret < 0) { - goto fail; - } + for (slice = 0; slice < n_slices; slice++) { + ret = qcow2_cache_get(bs, s->l2_table_cache, + l2_offset + slice * slice_size2, + (void **) &l2_slice); + if (ret < 0) { + goto fail; + } - for (j = 0; j < s->l2_size; j++) { - uint64_t cluster_index; - uint64_t offset; + for (j = 0; j < s->l2_slice_size; j++) { + uint64_t cluster_index; + uint64_t offset; - entry = be64_to_cpu(l2_table[j]); - old_entry = entry; - entry &= ~QCOW_OFLAG_COPIED; - offset = entry & L2E_OFFSET_MASK; + entry = be64_to_cpu(l2_slice[j]); + old_entry = entry; + entry &= ~QCOW_OFLAG_COPIED; + offset = entry & L2E_OFFSET_MASK; - switch (qcow2_get_cluster_type(entry)) { - case QCOW2_CLUSTER_COMPRESSED: - nb_csectors = ((entry >> s->csize_shift) & - s->csize_mask) + 1; - if (addend != 0) { - ret = update_refcount(bs, - (entry & s->cluster_offset_mask) & ~511, + switch (qcow2_get_cluster_type(entry)) { + case QCOW2_CLUSTER_COMPRESSED: + nb_csectors = ((entry >> s->csize_shift) & + s->csize_mask) + 1; + if (addend != 0) { + ret = update_refcount( + bs, (entry & s->cluster_offset_mask) & ~511, nb_csectors * 512, abs(addend), addend < 0, QCOW2_DISCARD_SNAPSHOT); + if (ret < 0) { + goto fail; + } + } + /* compressed clusters are never modified */ + refcount = 2; + break; + + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_ZERO_ALLOC: + if (offset_into_cluster(s, offset)) { + /* Here l2_index means table (not slice) index */ + int l2_index = slice * s->l2_slice_size + j; + qcow2_signal_corruption( + bs, true, -1, -1, "Cluster " + "allocation offset %#" PRIx64 + " unaligned (L2 offset: %#" + PRIx64 ", L2 index: %#x)", + offset, l2_offset, l2_index); + ret = -EIO; + goto fail; + } + + cluster_index = offset >> s->cluster_bits; + assert(cluster_index); + if (addend != 0) { + ret = qcow2_update_cluster_refcount( + bs, cluster_index, abs(addend), addend < 0, + QCOW2_DISCARD_SNAPSHOT); + if (ret < 0) { + goto fail; + } + } + + ret = qcow2_get_refcount(bs, cluster_index, &refcount); if (ret < 0) { goto fail; } - } - /* compressed clusters are never modified */ - refcount = 2; - break; + break; - case QCOW2_CLUSTER_NORMAL: - case QCOW2_CLUSTER_ZERO_ALLOC: - if (offset_into_cluster(s, offset)) { - qcow2_signal_corruption(bs, true, -1, -1, "Cluster " - "allocation offset %#" PRIx64 - " unaligned (L2 offset: %#" - PRIx64 ", L2 index: %#x)", - offset, l2_offset, j); - ret = -EIO; - goto fail; + case QCOW2_CLUSTER_ZERO_PLAIN: + case QCOW2_CLUSTER_UNALLOCATED: + refcount = 0; + break; + + default: + abort(); } - cluster_index = offset >> s->cluster_bits; - assert(cluster_index); - if (addend != 0) { - ret = qcow2_update_cluster_refcount(bs, - cluster_index, abs(addend), addend < 0, - QCOW2_DISCARD_SNAPSHOT); - if (ret < 0) { - goto fail; + if (refcount == 1) { + entry |= QCOW_OFLAG_COPIED; + } + if (entry != old_entry) { + if (addend > 0) { + qcow2_cache_set_dependency(bs, s->l2_table_cache, + s->refcount_block_cache); } + l2_slice[j] = cpu_to_be64(entry); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, + l2_slice); } - - ret = qcow2_get_refcount(bs, cluster_index, &refcount); - if (ret < 0) { - goto fail; - } - break; - - case QCOW2_CLUSTER_ZERO_PLAIN: - case QCOW2_CLUSTER_UNALLOCATED: - refcount = 0; - break; - - default: - abort(); } - if (refcount == 1) { - entry |= QCOW_OFLAG_COPIED; - } - if (entry != old_entry) { - if (addend > 0) { - qcow2_cache_set_dependency(bs, s->l2_table_cache, - s->refcount_block_cache); - } - l2_table[j] = cpu_to_be64(entry); - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, - l2_table); - } + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); } - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); - if (addend != 0) { ret = qcow2_update_cluster_refcount(bs, l2_offset >> s->cluster_bits, @@ -1348,8 +1356,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, ret = bdrv_flush(bs); fail: - if (l2_table) { - qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + if (l2_slice) { + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); } s->cache_discards = false; @@ -2849,7 +2857,7 @@ static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable, new_reftable_size, new_refblock, new_refblock_empty, allocated, errp); if (ret < 0) { - qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + qcow2_cache_put(s->refcount_block_cache, &refblock); return ret; } @@ -2862,7 +2870,7 @@ static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable, if (new_refcount_bits < 64 && refcount >> new_refcount_bits) { uint64_t offset; - qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + qcow2_cache_put(s->refcount_block_cache, &refblock); offset = ((reftable_index << s->refcount_block_bits) + refblock_index) << s->cluster_bits; @@ -2883,7 +2891,7 @@ static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable, new_refblock_empty = new_refblock_empty && refcount == 0; } - qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + qcow2_cache_put(s->refcount_block_cache, &refblock); } else { /* No refblock means every refcount is 0 */ for (refblock_index = 0; refblock_index < s->refcount_block_size; @@ -3175,24 +3183,24 @@ static int qcow2_discard_refcount_block(BlockDriverState *bs, offset_to_reftable_index(s, discard_block_offs), discard_block_offs, s->get_refcount(refblock, block_index)); - qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + qcow2_cache_put(s->refcount_block_cache, &refblock); return -EINVAL; } s->set_refcount(refblock, block_index, 0); - qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, refblock); + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock); - qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + qcow2_cache_put(s->refcount_block_cache, &refblock); if (cluster_index < s->free_cluster_index) { s->free_cluster_index = cluster_index; } - refblock = qcow2_cache_is_table_offset(bs, s->refcount_block_cache, + refblock = qcow2_cache_is_table_offset(s->refcount_block_cache, discard_block_offs); if (refblock) { /* discard refblock from the cache if refblock is cached */ - qcow2_cache_discard(bs, s->refcount_block_cache, refblock); + qcow2_cache_discard(s->refcount_block_cache, refblock); } update_refcount_discard(bs, discard_block_offs, s->cluster_size); @@ -3235,7 +3243,7 @@ int qcow2_shrink_reftable(BlockDriverState *bs) } else { unused_block = buffer_is_zero(refblock, s->cluster_size); } - qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + qcow2_cache_put(s->refcount_block_cache, &refblock); reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]); } diff --git a/block/qcow2.c b/block/qcow2.c index 801e29fc56..57a517e2bd 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -675,6 +675,11 @@ static QemuOptsList qcow2_runtime_opts = { .type = QEMU_OPT_SIZE, .help = "Maximum L2 table cache size", }, + { + .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Size of each entry in the L2 cache", + }, { .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE, .type = QEMU_OPT_SIZE, @@ -706,8 +711,8 @@ static void cache_clean_timer_cb(void *opaque) { BlockDriverState *bs = opaque; BDRVQcow2State *s = bs->opaque; - qcow2_cache_clean_unused(bs, s->l2_table_cache); - qcow2_cache_clean_unused(bs, s->refcount_block_cache); + qcow2_cache_clean_unused(s->l2_table_cache); + qcow2_cache_clean_unused(s->refcount_block_cache); timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + (int64_t) s->cache_clean_interval * 1000); } @@ -747,6 +752,7 @@ static void qcow2_attach_aio_context(BlockDriverState *bs, static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, uint64_t *l2_cache_size, + uint64_t *l2_cache_entry_size, uint64_t *refcount_cache_size, Error **errp) { BDRVQcow2State *s = bs->opaque; @@ -762,6 +768,9 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, *refcount_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0); + *l2_cache_entry_size = qemu_opt_get_size( + opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size); + if (combined_cache_size_set) { if (l2_cache_size_set && refcount_cache_size_set) { error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE @@ -802,11 +811,21 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, / DEFAULT_L2_REFCOUNT_SIZE_RATIO; } } + + if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) || + *l2_cache_entry_size > s->cluster_size || + !is_power_of_2(*l2_cache_entry_size)) { + error_setg(errp, "L2 cache entry size must be a power of two " + "between %d and the cluster size (%d)", + 1 << MIN_CLUSTER_BITS, s->cluster_size); + return; + } } typedef struct Qcow2ReopenState { Qcow2Cache *l2_table_cache; Qcow2Cache *refcount_block_cache; + int l2_slice_size; /* Number of entries in a slice of the L2 table */ bool use_lazy_refcounts; int overlap_check; bool discard_passthrough[QCOW2_DISCARD_MAX]; @@ -823,7 +842,7 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, QemuOpts *opts = NULL; const char *opt_overlap_check, *opt_overlap_check_template; int overlap_check_template = 0; - uint64_t l2_cache_size, refcount_cache_size; + uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size; int i; const char *encryptfmt; QDict *encryptopts = NULL; @@ -842,15 +861,15 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, } /* get L2 table/refcount block cache size from command line options */ - read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size, - &local_err); + read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size, + &refcount_cache_size, &local_err); if (local_err) { error_propagate(errp, local_err); ret = -EINVAL; goto fail; } - l2_cache_size /= s->cluster_size; + l2_cache_size /= l2_cache_entry_size; if (l2_cache_size < MIN_L2_CACHE_SIZE) { l2_cache_size = MIN_L2_CACHE_SIZE; } @@ -888,8 +907,11 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, } } - r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size); - r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size); + r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t); + r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size, + l2_cache_entry_size); + r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size, + s->cluster_size); if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) { error_setg(errp, "Could not allocate metadata caches"); ret = -ENOMEM; @@ -1044,13 +1066,14 @@ static void qcow2_update_options_commit(BlockDriverState *bs, int i; if (s->l2_table_cache) { - qcow2_cache_destroy(bs, s->l2_table_cache); + qcow2_cache_destroy(s->l2_table_cache); } if (s->refcount_block_cache) { - qcow2_cache_destroy(bs, s->refcount_block_cache); + qcow2_cache_destroy(s->refcount_block_cache); } s->l2_table_cache = r->l2_table_cache; s->refcount_block_cache = r->refcount_block_cache; + s->l2_slice_size = r->l2_slice_size; s->overlap_check = r->overlap_check; s->use_lazy_refcounts = r->use_lazy_refcounts; @@ -1073,10 +1096,10 @@ static void qcow2_update_options_abort(BlockDriverState *bs, Qcow2ReopenState *r) { if (r->l2_table_cache) { - qcow2_cache_destroy(bs, r->l2_table_cache); + qcow2_cache_destroy(r->l2_table_cache); } if (r->refcount_block_cache) { - qcow2_cache_destroy(bs, r->refcount_block_cache); + qcow2_cache_destroy(r->refcount_block_cache); } qapi_free_QCryptoBlockOpenOptions(r->crypto_opts); } @@ -1460,7 +1483,7 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; } - if (qcow2_load_autoloading_dirty_bitmaps(bs, &local_err)) { + if (qcow2_load_dirty_bitmaps(bs, &local_err)) { update_header = false; } if (local_err != NULL) { @@ -1514,10 +1537,10 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, s->l1_table = NULL; cache_clean_timer_del(bs); if (s->l2_table_cache) { - qcow2_cache_destroy(bs, s->l2_table_cache); + qcow2_cache_destroy(s->l2_table_cache); } if (s->refcount_block_cache) { - qcow2_cache_destroy(bs, s->refcount_block_cache); + qcow2_cache_destroy(s->refcount_block_cache); } qcrypto_block_free(s->crypto); qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); @@ -2065,8 +2088,8 @@ static void qcow2_close(BlockDriverState *bs) } cache_clean_timer_del(bs); - qcow2_cache_destroy(bs, s->l2_table_cache); - qcow2_cache_destroy(bs, s->refcount_block_cache); + qcow2_cache_destroy(s->l2_table_cache); + qcow2_cache_destroy(s->refcount_block_cache); qcrypto_block_free(s->crypto); s->crypto = NULL; @@ -3259,9 +3282,9 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, host_offset = allocation_start; guest_offset = old_length; while (nb_new_data_clusters) { - int64_t guest_cluster = guest_offset >> s->cluster_bits; - int64_t nb_clusters = MIN(nb_new_data_clusters, - s->l2_size - guest_cluster % s->l2_size); + int64_t nb_clusters = MIN( + nb_new_data_clusters, + s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset)); QCowL2Meta allocation = { .offset = guest_offset, .alloc_offset = host_offset, diff --git a/block/qcow2.h b/block/qcow2.h index 46c8cf44ec..883802241f 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -68,7 +68,7 @@ #define MAX_CLUSTER_BITS 21 /* Must be at least 2 to cover COW */ -#define MIN_L2_CACHE_SIZE 2 /* clusters */ +#define MIN_L2_CACHE_SIZE 2 /* cache entries */ /* Must be at least 4 to cover all cases of refcount table growth */ #define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */ @@ -100,6 +100,7 @@ #define QCOW2_OPT_OVERLAP_INACTIVE_L2 "overlap-check.inactive-l2" #define QCOW2_OPT_CACHE_SIZE "cache-size" #define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size" +#define QCOW2_OPT_L2_CACHE_ENTRY_SIZE "l2-cache-entry-size" #define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size" #define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval" @@ -251,6 +252,7 @@ typedef struct BDRVQcow2State { int cluster_bits; int cluster_size; int cluster_sectors; + int l2_slice_size; int l2_bits; int l2_size; int l1_size; @@ -463,11 +465,21 @@ static inline int64_t size_to_l1(BDRVQcow2State *s, int64_t size) return (size + (1ULL << shift) - 1) >> shift; } +static inline int offset_to_l1_index(BDRVQcow2State *s, uint64_t offset) +{ + return offset >> (s->l2_bits + s->cluster_bits); +} + static inline int offset_to_l2_index(BDRVQcow2State *s, int64_t offset) { return (offset >> s->cluster_bits) & (s->l2_size - 1); } +static inline int offset_to_l2_slice_index(BDRVQcow2State *s, int64_t offset) +{ + return (offset >> s->cluster_bits) & (s->l2_slice_size - 1); +} + static inline int64_t align_offset(int64_t offset, int n) { offset = (offset + n - 1) & ~(n - 1); @@ -636,34 +648,33 @@ void qcow2_free_snapshots(BlockDriverState *bs); int qcow2_read_snapshots(BlockDriverState *bs); /* qcow2-cache.c functions */ -Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables); -int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c); +Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables, + unsigned table_size); +int qcow2_cache_destroy(Qcow2Cache *c); -void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c, - void *table); +void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table); int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c); int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c); int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, Qcow2Cache *dependency); void qcow2_cache_depends_on_flush(Qcow2Cache *c); -void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c); +void qcow2_cache_clean_unused(Qcow2Cache *c); int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c); int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, void **table); int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, void **table); -void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table); -void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c, - uint64_t offset); -void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table); +void qcow2_cache_put(Qcow2Cache *c, void **table); +void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset); +void qcow2_cache_discard(Qcow2Cache *c, void *table); /* qcow2-bitmap.c functions */ int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res, void **refcount_table, int64_t *refcount_table_size); -bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp); +bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp); int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp); void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp); int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp); diff --git a/block/sheepdog.c b/block/sheepdog.c index af125a2c8d..ac02b10fe0 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1826,40 +1826,34 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, return 0; } -static int sd_prealloc(const char *filename, Error **errp) +static int sd_prealloc(BlockDriverState *bs, int64_t old_size, int64_t new_size, + Error **errp) { BlockBackend *blk = NULL; - BDRVSheepdogState *base = NULL; + BDRVSheepdogState *base = bs->opaque; unsigned long buf_size; uint32_t idx, max_idx; uint32_t object_size; - int64_t vdi_size; void *buf = NULL; int ret; - blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); - if (blk == NULL) { - ret = -EIO; + blk = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, + BLK_PERM_ALL); + + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { goto out_with_err_set; } blk_set_allow_write_beyond_eof(blk, true); - vdi_size = blk_getlength(blk); - if (vdi_size < 0) { - ret = vdi_size; - goto out; - } - - base = blk_bs(blk)->opaque; object_size = (UINT32_C(1) << base->inode.block_size_shift); buf_size = MIN(object_size, SD_DATA_OBJ_SIZE); buf = g_malloc0(buf_size); - max_idx = DIV_ROUND_UP(vdi_size, buf_size); + max_idx = DIV_ROUND_UP(new_size, buf_size); - for (idx = 0; idx < max_idx; idx++) { + for (idx = old_size / buf_size; idx < max_idx; idx++) { /* * The created image can be a cloned image, so we need to read * a data from the source image. @@ -2108,7 +2102,20 @@ static int sd_create(const char *filename, QemuOpts *opts, } if (prealloc) { - ret = sd_prealloc(filename, errp); + BlockDriverState *bs; + QDict *opts; + + opts = qdict_new(); + qdict_put_str(opts, "driver", "sheepdog"); + bs = bdrv_open(filename, NULL, opts, BDRV_O_PROTOCOL | BDRV_O_RDWR, + errp); + if (!bs) { + goto out; + } + + ret = sd_prealloc(bs, 0, s->inode.vdi_size, errp); + + bdrv_unref(bs); } out: g_free(backing_file); @@ -2173,15 +2180,16 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset, int ret, fd; unsigned int datalen; uint64_t max_vdi_size; + int64_t old_size = s->inode.vdi_size; - if (prealloc != PREALLOC_MODE_OFF) { + if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_FULL) { error_setg(errp, "Unsupported preallocation mode '%s'", PreallocMode_str(prealloc)); return -ENOTSUP; } max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS; - if (offset < s->inode.vdi_size) { + if (offset < old_size) { error_setg(errp, "shrinking is not supported"); return -EINVAL; } else if (offset > max_vdi_size) { @@ -2204,9 +2212,17 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset, if (ret < 0) { error_setg_errno(errp, -ret, "failed to update an inode"); + return ret; } - return ret; + if (prealloc == PREALLOC_MODE_FULL) { + ret = sd_prealloc(bs, old_size, offset, errp); + if (ret < 0) { + return ret; + } + } + + return 0; } /* diff --git a/blockdev.c b/blockdev.c index bdbdeae7e4..3fb1ca803c 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2825,14 +2825,9 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, if (!has_persistent) { persistent = false; } - if (!has_autoload) { - autoload = false; - } - if (has_autoload && !persistent) { - error_setg(errp, "Autoload flag must be used only for persistent " - "bitmaps"); - return; + if (has_autoload) { + warn_report("Autoload option is deprecated and its value is ignored"); } if (persistent && @@ -2847,7 +2842,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, } bdrv_dirty_bitmap_set_persistance(bitmap, persistent); - bdrv_dirty_bitmap_set_autoload(bitmap, autoload); } void qmp_block_dirty_bitmap_remove(const char *node, const char *name, @@ -3569,6 +3563,11 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) return; } + /* Early check to avoid creating target */ + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR_SOURCE, errp)) { + return; + } + aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi index cd74767ed3..f1793692bb 100644 --- a/docs/qemu-block-drivers.texi +++ b/docs/qemu-block-drivers.texi @@ -845,6 +845,16 @@ QEMU transparently handles lock handover during shared storage migration. For shared virtual disk images between multiple VMs, the "share-rw" device option should be used. +By default, the guest has exclusive write access to its disk image. If the +guest can safely share the disk image with other writers the @code{-device +...,share-rw=on} parameter can be used. This is only safe if the guest is +running software, such as a cluster file system, that coordinates disk accesses +to avoid corruption. + +Note that share-rw=on only declares the guest's ability to share the disk. +Some QEMU features, such as image file formats, require exclusive write access +to the disk image and this is unaffected by the share-rw=on option. + Alternatively, locking can be fully disabled by "locking=off" block device option. In the command line, the option is usually in the form of "file.locking=off" as the protocol driver is normally placed as a "file" child diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 3da8486ab1..e3f4bbf51d 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -66,7 +66,6 @@ void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap, void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap); void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value); -void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload); void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool persistent); diff --git a/qapi/block-core.json b/qapi/block-core.json index 8046c2da23..5c5921bfb7 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1593,9 +1593,9 @@ # Qcow2 disks support persistent bitmaps. Default is false for # block-dirty-bitmap-add. (Since: 2.10) # -# @autoload: the bitmap will be automatically loaded when the image it is stored -# in is opened. This flag may only be specified for persistent -# bitmaps. Default is false for block-dirty-bitmap-add. (Since: 2.10) +# @autoload: ignored and deprecated since 2.12. +# Currently, all dirty tracking bitmaps are loaded from Qcow2 on +# open. # # Since: 2.4 ## @@ -2521,6 +2521,11 @@ # @l2-cache-size: the maximum size of the L2 table cache in # bytes (since 2.2) # +# @l2-cache-entry-size: the size of each entry in the L2 cache in +# bytes. It must be a power of two between 512 +# and the cluster size. The default value is +# the cluster size (since 2.12) +# # @refcount-cache-size: the maximum size of the refcount block cache # in bytes (since 2.2) # @@ -2542,6 +2547,7 @@ '*overlap-check': 'Qcow2OverlapChecks', '*cache-size': 'int', '*l2-cache-size': 'int', + '*l2-cache-entry-size': 'int', '*refcount-cache-size': 'int', '*cache-clean-interval': 'int', '*encrypt': 'BlockdevQcow2Encryption' } } diff --git a/qemu-doc.texi b/qemu-doc.texi index 769968aba4..137f5814a8 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -2757,6 +2757,13 @@ used and it will be removed with no replacement. The ``convert -s snapshot_id_or_name'' argument is obsoleted by the ``convert -l snapshot_param'' argument instead. +@section QEMU Machine Protocol (QMP) commands + +@subsection block-dirty-bitmap-add "autoload" parameter (since 2.12.0) + +"autoload" parameter is now ignored. All bitmaps are automatically loaded +from qcow2 images. + @section System emulator human monitor commands @subsection host_net_add (since 2.10.0) diff --git a/qemu-img.texi b/qemu-img.texi index fdcf120f36..8a26400adb 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -33,38 +33,14 @@ The following commands are supported: Command parameters: @table @var + @item filename - is a disk image filename - -@item --object @var{objectdef} - -is a QEMU user creatable object definition. See the @code{qemu(1)} manual -page for a description of the object properties. The most common object -type is a @code{secret}, which is used to supply passwords and/or encryption -keys. - -@item --image-opts - -Indicates that the source @var{filename} parameter is to be interpreted as a -full option string, not a plain filename. This parameter is mutually -exclusive with the @var{-f} parameter. - -@item --target-image-opts - -Indicates that the @var{output_filename} parameter(s) are to be interpreted as -a full option string, not a plain filename. This parameter is mutually -exclusive with the @var{-O} parameters. It is currently required to also use -the @var{-n} parameter to skip image creation. This restriction may be relaxed -in a future release. +is a disk image filename @item fmt is the disk image format. It is guessed automatically in most cases. See below for a description of the supported disk formats. -@item --backing-chain -will enumerate information about backing files in a disk image chain. Refer -below for further description. - @item size is the disk image size in bytes. Optional suffixes @code{k} or @code{K} (kilobyte, 1024) @code{M} (megabyte, 1024k) and @code{G} (gigabyte, 1024M) @@ -74,42 +50,86 @@ and T (terabyte, 1024G) are supported. @code{b} is ignored. is the destination disk image filename @item output_fmt - is the destination format +is the destination format + @item options is a comma separated list of format specific options in a name=value format. Use @code{-o ?} for an overview of the options supported by the used format or see the format descriptions below for details. + @item snapshot_param is param used for internal snapshot, format is 'snapshot.id=[ID],snapshot.name=[NAME]' or '[ID_OR_NAME]' + @item snapshot_id_or_name is deprecated, use snapshot_param instead +@end table + +@table @option + +@item --object @var{objectdef} +is a QEMU user creatable object definition. See the @code{qemu(1)} manual +page for a description of the object properties. The most common object +type is a @code{secret}, which is used to supply passwords and/or encryption +keys. + +@item --image-opts +Indicates that the source @var{filename} parameter is to be interpreted as a +full option string, not a plain filename. This parameter is mutually +exclusive with the @var{-f} parameter. + +@item --target-image-opts +Indicates that the @var{output_filename} parameter(s) are to be interpreted as +a full option string, not a plain filename. This parameter is mutually +exclusive with the @var{-O} parameters. It is currently required to also use +the @var{-n} parameter to skip image creation. This restriction may be relaxed +in a future release. + +@item --force-share (-U) +If specified, @code{qemu-img} will open the image in shared mode, allowing +other QEMU processes to open it in write mode. For example, this can be used to +get the image information (with 'info' subcommand) when the image is used by a +running guest. Note that this could produce inconsistent results because of +concurrent metadata changes, etc. This option is only allowed when opening +images in read-only mode. + +@item --backing-chain +will enumerate information about backing files in a disk image chain. Refer +below for further description. + @item -c indicates that target image must be compressed (qcow format only) + @item -h with or without a command shows help and lists the supported formats + @item -p display progress bar (compare, convert and rebase commands only). If the @var{-p} option is not used for a command that supports it, the progress is reported when the process receives a @code{SIGUSR1} or @code{SIGINFO} signal. + @item -q Quiet mode - do not print any output (except errors). There's no progress bar in case both @var{-q} and @var{-p} options are used. + @item -S @var{size} indicates the consecutive number of bytes that must contain only zeros for qemu-img to create a sparse image during conversion. This value is rounded down to the nearest 512 bytes. You may use the common size suffixes like @code{k} for kilobytes. + @item -t @var{cache} specifies the cache mode that should be used with the (destination) file. See the documentation of the emulator's @code{-drive cache=...} option for allowed values. + @item -T @var{src_cache} specifies the cache mode that should be used with the source file(s). See the documentation of the emulator's @code{-drive cache=...} option for allowed values. + @end table Parameters to snapshot subcommand: diff --git a/qemu-io.c b/qemu-io.c index f554ab614b..2c00ea068e 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -11,6 +11,9 @@ #include "qemu/osdep.h" #include #include +#ifndef _WIN32 +#include +#endif #include "qapi/error.h" #include "qemu-io.h" @@ -42,6 +45,26 @@ static bool imageOpts; static ReadLineState *readline_state; +static int ttyEOF; + +static int get_eof_char(void) +{ +#ifdef _WIN32 + return 0x4; /* Ctrl-D */ +#else + struct termios tty; + if (tcgetattr(STDIN_FILENO, &tty) != 0) { + if (errno == ENOTTY) { + return 0x0; /* just expect read() == 0 */ + } else { + return 0x4; /* Ctrl-D */ + } + } + + return tty.c_cc[VEOF]; +#endif +} + static int close_f(BlockBackend *blk, int argc, char **argv) { blk_unref(qemuio_blk); @@ -323,7 +346,8 @@ static char *fetchline_readline(void) readline_start(readline_state, get_prompt(), 0, readline_func, &line); while (!line) { int ch = getchar(); - if (ch == EOF) { + if (ttyEOF != 0x0 && ch == ttyEOF) { + printf("\n"); break; } readline_handle_byte(readline_state, ch); @@ -593,6 +617,7 @@ int main(int argc, char **argv) qemuio_add_command(&close_cmd); if (isatty(STDIN_FILENO)) { + ttyEOF = get_eof_char(); readline_state = readline_init(readline_printf_func, readline_flush_func, NULL, diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out index 1ac5d56233..f6dce7947c 100644 --- a/tests/qemu-iotests/059.out +++ b/tests/qemu-iotests/059.out @@ -2358,5 +2358,5 @@ Offset Length Mapped to File 0x140000000 0x10000 0x50000 TEST_DIR/t-s003.vmdk === Testing afl image with a very large capacity === -qemu-img: Could not open 'TEST_DIR/afl9.IMGFMT': Could not open 'TEST_DIR/afl9.IMGFMT': Invalid argument +qemu-img: Can't get image size 'TEST_DIR/afl9.IMGFMT': File too large *** done diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061 index f5678b10c9..911b6f2894 100755 --- a/tests/qemu-iotests/061 +++ b/tests/qemu-iotests/061 @@ -53,6 +53,22 @@ $PYTHON qcow2.py "$TEST_IMG" dump-header $QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io _check_test_img +echo +echo "=== Testing version downgrade with zero expansion and 4K cache entries ===" +echo +IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M +$QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z 32M 128k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c map "$TEST_IMG" | _filter_qemu_io +$PYTHON qcow2.py "$TEST_IMG" dump-header +$QEMU_IMG amend -o "compat=0.10" --image-opts \ + driver=qcow2,file.filename=$TEST_IMG,l2-cache-entry-size=4096 +$PYTHON qcow2.py "$TEST_IMG" dump-header +$QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 32M 128k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c map "$TEST_IMG" | _filter_qemu_io +_check_test_img + echo echo "=== Testing dirty version downgrade ===" echo diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out index 942485de99..e857ef9a7d 100644 --- a/tests/qemu-iotests/061.out +++ b/tests/qemu-iotests/061.out @@ -52,6 +52,67 @@ read 131072/131072 bytes at offset 0 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) No errors were found on the image. +=== Testing version downgrade with zero expansion and 4K cache entries === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 33554432 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +128 KiB (0x20000) bytes allocated at offset 0 bytes (0x0) +31.875 MiB (0x1fe0000) bytes not allocated at offset 128 KiB (0x20000) +128 KiB (0x20000) bytes allocated at offset 32 MiB (0x2000000) +31.875 MiB (0x1fe0000) bytes not allocated at offset 32.125 MiB (0x2020000) +magic 0x514649fb +version 3 +backing_file_offset 0x0 +backing_file_size 0x0 +cluster_bits 16 +size 67108864 +crypt_method 0 +l1_size 1 +l1_table_offset 0x30000 +refcount_table_offset 0x10000 +refcount_table_clusters 1 +nb_snapshots 0 +snapshot_offset 0x0 +incompatible_features 0x0 +compatible_features 0x1 +autoclear_features 0x0 +refcount_order 4 +header_length 104 + +Header extension: +magic 0x6803f857 +length 144 +data + +magic 0x514649fb +version 2 +backing_file_offset 0x0 +backing_file_size 0x0 +cluster_bits 16 +size 67108864 +crypt_method 0 +l1_size 1 +l1_table_offset 0x30000 +refcount_table_offset 0x10000 +refcount_table_clusters 1 +nb_snapshots 0 +snapshot_offset 0x0 +incompatible_features 0x0 +compatible_features 0x0 +autoclear_features 0x0 +refcount_order 4 +header_length 72 + +read 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 33554432 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +64 MiB (0x4000000) bytes not allocated at offset 0 bytes (0x0) +No errors were found on the image. + === Testing dirty version downgrade === Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 diff --git a/tests/qemu-iotests/103 b/tests/qemu-iotests/103 index d0cfab8844..2841318492 100755 --- a/tests/qemu-iotests/103 +++ b/tests/qemu-iotests/103 @@ -66,6 +66,14 @@ $QEMU_IO -c "open -o cache-size=1M,refcount-cache-size=2M $TEST_IMG" 2>&1 \ $QEMU_IO -c "open -o cache-size=0,l2-cache-size=0,refcount-cache-size=0 $TEST_IMG" \ 2>&1 | _filter_testdir | _filter_imgfmt +# Invalid cache entry sizes +$QEMU_IO -c "open -o l2-cache-entry-size=256 $TEST_IMG" \ + 2>&1 | _filter_testdir | _filter_imgfmt +$QEMU_IO -c "open -o l2-cache-entry-size=4242 $TEST_IMG" \ + 2>&1 | _filter_testdir | _filter_imgfmt +$QEMU_IO -c "open -o l2-cache-entry-size=128k $TEST_IMG" \ + 2>&1 | _filter_testdir | _filter_imgfmt + echo echo '=== Testing valid option combinations ===' echo @@ -94,6 +102,15 @@ $QEMU_IO -c "open -o l2-cache-size=1M,refcount-cache-size=0.25M $TEST_IMG" \ -c 'read -P 42 0 64k' \ | _filter_qemu_io +# Valid cache entry sizes +$QEMU_IO -c "open -o l2-cache-entry-size=512 $TEST_IMG" \ + 2>&1 | _filter_testdir | _filter_imgfmt +$QEMU_IO -c "open -o l2-cache-entry-size=16k $TEST_IMG" \ + 2>&1 | _filter_testdir | _filter_imgfmt +$QEMU_IO -c "open -o l2-cache-entry-size=64k $TEST_IMG" \ + 2>&1 | _filter_testdir | _filter_imgfmt + + echo echo '=== Testing minimal L2 cache and COW ===' echo diff --git a/tests/qemu-iotests/103.out b/tests/qemu-iotests/103.out index b7aaadf89a..bd45d3875a 100644 --- a/tests/qemu-iotests/103.out +++ b/tests/qemu-iotests/103.out @@ -9,6 +9,9 @@ can't open device TEST_DIR/t.IMGFMT: cache-size, l2-cache-size and refcount-cach can't open device TEST_DIR/t.IMGFMT: l2-cache-size may not exceed cache-size can't open device TEST_DIR/t.IMGFMT: refcount-cache-size may not exceed cache-size can't open device TEST_DIR/t.IMGFMT: cache-size, l2-cache-size and refcount-cache-size may not be set the same time +can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of two between 512 and the cluster size (65536) +can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of two between 512 and the cluster size (65536) +can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of two between 512 and the cluster size (65536) === Testing valid option combinations === diff --git a/tests/qemu-iotests/137 b/tests/qemu-iotests/137 index 5a01250005..87965625d8 100755 --- a/tests/qemu-iotests/137 +++ b/tests/qemu-iotests/137 @@ -83,6 +83,9 @@ $QEMU_IO \ -c "reopen -o overlap-check.inactive-l2=off" \ -c "reopen -o cache-size=1M" \ -c "reopen -o l2-cache-size=512k" \ + -c "reopen -o l2-cache-entry-size=512" \ + -c "reopen -o l2-cache-entry-size=4k" \ + -c "reopen -o l2-cache-entry-size=64k" \ -c "reopen -o refcount-cache-size=128k" \ -c "reopen -o cache-clean-interval=5" \ -c "reopen -o cache-clean-interval=0" \ @@ -107,6 +110,8 @@ $QEMU_IO \ -c "reopen -o cache-size=1M,l2-cache-size=2M" \ -c "reopen -o cache-size=1M,refcount-cache-size=2M" \ -c "reopen -o l2-cache-size=256T" \ + -c "reopen -o l2-cache-entry-size=33k" \ + -c "reopen -o l2-cache-entry-size=128k" \ -c "reopen -o refcount-cache-size=256T" \ -c "reopen -o overlap-check=constant,overlap-check.template=all" \ -c "reopen -o overlap-check=blubb" \ diff --git a/tests/qemu-iotests/137.out b/tests/qemu-iotests/137.out index 05efd74d17..e28e1eadba 100644 --- a/tests/qemu-iotests/137.out +++ b/tests/qemu-iotests/137.out @@ -20,6 +20,8 @@ cache-size, l2-cache-size and refcount-cache-size may not be set the same time l2-cache-size may not exceed cache-size refcount-cache-size may not exceed cache-size L2 cache size too big +L2 cache entry size must be a power of two between 512 and the cluster size (65536) +L2 cache entry size must be a power of two between 512 and the cluster size (65536) L2 cache size too big Conflicting values for qcow2 options 'overlap-check' ('constant') and 'overlap-check.template' ('all') Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 index fc9fa975be..42dae04c83 100755 --- a/tests/qemu-iotests/155 +++ b/tests/qemu-iotests/155 @@ -64,7 +64,7 @@ class BaseClass(iotests.QMPTestCase): 'file': {'driver': 'file', 'filename': source_img}} self.vm.add_blockdev(self.qmp_to_opts(blockdev)) - self.vm.add_device('floppy,id=qdev0,drive=source') + self.vm.add_device('virtio-blk,id=qdev0,drive=source') self.vm.launch() self.assertIntactSourceBackingChain() @@ -173,21 +173,24 @@ class MirrorBaseClass(BaseClass): def testFull(self): self.runMirror('full') - node = self.findBlockNode('target', 'qdev0') + node = self.findBlockNode('target', + '/machine/peripheral/qdev0/virtio-backend') self.assertCorrectBackingImage(node, None) self.assertIntactSourceBackingChain() def testTop(self): self.runMirror('top') - node = self.findBlockNode('target', 'qdev0') + node = self.findBlockNode('target', + '/machine/peripheral/qdev0/virtio-backend') self.assertCorrectBackingImage(node, back2_img) self.assertIntactSourceBackingChain() def testNone(self): self.runMirror('none') - node = self.findBlockNode('target', 'qdev0') + node = self.findBlockNode('target', + '/machine/peripheral/qdev0/virtio-backend') self.assertCorrectBackingImage(node, source_img) self.assertIntactSourceBackingChain() @@ -239,7 +242,8 @@ class TestCommit(BaseClass): self.vm.event_wait('BLOCK_JOB_COMPLETED') - node = self.findBlockNode(None, 'qdev0') + node = self.findBlockNode(None, + '/machine/peripheral/qdev0/virtio-backend') self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename', back1_img) self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename', diff --git a/tests/qemu-iotests/165 b/tests/qemu-iotests/165 index a3932db3de..2936929627 100755 --- a/tests/qemu-iotests/165 +++ b/tests/qemu-iotests/165 @@ -64,7 +64,7 @@ class TestPersistentDirtyBitmap(iotests.QMPTestCase): def qmpAddBitmap(self): self.vm.qmp('block-dirty-bitmap-add', node='drive0', - name='bitmap0', persistent=True, autoload=True) + name='bitmap0', persistent=True) def test_persistent(self): self.vm = self.mkVm() diff --git a/tests/qemu-iotests/176 b/tests/qemu-iotests/176 index d38b3aeb91..32baa116dd 100755 --- a/tests/qemu-iotests/176 +++ b/tests/qemu-iotests/176 @@ -95,7 +95,7 @@ case $reason in "file": { "driver": "file", "filename": "$TEST_IMG" } } } { "execute": "block-dirty-bitmap-add", "arguments": { "node": "drive0", "name": "bitmap0", - "persistent": true, "autoload": true } } + "persistent": true } } { "execute": "quit" } EOF ;; diff --git a/tests/qemu-iotests/sample_images/afl9.vmdk.bz2 b/tests/qemu-iotests/sample_images/afl9.vmdk.bz2 index 03615d36a1..9fcd0af45a 100644 Binary files a/tests/qemu-iotests/sample_images/afl9.vmdk.bz2 and b/tests/qemu-iotests/sample_images/afl9.vmdk.bz2 differ