qemu/block/qcow2-snapshot.c
Max Reitz cf93980e77 qcow2: Employ metadata overlap checks
The pre-write overlap check function is now called before most of the
qcow2 writes (aborting it on collision or other error).

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2013-08-30 15:48:43 +02:00

683 lines
19 KiB
C

/*
* Block driver for the QCOW version 2 format
*
* Copyright (c) 2004-2006 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu-common.h"
#include "block/block_int.h"
#include "block/qcow2.h"
typedef struct QEMU_PACKED QCowSnapshotHeader {
/* header is 8 byte aligned */
uint64_t l1_table_offset;
uint32_t l1_size;
uint16_t id_str_size;
uint16_t name_size;
uint32_t date_sec;
uint32_t date_nsec;
uint64_t vm_clock_nsec;
uint32_t vm_state_size;
uint32_t extra_data_size; /* for extension */
/* extra data follows */
/* id_str follows */
/* name follows */
} QCowSnapshotHeader;
typedef struct QEMU_PACKED QCowSnapshotExtraData {
uint64_t vm_state_size_large;
uint64_t disk_size;
} QCowSnapshotExtraData;
void qcow2_free_snapshots(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
int i;
for(i = 0; i < s->nb_snapshots; i++) {
g_free(s->snapshots[i].name);
g_free(s->snapshots[i].id_str);
}
g_free(s->snapshots);
s->snapshots = NULL;
s->nb_snapshots = 0;
}
int qcow2_read_snapshots(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
QCowSnapshotHeader h;
QCowSnapshotExtraData extra;
QCowSnapshot *sn;
int i, id_str_size, name_size;
int64_t offset;
uint32_t extra_data_size;
int ret;
if (!s->nb_snapshots) {
s->snapshots = NULL;
s->snapshots_size = 0;
return 0;
}
offset = s->snapshots_offset;
s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));
for(i = 0; i < s->nb_snapshots; i++) {
/* Read statically sized part of the snapshot header */
offset = align_offset(offset, 8);
ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
if (ret < 0) {
goto fail;
}
offset += sizeof(h);
sn = s->snapshots + i;
sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
sn->l1_size = be32_to_cpu(h.l1_size);
sn->vm_state_size = be32_to_cpu(h.vm_state_size);
sn->date_sec = be32_to_cpu(h.date_sec);
sn->date_nsec = be32_to_cpu(h.date_nsec);
sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
extra_data_size = be32_to_cpu(h.extra_data_size);
id_str_size = be16_to_cpu(h.id_str_size);
name_size = be16_to_cpu(h.name_size);
/* Read extra data */
ret = bdrv_pread(bs->file, offset, &extra,
MIN(sizeof(extra), extra_data_size));
if (ret < 0) {
goto fail;
}
offset += extra_data_size;
if (extra_data_size >= 8) {
sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
}
if (extra_data_size >= 16) {
sn->disk_size = be64_to_cpu(extra.disk_size);
} else {
sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
}
/* Read snapshot ID */
sn->id_str = g_malloc(id_str_size + 1);
ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
if (ret < 0) {
goto fail;
}
offset += id_str_size;
sn->id_str[id_str_size] = '\0';
/* Read snapshot name */
sn->name = g_malloc(name_size + 1);
ret = bdrv_pread(bs->file, offset, sn->name, name_size);
if (ret < 0) {
goto fail;
}
offset += name_size;
sn->name[name_size] = '\0';
}
s->snapshots_size = offset - s->snapshots_offset;
return 0;
fail:
qcow2_free_snapshots(bs);
return ret;
}
/* add at the end of the file a new list of snapshots */
static int qcow2_write_snapshots(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
QCowSnapshot *sn;
QCowSnapshotHeader h;
QCowSnapshotExtraData extra;
int i, name_size, id_str_size, snapshots_size;
struct {
uint32_t nb_snapshots;
uint64_t snapshots_offset;
} QEMU_PACKED header_data;
int64_t offset, snapshots_offset;
int ret;
/* compute the size of the snapshots */
offset = 0;
for(i = 0; i < s->nb_snapshots; i++) {
sn = s->snapshots + i;
offset = align_offset(offset, 8);
offset += sizeof(h);
offset += sizeof(extra);
offset += strlen(sn->id_str);
offset += strlen(sn->name);
}
snapshots_size = offset;
/* Allocate space for the new snapshot list */
snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
offset = snapshots_offset;
if (offset < 0) {
return offset;
}
ret = bdrv_flush(bs);
if (ret < 0) {
return ret;
}
/* The snapshot list position has not yet been updated, so these clusters
* must indeed be completely free */
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, offset,
s->snapshots_size);
if (ret < 0) {
return ret;
}
/* Write all snapshots to the new list */
for(i = 0; i < s->nb_snapshots; i++) {
sn = s->snapshots + i;
memset(&h, 0, sizeof(h));
h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
h.l1_size = cpu_to_be32(sn->l1_size);
/* If it doesn't fit in 32 bit, older implementations should treat it
* as a disk-only snapshot rather than truncate the VM state */
if (sn->vm_state_size <= 0xffffffff) {
h.vm_state_size = cpu_to_be32(sn->vm_state_size);
}
h.date_sec = cpu_to_be32(sn->date_sec);
h.date_nsec = cpu_to_be32(sn->date_nsec);
h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
h.extra_data_size = cpu_to_be32(sizeof(extra));
memset(&extra, 0, sizeof(extra));
extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
extra.disk_size = cpu_to_be64(sn->disk_size);
id_str_size = strlen(sn->id_str);
name_size = strlen(sn->name);
h.id_str_size = cpu_to_be16(id_str_size);
h.name_size = cpu_to_be16(name_size);
offset = align_offset(offset, 8);
ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
if (ret < 0) {
goto fail;
}
offset += sizeof(h);
ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
if (ret < 0) {
goto fail;
}
offset += sizeof(extra);
ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
if (ret < 0) {
goto fail;
}
offset += id_str_size;
ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
if (ret < 0) {
goto fail;
}
offset += name_size;
}
/*
* Update the header to point to the new snapshot table. This requires the
* new table and its refcounts to be stable on disk.
*/
ret = bdrv_flush(bs);
if (ret < 0) {
goto fail;
}
QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
&header_data, sizeof(header_data));
if (ret < 0) {
goto fail;
}
/* free the old snapshot table */
qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
QCOW2_DISCARD_SNAPSHOT);
s->snapshots_offset = snapshots_offset;
s->snapshots_size = snapshots_size;
return 0;
fail:
return ret;
}
static void find_new_snapshot_id(BlockDriverState *bs,
char *id_str, int id_str_size)
{
BDRVQcowState *s = bs->opaque;
QCowSnapshot *sn;
int i, id, id_max = 0;
for(i = 0; i < s->nb_snapshots; i++) {
sn = s->snapshots + i;
id = strtoul(sn->id_str, NULL, 10);
if (id > id_max)
id_max = id;
}
snprintf(id_str, id_str_size, "%d", id_max + 1);
}
static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
{
BDRVQcowState *s = bs->opaque;
int i;
for(i = 0; i < s->nb_snapshots; i++) {
if (!strcmp(s->snapshots[i].id_str, id_str))
return i;
}
return -1;
}
static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
{
BDRVQcowState *s = bs->opaque;
int i, ret;
ret = find_snapshot_by_id(bs, name);
if (ret >= 0)
return ret;
for(i = 0; i < s->nb_snapshots; i++) {
if (!strcmp(s->snapshots[i].name, name))
return i;
}
return -1;
}
/* if no id is provided, a new one is constructed */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
BDRVQcowState *s = bs->opaque;
QCowSnapshot *new_snapshot_list = NULL;
QCowSnapshot *old_snapshot_list = NULL;
QCowSnapshot sn1, *sn = &sn1;
int i, ret;
uint64_t *l1_table = NULL;
int64_t l1_table_offset;
memset(sn, 0, sizeof(*sn));
/* Generate an ID if it wasn't passed */
if (sn_info->id_str[0] == '\0') {
find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
}
/* Check that the ID is unique */
if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
return -EEXIST;
}
/* Populate sn with passed data */
sn->id_str = g_strdup(sn_info->id_str);
sn->name = g_strdup(sn_info->name);
sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
sn->vm_state_size = sn_info->vm_state_size;
sn->date_sec = sn_info->date_sec;
sn->date_nsec = sn_info->date_nsec;
sn->vm_clock_nsec = sn_info->vm_clock_nsec;
/* Allocate the L1 table of the snapshot and copy the current one there. */
l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
if (l1_table_offset < 0) {
ret = l1_table_offset;
goto fail;
}
sn->l1_table_offset = l1_table_offset;
sn->l1_size = s->l1_size;
l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
for(i = 0; i < s->l1_size; i++) {
l1_table[i] = cpu_to_be64(s->l1_table[i]);
}
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
sn->l1_table_offset, s->l1_size * sizeof(uint64_t));
if (ret < 0) {
goto fail;
}
ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
goto fail;
}
g_free(l1_table);
l1_table = NULL;
/*
* Increase the refcounts of all clusters and make sure everything is
* stable on disk before updating the snapshot table to contain a pointer
* to the new L1 table.
*/
ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
if (ret < 0) {
goto fail;
}
/* Append the new snapshot to the snapshot list */
new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
if (s->snapshots) {
memcpy(new_snapshot_list, s->snapshots,
s->nb_snapshots * sizeof(QCowSnapshot));
old_snapshot_list = s->snapshots;
}
s->snapshots = new_snapshot_list;
s->snapshots[s->nb_snapshots++] = *sn;
ret = qcow2_write_snapshots(bs);
if (ret < 0) {
g_free(s->snapshots);
s->snapshots = old_snapshot_list;
goto fail;
}
g_free(old_snapshot_list);
#ifdef DEBUG_ALLOC
{
BdrvCheckResult result = {0};
qcow2_check_refcounts(bs, &result, 0);
}
#endif
return 0;
fail:
g_free(sn->id_str);
g_free(sn->name);
g_free(l1_table);
return ret;
}
/* copy the snapshot 'snapshot_name' into the current disk image */
int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
{
BDRVQcowState *s = bs->opaque;
QCowSnapshot *sn;
int i, snapshot_index;
int cur_l1_bytes, sn_l1_bytes;
int ret;
uint64_t *sn_l1_table = NULL;
/* Search the snapshot */
snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
if (snapshot_index < 0) {
return -ENOENT;
}
sn = &s->snapshots[snapshot_index];
if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
error_report("qcow2: Loading snapshots with different disk "
"size is not implemented");
ret = -ENOTSUP;
goto fail;
}
/*
* Make sure that the current L1 table is big enough to contain the whole
* L1 table of the snapshot. If the snapshot L1 table is smaller, the
* current one must be padded with zeros.
*/
ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
if (ret < 0) {
goto fail;
}
cur_l1_bytes = s->l1_size * sizeof(uint64_t);
sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
/*
* Copy the snapshot L1 table to the current L1 table.
*
* Before overwriting the old current L1 table on disk, make sure to
* increase all refcounts for the clusters referenced by the new one.
* Decrease the refcount referenced by the old one only when the L1
* table is overwritten.
*/
sn_l1_table = g_malloc0(cur_l1_bytes);
ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
if (ret < 0) {
goto fail;
}
ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
sn->l1_size, 1);
if (ret < 0) {
goto fail;
}
ret = qcow2_pre_write_overlap_check(bs,
QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1,
s->l1_table_offset, cur_l1_bytes);
if (ret < 0) {
goto fail;
}
ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
cur_l1_bytes);
if (ret < 0) {
goto fail;
}
/*
* Decrease refcount of clusters of current L1 table.
*
* At this point, the in-memory s->l1_table points to the old L1 table,
* whereas on disk we already have the new one.
*
* qcow2_update_snapshot_refcount special cases the current L1 table to use
* the in-memory data instead of really using the offset to load a new one,
* which is why this works.
*/
ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
s->l1_size, -1);
/*
* Now update the in-memory L1 table to be in sync with the on-disk one. We
* need to do this even if updating refcounts failed.
*/
for(i = 0;i < s->l1_size; i++) {
s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
}
if (ret < 0) {
goto fail;
}
g_free(sn_l1_table);
sn_l1_table = NULL;
/*
* Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
* when we decreased the refcount of the old snapshot.
*/
ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
if (ret < 0) {
goto fail;
}
#ifdef DEBUG_ALLOC
{
BdrvCheckResult result = {0};
qcow2_check_refcounts(bs, &result, 0);
}
#endif
return 0;
fail:
g_free(sn_l1_table);
return ret;
}
int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
{
BDRVQcowState *s = bs->opaque;
QCowSnapshot sn;
int snapshot_index, ret;
/* Search the snapshot */
snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
if (snapshot_index < 0) {
return -ENOENT;
}
sn = s->snapshots[snapshot_index];
/* Remove it from the snapshot list */
memmove(s->snapshots + snapshot_index,
s->snapshots + snapshot_index + 1,
(s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
s->nb_snapshots--;
ret = qcow2_write_snapshots(bs);
if (ret < 0) {
return ret;
}
/*
* The snapshot is now unused, clean up. If we fail after this point, we
* won't recover but just leak clusters.
*/
g_free(sn.id_str);
g_free(sn.name);
/*
* Now decrease the refcounts of clusters referenced by the snapshot and
* free the L1 table.
*/
ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
sn.l1_size, -1);
if (ret < 0) {
return ret;
}
qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
QCOW2_DISCARD_SNAPSHOT);
/* must update the copied flag on the current cluster offsets */
ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
if (ret < 0) {
return ret;
}
#ifdef DEBUG_ALLOC
{
BdrvCheckResult result = {0};
qcow2_check_refcounts(bs, &result, 0);
}
#endif
return 0;
}
int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
{
BDRVQcowState *s = bs->opaque;
QEMUSnapshotInfo *sn_tab, *sn_info;
QCowSnapshot *sn;
int i;
if (!s->nb_snapshots) {
*psn_tab = NULL;
return s->nb_snapshots;
}
sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
for(i = 0; i < s->nb_snapshots; i++) {
sn_info = sn_tab + i;
sn = s->snapshots + i;
pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
sn->id_str);
pstrcpy(sn_info->name, sizeof(sn_info->name),
sn->name);
sn_info->vm_state_size = sn->vm_state_size;
sn_info->date_sec = sn->date_sec;
sn_info->date_nsec = sn->date_nsec;
sn_info->vm_clock_nsec = sn->vm_clock_nsec;
}
*psn_tab = sn_tab;
return s->nb_snapshots;
}
int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
{
int i, snapshot_index;
BDRVQcowState *s = bs->opaque;
QCowSnapshot *sn;
uint64_t *new_l1_table;
int new_l1_bytes;
int ret;
assert(bs->read_only);
/* Search the snapshot */
snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
if (snapshot_index < 0) {
return -ENOENT;
}
sn = &s->snapshots[snapshot_index];
/* Allocate and read in the snapshot's L1 table */
new_l1_bytes = s->l1_size * sizeof(uint64_t);
new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
if (ret < 0) {
g_free(new_l1_table);
return ret;
}
/* Switch the L1 table */
g_free(s->l1_table);
s->l1_size = sn->l1_size;
s->l1_table_offset = sn->l1_table_offset;
s->l1_table = new_l1_table;
for(i = 0;i < s->l1_size; i++) {
be64_to_cpus(&s->l1_table[i]);
}
return 0;
}