diff --git a/block/qed.c b/block/qed.c index eddae929eb..b69374b6a2 100644 --- a/block/qed.c +++ b/block/qed.c @@ -567,7 +567,7 @@ static void bdrv_qed_close(BlockDriverState *bs) static int qed_create(const char *filename, uint32_t cluster_size, uint64_t image_size, uint32_t table_size, const char *backing_file, const char *backing_fmt, - Error **errp) + QemuOpts *opts, Error **errp) { QEDHeader header = { .magic = QED_MAGIC, @@ -586,7 +586,7 @@ static int qed_create(const char *filename, uint32_t cluster_size, int ret = 0; BlockDriverState *bs; - ret = bdrv_create_file(filename, NULL, &local_err); + ret = bdrv_create_file(filename, opts, &local_err); if (ret < 0) { error_propagate(errp, local_err); return ret; @@ -682,7 +682,7 @@ static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp) } ret = qed_create(filename, cluster_size, image_size, table_size, - backing_file, backing_fmt, errp); + backing_file, backing_fmt, opts, errp); finish: g_free(backing_file); diff --git a/block/raw-posix.c b/block/raw-posix.c index dacf4fbbc8..825a0c878f 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -55,6 +55,9 @@ #include #include #include +#ifndef FS_NOCOW_FL +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#endif #endif #ifdef CONFIG_FIEMAP #include @@ -1278,12 +1281,14 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp) int fd; int result = 0; int64_t total_size = 0; + bool nocow = false; strstart(filename, "file:", &filename); /* Read out options */ total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE; + nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false); fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); @@ -1291,6 +1296,21 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp) result = -errno; error_setg_errno(errp, -result, "Could not create file"); } else { + if (nocow) { +#ifdef __linux__ + /* Set NOCOW flag to solve performance issue on fs like btrfs. + * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value + * will be ignored since any failure of this operation should not + * block the left work. + */ + int attr; + if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) { + attr |= FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &attr); + } +#endif + } + if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) { result = -errno; error_setg_errno(errp, -result, "Could not resize file"); @@ -1477,6 +1497,11 @@ static QemuOptsList raw_create_opts = { .type = QEMU_OPT_SIZE, .help = "Virtual disk size" }, + { + .name = BLOCK_OPT_NOCOW, + .type = QEMU_OPT_BOOL, + .help = "Turn off copy-on-write (valid only on btrfs)" + }, { /* end of list */ } } }; diff --git a/block/vdi.c b/block/vdi.c index 01fe22ebe8..197bd77c97 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -53,6 +53,13 @@ #include "block/block_int.h" #include "qemu/module.h" #include "migration/migration.h" +#ifdef __linux__ +#include +#include +#ifndef FS_NOCOW_FL +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#endif +#endif #if defined(CONFIG_UUID) #include @@ -683,6 +690,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) VdiHeader header; size_t i; size_t bmap_size; + bool nocow = false; logout("\n"); @@ -699,6 +707,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) image_type = VDI_TYPE_STATIC; } #endif + nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false); if (bytes > VDI_DISK_SIZE_MAX) { result = -ENOTSUP; @@ -716,6 +725,21 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } + if (nocow) { +#ifdef __linux__ + /* Set NOCOW flag to solve performance issue on fs like btrfs. + * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will + * be ignored since any failure of this operation should not block the + * left work. + */ + int attr; + if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) { + attr |= FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &attr); + } +#endif + } + /* We need enough blocks to store the given disk size, so always round up. */ blocks = (bytes + block_size - 1) / block_size; @@ -818,6 +842,11 @@ static QemuOptsList vdi_create_opts = { .def_value_str = "off" }, #endif + { + .name = BLOCK_OPT_NOCOW, + .type = QEMU_OPT_BOOL, + .help = "Turn off copy-on-write (valid only on btrfs)" + }, /* TODO: An additional option to set UUID values might be useful. */ { /* end of list */ } } diff --git a/block/vmdk.c b/block/vmdk.c index d0de0193fc..27a78daa02 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1529,7 +1529,7 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs, static int vmdk_create_extent(const char *filename, int64_t filesize, bool flat, bool compress, bool zeroed_grain, - Error **errp) + QemuOpts *opts, Error **errp) { int ret, i; BlockDriverState *bs = NULL; @@ -1539,7 +1539,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, uint32_t *gd_buf = NULL; int gd_buf_size; - ret = bdrv_create_file(filename, NULL, &local_err); + ret = bdrv_create_file(filename, opts, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto exit; @@ -1845,7 +1845,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) path, desc_filename); if (vmdk_create_extent(ext_filename, size, - flat, compress, zeroed_grain, errp)) { + flat, compress, zeroed_grain, opts, errp)) { ret = -EINVAL; goto exit; } diff --git a/block/vpc.c b/block/vpc.c index 798d8540db..8b376a40be 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -29,6 +29,13 @@ #if defined(CONFIG_UUID) #include #endif +#ifdef __linux__ +#include +#include +#ifndef FS_NOCOW_FL +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#endif +#endif /**************************************************************/ @@ -751,6 +758,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) int64_t total_size; int disk_type; int ret = -EIO; + bool nocow = false; /* Read out options */ total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); @@ -767,6 +775,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) } else { disk_type = VHD_DYNAMIC; } + nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false); /* Create the file */ fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); @@ -775,6 +784,21 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) goto out; } + if (nocow) { +#ifdef __linux__ + /* Set NOCOW flag to solve performance issue on fs like btrfs. + * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will + * be ignored since any failure of this operation should not block the + * left work. + */ + int attr; + if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) { + attr |= FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &attr); + } +#endif + } + /* * Calculate matching total_size and geometry. Increase the number of * sectors requested until we get enough (or fail). This ensures that @@ -884,6 +908,11 @@ static QemuOptsList vpc_create_opts = { "Type of virtual hard disk format. Supported formats are " "{dynamic (default) | fixed} " }, + { + .name = BLOCK_OPT_NOCOW, + .type = QEMU_OPT_BOOL, + .help = "Turn off copy-on-write (valid only on btrfs)" + }, { /* end of list */ } } }; diff --git a/include/block/block_int.h b/include/block/block_int.h index 53e77cf11e..eaf6e313d5 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -54,6 +54,7 @@ #define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" #define BLOCK_OPT_ADAPTER_TYPE "adapter_type" #define BLOCK_OPT_REDUNDANCY "redundancy" +#define BLOCK_OPT_NOCOW "nocow" typedef struct BdrvTrackedRequest { BlockDriverState *bs; diff --git a/qemu-doc.texi b/qemu-doc.texi index 88ec9bb133..ad92c85cba 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -589,6 +589,22 @@ check -r all} is required, which may take some time. This option can only be enabled if @code{compat=1.1} is specified. +@item nocow +If this option is set to @code{on}, it will trun off COW of the file. It's only +valid on btrfs, no effect on other file systems. + +Btrfs has low performance when hosting a VM image file, even more when the guest +on the VM also using btrfs as file system. Turning off COW is a way to mitigate +this bad performance. Generally there are two ways to turn off COW on btrfs: +a) Disable it by mounting with nodatacow, then all newly created files will be +NOCOW. b) For an empty file, add the NOCOW file attribute. That's what this option +does. + +Note: this option is only valid to new or empty files. If there is an existing +file which is COW and has data blocks already, it couldn't be changed to NOCOW +by setting @code{nocow=on}. One can issue @code{lsattr filename} to check if +the NOCOW flag is set or not (Capitabl 'C' is NOCOW flag). + @end table @item qed diff --git a/qemu-img.texi b/qemu-img.texi index c68b54148a..8496f3b8dc 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -474,6 +474,22 @@ check -r all} is required, which may take some time. This option can only be enabled if @code{compat=1.1} is specified. +@item nocow +If this option is set to @code{on}, it will trun off COW of the file. It's only +valid on btrfs, no effect on other file systems. + +Btrfs has low performance when hosting a VM image file, even more when the guest +on the VM also using btrfs as file system. Turning off COW is a way to mitigate +this bad performance. Generally there are two ways to turn off COW on btrfs: +a) Disable it by mounting with nodatacow, then all newly created files will be +NOCOW. b) For an empty file, add the NOCOW file attribute. That's what this option +does. + +Note: this option is only valid to new or empty files. If there is an existing +file which is COW and has data blocks already, it couldn't be changed to NOCOW +by setting @code{nocow=on}. One can issue @code{lsattr filename} to check if +the NOCOW flag is set or not (Capitabl 'C' is NOCOW flag). + @end table @item Other diff --git a/tests/qemu-iotests/082.out b/tests/qemu-iotests/082.out index 28309a0327..413e7ef391 100644 --- a/tests/qemu-iotests/082.out +++ b/tests/qemu-iotests/082.out @@ -66,6 +66,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o ? TEST_DIR/t.qcow2 128M Supported options: @@ -77,6 +78,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k,help TEST_DIR/t.qcow2 128M Supported options: @@ -88,6 +90,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k,? TEST_DIR/t.qcow2 128M Supported options: @@ -99,6 +102,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o help,cluster_size=4k TEST_DIR/t.qcow2 128M Supported options: @@ -110,6 +114,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o ?,cluster_size=4k TEST_DIR/t.qcow2 128M Supported options: @@ -121,6 +126,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k -o help TEST_DIR/t.qcow2 128M Supported options: @@ -132,6 +138,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k -o ? TEST_DIR/t.qcow2 128M Supported options: @@ -143,6 +150,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2 128M Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/t.qcow2,help' encryption=off cluster_size=65536 lazy_refcounts=off @@ -247,6 +255,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o ? TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -258,6 +267,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k,help TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -269,6 +279,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k,? TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -280,6 +291,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o help,cluster_size=4k TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -291,6 +303,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o ?,cluster_size=4k TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -302,6 +315,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k -o help TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -313,6 +327,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k -o ? TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -324,6 +339,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base qemu-img: Could not open 'TEST_DIR/t.qcow2.base': Could not open backing file: Could not open 'TEST_DIR/t.qcow2,help': No such file or directory @@ -417,6 +433,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o ? TEST_DIR/t.qcow2 Supported options: @@ -428,6 +445,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k,help TEST_DIR/t.qcow2 Supported options: @@ -439,6 +457,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k,? TEST_DIR/t.qcow2 Supported options: @@ -450,6 +469,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o help,cluster_size=4k TEST_DIR/t.qcow2 Supported options: @@ -461,6 +481,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o ?,cluster_size=4k TEST_DIR/t.qcow2 Supported options: @@ -472,6 +493,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k -o help TEST_DIR/t.qcow2 Supported options: @@ -483,6 +505,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k -o ? TEST_DIR/t.qcow2 Supported options: @@ -494,6 +517,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2