block: add cache=directsync parameter to -drive

This patch adds -drive cache=directsync for O_DIRECT | O_SYNC host file
I/O with no disk write cache presented to the guest.

This mode is useful when guests may not be sending flushes when
appropriate and therefore leave data at risk in case of power failure.
When cache=directsync is used, write operations are only completed to
the guest when data is safely on disk.

This new mode is like cache=writethrough but it bypasses the host page
cache.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2011-08-04 12:26:52 +01:00 committed by Kevin Wolf
parent c3993cdca3
commit 92196b2f56
4 changed files with 14 additions and 6 deletions

View File

@ -448,6 +448,8 @@ int bdrv_parse_cache_flags(const char *mode, int *flags)
if (!strcmp(mode, "off") || !strcmp(mode, "none")) { if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
*flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
} else if (!strcmp(mode, "directsync")) {
*flags |= BDRV_O_NOCACHE;
} else if (!strcmp(mode, "writeback")) { } else if (!strcmp(mode, "writeback")) {
*flags |= BDRV_O_CACHE_WB; *flags |= BDRV_O_CACHE_WB;
} else if (!strcmp(mode, "unsafe")) { } else if (!strcmp(mode, "unsafe")) {
@ -1188,8 +1190,8 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
return ret; return ret;
} }
/* No flush needed for cache=writethrough, it uses O_DSYNC */ /* No flush needed for cache modes that use O_DSYNC */
if ((bs->open_flags & BDRV_O_CACHE_MASK) != 0) { if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
bdrv_flush(bs); bdrv_flush(bs);
} }

View File

@ -55,7 +55,8 @@ static QemuOptsList qemu_drive_opts = {
},{ },{
.name = "cache", .name = "cache",
.type = QEMU_OPT_STRING, .type = QEMU_OPT_STRING,
.help = "host cache usage (none, writeback, writethrough, unsafe)", .help = "host cache usage (none, writeback, writethrough, "
"directsync, unsafe)",
},{ },{
.name = "aio", .name = "aio",
.type = QEMU_OPT_STRING, .type = QEMU_OPT_STRING,

View File

@ -66,7 +66,8 @@ static void help(void)
" 'filename' is a disk image filename\n" " 'filename' is a disk image filename\n"
" 'fmt' is the disk image format. It is guessed automatically in most cases\n" " 'fmt' is the disk image format. It is guessed automatically in most cases\n"
" 'cache' is the cache mode used to write the output disk image, the valid\n" " 'cache' is the cache mode used to write the output disk image, the valid\n"
" options are: 'none', 'writeback' (default), 'writethrough' and 'unsafe'\n" " options are: 'none', 'writeback' (default), 'writethrough', 'directsync'\n"
" and 'unsafe'\n"
" 'size' is the disk image size in bytes. Optional suffixes\n" " 'size' is the disk image size in bytes. Optional suffixes\n"
" 'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M)\n" " 'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M)\n"
" and T (terabyte, 1024G) are supported. 'b' is ignored.\n" " and T (terabyte, 1024G) are supported. 'b' is ignored.\n"

View File

@ -133,7 +133,7 @@ ETEXI
DEF("drive", HAS_ARG, QEMU_OPTION_drive, DEF("drive", HAS_ARG, QEMU_OPTION_drive,
"-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n" "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
" [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n" " [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n"
" [,cache=writethrough|writeback|none|unsafe][,format=f]\n" " [,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n"
" [,serial=s][,addr=A][,id=name][,aio=threads|native]\n" " [,serial=s][,addr=A][,id=name][,aio=threads|native]\n"
" [,readonly=on|off]\n" " [,readonly=on|off]\n"
" use 'file' as a drive image\n", QEMU_ARCH_ALL) " use 'file' as a drive image\n", QEMU_ARCH_ALL)
@ -164,7 +164,7 @@ These options have the same definition as they have in @option{-hdachs}.
@item snapshot=@var{snapshot} @item snapshot=@var{snapshot}
@var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}). @var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}).
@item cache=@var{cache} @item cache=@var{cache}
@var{cache} is "none", "writeback", "unsafe", or "writethrough" and controls how the host cache is used to access block data. @var{cache} is "none", "writeback", "unsafe", "directsync" or "writethrough" and controls how the host cache is used to access block data.
@item aio=@var{aio} @item aio=@var{aio}
@var{aio} is "threads", or "native" and selects between pthread based disk I/O and native Linux AIO. @var{aio} is "threads", or "native" and selects between pthread based disk I/O and native Linux AIO.
@item format=@var{format} @item format=@var{format}
@ -199,6 +199,10 @@ The host page cache can be avoided entirely with @option{cache=none}. This will
attempt to do disk IO directly to the guests memory. QEMU may still perform attempt to do disk IO directly to the guests memory. QEMU may still perform
an internal copy of the data. an internal copy of the data.
The host page cache can be avoided while only sending write notifications to
the guest when the data has been reported as written by the storage subsystem
using @option{cache=directsync}.
Some block drivers perform badly with @option{cache=writethrough}, most notably, Some block drivers perform badly with @option{cache=writethrough}, most notably,
qcow2. If performance is more important than correctness, qcow2. If performance is more important than correctness,
@option{cache=writeback} should be used with qcow2. @option{cache=writeback} should be used with qcow2.