block: Cater to iscsi with non-power-of-2 discard
Dell Equallogic iSCSI SANs have a very unusual advertised geometry: $ iscsi-inq -e 1 -c $((0xb0)) iscsi://XXX/0 wsnz:0 maximum compare and write length:1 optimal transfer length granularity:0 maximum transfer length:0 optimal transfer length:0 maximum prefetch xdread xdwrite transfer length:0 maximum unmap lba count:30720 maximum unmap block descriptor count:2 optimal unmap granularity:30720 ugavalid:1 unmap granularity alignment:0 maximum write same length:30720 which says that both the maximum and the optimal discard size is 15M. It is not immediately apparent if the device allows discard requests not aligned to the optimal size, nor if it allows discards at a finer granularity than the optimal size. I tried to find details in the SCSI Commands Reference Manual Rev. A on what valid values of maximum and optimal sizes are permitted, but while that document mentions a "Block Limits VPD Page", I couldn't actually find documentation of that page or what values it would have, or if a SCSI device has an advertisement of its minimal unmap granularity. So it is not obvious to me whether the Dell Equallogic device is compliance with the SCSI specification. Fortunately, it is easy enough to support non-power-of-2 sizing, even if it means we are less efficient than truly possible when targetting that device (for example, it means that we refuse to unmap anything that is not a multiple of 15M and aligned to a 15M boundary, even if the device truly does support a smaller granularity where unmapping actually works). Reported-by: Peter Lieven <pl@kamp.de> Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1469129688-22848-5-git-send-email-eblake@redhat.com> Acked-by: Stefan Hajnoczi <stefanha@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
e9fd416e66
commit
b8d0a9804d
15
block/io.c
15
block/io.c
@ -1180,10 +1180,11 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
|
|||||||
int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
|
int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
|
||||||
bs->bl.request_alignment);
|
bs->bl.request_alignment);
|
||||||
|
|
||||||
assert(is_power_of_2(alignment));
|
assert(alignment % bs->bl.request_alignment == 0);
|
||||||
head = offset & (alignment - 1);
|
head = offset % alignment;
|
||||||
tail = (offset + count) & (alignment - 1);
|
tail = (offset + count) % alignment;
|
||||||
max_write_zeroes &= ~(alignment - 1);
|
max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
|
||||||
|
assert(max_write_zeroes >= bs->bl.request_alignment);
|
||||||
|
|
||||||
while (count > 0 && !ret) {
|
while (count > 0 && !ret) {
|
||||||
int num = count;
|
int num = count;
|
||||||
@ -2429,9 +2430,10 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
|
|||||||
|
|
||||||
/* Discard is advisory, so ignore any unaligned head or tail */
|
/* Discard is advisory, so ignore any unaligned head or tail */
|
||||||
align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
|
align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
|
||||||
assert(is_power_of_2(align));
|
assert(align % bs->bl.request_alignment == 0);
|
||||||
head = MIN(count, -offset & (align - 1));
|
head = offset % align;
|
||||||
if (head) {
|
if (head) {
|
||||||
|
head = MIN(count, align - head);
|
||||||
count -= head;
|
count -= head;
|
||||||
offset += head;
|
offset += head;
|
||||||
}
|
}
|
||||||
@ -2449,6 +2451,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
|
|||||||
|
|
||||||
max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
|
max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
|
||||||
align);
|
align);
|
||||||
|
assert(max_pdiscard);
|
||||||
|
|
||||||
while (count > 0) {
|
while (count > 0) {
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -330,36 +330,39 @@ typedef struct BlockLimits {
|
|||||||
* otherwise. */
|
* otherwise. */
|
||||||
uint32_t request_alignment;
|
uint32_t request_alignment;
|
||||||
|
|
||||||
/* maximum number of bytes that can be discarded at once (since it
|
/* Maximum number of bytes that can be discarded at once (since it
|
||||||
* is signed, it must be < 2G, if set), should be multiple of
|
* is signed, it must be < 2G, if set). Must be multiple of
|
||||||
* pdiscard_alignment, but need not be power of 2. May be 0 if no
|
* pdiscard_alignment, but need not be power of 2. May be 0 if no
|
||||||
* inherent 32-bit limit */
|
* inherent 32-bit limit */
|
||||||
int32_t max_pdiscard;
|
int32_t max_pdiscard;
|
||||||
|
|
||||||
/* optimal alignment for discard requests in bytes, must be power
|
/* Optimal alignment for discard requests in bytes. A power of 2
|
||||||
* of 2, less than max_pdiscard if that is set, and multiple of
|
* is best but not mandatory. Must be a multiple of
|
||||||
* bl.request_alignment. May be 0 if bl.request_alignment is good
|
* bl.request_alignment, and must be less than max_pdiscard if
|
||||||
* enough */
|
* that is set. May be 0 if bl.request_alignment is good enough */
|
||||||
uint32_t pdiscard_alignment;
|
uint32_t pdiscard_alignment;
|
||||||
|
|
||||||
/* maximum number of bytes that can zeroized at once (since it is
|
/* Maximum number of bytes that can zeroized at once (since it is
|
||||||
* signed, it must be < 2G, if set), should be multiple of
|
* signed, it must be < 2G, if set). Must be multiple of
|
||||||
* pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */
|
* pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */
|
||||||
int32_t max_pwrite_zeroes;
|
int32_t max_pwrite_zeroes;
|
||||||
|
|
||||||
/* optimal alignment for write zeroes requests in bytes, must be
|
/* Optimal alignment for write zeroes requests in bytes. A power
|
||||||
* power of 2, less than max_pwrite_zeroes if that is set, and
|
* of 2 is best but not mandatory. Must be a multiple of
|
||||||
* multiple of bl.request_alignment. May be 0 if
|
* bl.request_alignment, and must be less than max_pwrite_zeroes
|
||||||
* bl.request_alignment is good enough */
|
* if that is set. May be 0 if bl.request_alignment is good
|
||||||
|
* enough */
|
||||||
uint32_t pwrite_zeroes_alignment;
|
uint32_t pwrite_zeroes_alignment;
|
||||||
|
|
||||||
/* optimal transfer length in bytes (must be power of 2, and
|
/* Optimal transfer length in bytes. A power of 2 is best but not
|
||||||
* multiple of bl.request_alignment), or 0 if no preferred size */
|
* mandatory. Must be a multiple of bl.request_alignment, or 0 if
|
||||||
|
* no preferred size */
|
||||||
uint32_t opt_transfer;
|
uint32_t opt_transfer;
|
||||||
|
|
||||||
/* maximal transfer length in bytes (need not be power of 2, but
|
/* Maximal transfer length in bytes. Need not be power of 2, but
|
||||||
* should be multiple of opt_transfer), or 0 for no 32-bit limit.
|
* must be multiple of opt_transfer and bl.request_alignment, or 0
|
||||||
* For now, anything larger than INT_MAX is clamped down. */
|
* for no 32-bit limit. For now, anything larger than INT_MAX is
|
||||||
|
* clamped down. */
|
||||||
uint32_t max_transfer;
|
uint32_t max_transfer;
|
||||||
|
|
||||||
/* memory alignment, in bytes so that no bounce buffer is needed */
|
/* memory alignment, in bytes so that no bounce buffer is needed */
|
||||||
|
Loading…
Reference in New Issue
Block a user