2013-12-02 01:23:41 +04:00
|
|
|
/*
|
|
|
|
* QEMU Block driver for NBD
|
|
|
|
*
|
2016-10-14 21:33:04 +03:00
|
|
|
* Copyright (C) 2016 Red Hat, Inc.
|
2013-12-02 01:23:41 +04:00
|
|
|
* Copyright (C) 2008 Bull S.A.S.
|
|
|
|
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
|
|
|
|
*
|
|
|
|
* Some parts:
|
|
|
|
* Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
* THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
2016-01-18 21:01:42 +03:00
|
|
|
#include "qemu/osdep.h"
|
2017-05-26 14:09:13 +03:00
|
|
|
#include "qapi/error.h"
|
2013-12-02 01:23:41 +04:00
|
|
|
#include "nbd-client.h"
|
|
|
|
|
2017-09-19 00:46:49 +03:00
|
|
|
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ (uint64_t)(intptr_t)(bs))
|
|
|
|
#define INDEX_TO_HANDLE(bs, index) ((index) ^ (uint64_t)(intptr_t)(bs))
|
2013-12-02 01:23:41 +04:00
|
|
|
|
2017-08-04 18:14:31 +03:00
|
|
|
static void nbd_recv_coroutines_wake_all(NBDClientSession *s)
|
2013-12-02 01:23:45 +04:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
|
2017-08-22 15:51:13 +03:00
|
|
|
NBDClientRequest *req = &s->requests[i];
|
|
|
|
|
|
|
|
if (req->coroutine && req->receiving) {
|
|
|
|
aio_co_wake(req->coroutine);
|
2013-12-02 01:23:45 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-07 00:06:16 +03:00
|
|
|
static void nbd_teardown_connection(BlockDriverState *bs)
|
2014-02-26 18:30:18 +04:00
|
|
|
{
|
2016-10-14 21:33:06 +03:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2015-02-07 00:06:16 +03:00
|
|
|
|
2016-02-10 21:41:01 +03:00
|
|
|
if (!client->ioc) { /* Already closed */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-02-26 18:30:18 +04:00
|
|
|
/* finish any pending coroutines */
|
2016-02-10 21:41:01 +03:00
|
|
|
qio_channel_shutdown(client->ioc,
|
|
|
|
QIO_CHANNEL_SHUTDOWN_BOTH,
|
|
|
|
NULL);
|
2017-03-14 14:11:56 +03:00
|
|
|
BDRV_POLL_WHILE(bs, client->read_reply_co);
|
2014-02-26 18:30:18 +04:00
|
|
|
|
2015-02-07 00:06:16 +03:00
|
|
|
nbd_client_detach_aio_context(bs);
|
2016-02-10 21:41:01 +03:00
|
|
|
object_unref(OBJECT(client->sioc));
|
|
|
|
client->sioc = NULL;
|
|
|
|
object_unref(OBJECT(client->ioc));
|
|
|
|
client->ioc = NULL;
|
2014-02-26 18:30:18 +04:00
|
|
|
}
|
|
|
|
|
2017-02-13 16:52:24 +03:00
|
|
|
static coroutine_fn void nbd_read_reply_entry(void *opaque)
|
2013-12-02 01:23:41 +04:00
|
|
|
{
|
2017-02-13 16:52:24 +03:00
|
|
|
NBDClientSession *s = opaque;
|
2013-12-02 01:23:41 +04:00
|
|
|
uint64_t i;
|
2017-08-17 19:14:13 +03:00
|
|
|
int ret = 0;
|
2017-05-26 14:09:13 +03:00
|
|
|
Error *local_err = NULL;
|
2013-12-02 01:23:41 +04:00
|
|
|
|
nbd-client: Fix regression when server sends garbage
When we switched NBD to use coroutines for qemu 2.9 (in particular,
commit a12a712a), we introduced a regression: if a server sends us
garbage (such as a corrupted magic number), we quit the read loop
but do not stop sending further queued commands, resulting in the
client hanging when it never reads the response to those additional
commands. In qemu 2.8, we properly detected that the server is no
longer reliable, and cancelled all existing pending commands with
EIO, then tore down the socket so that all further command attempts
get EPIPE.
Restore the proper behavior of quitting (almost) all communication
with a broken server: Once we know we are out of sync or otherwise
can't trust the server, we must assume that any further incoming
data is unreliable and therefore end all pending commands with EIO,
and quit trying to send any further commands. As an exception, we
still (try to) send NBD_CMD_DISC to let the server know we are going
away (in part, because it is easier to do that than to further
refactor nbd_teardown_connection, and in part because it is the
only command where we do not have to wait for a reply).
Based on a patch by Vladimir Sementsov-Ogievskiy.
A malicious server can be created with the following hack,
followed by setting NBD_SERVER_DEBUG to a non-zero value in the
environment when running qemu-nbd:
| --- a/nbd/server.c
| +++ b/nbd/server.c
| @@ -919,6 +919,17 @@ static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
| stl_be_p(buf + 4, reply->error);
| stq_be_p(buf + 8, reply->handle);
|
| + static int debug;
| + static int count;
| + if (!count++) {
| + const char *str = getenv("NBD_SERVER_DEBUG");
| + if (str) {
| + debug = atoi(str);
| + }
| + }
| + if (debug && !(count % debug)) {
| + buf[0] = 0;
| + }
| return nbd_write(ioc, buf, sizeof(buf), errp);
| }
Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170814213426.24681-1-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-08-15 00:34:26 +03:00
|
|
|
while (!s->quit) {
|
2017-02-13 16:52:24 +03:00
|
|
|
assert(s->reply.handle == 0);
|
2017-05-26 14:09:13 +03:00
|
|
|
ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
}
|
2017-03-14 14:11:56 +03:00
|
|
|
if (ret <= 0) {
|
2017-02-13 16:52:24 +03:00
|
|
|
break;
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
|
|
|
|
2017-02-13 16:52:24 +03:00
|
|
|
/* There's no need for a mutex on the receive side, because the
|
|
|
|
* handler acts as a synchronization point and ensures that only
|
|
|
|
* one coroutine is called until the reply finishes.
|
|
|
|
*/
|
|
|
|
i = HANDLE_TO_INDEX(s, s->reply.handle);
|
2017-08-22 15:51:13 +03:00
|
|
|
if (i >= MAX_NBD_REQUESTS ||
|
|
|
|
!s->requests[i].coroutine ||
|
|
|
|
!s->requests[i].receiving) {
|
2017-02-13 16:52:24 +03:00
|
|
|
break;
|
|
|
|
}
|
2013-12-02 01:23:41 +04:00
|
|
|
|
2017-08-22 15:51:13 +03:00
|
|
|
/* We're woken up again by the request itself. Note that there
|
2017-02-13 16:52:24 +03:00
|
|
|
* is no race between yielding and reentering read_reply_co. This
|
|
|
|
* is because:
|
|
|
|
*
|
2017-08-22 15:51:13 +03:00
|
|
|
* - if the request runs on the same AioContext, it is only
|
2017-02-13 16:52:24 +03:00
|
|
|
* entered after we yield
|
|
|
|
*
|
2017-08-22 15:51:13 +03:00
|
|
|
* - if the request runs on a different AioContext, reentering
|
2017-02-13 16:52:24 +03:00
|
|
|
* read_reply_co happens through a bottom half, which can only
|
|
|
|
* run after we yield.
|
|
|
|
*/
|
2017-08-22 15:51:13 +03:00
|
|
|
aio_co_wake(s->requests[i].coroutine);
|
2017-02-13 16:52:24 +03:00
|
|
|
qemu_coroutine_yield();
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
2017-03-14 14:11:56 +03:00
|
|
|
|
2017-08-22 15:51:13 +03:00
|
|
|
s->quit = true;
|
2017-08-04 18:14:31 +03:00
|
|
|
nbd_recv_coroutines_wake_all(s);
|
2017-02-13 16:52:24 +03:00
|
|
|
s->read_reply_co = NULL;
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
|
|
|
|
2015-02-07 00:06:16 +03:00
|
|
|
static int nbd_co_send_request(BlockDriverState *bs,
|
2016-10-14 21:33:07 +03:00
|
|
|
NBDRequest *request,
|
2016-07-15 21:32:03 +03:00
|
|
|
QEMUIOVector *qiov)
|
2013-12-02 01:23:41 +04:00
|
|
|
{
|
2016-10-14 21:33:06 +03:00
|
|
|
NBDClientSession *s = nbd_get_client_session(bs);
|
2017-09-05 22:11:14 +03:00
|
|
|
int rc, i;
|
2013-12-02 01:23:41 +04:00
|
|
|
|
|
|
|
qemu_co_mutex_lock(&s->send_mutex);
|
2017-06-01 13:44:56 +03:00
|
|
|
while (s->in_flight == MAX_NBD_REQUESTS) {
|
|
|
|
qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
|
|
|
|
}
|
|
|
|
s->in_flight++;
|
nbd: fix the co_queue multi-adding bug
When we tested the VM migartion between different hosts with NBD
devices, we found if we sent a cancel command after the drive_mirror
was just started, a coroutine re-enter error would occur. The stack
was as follow:
(gdb) bt
00) 0x00007fdfc744d885 in raise () from /lib64/libc.so.6
01) 0x00007fdfc744ee61 in abort () from /lib64/libc.so.6
02) 0x00007fdfca467cc5 in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:118
03) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedb400) at
qemu-coroutine-lock.c:59
04) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedb400) at qemu-coroutine.c:96
05) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:123
06) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedbdc0) at
qemu-coroutine-lock.c:59
07) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedbdc0) at qemu-coroutine.c:96
08) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedbdc0, opaque=0x0)
at qemu-coroutine.c:123
09) 0x00007fdfca4a1fa4 in nbd_recv_coroutines_enter_all (s=0x7fdfcaef7dd0) at
block/nbd-client.c:41
10) 0x00007fdfca4a1ff9 in nbd_teardown_connection (client=0x7fdfcaef7dd0) at
block/nbd-client.c:50
11) 0x00007fdfca4a20f0 in nbd_reply_ready (opaque=0x7fdfcaef7dd0) at
block/nbd-client.c:92
12) 0x00007fdfca45ed80 in aio_dispatch (ctx=0x7fdfcae15e90) at aio-posix.c:144
13) 0x00007fdfca45ef1b in aio_poll (ctx=0x7fdfcae15e90, blocking=false) at
aio-posix.c:222
14) 0x00007fdfca448c34 in aio_ctx_dispatch (source=0x7fdfcae15e90, callback=0x0,
user_data=0x0) at async.c:212
15) 0x00007fdfc8f2f69a in g_main_context_dispatch () from
/usr/lib64/libglib-2.0.so.0
16) 0x00007fdfca45c391 in glib_pollfds_poll () at main-loop.c:190
17) 0x00007fdfca45c489 in os_host_main_loop_wait (timeout=1483677098) at
main-loop.c:235
18) 0x00007fdfca45c57b in main_loop_wait (nonblocking=0) at main-loop.c:484
19) 0x00007fdfca25f403 in main_loop () at vl.c:2249
20) 0x00007fdfca266fc2 in main (argc=42, argv=0x7ffff517d638,
envp=0x7ffff517d790) at vl.c:4814
We find the nbd_recv_coroutines_enter_all function (triggered by a cancel
command or a network connection breaking down) will enter a coroutine which
is waiting for the sending lock. If the lock is still held by another coroutine,
the entering coroutine will be added into the co_queue again. Latter, when the
lock is released, a coroutine re-enter error will occur.
This bug can be fixed simply by delaying the setting of recv_coroutine as
suggested by paolo. After applying this patch, we have tested the cancel
operation in mirror phase looply for more than 5 hous and everything is fine.
Without this patch, a coroutine re-enter error will occur in 5 minutes.
Signed-off-by: Bn Wu <wu.wubin@huawei.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1423552846-3896-1-git-send-email-wu.wubin@huawei.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2015-02-10 10:20:46 +03:00
|
|
|
|
|
|
|
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
|
2017-08-22 15:51:13 +03:00
|
|
|
if (s->requests[i].coroutine == NULL) {
|
nbd: fix the co_queue multi-adding bug
When we tested the VM migartion between different hosts with NBD
devices, we found if we sent a cancel command after the drive_mirror
was just started, a coroutine re-enter error would occur. The stack
was as follow:
(gdb) bt
00) 0x00007fdfc744d885 in raise () from /lib64/libc.so.6
01) 0x00007fdfc744ee61 in abort () from /lib64/libc.so.6
02) 0x00007fdfca467cc5 in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:118
03) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedb400) at
qemu-coroutine-lock.c:59
04) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedb400) at qemu-coroutine.c:96
05) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:123
06) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedbdc0) at
qemu-coroutine-lock.c:59
07) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedbdc0) at qemu-coroutine.c:96
08) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedbdc0, opaque=0x0)
at qemu-coroutine.c:123
09) 0x00007fdfca4a1fa4 in nbd_recv_coroutines_enter_all (s=0x7fdfcaef7dd0) at
block/nbd-client.c:41
10) 0x00007fdfca4a1ff9 in nbd_teardown_connection (client=0x7fdfcaef7dd0) at
block/nbd-client.c:50
11) 0x00007fdfca4a20f0 in nbd_reply_ready (opaque=0x7fdfcaef7dd0) at
block/nbd-client.c:92
12) 0x00007fdfca45ed80 in aio_dispatch (ctx=0x7fdfcae15e90) at aio-posix.c:144
13) 0x00007fdfca45ef1b in aio_poll (ctx=0x7fdfcae15e90, blocking=false) at
aio-posix.c:222
14) 0x00007fdfca448c34 in aio_ctx_dispatch (source=0x7fdfcae15e90, callback=0x0,
user_data=0x0) at async.c:212
15) 0x00007fdfc8f2f69a in g_main_context_dispatch () from
/usr/lib64/libglib-2.0.so.0
16) 0x00007fdfca45c391 in glib_pollfds_poll () at main-loop.c:190
17) 0x00007fdfca45c489 in os_host_main_loop_wait (timeout=1483677098) at
main-loop.c:235
18) 0x00007fdfca45c57b in main_loop_wait (nonblocking=0) at main-loop.c:484
19) 0x00007fdfca25f403 in main_loop () at vl.c:2249
20) 0x00007fdfca266fc2 in main (argc=42, argv=0x7ffff517d638,
envp=0x7ffff517d790) at vl.c:4814
We find the nbd_recv_coroutines_enter_all function (triggered by a cancel
command or a network connection breaking down) will enter a coroutine which
is waiting for the sending lock. If the lock is still held by another coroutine,
the entering coroutine will be added into the co_queue again. Latter, when the
lock is released, a coroutine re-enter error will occur.
This bug can be fixed simply by delaying the setting of recv_coroutine as
suggested by paolo. After applying this patch, we have tested the cancel
operation in mirror phase looply for more than 5 hous and everything is fine.
Without this patch, a coroutine re-enter error will occur in 5 minutes.
Signed-off-by: Bn Wu <wu.wubin@huawei.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1423552846-3896-1-git-send-email-wu.wubin@huawei.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2015-02-10 10:20:46 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-10 21:41:04 +03:00
|
|
|
g_assert(qemu_in_coroutine());
|
nbd: fix the co_queue multi-adding bug
When we tested the VM migartion between different hosts with NBD
devices, we found if we sent a cancel command after the drive_mirror
was just started, a coroutine re-enter error would occur. The stack
was as follow:
(gdb) bt
00) 0x00007fdfc744d885 in raise () from /lib64/libc.so.6
01) 0x00007fdfc744ee61 in abort () from /lib64/libc.so.6
02) 0x00007fdfca467cc5 in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:118
03) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedb400) at
qemu-coroutine-lock.c:59
04) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedb400) at qemu-coroutine.c:96
05) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:123
06) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedbdc0) at
qemu-coroutine-lock.c:59
07) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedbdc0) at qemu-coroutine.c:96
08) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedbdc0, opaque=0x0)
at qemu-coroutine.c:123
09) 0x00007fdfca4a1fa4 in nbd_recv_coroutines_enter_all (s=0x7fdfcaef7dd0) at
block/nbd-client.c:41
10) 0x00007fdfca4a1ff9 in nbd_teardown_connection (client=0x7fdfcaef7dd0) at
block/nbd-client.c:50
11) 0x00007fdfca4a20f0 in nbd_reply_ready (opaque=0x7fdfcaef7dd0) at
block/nbd-client.c:92
12) 0x00007fdfca45ed80 in aio_dispatch (ctx=0x7fdfcae15e90) at aio-posix.c:144
13) 0x00007fdfca45ef1b in aio_poll (ctx=0x7fdfcae15e90, blocking=false) at
aio-posix.c:222
14) 0x00007fdfca448c34 in aio_ctx_dispatch (source=0x7fdfcae15e90, callback=0x0,
user_data=0x0) at async.c:212
15) 0x00007fdfc8f2f69a in g_main_context_dispatch () from
/usr/lib64/libglib-2.0.so.0
16) 0x00007fdfca45c391 in glib_pollfds_poll () at main-loop.c:190
17) 0x00007fdfca45c489 in os_host_main_loop_wait (timeout=1483677098) at
main-loop.c:235
18) 0x00007fdfca45c57b in main_loop_wait (nonblocking=0) at main-loop.c:484
19) 0x00007fdfca25f403 in main_loop () at vl.c:2249
20) 0x00007fdfca266fc2 in main (argc=42, argv=0x7ffff517d638,
envp=0x7ffff517d790) at vl.c:4814
We find the nbd_recv_coroutines_enter_all function (triggered by a cancel
command or a network connection breaking down) will enter a coroutine which
is waiting for the sending lock. If the lock is still held by another coroutine,
the entering coroutine will be added into the co_queue again. Latter, when the
lock is released, a coroutine re-enter error will occur.
This bug can be fixed simply by delaying the setting of recv_coroutine as
suggested by paolo. After applying this patch, we have tested the cancel
operation in mirror phase looply for more than 5 hous and everything is fine.
Without this patch, a coroutine re-enter error will occur in 5 minutes.
Signed-off-by: Bn Wu <wu.wubin@huawei.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1423552846-3896-1-git-send-email-wu.wubin@huawei.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2015-02-10 10:20:46 +03:00
|
|
|
assert(i < MAX_NBD_REQUESTS);
|
2017-08-22 15:51:13 +03:00
|
|
|
|
|
|
|
s->requests[i].coroutine = qemu_coroutine_self();
|
|
|
|
s->requests[i].receiving = false;
|
|
|
|
|
nbd: fix the co_queue multi-adding bug
When we tested the VM migartion between different hosts with NBD
devices, we found if we sent a cancel command after the drive_mirror
was just started, a coroutine re-enter error would occur. The stack
was as follow:
(gdb) bt
00) 0x00007fdfc744d885 in raise () from /lib64/libc.so.6
01) 0x00007fdfc744ee61 in abort () from /lib64/libc.so.6
02) 0x00007fdfca467cc5 in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:118
03) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedb400) at
qemu-coroutine-lock.c:59
04) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedb400) at qemu-coroutine.c:96
05) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:123
06) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedbdc0) at
qemu-coroutine-lock.c:59
07) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedbdc0) at qemu-coroutine.c:96
08) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedbdc0, opaque=0x0)
at qemu-coroutine.c:123
09) 0x00007fdfca4a1fa4 in nbd_recv_coroutines_enter_all (s=0x7fdfcaef7dd0) at
block/nbd-client.c:41
10) 0x00007fdfca4a1ff9 in nbd_teardown_connection (client=0x7fdfcaef7dd0) at
block/nbd-client.c:50
11) 0x00007fdfca4a20f0 in nbd_reply_ready (opaque=0x7fdfcaef7dd0) at
block/nbd-client.c:92
12) 0x00007fdfca45ed80 in aio_dispatch (ctx=0x7fdfcae15e90) at aio-posix.c:144
13) 0x00007fdfca45ef1b in aio_poll (ctx=0x7fdfcae15e90, blocking=false) at
aio-posix.c:222
14) 0x00007fdfca448c34 in aio_ctx_dispatch (source=0x7fdfcae15e90, callback=0x0,
user_data=0x0) at async.c:212
15) 0x00007fdfc8f2f69a in g_main_context_dispatch () from
/usr/lib64/libglib-2.0.so.0
16) 0x00007fdfca45c391 in glib_pollfds_poll () at main-loop.c:190
17) 0x00007fdfca45c489 in os_host_main_loop_wait (timeout=1483677098) at
main-loop.c:235
18) 0x00007fdfca45c57b in main_loop_wait (nonblocking=0) at main-loop.c:484
19) 0x00007fdfca25f403 in main_loop () at vl.c:2249
20) 0x00007fdfca266fc2 in main (argc=42, argv=0x7ffff517d638,
envp=0x7ffff517d790) at vl.c:4814
We find the nbd_recv_coroutines_enter_all function (triggered by a cancel
command or a network connection breaking down) will enter a coroutine which
is waiting for the sending lock. If the lock is still held by another coroutine,
the entering coroutine will be added into the co_queue again. Latter, when the
lock is released, a coroutine re-enter error will occur.
This bug can be fixed simply by delaying the setting of recv_coroutine as
suggested by paolo. After applying this patch, we have tested the cancel
operation in mirror phase looply for more than 5 hous and everything is fine.
Without this patch, a coroutine re-enter error will occur in 5 minutes.
Signed-off-by: Bn Wu <wu.wubin@huawei.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1423552846-3896-1-git-send-email-wu.wubin@huawei.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2015-02-10 10:20:46 +03:00
|
|
|
request->handle = INDEX_TO_HANDLE(s, i);
|
2016-02-10 21:41:01 +03:00
|
|
|
|
nbd-client: Fix regression when server sends garbage
When we switched NBD to use coroutines for qemu 2.9 (in particular,
commit a12a712a), we introduced a regression: if a server sends us
garbage (such as a corrupted magic number), we quit the read loop
but do not stop sending further queued commands, resulting in the
client hanging when it never reads the response to those additional
commands. In qemu 2.8, we properly detected that the server is no
longer reliable, and cancelled all existing pending commands with
EIO, then tore down the socket so that all further command attempts
get EPIPE.
Restore the proper behavior of quitting (almost) all communication
with a broken server: Once we know we are out of sync or otherwise
can't trust the server, we must assume that any further incoming
data is unreliable and therefore end all pending commands with EIO,
and quit trying to send any further commands. As an exception, we
still (try to) send NBD_CMD_DISC to let the server know we are going
away (in part, because it is easier to do that than to further
refactor nbd_teardown_connection, and in part because it is the
only command where we do not have to wait for a reply).
Based on a patch by Vladimir Sementsov-Ogievskiy.
A malicious server can be created with the following hack,
followed by setting NBD_SERVER_DEBUG to a non-zero value in the
environment when running qemu-nbd:
| --- a/nbd/server.c
| +++ b/nbd/server.c
| @@ -919,6 +919,17 @@ static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
| stl_be_p(buf + 4, reply->error);
| stq_be_p(buf + 8, reply->handle);
|
| + static int debug;
| + static int count;
| + if (!count++) {
| + const char *str = getenv("NBD_SERVER_DEBUG");
| + if (str) {
| + debug = atoi(str);
| + }
| + }
| + if (debug && !(count % debug)) {
| + buf[0] = 0;
| + }
| return nbd_write(ioc, buf, sizeof(buf), errp);
| }
Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170814213426.24681-1-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-08-15 00:34:26 +03:00
|
|
|
if (s->quit) {
|
nbd-client: avoid read_reply_co entry if send failed
The following segfault is encountered if the NBD server closes the UNIX
domain socket immediately after negotiation:
Program terminated with signal SIGSEGV, Segmentation fault.
#0 aio_co_schedule (ctx=0x0, co=0xd3c0ff2ef0) at util/async.c:441
441 QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
(gdb) bt
#0 0x000000d3c01a50f8 in aio_co_schedule (ctx=0x0, co=0xd3c0ff2ef0) at util/async.c:441
#1 0x000000d3c012fa90 in nbd_coroutine_end (bs=bs@entry=0xd3c0fec650, request=<optimized out>) at block/nbd-client.c:207
#2 0x000000d3c012fb58 in nbd_client_co_preadv (bs=0xd3c0fec650, offset=0, bytes=<optimized out>, qiov=0x7ffc10a91b20, flags=0) at block/nbd-client.c:237
#3 0x000000d3c0128e63 in bdrv_driver_preadv (bs=bs@entry=0xd3c0fec650, offset=offset@entry=0, bytes=bytes@entry=512, qiov=qiov@entry=0x7ffc10a91b20, flags=0) at block/io.c:836
#4 0x000000d3c012c3e0 in bdrv_aligned_preadv (child=child@entry=0xd3c0ff51d0, req=req@entry=0x7f31885d6e90, offset=offset@entry=0, bytes=bytes@entry=512, align=align@entry=1, qiov=qiov@entry=0x7ffc10a91b20, f
+lags=0) at block/io.c:1086
#5 0x000000d3c012c6b8 in bdrv_co_preadv (child=0xd3c0ff51d0, offset=offset@entry=0, bytes=bytes@entry=512, qiov=qiov@entry=0x7ffc10a91b20, flags=flags@entry=0) at block/io.c:1182
#6 0x000000d3c011cc17 in blk_co_preadv (blk=0xd3c0ff4f80, offset=0, bytes=512, qiov=0x7ffc10a91b20, flags=0) at block/block-backend.c:1032
#7 0x000000d3c011ccec in blk_read_entry (opaque=0x7ffc10a91b40) at block/block-backend.c:1079
#8 0x000000d3c01bbb96 in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>) at util/coroutine-ucontext.c:79
#9 0x00007f3196cb8600 in __start_context () at /lib64/libc.so.6
The problem is that nbd_client_init() uses
nbd_client_attach_aio_context() -> aio_co_schedule(new_context,
client->read_reply_co). Execution of read_reply_co is deferred to a BH
which doesn't run until later.
In the mean time blk_co_preadv() can be called and nbd_coroutine_end()
calls aio_wake() on read_reply_co. At this point in time
read_reply_co's ctx isn't set because it has never been entered yet.
This patch simplifies the nbd_co_send_request() ->
nbd_co_receive_reply() -> nbd_coroutine_end() lifecycle to just
nbd_co_send_request() -> nbd_co_receive_reply(). The request is "ended"
if an error occurs at any point. Callers no longer have to invoke
nbd_coroutine_end().
This cleanup also eliminates the segfault because we don't call
aio_co_schedule() to wake up s->read_reply_co if sending the request
failed. It is only necessary to wake up s->read_reply_co if a reply was
received.
Note this only happens with UNIX domain sockets on Linux. It doesn't
seem possible to reproduce this with TCP sockets.
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20170829122745.14309-2-stefanha@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-08-29 15:27:43 +03:00
|
|
|
rc = -EIO;
|
|
|
|
goto err;
|
nbd-client: Fix regression when server sends garbage
When we switched NBD to use coroutines for qemu 2.9 (in particular,
commit a12a712a), we introduced a regression: if a server sends us
garbage (such as a corrupted magic number), we quit the read loop
but do not stop sending further queued commands, resulting in the
client hanging when it never reads the response to those additional
commands. In qemu 2.8, we properly detected that the server is no
longer reliable, and cancelled all existing pending commands with
EIO, then tore down the socket so that all further command attempts
get EPIPE.
Restore the proper behavior of quitting (almost) all communication
with a broken server: Once we know we are out of sync or otherwise
can't trust the server, we must assume that any further incoming
data is unreliable and therefore end all pending commands with EIO,
and quit trying to send any further commands. As an exception, we
still (try to) send NBD_CMD_DISC to let the server know we are going
away (in part, because it is easier to do that than to further
refactor nbd_teardown_connection, and in part because it is the
only command where we do not have to wait for a reply).
Based on a patch by Vladimir Sementsov-Ogievskiy.
A malicious server can be created with the following hack,
followed by setting NBD_SERVER_DEBUG to a non-zero value in the
environment when running qemu-nbd:
| --- a/nbd/server.c
| +++ b/nbd/server.c
| @@ -919,6 +919,17 @@ static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
| stl_be_p(buf + 4, reply->error);
| stq_be_p(buf + 8, reply->handle);
|
| + static int debug;
| + static int count;
| + if (!count++) {
| + const char *str = getenv("NBD_SERVER_DEBUG");
| + if (str) {
| + debug = atoi(str);
| + }
| + }
| + if (debug && !(count % debug)) {
| + buf[0] = 0;
| + }
| return nbd_write(ioc, buf, sizeof(buf), errp);
| }
Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170814213426.24681-1-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-08-15 00:34:26 +03:00
|
|
|
}
|
2016-02-10 21:41:01 +03:00
|
|
|
if (!s->ioc) {
|
nbd-client: avoid read_reply_co entry if send failed
The following segfault is encountered if the NBD server closes the UNIX
domain socket immediately after negotiation:
Program terminated with signal SIGSEGV, Segmentation fault.
#0 aio_co_schedule (ctx=0x0, co=0xd3c0ff2ef0) at util/async.c:441
441 QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
(gdb) bt
#0 0x000000d3c01a50f8 in aio_co_schedule (ctx=0x0, co=0xd3c0ff2ef0) at util/async.c:441
#1 0x000000d3c012fa90 in nbd_coroutine_end (bs=bs@entry=0xd3c0fec650, request=<optimized out>) at block/nbd-client.c:207
#2 0x000000d3c012fb58 in nbd_client_co_preadv (bs=0xd3c0fec650, offset=0, bytes=<optimized out>, qiov=0x7ffc10a91b20, flags=0) at block/nbd-client.c:237
#3 0x000000d3c0128e63 in bdrv_driver_preadv (bs=bs@entry=0xd3c0fec650, offset=offset@entry=0, bytes=bytes@entry=512, qiov=qiov@entry=0x7ffc10a91b20, flags=0) at block/io.c:836
#4 0x000000d3c012c3e0 in bdrv_aligned_preadv (child=child@entry=0xd3c0ff51d0, req=req@entry=0x7f31885d6e90, offset=offset@entry=0, bytes=bytes@entry=512, align=align@entry=1, qiov=qiov@entry=0x7ffc10a91b20, f
+lags=0) at block/io.c:1086
#5 0x000000d3c012c6b8 in bdrv_co_preadv (child=0xd3c0ff51d0, offset=offset@entry=0, bytes=bytes@entry=512, qiov=qiov@entry=0x7ffc10a91b20, flags=flags@entry=0) at block/io.c:1182
#6 0x000000d3c011cc17 in blk_co_preadv (blk=0xd3c0ff4f80, offset=0, bytes=512, qiov=0x7ffc10a91b20, flags=0) at block/block-backend.c:1032
#7 0x000000d3c011ccec in blk_read_entry (opaque=0x7ffc10a91b40) at block/block-backend.c:1079
#8 0x000000d3c01bbb96 in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>) at util/coroutine-ucontext.c:79
#9 0x00007f3196cb8600 in __start_context () at /lib64/libc.so.6
The problem is that nbd_client_init() uses
nbd_client_attach_aio_context() -> aio_co_schedule(new_context,
client->read_reply_co). Execution of read_reply_co is deferred to a BH
which doesn't run until later.
In the mean time blk_co_preadv() can be called and nbd_coroutine_end()
calls aio_wake() on read_reply_co. At this point in time
read_reply_co's ctx isn't set because it has never been entered yet.
This patch simplifies the nbd_co_send_request() ->
nbd_co_receive_reply() -> nbd_coroutine_end() lifecycle to just
nbd_co_send_request() -> nbd_co_receive_reply(). The request is "ended"
if an error occurs at any point. Callers no longer have to invoke
nbd_coroutine_end().
This cleanup also eliminates the segfault because we don't call
aio_co_schedule() to wake up s->read_reply_co if sending the request
failed. It is only necessary to wake up s->read_reply_co if a reply was
received.
Note this only happens with UNIX domain sockets on Linux. It doesn't
seem possible to reproduce this with TCP sockets.
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20170829122745.14309-2-stefanha@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-08-29 15:27:43 +03:00
|
|
|
rc = -EPIPE;
|
|
|
|
goto err;
|
2016-02-10 21:41:01 +03:00
|
|
|
}
|
|
|
|
|
2013-12-02 01:23:41 +04:00
|
|
|
if (qiov) {
|
2016-02-10 21:41:01 +03:00
|
|
|
qio_channel_set_cork(s->ioc, true);
|
2016-02-10 21:41:04 +03:00
|
|
|
rc = nbd_send_request(s->ioc, request);
|
nbd-client: Fix regression when server sends garbage
When we switched NBD to use coroutines for qemu 2.9 (in particular,
commit a12a712a), we introduced a regression: if a server sends us
garbage (such as a corrupted magic number), we quit the read loop
but do not stop sending further queued commands, resulting in the
client hanging when it never reads the response to those additional
commands. In qemu 2.8, we properly detected that the server is no
longer reliable, and cancelled all existing pending commands with
EIO, then tore down the socket so that all further command attempts
get EPIPE.
Restore the proper behavior of quitting (almost) all communication
with a broken server: Once we know we are out of sync or otherwise
can't trust the server, we must assume that any further incoming
data is unreliable and therefore end all pending commands with EIO,
and quit trying to send any further commands. As an exception, we
still (try to) send NBD_CMD_DISC to let the server know we are going
away (in part, because it is easier to do that than to further
refactor nbd_teardown_connection, and in part because it is the
only command where we do not have to wait for a reply).
Based on a patch by Vladimir Sementsov-Ogievskiy.
A malicious server can be created with the following hack,
followed by setting NBD_SERVER_DEBUG to a non-zero value in the
environment when running qemu-nbd:
| --- a/nbd/server.c
| +++ b/nbd/server.c
| @@ -919,6 +919,17 @@ static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
| stl_be_p(buf + 4, reply->error);
| stq_be_p(buf + 8, reply->handle);
|
| + static int debug;
| + static int count;
| + if (!count++) {
| + const char *str = getenv("NBD_SERVER_DEBUG");
| + if (str) {
| + debug = atoi(str);
| + }
| + }
| + if (debug && !(count % debug)) {
| + buf[0] = 0;
| + }
| return nbd_write(ioc, buf, sizeof(buf), errp);
| }
Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170814213426.24681-1-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-08-15 00:34:26 +03:00
|
|
|
if (rc >= 0 && !s->quit) {
|
2017-09-05 22:11:14 +03:00
|
|
|
if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
|
|
|
|
NULL) < 0) {
|
2013-12-02 01:23:41 +04:00
|
|
|
rc = -EIO;
|
|
|
|
}
|
2017-09-20 15:45:07 +03:00
|
|
|
} else if (rc >= 0) {
|
|
|
|
rc = -EIO;
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
2016-02-10 21:41:01 +03:00
|
|
|
qio_channel_set_cork(s->ioc, false);
|
2013-12-02 01:23:41 +04:00
|
|
|
} else {
|
2016-02-10 21:41:04 +03:00
|
|
|
rc = nbd_send_request(s->ioc, request);
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
nbd-client: avoid read_reply_co entry if send failed
The following segfault is encountered if the NBD server closes the UNIX
domain socket immediately after negotiation:
Program terminated with signal SIGSEGV, Segmentation fault.
#0 aio_co_schedule (ctx=0x0, co=0xd3c0ff2ef0) at util/async.c:441
441 QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
(gdb) bt
#0 0x000000d3c01a50f8 in aio_co_schedule (ctx=0x0, co=0xd3c0ff2ef0) at util/async.c:441
#1 0x000000d3c012fa90 in nbd_coroutine_end (bs=bs@entry=0xd3c0fec650, request=<optimized out>) at block/nbd-client.c:207
#2 0x000000d3c012fb58 in nbd_client_co_preadv (bs=0xd3c0fec650, offset=0, bytes=<optimized out>, qiov=0x7ffc10a91b20, flags=0) at block/nbd-client.c:237
#3 0x000000d3c0128e63 in bdrv_driver_preadv (bs=bs@entry=0xd3c0fec650, offset=offset@entry=0, bytes=bytes@entry=512, qiov=qiov@entry=0x7ffc10a91b20, flags=0) at block/io.c:836
#4 0x000000d3c012c3e0 in bdrv_aligned_preadv (child=child@entry=0xd3c0ff51d0, req=req@entry=0x7f31885d6e90, offset=offset@entry=0, bytes=bytes@entry=512, align=align@entry=1, qiov=qiov@entry=0x7ffc10a91b20, f
+lags=0) at block/io.c:1086
#5 0x000000d3c012c6b8 in bdrv_co_preadv (child=0xd3c0ff51d0, offset=offset@entry=0, bytes=bytes@entry=512, qiov=qiov@entry=0x7ffc10a91b20, flags=flags@entry=0) at block/io.c:1182
#6 0x000000d3c011cc17 in blk_co_preadv (blk=0xd3c0ff4f80, offset=0, bytes=512, qiov=0x7ffc10a91b20, flags=0) at block/block-backend.c:1032
#7 0x000000d3c011ccec in blk_read_entry (opaque=0x7ffc10a91b40) at block/block-backend.c:1079
#8 0x000000d3c01bbb96 in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>) at util/coroutine-ucontext.c:79
#9 0x00007f3196cb8600 in __start_context () at /lib64/libc.so.6
The problem is that nbd_client_init() uses
nbd_client_attach_aio_context() -> aio_co_schedule(new_context,
client->read_reply_co). Execution of read_reply_co is deferred to a BH
which doesn't run until later.
In the mean time blk_co_preadv() can be called and nbd_coroutine_end()
calls aio_wake() on read_reply_co. At this point in time
read_reply_co's ctx isn't set because it has never been entered yet.
This patch simplifies the nbd_co_send_request() ->
nbd_co_receive_reply() -> nbd_coroutine_end() lifecycle to just
nbd_co_send_request() -> nbd_co_receive_reply(). The request is "ended"
if an error occurs at any point. Callers no longer have to invoke
nbd_coroutine_end().
This cleanup also eliminates the segfault because we don't call
aio_co_schedule() to wake up s->read_reply_co if sending the request
failed. It is only necessary to wake up s->read_reply_co if a reply was
received.
Note this only happens with UNIX domain sockets on Linux. It doesn't
seem possible to reproduce this with TCP sockets.
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20170829122745.14309-2-stefanha@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-08-29 15:27:43 +03:00
|
|
|
|
|
|
|
err:
|
nbd-client: Fix regression when server sends garbage
When we switched NBD to use coroutines for qemu 2.9 (in particular,
commit a12a712a), we introduced a regression: if a server sends us
garbage (such as a corrupted magic number), we quit the read loop
but do not stop sending further queued commands, resulting in the
client hanging when it never reads the response to those additional
commands. In qemu 2.8, we properly detected that the server is no
longer reliable, and cancelled all existing pending commands with
EIO, then tore down the socket so that all further command attempts
get EPIPE.
Restore the proper behavior of quitting (almost) all communication
with a broken server: Once we know we are out of sync or otherwise
can't trust the server, we must assume that any further incoming
data is unreliable and therefore end all pending commands with EIO,
and quit trying to send any further commands. As an exception, we
still (try to) send NBD_CMD_DISC to let the server know we are going
away (in part, because it is easier to do that than to further
refactor nbd_teardown_connection, and in part because it is the
only command where we do not have to wait for a reply).
Based on a patch by Vladimir Sementsov-Ogievskiy.
A malicious server can be created with the following hack,
followed by setting NBD_SERVER_DEBUG to a non-zero value in the
environment when running qemu-nbd:
| --- a/nbd/server.c
| +++ b/nbd/server.c
| @@ -919,6 +919,17 @@ static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
| stl_be_p(buf + 4, reply->error);
| stq_be_p(buf + 8, reply->handle);
|
| + static int debug;
| + static int count;
| + if (!count++) {
| + const char *str = getenv("NBD_SERVER_DEBUG");
| + if (str) {
| + debug = atoi(str);
| + }
| + }
| + if (debug && !(count % debug)) {
| + buf[0] = 0;
| + }
| return nbd_write(ioc, buf, sizeof(buf), errp);
| }
Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170814213426.24681-1-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-08-15 00:34:26 +03:00
|
|
|
if (rc < 0) {
|
|
|
|
s->quit = true;
|
nbd-client: avoid read_reply_co entry if send failed
The following segfault is encountered if the NBD server closes the UNIX
domain socket immediately after negotiation:
Program terminated with signal SIGSEGV, Segmentation fault.
#0 aio_co_schedule (ctx=0x0, co=0xd3c0ff2ef0) at util/async.c:441
441 QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
(gdb) bt
#0 0x000000d3c01a50f8 in aio_co_schedule (ctx=0x0, co=0xd3c0ff2ef0) at util/async.c:441
#1 0x000000d3c012fa90 in nbd_coroutine_end (bs=bs@entry=0xd3c0fec650, request=<optimized out>) at block/nbd-client.c:207
#2 0x000000d3c012fb58 in nbd_client_co_preadv (bs=0xd3c0fec650, offset=0, bytes=<optimized out>, qiov=0x7ffc10a91b20, flags=0) at block/nbd-client.c:237
#3 0x000000d3c0128e63 in bdrv_driver_preadv (bs=bs@entry=0xd3c0fec650, offset=offset@entry=0, bytes=bytes@entry=512, qiov=qiov@entry=0x7ffc10a91b20, flags=0) at block/io.c:836
#4 0x000000d3c012c3e0 in bdrv_aligned_preadv (child=child@entry=0xd3c0ff51d0, req=req@entry=0x7f31885d6e90, offset=offset@entry=0, bytes=bytes@entry=512, align=align@entry=1, qiov=qiov@entry=0x7ffc10a91b20, f
+lags=0) at block/io.c:1086
#5 0x000000d3c012c6b8 in bdrv_co_preadv (child=0xd3c0ff51d0, offset=offset@entry=0, bytes=bytes@entry=512, qiov=qiov@entry=0x7ffc10a91b20, flags=flags@entry=0) at block/io.c:1182
#6 0x000000d3c011cc17 in blk_co_preadv (blk=0xd3c0ff4f80, offset=0, bytes=512, qiov=0x7ffc10a91b20, flags=0) at block/block-backend.c:1032
#7 0x000000d3c011ccec in blk_read_entry (opaque=0x7ffc10a91b40) at block/block-backend.c:1079
#8 0x000000d3c01bbb96 in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>) at util/coroutine-ucontext.c:79
#9 0x00007f3196cb8600 in __start_context () at /lib64/libc.so.6
The problem is that nbd_client_init() uses
nbd_client_attach_aio_context() -> aio_co_schedule(new_context,
client->read_reply_co). Execution of read_reply_co is deferred to a BH
which doesn't run until later.
In the mean time blk_co_preadv() can be called and nbd_coroutine_end()
calls aio_wake() on read_reply_co. At this point in time
read_reply_co's ctx isn't set because it has never been entered yet.
This patch simplifies the nbd_co_send_request() ->
nbd_co_receive_reply() -> nbd_coroutine_end() lifecycle to just
nbd_co_send_request() -> nbd_co_receive_reply(). The request is "ended"
if an error occurs at any point. Callers no longer have to invoke
nbd_coroutine_end().
This cleanup also eliminates the segfault because we don't call
aio_co_schedule() to wake up s->read_reply_co if sending the request
failed. It is only necessary to wake up s->read_reply_co if a reply was
received.
Note this only happens with UNIX domain sockets on Linux. It doesn't
seem possible to reproduce this with TCP sockets.
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20170829122745.14309-2-stefanha@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-08-29 15:27:43 +03:00
|
|
|
s->requests[i].coroutine = NULL;
|
|
|
|
s->in_flight--;
|
|
|
|
qemu_co_queue_next(&s->free_sema);
|
nbd-client: Fix regression when server sends garbage
When we switched NBD to use coroutines for qemu 2.9 (in particular,
commit a12a712a), we introduced a regression: if a server sends us
garbage (such as a corrupted magic number), we quit the read loop
but do not stop sending further queued commands, resulting in the
client hanging when it never reads the response to those additional
commands. In qemu 2.8, we properly detected that the server is no
longer reliable, and cancelled all existing pending commands with
EIO, then tore down the socket so that all further command attempts
get EPIPE.
Restore the proper behavior of quitting (almost) all communication
with a broken server: Once we know we are out of sync or otherwise
can't trust the server, we must assume that any further incoming
data is unreliable and therefore end all pending commands with EIO,
and quit trying to send any further commands. As an exception, we
still (try to) send NBD_CMD_DISC to let the server know we are going
away (in part, because it is easier to do that than to further
refactor nbd_teardown_connection, and in part because it is the
only command where we do not have to wait for a reply).
Based on a patch by Vladimir Sementsov-Ogievskiy.
A malicious server can be created with the following hack,
followed by setting NBD_SERVER_DEBUG to a non-zero value in the
environment when running qemu-nbd:
| --- a/nbd/server.c
| +++ b/nbd/server.c
| @@ -919,6 +919,17 @@ static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
| stl_be_p(buf + 4, reply->error);
| stq_be_p(buf + 8, reply->handle);
|
| + static int debug;
| + static int count;
| + if (!count++) {
| + const char *str = getenv("NBD_SERVER_DEBUG");
| + if (str) {
| + debug = atoi(str);
| + }
| + }
| + if (debug && !(count % debug)) {
| + buf[0] = 0;
| + }
| return nbd_write(ioc, buf, sizeof(buf), errp);
| }
Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170814213426.24681-1-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-08-15 00:34:26 +03:00
|
|
|
}
|
2013-12-02 01:23:41 +04:00
|
|
|
qemu_co_mutex_unlock(&s->send_mutex);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2017-09-20 15:45:05 +03:00
|
|
|
static int nbd_co_receive_reply(NBDClientSession *s,
|
2017-10-12 12:53:08 +03:00
|
|
|
uint64_t handle,
|
2017-09-20 15:45:05 +03:00
|
|
|
QEMUIOVector *qiov)
|
2013-12-02 01:23:41 +04:00
|
|
|
{
|
2017-09-20 15:45:05 +03:00
|
|
|
int ret;
|
2017-10-12 12:53:08 +03:00
|
|
|
int i = HANDLE_TO_INDEX(s, handle);
|
2013-12-02 01:23:41 +04:00
|
|
|
|
2017-02-13 16:52:24 +03:00
|
|
|
/* Wait until we're woken up by nbd_read_reply_entry. */
|
2017-08-22 15:51:13 +03:00
|
|
|
s->requests[i].receiving = true;
|
2013-12-02 01:23:41 +04:00
|
|
|
qemu_coroutine_yield();
|
2017-08-22 15:51:13 +03:00
|
|
|
s->requests[i].receiving = false;
|
2017-09-20 15:45:06 +03:00
|
|
|
if (!s->ioc || s->quit) {
|
2017-09-20 15:45:05 +03:00
|
|
|
ret = -EIO;
|
2013-12-02 01:23:41 +04:00
|
|
|
} else {
|
2017-10-12 12:53:08 +03:00
|
|
|
assert(s->reply.handle == handle);
|
2017-09-20 15:45:05 +03:00
|
|
|
ret = -s->reply.error;
|
|
|
|
if (qiov && s->reply.error == 0) {
|
2017-09-05 22:11:14 +03:00
|
|
|
if (qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
|
|
|
|
NULL) < 0) {
|
2017-09-20 15:45:05 +03:00
|
|
|
ret = -EIO;
|
2017-08-22 15:51:13 +03:00
|
|
|
s->quit = true;
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Tell the read handler to read another header. */
|
|
|
|
s->reply.handle = 0;
|
|
|
|
}
|
2017-02-13 16:52:24 +03:00
|
|
|
|
2017-08-22 15:51:13 +03:00
|
|
|
s->requests[i].coroutine = NULL;
|
2017-02-13 16:52:24 +03:00
|
|
|
|
|
|
|
/* Kick the read_reply_co to get the next reply. */
|
|
|
|
if (s->read_reply_co) {
|
|
|
|
aio_co_wake(s->read_reply_co);
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
2017-06-01 13:44:56 +03:00
|
|
|
|
|
|
|
qemu_co_mutex_lock(&s->send_mutex);
|
|
|
|
s->in_flight--;
|
|
|
|
qemu_co_queue_next(&s->free_sema);
|
|
|
|
qemu_co_mutex_unlock(&s->send_mutex);
|
2017-09-20 15:45:05 +03:00
|
|
|
|
|
|
|
return ret;
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
|
|
|
|
2017-08-30 00:48:31 +03:00
|
|
|
static int nbd_co_request(BlockDriverState *bs,
|
|
|
|
NBDRequest *request,
|
|
|
|
QEMUIOVector *qiov)
|
|
|
|
{
|
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
|
|
|
int ret;
|
|
|
|
|
2017-10-12 12:53:07 +03:00
|
|
|
if (qiov) {
|
|
|
|
assert(request->type == NBD_CMD_WRITE || request->type == NBD_CMD_READ);
|
|
|
|
assert(request->len == iov_size(qiov->iov, qiov->niov));
|
|
|
|
} else {
|
|
|
|
assert(request->type != NBD_CMD_WRITE && request->type != NBD_CMD_READ);
|
|
|
|
}
|
2017-08-30 00:48:31 +03:00
|
|
|
ret = nbd_co_send_request(bs, request,
|
|
|
|
request->type == NBD_CMD_WRITE ? qiov : NULL);
|
|
|
|
if (ret < 0) {
|
2017-09-20 15:45:05 +03:00
|
|
|
return ret;
|
2017-08-30 00:48:31 +03:00
|
|
|
}
|
2017-09-20 15:45:05 +03:00
|
|
|
|
2017-10-12 12:53:08 +03:00
|
|
|
return nbd_co_receive_reply(client, request->handle,
|
2017-09-20 15:45:05 +03:00
|
|
|
request->type == NBD_CMD_READ ? qiov : NULL);
|
2017-08-30 00:48:31 +03:00
|
|
|
}
|
|
|
|
|
2016-07-16 02:23:07 +03:00
|
|
|
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
|
|
|
|
uint64_t bytes, QEMUIOVector *qiov, int flags)
|
2013-12-02 01:23:41 +04:00
|
|
|
{
|
2016-10-14 21:33:07 +03:00
|
|
|
NBDRequest request = {
|
2016-07-16 02:23:07 +03:00
|
|
|
.type = NBD_CMD_READ,
|
|
|
|
.from = offset,
|
|
|
|
.len = bytes,
|
|
|
|
};
|
2013-12-02 01:23:41 +04:00
|
|
|
|
2016-07-16 02:23:07 +03:00
|
|
|
assert(bytes <= NBD_MAX_BUFFER_SIZE);
|
|
|
|
assert(!flags);
|
2013-12-02 01:23:41 +04:00
|
|
|
|
2017-08-30 00:48:31 +03:00
|
|
|
return nbd_co_request(bs, &request, qiov);
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
|
|
|
|
2016-07-16 02:23:07 +03:00
|
|
|
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
|
|
|
|
uint64_t bytes, QEMUIOVector *qiov, int flags)
|
2013-12-02 01:23:41 +04:00
|
|
|
{
|
2016-10-14 21:33:06 +03:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2016-10-14 21:33:07 +03:00
|
|
|
NBDRequest request = {
|
2016-07-16 02:23:07 +03:00
|
|
|
.type = NBD_CMD_WRITE,
|
|
|
|
.from = offset,
|
|
|
|
.len = bytes,
|
|
|
|
};
|
2013-12-02 01:23:41 +04:00
|
|
|
|
2016-05-04 01:39:08 +03:00
|
|
|
if (flags & BDRV_REQ_FUA) {
|
2017-07-07 23:30:41 +03:00
|
|
|
assert(client->info.flags & NBD_FLAG_SEND_FUA);
|
2016-10-14 21:33:04 +03:00
|
|
|
request.flags |= NBD_CMD_FLAG_FUA;
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
|
|
|
|
2016-07-16 02:23:07 +03:00
|
|
|
assert(bytes <= NBD_MAX_BUFFER_SIZE);
|
2013-12-02 01:23:41 +04:00
|
|
|
|
2017-08-30 00:48:31 +03:00
|
|
|
return nbd_co_request(bs, &request, qiov);
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
|
|
|
|
2016-10-14 21:33:18 +03:00
|
|
|
int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
|
2017-06-09 13:18:08 +03:00
|
|
|
int bytes, BdrvRequestFlags flags)
|
2016-10-14 21:33:18 +03:00
|
|
|
{
|
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
|
|
|
NBDRequest request = {
|
|
|
|
.type = NBD_CMD_WRITE_ZEROES,
|
|
|
|
.from = offset,
|
2017-06-09 13:18:08 +03:00
|
|
|
.len = bytes,
|
2016-10-14 21:33:18 +03:00
|
|
|
};
|
|
|
|
|
2017-07-07 23:30:41 +03:00
|
|
|
if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
|
2016-10-14 21:33:18 +03:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flags & BDRV_REQ_FUA) {
|
2017-07-07 23:30:41 +03:00
|
|
|
assert(client->info.flags & NBD_FLAG_SEND_FUA);
|
2016-10-14 21:33:18 +03:00
|
|
|
request.flags |= NBD_CMD_FLAG_FUA;
|
|
|
|
}
|
|
|
|
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
|
|
|
|
request.flags |= NBD_CMD_FLAG_NO_HOLE;
|
|
|
|
}
|
|
|
|
|
2017-08-30 00:48:31 +03:00
|
|
|
return nbd_co_request(bs, &request, NULL);
|
2016-10-14 21:33:18 +03:00
|
|
|
}
|
|
|
|
|
2015-02-07 00:06:16 +03:00
|
|
|
int nbd_client_co_flush(BlockDriverState *bs)
|
2013-12-02 01:23:41 +04:00
|
|
|
{
|
2016-10-14 21:33:06 +03:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2016-10-14 21:33:07 +03:00
|
|
|
NBDRequest request = { .type = NBD_CMD_FLUSH };
|
2013-12-02 01:23:41 +04:00
|
|
|
|
2017-07-07 23:30:41 +03:00
|
|
|
if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) {
|
2013-12-02 01:23:41 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
request.from = 0;
|
|
|
|
request.len = 0;
|
|
|
|
|
2017-08-30 00:48:31 +03:00
|
|
|
return nbd_co_request(bs, &request, NULL);
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
|
|
|
|
2017-06-09 13:18:08 +03:00
|
|
|
int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
|
2013-12-02 01:23:41 +04:00
|
|
|
{
|
2016-10-14 21:33:06 +03:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2016-10-14 21:33:07 +03:00
|
|
|
NBDRequest request = {
|
2016-07-16 02:23:02 +03:00
|
|
|
.type = NBD_CMD_TRIM,
|
|
|
|
.from = offset,
|
2017-06-09 13:18:08 +03:00
|
|
|
.len = bytes,
|
2016-07-16 02:23:02 +03:00
|
|
|
};
|
2013-12-02 01:23:41 +04:00
|
|
|
|
2017-07-07 23:30:41 +03:00
|
|
|
if (!(client->info.flags & NBD_FLAG_SEND_TRIM)) {
|
2013-12-02 01:23:41 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-08-30 00:48:31 +03:00
|
|
|
return nbd_co_request(bs, &request, NULL);
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
|
|
|
|
2015-02-07 00:06:16 +03:00
|
|
|
void nbd_client_detach_aio_context(BlockDriverState *bs)
|
2014-05-08 18:34:43 +04:00
|
|
|
{
|
2017-02-13 16:52:24 +03:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2017-06-15 20:09:05 +03:00
|
|
|
qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
|
2014-05-08 18:34:43 +04:00
|
|
|
}
|
|
|
|
|
2015-02-07 00:06:16 +03:00
|
|
|
void nbd_client_attach_aio_context(BlockDriverState *bs,
|
|
|
|
AioContext *new_context)
|
2014-05-08 18:34:43 +04:00
|
|
|
{
|
2017-02-13 16:52:24 +03:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2017-06-15 20:09:05 +03:00
|
|
|
qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), new_context);
|
2017-02-13 16:52:24 +03:00
|
|
|
aio_co_schedule(new_context, client->read_reply_co);
|
2014-05-08 18:34:43 +04:00
|
|
|
}
|
|
|
|
|
2015-02-07 00:06:16 +03:00
|
|
|
void nbd_client_close(BlockDriverState *bs)
|
2013-12-02 01:23:41 +04:00
|
|
|
{
|
2016-10-14 21:33:06 +03:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2016-10-14 21:33:07 +03:00
|
|
|
NBDRequest request = { .type = NBD_CMD_DISC };
|
2013-12-02 01:23:41 +04:00
|
|
|
|
2016-02-10 21:41:01 +03:00
|
|
|
if (client->ioc == NULL) {
|
2014-02-26 18:30:18 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-02-10 21:41:04 +03:00
|
|
|
nbd_send_request(client->ioc, &request);
|
2013-12-02 01:23:44 +04:00
|
|
|
|
2015-02-07 00:06:16 +03:00
|
|
|
nbd_teardown_connection(bs);
|
2013-12-02 01:23:41 +04:00
|
|
|
}
|
|
|
|
|
2016-02-10 21:41:12 +03:00
|
|
|
int nbd_client_init(BlockDriverState *bs,
|
|
|
|
QIOChannelSocket *sioc,
|
|
|
|
const char *export,
|
|
|
|
QCryptoTLSCreds *tlscreds,
|
|
|
|
const char *hostname,
|
|
|
|
Error **errp)
|
2013-12-02 01:23:41 +04:00
|
|
|
{
|
2016-10-14 21:33:06 +03:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2013-12-02 01:23:41 +04:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* NBD handshake */
|
2013-12-02 01:23:43 +04:00
|
|
|
logout("session init %s\n", export);
|
2016-02-10 21:41:01 +03:00
|
|
|
qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
|
|
|
|
|
nbd: Implement NBD_INFO_BLOCK_SIZE on client
The upstream NBD Protocol has defined a new extension to allow
the server to advertise block sizes to the client, as well as
a way for the client to inform the server whether it intends to
obey block sizes.
When using the block layer as the client, we will obey block
sizes; but when used as 'qemu-nbd -c' to hand off to the
kernel nbd module as the client, we are still waiting for the
kernel to implement a way for us to learn if it will honor
block sizes (perhaps by an addition to sysfs, rather than an
ioctl), as well as any way to tell the kernel what additional
block sizes to obey (NBD_SET_BLKSIZE appears to be accurate
for the minimum size, but preferred and maximum sizes would
probably be new ioctl()s), so until then, we need to make our
request for block sizes conditional.
When using ioctl(NBD_SET_BLKSIZE) to hand off to the kernel,
use the minimum block size as the sector size if it is larger
than 512, which also has the nice effect of cooperating with
(non-qemu) servers that don't do read-modify-write when
exposing a block device with 4k sectors; it might also allow
us to visit a file larger than 2T on a 32-bit kernel.
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707203049.534-10-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-07 23:30:49 +03:00
|
|
|
client->info.request_sizes = true;
|
2016-02-10 21:41:04 +03:00
|
|
|
ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
|
2016-02-10 21:41:12 +03:00
|
|
|
tlscreds, hostname,
|
2017-07-07 23:30:41 +03:00
|
|
|
&client->ioc, &client->info, errp);
|
2013-12-02 01:23:41 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
logout("Failed to negotiate with the NBD server\n");
|
|
|
|
return ret;
|
|
|
|
}
|
2017-07-07 23:30:41 +03:00
|
|
|
if (client->info.flags & NBD_FLAG_SEND_FUA) {
|
2016-05-04 01:39:06 +03:00
|
|
|
bs->supported_write_flags = BDRV_REQ_FUA;
|
2016-11-17 23:13:54 +03:00
|
|
|
bs->supported_zero_flags |= BDRV_REQ_FUA;
|
|
|
|
}
|
2017-07-07 23:30:41 +03:00
|
|
|
if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
|
2016-11-17 23:13:54 +03:00
|
|
|
bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
|
2016-05-04 01:39:06 +03:00
|
|
|
}
|
nbd: Implement NBD_INFO_BLOCK_SIZE on client
The upstream NBD Protocol has defined a new extension to allow
the server to advertise block sizes to the client, as well as
a way for the client to inform the server whether it intends to
obey block sizes.
When using the block layer as the client, we will obey block
sizes; but when used as 'qemu-nbd -c' to hand off to the
kernel nbd module as the client, we are still waiting for the
kernel to implement a way for us to learn if it will honor
block sizes (perhaps by an addition to sysfs, rather than an
ioctl), as well as any way to tell the kernel what additional
block sizes to obey (NBD_SET_BLKSIZE appears to be accurate
for the minimum size, but preferred and maximum sizes would
probably be new ioctl()s), so until then, we need to make our
request for block sizes conditional.
When using ioctl(NBD_SET_BLKSIZE) to hand off to the kernel,
use the minimum block size as the sector size if it is larger
than 512, which also has the nice effect of cooperating with
(non-qemu) servers that don't do read-modify-write when
exposing a block device with 4k sectors; it might also allow
us to visit a file larger than 2T on a 32-bit kernel.
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707203049.534-10-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-07 23:30:49 +03:00
|
|
|
if (client->info.min_block > bs->bl.request_alignment) {
|
|
|
|
bs->bl.request_alignment = client->info.min_block;
|
|
|
|
}
|
2013-12-02 01:23:41 +04:00
|
|
|
|
|
|
|
qemu_co_mutex_init(&client->send_mutex);
|
nbd: Use CoQueue for free_sema instead of CoMutex
NBD is using the CoMutex in a way that wasn't anticipated. For example, if there are
N(N=26, MAX_NBD_REQUESTS=16) nbd write requests, so we will invoke nbd_client_co_pwritev
N times.
----------------------------------------------------------------------------------------
time request Actions
1 1 in_flight=1, Coroutine=C1
2 2 in_flight=2, Coroutine=C2
...
15 15 in_flight=15, Coroutine=C15
16 16 in_flight=16, Coroutine=C16, free_sema->holder=C16, mutex->locked=true
17 17 in_flight=16, Coroutine=C17, queue C17 into free_sema->queue
18 18 in_flight=16, Coroutine=C18, queue C18 into free_sema->queue
...
26 N in_flight=16, Coroutine=C26, queue C26 into free_sema->queue
----------------------------------------------------------------------------------------
Once nbd client recieves request No.16' reply, we will re-enter C16. It's ok, because
it's equal to 'free_sema->holder'.
----------------------------------------------------------------------------------------
time request Actions
27 16 in_flight=15, Coroutine=C16, free_sema->holder=C16, mutex->locked=false
----------------------------------------------------------------------------------------
Then nbd_coroutine_end invokes qemu_co_mutex_unlock what will pop coroutines from
free_sema->queue's head and enter C17. More free_sema->holder is C17 now.
----------------------------------------------------------------------------------------
time request Actions
28 17 in_flight=16, Coroutine=C17, free_sema->holder=C17, mutex->locked=true
----------------------------------------------------------------------------------------
In above scenario, we only recieves request No.16' reply. As time goes by, nbd client will
almostly recieves replies from requests 1 to 15 rather than request 17 who owns C17. In this
case, we will encounter assert "mutex->holder == self" failed since Kevin's commit 0e438cdc
"coroutine: Let CoMutex remember who holds it". For example, if nbd client recieves request
No.15' reply, qemu will stop unexpectedly:
----------------------------------------------------------------------------------------
time request Actions
29 15(most case) in_flight=15, Coroutine=C15, free_sema->holder=C17, mutex->locked=false
----------------------------------------------------------------------------------------
Per Paolo's suggestion "The simplest fix is to change it to CoQueue, which is like a condition
variable", this patch replaces CoMutex with CoQueue.
Cc: Wen Congyang <wency@cn.fujitsu.com>
Reported-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Message-Id: <1476267508-19499-1-git-send-email-xiecl.fnst@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-10-12 13:18:28 +03:00
|
|
|
qemu_co_queue_init(&client->free_sema);
|
2016-02-10 21:41:01 +03:00
|
|
|
client->sioc = sioc;
|
|
|
|
object_ref(OBJECT(client->sioc));
|
2016-02-10 21:41:11 +03:00
|
|
|
|
|
|
|
if (!client->ioc) {
|
|
|
|
client->ioc = QIO_CHANNEL(sioc);
|
|
|
|
object_ref(OBJECT(client->ioc));
|
|
|
|
}
|
2013-12-02 01:23:41 +04:00
|
|
|
|
|
|
|
/* Now that we're connected, set the socket to be non-blocking and
|
|
|
|
* kick the reply mechanism. */
|
2016-02-10 21:41:01 +03:00
|
|
|
qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
|
2017-02-13 16:52:24 +03:00
|
|
|
client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
|
2015-02-07 00:06:16 +03:00
|
|
|
nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
|
2013-12-02 01:23:41 +04:00
|
|
|
|
|
|
|
logout("Established connection with NBD server\n");
|
|
|
|
return 0;
|
|
|
|
}
|