There is no 1:1 relation between cv_signal() and cv_timedwait() as

the latter implicitly calls cv_signal() on error.

This leads to "tq_waiting > 0" with "tq_running == 0" and the
taskq stalls.

Change task_executor() to increment and decrement "tq_waiting"
and always check and run the queue after cv_timedwait().

Use mstohz(), fix timeout and sort includes.

Addresses PR port-xen/54273: "zpool create pool xbd2" panics DOMU kernel
This commit is contained in:
hannken 2019-06-11 09:05:33 +00:00
parent 94984370e5
commit cfba2bba58
1 changed files with 16 additions and 10 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: taskq.c,v 1.9 2019/05/07 08:49:59 hannken Exp $ */
/* $NetBSD: taskq.c,v 1.10 2019/06/11 09:05:33 hannken Exp $ */
/*-
* Copyright (c) 2019 The NetBSD Foundation, Inc.
@ -30,9 +30,11 @@
*/
#include <sys/types.h>
#include <sys/mutex.h>
#include <sys/param.h>
#include <sys/kcondvar.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/threadpool.h>
@ -65,7 +67,7 @@ static specificdata_key_t taskq_lwp_key; /* Null or taskq this thread runs. */
/*
* Threadpool job to service tasks from task queue.
* Runs until the task queue gets destroyed or the queue is empty for 5 secs.
* Runs until the task queue gets destroyed or the queue is empty for 10 secs.
*/
static void
task_executor(struct threadpool_job *job)
@ -73,22 +75,27 @@ task_executor(struct threadpool_job *job)
struct taskq_executor *state = (struct taskq_executor *)job;
taskq_t *tq = state->te_self;
taskq_ent_t *tqe;
int error;
lwp_setspecific(taskq_lwp_key, tq);
mutex_enter(&tq->tq_lock);
while (!tq->tq_destroyed) {
tqe = SIMPLEQ_FIRST(&tq->tq_list);
if (tqe == NULL) {
if (SIMPLEQ_EMPTY(&tq->tq_list)) {
if (ISSET(tq->tq_flags, TASKQ_DYNAMIC))
break;
tq->tq_waiting++;
if (cv_timedwait(&tq->tq_cv, &tq->tq_lock, 5000) != 0) {
tq->tq_waiting--;
break;
error = cv_timedwait(&tq->tq_cv, &tq->tq_lock,
mstohz(10000));
tq->tq_waiting--;
if (SIMPLEQ_EMPTY(&tq->tq_list)) {
if (error)
break;
continue;
}
continue;
}
tqe = SIMPLEQ_FIRST(&tq->tq_list);
KASSERT(tqe != NULL);
SIMPLEQ_REMOVE_HEAD(&tq->tq_list, tqent_list);
tqe->tqent_queued = 0;
mutex_exit(&tq->tq_lock);
@ -144,7 +151,6 @@ taskq_dispatch_common(taskq_t *tq, taskq_ent_t *tqe, uint_t flags)
tq->tq_active++;
if (tq->tq_waiting) {
cv_signal(&tq->tq_cv);
tq->tq_waiting--;
mutex_exit(&tq->tq_lock);
return;
}