From 4eb9ccfea3aa6f446c38e9edd01318266de8f2ca Mon Sep 17 00:00:00 2001 From: reinoud Date: Mon, 6 Jul 2009 17:13:38 +0000 Subject: [PATCH] Re-implement read-modify-write backend strategy. This version is a lot more clean locking-wise and will consume less CPU power on needless moving-around. --- sys/fs/udf/udf_strat_rmw.c | 429 +++++++++++++++++++++---------------- 1 file changed, 240 insertions(+), 189 deletions(-) diff --git a/sys/fs/udf/udf_strat_rmw.c b/sys/fs/udf/udf_strat_rmw.c index ac43d1fc7e4c..8e80431da95b 100644 --- a/sys/fs/udf/udf_strat_rmw.c +++ b/sys/fs/udf/udf_strat_rmw.c @@ -1,4 +1,4 @@ -/* $NetBSD: udf_strat_rmw.c,v 1.20 2009/07/02 16:56:35 reinoud Exp $ */ +/* $NetBSD: udf_strat_rmw.c,v 1.21 2009/07/06 17:13:38 reinoud Exp $ */ /* * Copyright (c) 2006, 2008 Reinoud Zandijk @@ -28,7 +28,7 @@ #include #ifndef lint -__KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.20 2009/07/02 16:56:35 reinoud Exp $"); +__KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.21 2009/07/06 17:13:38 reinoud Exp $"); #endif /* not lint */ @@ -79,7 +79,7 @@ __KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.20 2009/07/02 16:56:35 reinoud E #define UDF_SHED_READING 2 #define UDF_SHED_WRITING 3 #define UDF_SHED_SEQWRITING 4 -#define UDF_SHED_IDLE 5 /* resting */ +#define UDF_SHED_IDLE 5 /* refcnt'd */ #define UDF_SHED_FREE 6 /* recycleable */ #define UDF_SHED_MAX 6+1 @@ -105,6 +105,9 @@ struct udf_eccline { uint32_t flags; uint32_t start_sector; /* physical */ + const char *fname; + int sline; + struct buf *buf; void *blob; @@ -141,22 +144,29 @@ struct strat_private { /* --------------------------------------------------------------------- */ -#define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline) -#define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline) +#define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline, __FILE__, __LINE__) +#define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline, __FILE__, __LINE__) /* can be called with or without discstrat lock */ static void -udf_lock_eccline(struct udf_eccline *eccline) +udf_lock_eccline(struct udf_eccline *eccline, const char *fname, int sline) { struct strat_private *priv = PRIV(eccline->ump); int waslocked, ret; + KASSERT(mutex_owned(&priv->discstrat_mutex)); + waslocked = mutex_owned(&priv->discstrat_mutex); if (!waslocked) mutex_enter(&priv->discstrat_mutex); /* wait until its unlocked first */ + eccline->refcnt++; while (eccline->flags & ECC_LOCKED) { + DPRINTF(ECCLINE, ("waiting for lock at %s:%d\n", + fname, sline)); + DPRINTF(ECCLINE, ("was locked at %s:%d\n", + eccline->fname, eccline->sline)); eccline->flags |= ECC_WANTED; ret = cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, hz/8); @@ -166,6 +176,10 @@ udf_lock_eccline(struct udf_eccline *eccline) } eccline->flags |= ECC_LOCKED; eccline->flags &= ~ECC_WANTED; + eccline->refcnt--; + + eccline->fname = fname; + eccline->sline = sline; if (!waslocked) mutex_exit(&priv->discstrat_mutex); @@ -174,11 +188,13 @@ udf_lock_eccline(struct udf_eccline *eccline) /* can be called with or without discstrat lock */ static void -udf_unlock_eccline(struct udf_eccline *eccline) +udf_unlock_eccline(struct udf_eccline *eccline, const char *fname, int sline) { struct strat_private *priv = PRIV(eccline->ump); int waslocked; + KASSERT(mutex_owned(&priv->discstrat_mutex)); + waslocked = mutex_owned(&priv->discstrat_mutex); if (!waslocked) mutex_enter(&priv->discstrat_mutex); @@ -196,28 +212,21 @@ static void udf_dispose_eccline(struct udf_eccline *eccline) { struct strat_private *priv = PRIV(eccline->ump); - struct buf *ret; KASSERT(mutex_owned(&priv->discstrat_mutex)); - KASSERT(eccline->refcnt == 0); - KASSERT(eccline->dirty == 0); - DPRINTF(ECCLINE, ("dispose eccline with start sector %d, " "present %0"PRIx64"\n", eccline->start_sector, eccline->present)); - if (eccline->queued_on) { - ret = bufq_cancel(priv->queues[eccline->queued_on], eccline->buf); - KASSERT(ret == eccline->buf); - priv->num_queued[eccline->queued_on]--; - } - LIST_REMOVE(eccline, hashchain); + KASSERT(eccline->refcnt == 0); + KASSERT(eccline->dirty == 0); + KASSERT(eccline->queued_on == 0); + KASSERT(eccline->flags & ECC_FLOATING); + KASSERT(eccline->flags & ECC_LOCKED); - if (eccline->flags & ECC_FLOATING) { - eccline->flags &= ~ECC_FLOATING; - priv->num_floating--; - } + LIST_REMOVE(eccline, hashchain); + priv->num_floating--; putiobuf(eccline->buf); pool_put(&priv->ecclineblob_pool, eccline->blob); @@ -230,71 +239,68 @@ static void udf_push_eccline(struct udf_eccline *eccline, int newqueue) { struct strat_private *priv = PRIV(eccline->ump); - struct buf *ret; - int curqueue; KASSERT(mutex_owned(&priv->discstrat_mutex)); DPRINTF(PARANOIA, ("DEBUG: buf %p pushed on queue %d\n", eccline->buf, newqueue)); - /* requeue */ - curqueue = eccline->queued_on; - if (curqueue) { - ret = bufq_cancel(priv->queues[curqueue], eccline->buf); - -#ifdef DIAGNOSTIC - if (ret == NULL) { - int i; - - printf("udf_push_eccline: bufq_cancel can't find " - "buffer %p on queue %d; " - "dumping queues\n", eccline->buf, curqueue); - for (i = 1; i < UDF_SHED_MAX; i++) { - printf("queue %d\n\t", i); - ret = bufq_get(priv->queues[i]); - while (ret) { - printf("%p ", ret); - if (BTOE(ret)->queued_on != i) - printf("WRONGQ "); - if (ret == eccline->buf) - printf("[<-] "); - if (ret == bufq_peek(priv->queues[i])) { - printf("LOOP "); - break; - } - ret = bufq_get(priv->queues[i]); - } - printf("\n"); - } - panic("fatal queue bug; exit"); - } -#endif - - KASSERT(ret == eccline->buf); - priv->num_queued[curqueue]--; - } + KASSERT(eccline->queued_on == 0); + KASSERT(eccline->flags & ECC_FLOATING); /* set buffer block numbers to make sure its queued correctly */ eccline->buf->b_lblkno = eccline->start_sector; eccline->buf->b_blkno = eccline->start_sector; eccline->buf->b_rawblkno = eccline->start_sector; + vfs_timestamp(&priv->last_queued[newqueue]); + eccline->flags &= ~ECC_FLOATING; + priv->num_floating--; eccline->queued_on = newqueue; priv->num_queued[newqueue]++; bufq_put(priv->queues[newqueue], eccline->buf); - vfs_timestamp(&priv->last_queued[newqueue]); - if (eccline->flags & ECC_FLOATING) { - eccline->flags &= ~ECC_FLOATING; - priv->num_floating--; - } + UDF_UNLOCK_ECCLINE(eccline); - /* tickle disc strategy statemachine */ + /* XXX tickle disc strategy statemachine */ if (newqueue != UDF_SHED_IDLE) cv_signal(&priv->discstrat_cv); } +static struct udf_eccline * +udf_peek_eccline(struct strat_private *priv, int queued_on) +{ + struct udf_eccline *eccline; + struct buf *buf; + + KASSERT(mutex_owned(&priv->discstrat_mutex)); + + for(;;) { + buf = bufq_peek(priv->queues[queued_on]); + /* could have been a race, but we'll revisit later */ + if (buf == NULL) + return NULL; + + eccline = BTOE(buf); + UDF_LOCK_ECCLINE(eccline); + + /* might have changed before we obtained the lock */ + if (eccline->queued_on == queued_on) + break; + + UDF_UNLOCK_ECCLINE(eccline); + } + + KASSERT(eccline->queued_on == queued_on); + KASSERT((eccline->flags & ECC_FLOATING) == 0); + + DPRINTF(PARANOIA, ("DEBUG: buf %p peeked at queue %d\n", + eccline->buf, queued_on)); + + return eccline; +} + + static struct udf_eccline * udf_pop_eccline(struct strat_private *priv, int queued_on) { @@ -303,19 +309,29 @@ udf_pop_eccline(struct strat_private *priv, int queued_on) KASSERT(mutex_owned(&priv->discstrat_mutex)); - buf = bufq_get(priv->queues[queued_on]); - if (buf == NULL) { - KASSERT(priv->num_queued[queued_on] == 0); - return NULL; + for(;;) { + buf = bufq_get(priv->queues[queued_on]); + if (buf == NULL) { + // KASSERT(priv->num_queued[queued_on] == 0); + return NULL; + } + + eccline = BTOE(buf); + UDF_LOCK_ECCLINE(eccline); + + /* might have changed before we obtained the lock */ + if (eccline->queued_on == queued_on) + break; + + UDF_UNLOCK_ECCLINE(eccline); } - eccline = BTOE(buf); KASSERT(eccline->queued_on == queued_on); - eccline->queued_on = 0; - priv->num_queued[queued_on]--; + KASSERT((eccline->flags & ECC_FLOATING) == 0); + + priv->num_queued[queued_on]--; + eccline->queued_on = 0; - if (eccline->flags & ECC_FLOATING) - panic("popping already marked floating eccline"); eccline->flags |= ECC_FLOATING; priv->num_floating++; @@ -326,6 +342,28 @@ udf_pop_eccline(struct strat_private *priv, int queued_on) } +static void +udf_unqueue_eccline(struct strat_private *priv, struct udf_eccline *eccline) +{ + struct buf *ret; + + UDF_LOCK_ECCLINE(eccline); + if (eccline->queued_on == 0) { + KASSERT(eccline->flags & ECC_FLOATING); + return; + } + + ret = bufq_cancel(priv->queues[eccline->queued_on], eccline->buf); + KASSERT(ret == eccline->buf); + + priv->num_queued[eccline->queued_on]--; + eccline->queued_on = 0; + + eccline->flags |= ECC_FLOATING; + priv->num_floating++; +} + + static struct udf_eccline * udf_geteccline(struct udf_mount *ump, uint32_t sector, int flags) { @@ -336,11 +374,13 @@ udf_geteccline(struct udf_mount *ump, uint32_t sector, int flags) int line, line_offset; int num_busy, ret; + mutex_enter(&priv->discstrat_mutex); + + /* lookup in our line cache hashtable */ line_offset = sector % ump->packet_size; start_sector = sector - line_offset; line = (start_sector/ump->packet_size) & UDF_ECCBUF_HASHMASK; - mutex_enter(&priv->discstrat_mutex); KASSERT(priv->thread_running); retry: @@ -349,22 +389,15 @@ retry: if (eccline->start_sector == start_sector) { DPRINTF(ECCLINE, ("\tfound eccline, start_sector %d\n", eccline->start_sector)); + udf_unqueue_eccline(priv, eccline); - UDF_LOCK_ECCLINE(eccline); - /* move from freelist (!) */ - if (eccline->queued_on == UDF_SHED_FREE) { - DPRINTF(ECCLINE, ("was on freelist\n")); - KASSERT(eccline->refcnt == 0); - udf_push_eccline(eccline, UDF_SHED_IDLE); - } - eccline->refcnt++; mutex_exit(&priv->discstrat_mutex); return eccline; } } - DPRINTF(ECCLINE, ("\tnot found in eccline cache\n")); /* not found in eccline cache */ + DPRINTF(ECCLINE, ("\tnot found in eccline cache\n")); lb_size = udf_rw32(ump->logical_vol->lb_size); blobsize = ump->packet_size * lb_size; @@ -396,23 +429,21 @@ retry: } /* push back line if we're waiting for it or its locked */ - if (eccline->flags & (ECC_WANTED | ECC_LOCKED)) { - /* XXX what were they doing on the free list anyway? */ - udf_push_eccline(eccline, UDF_SHED_WAITING); + if (eccline->flags & ECC_WANTED) { + /* we won a race, but someone else needed it */ + udf_push_eccline(eccline, UDF_SHED_FREE); goto retry; } /* unlink this entry */ LIST_REMOVE(eccline, hashchain); - KASSERT(eccline->flags & ECC_FLOATING); - + KASSERT(eccline->queued_on == 0); + eccline_blob = eccline->blob; - memset(eccline, 0, sizeof(struct udf_eccline)); - eccline->flags = ECC_FLOATING; + eccline->flags = ECC_FLOATING | ECC_LOCKED; } else { - memset(eccline, 0, sizeof(struct udf_eccline)); - eccline->flags = ECC_FLOATING; + eccline->flags = ECC_FLOATING | ECC_LOCKED; priv->num_floating++; } @@ -422,12 +453,14 @@ retry: eccline->buf->b_private = eccline; /* IMPORTANT */ /* initialise eccline blob */ + /* XXX memset expensive and strictly not needed XXX */ memset(eccline->blob, 0, blobsize); eccline->ump = ump; eccline->present = eccline->readin = eccline->dirty = 0; eccline->error = 0; eccline->refcnt = 0; + memset(eccline->bufs, 0, UDF_MAX_PACKET_SIZE * sizeof(struct buf *)); eccline->start_sector = start_sector; eccline->buf->b_lblkno = start_sector; @@ -440,9 +473,10 @@ retry: * TODO possible optimalisation for checking overlap with partitions * to get a clue on future eccline usage */ - eccline->refcnt++; - UDF_LOCK_ECCLINE(eccline); + KASSERT(eccline->refcnt == 0); + KASSERT(eccline->flags & ECC_FLOATING); + KASSERT(eccline->flags & ECC_LOCKED); mutex_exit(&priv->discstrat_mutex); return eccline; @@ -455,32 +489,45 @@ udf_puteccline(struct udf_eccline *eccline) struct strat_private *priv = PRIV(eccline->ump); struct udf_mount *ump = eccline->ump; uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1; + int new_queue; mutex_enter(&priv->discstrat_mutex); - /* clear directly all readin requests from present ones */ - if (eccline->readin & eccline->present) { - /* clear all read bits that are already read in */ - eccline->readin &= (~eccline->present) & allbits; - wakeup(eccline); - } - DPRINTF(ECCLINE, ("put eccline start sector %d, refcnt %d\n", eccline->start_sector, eccline->refcnt)); + KASSERT(eccline->flags & ECC_LOCKED); + KASSERT(eccline->flags & ECC_FLOATING); + + /* clear all read bits that are already read in */ + if (eccline->readin & eccline->present) + eccline->readin &= (~eccline->present) & allbits; + /* if we have active nodes we dont set it on seqwriting */ if (eccline->refcnt > 1) eccline->flags &= ~ECC_SEQWRITING; - vfs_timestamp(&eccline->wait_time); - eccline->wait_time.tv_sec += ECC_WAITTIME; - udf_push_eccline(eccline, UDF_SHED_WAITING); + /* select state */ + new_queue = UDF_SHED_FREE; + if (eccline->refcnt > 0) + new_queue = UDF_SHED_IDLE; + if (eccline->flags & ECC_WANTED) + new_queue = UDF_SHED_IDLE; + if (eccline->readin) + new_queue = UDF_SHED_READING; + if (eccline->dirty) { + new_queue = UDF_SHED_WAITING; + vfs_timestamp(&eccline->wait_time); + eccline->wait_time.tv_sec += ECC_WAITTIME; - KASSERT(eccline->refcnt >= 1); - eccline->refcnt--; - UDF_UNLOCK_ECCLINE(eccline); + if (eccline->present == allbits) { + new_queue = UDF_SHED_WRITING; + if (eccline->flags & ECC_SEQWRITING) + new_queue = UDF_SHED_SEQWRITING; + } + } + udf_push_eccline(eccline, new_queue); - wakeup(eccline); mutex_exit(&priv->discstrat_mutex); } @@ -545,7 +592,10 @@ udf_free_nodedscr_rmw(struct udf_strat_args *args) eccsect = sectornr - eccline->start_sector; bit = (uint64_t) 1 << eccsect; + KASSERT(eccline->present & bit); + eccline->readin &= ~bit; /* just in case */ + /* XXX eccline->dirty? */ KASSERT(eccline->refcnt >= 1); eccline->refcnt--; @@ -560,6 +610,7 @@ udf_read_nodedscr_rmw(struct udf_strat_args *args) union dscrptr **dscrptr = &args->dscr; struct udf_mount *ump = args->ump; struct long_ad *icb = args->icb; + struct strat_private *priv; struct udf_eccline *eccline; uint64_t bit; uint32_t sectornr, dummy; @@ -586,22 +637,28 @@ udf_read_nodedscr_rmw(struct udf_strat_args *args) KASSERT(eccline->bufs[eccsect] == NULL); udf_puteccline(eccline); - /* wait for completion; XXX remodel to lock bit code */ - error = 0; - while ((eccline->present & bit) == 0) { - tsleep(eccline, PRIBIO+1, "udflvdrd", hz/8); - if (eccline->error & bit) { - KASSERT(eccline->refcnt >= 1); - eccline->refcnt--; /* undo temp refcnt */ - *dscrptr = NULL; - return EIO; /* XXX error code */ - } + /* wait for completion */ + priv = PRIV(eccline->ump); + mutex_enter(&priv->discstrat_mutex); + while (((eccline->present | eccline->error) & bit) == 0) { + error = cv_timedwait(&priv->discstrat_cv, + &priv->discstrat_mutex, + hz/8); + if (error == EWOULDBLOCK) + DPRINTF(LOCKING, ("eccline waiting for read\n")); } + mutex_exit(&priv->discstrat_mutex); /* reget our line */ eccline = udf_geteccline(ump, sectornr, 0); KASSERT(eccline->refcnt >= 1); eccline->refcnt--; /* undo refcnt */ + + if (eccline->error & bit) { + *dscrptr = NULL; + udf_puteccline(eccline); + return EIO; /* XXX error code */ + } } *dscrptr = (union dscrptr *) @@ -634,6 +691,7 @@ udf_read_nodedscr_rmw(struct udf_strat_args *args) return error; } + /* we have a hold since it has a node descriptor */ eccline->refcnt++; udf_puteccline(eccline); @@ -663,7 +721,7 @@ udf_write_nodedscr_rmw(struct udf_strat_args *args) if (error) return error; - /* add reference to the vnode to prevent recycling */ + /* paranoia: add reference to the vnode to prevent recycling */ vhold(udf_node->vnode); /* get our eccline */ @@ -699,14 +757,14 @@ udf_write_nodedscr_rmw(struct udf_strat_args *args) KASSERT(udf_tagsize(dscrptr, sector_size) <= sector_size); - udf_puteccline(eccline); - - holdrele(udf_node->vnode); udf_node->outstanding_nodedscr--; if (udf_node->outstanding_nodedscr == 0) { + /* XXX still using wakeup! */ UDF_UNLOCK_NODE(udf_node, 0); wakeup(&udf_node->outstanding_nodedscr); } + holdrele(udf_node->vnode); + udf_puteccline(eccline); /* XXX waitfor not used */ return 0; @@ -765,7 +823,7 @@ udf_queuebuf_rmw(struct udf_strat_args *args) bpos = 0; while (buf_len) { len = MIN(buf_len, sector_size); - if (eccsect == ump->packet_size) { + if ((eccsect < 0) || (eccsect >= ump->packet_size)) { udf_puteccline(eccline); eccline = udf_geteccline(ump, sectornr, 0); eccsect = sectornr - eccline->start_sector; @@ -800,24 +858,11 @@ udf_queuebuf_rmw(struct udf_strat_args *args) "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n", buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type, buf->b_resid, buf->b_bcount, buf->b_bufsize)); + /* if we have FIDs fixup using buffer's sector number(s) */ - if (buf->b_udf_c_type == UDF_C_FIDS) { + if (buf->b_udf_c_type == UDF_C_FIDS) panic("UDF_C_FIDS in SHED_WRITING!\n"); -#if 0 - buf_len = buf->b_bcount; - sectornr = our_sectornr; - bpos = 0; - while (buf_len) { - len = MIN(buf_len, sector_size); - fidblk = (uint8_t *) buf->b_data + bpos; - udf_fixup_fid_block(fidblk, sector_size, - 0, len, sectornr); - sectornr++; - bpos += len; - buf_len -= len; - } -#endif - } + udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type); /* copy parts into the bufs and set for writing */ @@ -828,7 +873,7 @@ udf_queuebuf_rmw(struct udf_strat_args *args) bpos = 0; while (buf_len) { len = MIN(buf_len, sector_size); - if (eccsect == ump->packet_size) { + if ((eccsect < 0) || (eccsect >= ump->packet_size)) { udf_puteccline(eccline); eccline = udf_geteccline(ump, sectornr, 0); eccsect = sectornr - eccline->start_sector; @@ -976,8 +1021,10 @@ udf_shedule_read_callback(struct buf *buf) int sector_size = ump->discinfo.sector_size; int error, i, len; - DPRINTF(ECCLINE, ("read callback called\n")); + DPRINTF(ECCLINE, ("read callback called on buf %p\n", buf)); + /* post process read action */ + KASSERT(eccline->flags & ECC_LOCKED); error = buf->b_error; for (i = 0; i < ump->packet_size; i++) { bit = (uint64_t) 1 << i; @@ -1008,7 +1055,6 @@ udf_shedule_read_callback(struct buf *buf) */ udf_puteccline(eccline); - DPRINTF(ECCLINE, ("read callback finished\n")); } @@ -1019,10 +1065,12 @@ udf_shedule_write_callback(struct buf *buf) struct udf_eccline *eccline = BTOE(buf); struct udf_mount *ump = eccline->ump; uint64_t bit; - int error, i, len; + int error, i; + + DPRINTF(ECCLINE, ("write callback called on buf %p\n", buf)); - DPRINTF(ECCLINE, ("write callback called buf %p\n", buf)); /* post process write action */ + KASSERT(eccline->flags & ECC_LOCKED); error = buf->b_error; for (i = 0; i < ump->packet_size; i++) { bit = (uint64_t) 1 << i; @@ -1033,20 +1081,18 @@ udf_shedule_write_callback(struct buf *buf) } else { eccline->dirty &= ~bit; } - if (eccline->bufs[i]) { - len = eccline->bufs_len[i]; - nestiobuf_done(eccline->bufs[i], len, error); - eccline->bufs[i] = NULL; - } + + KASSERT(eccline->bufs[i] == 0); } KASSERT(eccline->dirty == 0); - KASSERT(error == 0); + /* * XXX TODO on write errors allocate a sparable entry and reissue */ udf_puteccline(eccline); + DPRINTF(ECCLINE, ("write callback finished\n")); } @@ -1062,6 +1108,8 @@ udf_issue_eccline(struct udf_eccline *eccline, int queued_on) int blks = sector_size / DEV_BSIZE; int i; + KASSERT(eccline->flags & ECC_LOCKED); + if (queued_on == UDF_SHED_READING) { DPRINTF(SHEDULE, ("udf_issue_eccline reading : ")); /* read all bits that are not yet present */ @@ -1096,10 +1144,13 @@ udf_issue_eccline(struct udf_eccline *eccline, int queued_on) nestbuf->b_blkno = buf->b_blkno + i*blks; nestbuf->b_rawblkno = buf->b_rawblkno + i*blks; - DPRINTF(SHEDULE, ("sector %d ", - start + i)); - /* call asynchronous */ - VOP_STRATEGY(ump->devvp, nestbuf); + DPRINTF(SHEDULE, ("sector %d ", start + i)); + + /* mutex dance since it could lock */ + mutex_exit(&priv->discstrat_mutex); + /* call asynchronous */ + VOP_STRATEGY(ump->devvp, nestbuf); + mutex_enter(&priv->discstrat_mutex); } DPRINTF(SHEDULE, ("\n")); return; @@ -1110,13 +1161,8 @@ udf_issue_eccline(struct udf_eccline *eccline, int queued_on) DPRINTF(SHEDULE, ("\n\tpresent %"PRIx64", readin %"PRIx64", " "dirty %"PRIx64"\n\t", eccline->present, eccline->readin, eccline->dirty)); - if (eccline->present != allbits) { - /* requeue to read-only */ - DPRINTF(SHEDULE, ("\n\t-> not complete, requeue to " - "reading\n")); - udf_push_eccline(eccline, UDF_SHED_READING); - return; - } + KASSERT(eccline->present == allbits); + start = eccline->start_sector; buf = eccline->buf; buf->b_flags = B_WRITE | B_ASYNC; @@ -1133,6 +1179,7 @@ udf_issue_eccline(struct udf_eccline *eccline, int queued_on) buf->b_proc = NULL; } + /* mutex dance since it could lock */ mutex_exit(&priv->discstrat_mutex); /* call asynchronous */ DPRINTF(SHEDULE, ("sector %d for %d\n", @@ -1150,7 +1197,7 @@ udf_discstrat_thread(void *arg) struct udf_eccline *eccline; struct timespec now, *last; uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1; - int new_queue, wait, work, num, cnt; + int new_queue, wait, work; work = 1; priv->thread_running = 1; @@ -1161,12 +1208,26 @@ udf_discstrat_thread(void *arg) vfs_timestamp(&now); /* maintenance: handle eccline state machine */ - num = priv->num_queued[UDF_SHED_WAITING]; - cnt = 0; - while (cnt < num) { + for(;;) { + /* only peek at it */ + eccline = udf_peek_eccline(priv, UDF_SHED_WAITING); + if (eccline == NULL) + break; + + /* all others are later, so if not satisfied, abort */ + if ((eccline->wait_time.tv_sec - now.tv_sec > 0)) { + UDF_UNLOCK_ECCLINE(eccline); + break; + } + + /* release */ + UDF_UNLOCK_ECCLINE(eccline); + + /* do get it */ eccline = udf_pop_eccline(priv, UDF_SHED_WAITING); - /* requeue */ - new_queue = UDF_SHED_FREE; + + /* requeue according to state */ + new_queue = UDF_SHED_FREE; /* unlikely */ if (eccline->refcnt > 0) new_queue = UDF_SHED_IDLE; if (eccline->flags & ECC_WANTED) @@ -1174,22 +1235,14 @@ udf_discstrat_thread(void *arg) if (eccline->readin) new_queue = UDF_SHED_READING; if (eccline->dirty) { - new_queue = UDF_SHED_WAITING; - if ((eccline->wait_time.tv_sec - now.tv_sec <= 0) || - ((eccline->present == allbits) && - (eccline->flags & ECC_SEQWRITING))) - { + new_queue = UDF_SHED_READING; + if (eccline->present == allbits) { new_queue = UDF_SHED_WRITING; if (eccline->flags & ECC_SEQWRITING) new_queue = UDF_SHED_SEQWRITING; - if (eccline->present != allbits) - new_queue = UDF_SHED_READING; } } - if (eccline->flags & ECC_LOCKED) - new_queue = UDF_SHED_WAITING; udf_push_eccline(eccline, new_queue); - cnt++; } /* maintenance: free excess ecclines */ @@ -1197,9 +1250,10 @@ udf_discstrat_thread(void *arg) eccline = udf_pop_eccline(priv, UDF_SHED_FREE); KASSERT(eccline); KASSERT(eccline->refcnt == 0); - if (eccline->flags & (ECC_WANTED | ECC_LOCKED)) { - udf_push_eccline(eccline, UDF_SHED_IDLE); + if (eccline->flags & ECC_WANTED) { + /* we won the race, but we dont want to win */ DPRINTF(ECCLINE, ("Tried removing, pushed back to free list\n")); + udf_push_eccline(eccline, UDF_SHED_IDLE); } else { DPRINTF(ECCLINE, ("Removing entry from free list\n")); udf_dispose_eccline(eccline); @@ -1218,10 +1272,6 @@ udf_discstrat_thread(void *arg) new_queue = priv->cur_queue; DPRINTF(ECCLINE, ("UDF_ISSUE_ECCLINE\n")); - /* complete the `get' by locking and refcounting it */ - UDF_LOCK_ECCLINE(eccline); - eccline->refcnt++; - udf_issue_eccline(eccline, priv->cur_queue); } else { /* don't switch too quickly */ @@ -1278,17 +1328,18 @@ udf_discstrat_thread(void *arg) /* tear down remaining ecclines */ mutex_enter(&priv->discstrat_mutex); + KASSERT(bufq_peek(priv->queues[UDF_SHED_WAITING]) == NULL); + KASSERT(bufq_peek(priv->queues[UDF_SHED_IDLE]) == NULL); + KASSERT(bufq_peek(priv->queues[UDF_SHED_READING]) == NULL); + KASSERT(bufq_peek(priv->queues[UDF_SHED_WRITING]) == NULL); + KASSERT(bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) == NULL); + KASSERT(priv->num_queued[UDF_SHED_WAITING] == 0); KASSERT(priv->num_queued[UDF_SHED_IDLE] == 0); KASSERT(priv->num_queued[UDF_SHED_READING] == 0); KASSERT(priv->num_queued[UDF_SHED_WRITING] == 0); KASSERT(priv->num_queued[UDF_SHED_SEQWRITING] == 0); - KASSERT(bufq_peek(priv->queues[UDF_SHED_WAITING]) == NULL); - KASSERT(bufq_peek(priv->queues[UDF_SHED_IDLE]) == NULL); - KASSERT(bufq_peek(priv->queues[UDF_SHED_READING]) == NULL); - KASSERT(bufq_peek(priv->queues[UDF_SHED_WRITING]) == NULL); - KASSERT(bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) == NULL); eccline = udf_pop_eccline(priv, UDF_SHED_FREE); while (eccline) { udf_dispose_eccline(eccline);