softraid using workq for VOP_STRATEGY take 2

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Marco Peereboom
Date: Friday, July 30, 2010 - 5:39 am

kettenis@ pointed out a misuse of workq_add_task.  This diff fixes that.

Please test.  And I mean every arch and every raid level!

Index: softraid.c
===================================================================
RCS file: /cvs/src/sys/dev/softraid.c,v
retrieving revision 1.210
diff -u -p -r1.210 softraid.c
--- softraid.c	3 Jul 2010 03:04:55 -0000	1.210
+++ softraid.c	30 Jul 2010 12:19:50 -0000
@@ -128,6 +128,7 @@ void			sr_rebuild(void *);
 void			sr_rebuild_thread(void *);
 void			sr_roam_chunks(struct sr_discipline *);
 int			sr_chunk_in_use(struct sr_softc *, dev_t);
+void			sr_startwu_callback(void *, void *);
 
 /* don't include these on RAMDISK */
 #ifndef SMALL_KERNEL
@@ -571,16 +572,12 @@ void
 sr_meta_save_callback(void *arg1, void *arg2)
 {
 	struct sr_discipline	*sd = arg1;
-	int			s;
-
-	s = splbio();
 
 	if (sr_meta_save(arg1, SR_META_DIRTY))
 		printf("%s: save metadata failed\n",
 		    DEVNAME(sd->sd_sc));
 
 	sd->sd_must_flush = 0;
-	splx(s);
 }
 
 int
@@ -1637,12 +1634,14 @@ sr_ccb_alloc(struct sr_discipline *sd)
 
 	sd->sd_ccb = malloc(sizeof(struct sr_ccb) *
 	    sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO);
+	mtx_enter(&sd->sd_mtx); /* need this for sr_ccb_put */
 	TAILQ_INIT(&sd->sd_ccb_freeq);
 	for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) {
 		ccb = &sd->sd_ccb[i];
 		ccb->ccb_dis = sd;
 		sr_ccb_put(ccb);
 	}
+	mtx_leave(&sd->sd_mtx);
 
 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n",
 	    DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu);
@@ -1660,8 +1659,10 @@ sr_ccb_free(struct sr_discipline *sd)
 
 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd);
 
+	mtx_enter(&sd->sd_mtx);
 	while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL)
 		TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
+	mtx_leave(&sd->sd_mtx);
 
 	if (sd->sd_ccb)
 		free(sd->sd_ccb, M_DEVBUF);
@@ -1671,9 +1672,8 @@ struct sr_ccb *
 sr_ccb_get(struct sr_discipline *sd)
 {
 	struct sr_ccb		*ccb;
-	int			s;
 
-	s = splbio();
+	MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
 
 	ccb = TAILQ_FIRST(&sd->sd_ccb_freeq);
 	if (ccb) {
@@ -1681,8 +1681,6 @@ sr_ccb_get(struct sr_discipline *sd)
 		ccb->ccb_state = SR_CCB_INPROGRESS;
 	}
 
-	splx(s);
-
 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc),
 	    ccb);
 
@@ -1693,21 +1691,18 @@ void
 sr_ccb_put(struct sr_ccb *ccb)
 {
 	struct sr_discipline	*sd = ccb->ccb_dis;
-	int			s;
+
+	MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
 
 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc),
 	    ccb);
 
-	s = splbio();
-
 	ccb->ccb_wu = NULL;
 	ccb->ccb_state = SR_CCB_FREE;
 	ccb->ccb_target = -1;
 	ccb->ccb_opaque = NULL;
 
 	TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link);
-
-	splx(s);
 }
 
 int
@@ -1726,13 +1721,15 @@ sr_wu_alloc(struct sr_discipline *sd)
 		return (1);
 
 	no_wu = sd->sd_max_wu;
-	sd->sd_wu_pending = no_wu;
 
 	sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
+	mtx_enter(&sd->sd_mtx);
+	sd->sd_wu_pending = no_wu;
 	TAILQ_INIT(&sd->sd_wu_freeq);
 	TAILQ_INIT(&sd->sd_wu_pendq);
 	TAILQ_INIT(&sd->sd_wu_defq);
+	mtx_leave(&sd->sd_mtx);
 	for (i = 0; i < no_wu; i++) {
 		wu = &sd->sd_wu[i];
 		wu->swu_dis = sd;
@@ -1752,12 +1749,14 @@ sr_wu_free(struct sr_discipline *sd)
 
 	DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd);
 
+	mtx_enter(&sd->sd_mtx);
 	while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL)
 		TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
 	while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL)
 		TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
 	while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL)
 		TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link);
+	mtx_leave(&sd->sd_mtx);
 
 	if (sd->sd_wu)
 		free(sd->sd_wu, M_DEVBUF);
@@ -1769,12 +1768,8 @@ sr_wu_put(struct sr_workunit *wu)
 	struct sr_discipline	*sd = wu->swu_dis;
 	struct sr_ccb		*ccb;
 
-	int			s;
-
 	DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu);
 
-	s = splbio();
-
 	wu->swu_xs = NULL;
 	wu->swu_state = SR_WU_FREE;
 	wu->swu_ios_complete = 0;
@@ -1787,6 +1782,7 @@ sr_wu_put(struct sr_workunit *wu)
 	wu->swu_fake = 0;
 	wu->swu_flags = 0;
 
+	mtx_enter(&sd->sd_mtx);
 	while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
 		TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
 		sr_ccb_put(ccb);
@@ -1804,17 +1800,15 @@ sr_wu_put(struct sr_workunit *wu)
 	if (sd->sd_wu_sleep)
 		wakeup(&sd->sd_wu_sleep);
 
-	splx(s);
+	mtx_leave(&sd->sd_mtx);
 }
 
 struct sr_workunit *
 sr_wu_get(struct sr_discipline *sd, int canwait)
 {
 	struct sr_workunit	*wu;
-	int			s;
-
-	s = splbio();
 
+	mtx_enter(&sd->sd_mtx);
 	for (;;) {
 		wu = TAILQ_FIRST(&sd->sd_wu_freeq);
 		if (wu) {
@@ -1824,13 +1818,13 @@ sr_wu_get(struct sr_discipline *sd, int 
 			break;
 		} else if (wu == NULL && canwait) {
 			sd->sd_wu_sleep++;
-			tsleep(&sd->sd_wu_sleep, PRIBIO, "sr_wu_get", 0);
+			msleep(&sd->sd_wu_sleep, &sd->sd_mtx, PRIBIO,
+			    "sr_wu_get", 0);
 			sd->sd_wu_sleep--;
 		} else
 			break;
 	}
-
-	splx(s);
+	mtx_leave(&sd->sd_mtx);
 
 	DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu);
 
@@ -2542,7 +2536,7 @@ sr_hotspare_rebuild(struct sr_discipline
 		do {
 			busy = 0;
 
-			s = splbio();
+			mtx_enter(&sd->sd_mtx);
 			TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) {
         			TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
                 			if (ccb->ccb_target == chunk_no)
@@ -2555,7 +2549,7 @@ sr_hotspare_rebuild(struct sr_discipline
 						busy = 1;
 				}
 			}
-			splx(s);
+			mtx_leave(&sd->sd_mtx);
 
 			if (busy) {
 				tsleep(sd, PRIBIO, "sr_hotspare", hz);
@@ -2795,6 +2789,12 @@ sr_ioctl_createraid(struct sr_softc *sc,
 	sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO);
 	sd->sd_sc = sc;
 	SLIST_INIT(&sd->sd_meta_opt);
+	mtx_init(&sd->sd_mtx, IPL_BIO);
+	sd->sd_workq = workq_create("srdis", 1, IPL_BIO);
+	if (sd->sd_workq == NULL) {
+		printf("%s: could not create workq\n");
+		goto unwind;
+	}
 	if (sr_discipline_init(sd, bc->bc_level)) {
 		printf("%s: could not initialize discipline\n", DEVNAME(sc));
 		goto unwind;
@@ -2955,7 +2955,9 @@ sr_ioctl_createraid(struct sr_softc *sc,
 
 	if (sd->sd_capabilities & SR_CAP_SYSTEM_DISK) {
 		/* set volume status */
+		mtx_enter(&sd->sd_mtx);
 		sd->sd_set_vol_state(sd);
+		mtx_leave(&sd->sd_mtx);
 		if (sd->sd_vol_status == BIOC_SVOFFLINE) {
 			printf("%s: %s offline, will not be brought online\n",
 			    DEVNAME(sc), sd->sd_meta->ssd_devname);
@@ -3353,7 +3355,6 @@ void
 sr_discipline_shutdown(struct sr_discipline *sd)
 {
 	struct sr_softc		*sc = sd->sd_sc;
-	int			s;
 
 	if (!sd || !sc)
 		return;
@@ -3361,8 +3362,6 @@ sr_discipline_shutdown(struct sr_discipl
 	DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc),
 	    sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
 
-	s = splbio();
-
 	sd->sd_ready = 0;
 
 	if (sd->sd_shutdownhook)
@@ -3384,10 +3383,11 @@ sr_discipline_shutdown(struct sr_discipl
 
 	sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list);
 
+	if (sd->sd_workq)
+		workq_destroy(sd->sd_workq);
+
 	if (sd)
 		sr_discipline_free(sd);
-
-	splx(s);
 }
 
 int
@@ -3574,14 +3574,13 @@ int
 sr_raid_sync(struct sr_workunit *wu)
 {
 	struct sr_discipline	*sd = wu->swu_dis;
-	int			s, rv = 0, ios;
+	int			rv = 0, ios;
 
 	DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc));
 
 	/* when doing a fake sync don't count the wu */
 	ios = wu->swu_fake ? 0 : 1;
 
-	s = splbio();
 	sd->sd_sync = 1;
 
 	while (sd->sd_wu_pending > ios)
@@ -3593,7 +3592,6 @@ sr_raid_sync(struct sr_workunit *wu)
 		}
 
 	sd->sd_sync = 0;
-	splx(s);
 
 	wakeup(&sd->sd_sync);
 
@@ -3601,12 +3599,24 @@ sr_raid_sync(struct sr_workunit *wu)
 }
 
 void
+sr_startwu_callback(void *arg1, void *arg2)
+{
+	struct sr_discipline	*sd = arg1;
+	struct sr_workunit	*wu = arg2;
+	struct sr_ccb		*ccb;
+
+	mtx_enter(&sd->sd_mtx);
+	TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link)
+		VOP_STRATEGY(&ccb->ccb_buf);
+	mtx_leave(&sd->sd_mtx);
+}
+
+void
 sr_raid_startwu(struct sr_workunit *wu)
 {
 	struct sr_discipline	*sd = wu->swu_dis;
-	struct sr_ccb		*ccb;
 
-	splassert(IPL_BIO);
+	MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
 
 	if (wu->swu_state == SR_WU_RESTART)
 		/*
@@ -3619,9 +3629,8 @@ sr_raid_startwu(struct sr_workunit *wu)
 		TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
 
 	/* start all individual ios */
-	TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
-		VOP_STRATEGY(&ccb->ccb_buf);
-	}
+	workq_queue_task(sd->sd_workq, &wu->swu_wqt, 0, sr_startwu_callback,
+	    sd, wu);
 }
 
 void
@@ -3799,7 +3808,7 @@ sr_check_io_collision(struct sr_workunit
 	struct sr_discipline	*sd = wu->swu_dis;
 	struct sr_workunit	*wup;
 
-	splassert(IPL_BIO);
+	MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
 
 	/* walk queue backwards and fill in collider if we have one */
 	TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) {
@@ -3848,7 +3857,7 @@ sr_rebuild_thread(void *arg)
 	struct sr_workunit	*wu_r, *wu_w;
 	struct scsi_xfer	xs_r, xs_w;
 	struct scsi_rw_16	cr, cw;
-	int			c, s, slept, percent = 0, old_percent = -1;
+	int			c, slept, percent = 0, old_percent = -1;
 	u_int8_t		*buf;
 
 	whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE;
@@ -3943,16 +3952,13 @@ sr_rebuild_thread(void *arg)
 		 */
 		wu_w->swu_state = SR_WU_DEFERRED;
 		wu_r->swu_collider = wu_w;
-		s = splbio();
-		TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link);
 
+		mtx_enter(&sd->sd_mtx);
+		TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link);
 		/* schedule io */
-		if (sr_check_io_collision(wu_r))
-			goto queued;
-
-		sr_raid_startwu(wu_r);
-queued:
-		splx(s);
+		if (sr_check_io_collision(wu_r) == 0)
+			sr_raid_startwu(wu_r);
+		mtx_leave(&sd->sd_mtx);
 
 		/* wait for read completion */
 		slept = 0;
Index: softraid_crypto.c
===================================================================
RCS file: /cvs/src/sys/dev/softraid_crypto.c,v
retrieving revision 1.54
diff -u -p -r1.54 softraid_crypto.c
--- softraid_crypto.c	2 Jul 2010 09:26:05 -0000	1.54
+++ softraid_crypto.c	29 Jul 2010 19:11:45 -0000
@@ -1134,7 +1134,6 @@ sr_crypto_rw(struct sr_workunit *wu)
 int
 sr_crypto_write(struct cryptop *crp)
 {
-	int			s;
 	struct sr_workunit	*wu = crp->crp_opaque;
 
 	DNPRINTF(SR_D_INTR, "%s: sr_crypto_write: wu %x xs: %x\n",
@@ -1144,9 +1143,7 @@ sr_crypto_write(struct cryptop *crp)
 		/* fail io */
 		((struct sr_workunit *)(crp->crp_opaque))->swu_xs->error =
 		    XS_DRIVER_STUFFUP;
-		s = splbio();
 		sr_crypto_finish_io(crp->crp_opaque);
-		splx(s);
 	}
 
 	return (sr_crypto_rw2(wu, crp));
@@ -1159,7 +1156,6 @@ sr_crypto_rw2(struct sr_workunit *wu, st
 	struct scsi_xfer	*xs = wu->swu_xs;
 	struct sr_ccb		*ccb;
 	struct uio		*uio;
-	int			s;
 	daddr64_t		blk;
 
 	if (sr_validate_io(wu, &blk, "sr_crypto_rw2"))
@@ -1169,7 +1165,9 @@ sr_crypto_rw2(struct sr_workunit *wu, st
 
 	wu->swu_io_count = 1;
 
+	mtx_enter(&sd->sd_mtx);
 	ccb = sr_ccb_get(sd);
+	mtx_leave(&sd->sd_mtx);
 	if (!ccb) {
 		/* should never happen but handle more gracefully */
 		printf("%s: %s: too many ccbs queued\n",
@@ -1205,7 +1203,9 @@ sr_crypto_rw2(struct sr_workunit *wu, st
 
 	LIST_INIT(&ccb->ccb_buf.b_dep);
 
+	mtx_enter(&sd->sd_mtx);
 	TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
+	mtx_leave(&sd->sd_mtx);
 
 	DNPRINTF(SR_D_DIS, "%s: %s: sr_crypto_rw2: b_bcount: %d "
 	    "b_blkno: %x b_flags 0x%0x b_data %p\n",
@@ -1213,15 +1213,11 @@ sr_crypto_rw2(struct sr_workunit *wu, st
 	    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno,
 	    ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data);
 
-	s = splbio();
+	mtx_enter(&sd->sd_mtx);
+	if (sr_check_io_collision(wu) == 0)
+		sr_raid_startwu(wu);
+	mtx_leave(&sd->sd_mtx);
 
-	if (sr_check_io_collision(wu))
-		goto queued;
-
-	sr_raid_startwu(wu);
-
-queued:
-	splx(s);
 	return (0);
 bad:
 	/* wu is unwound by sr_wu_put */
@@ -1239,7 +1235,7 @@ sr_crypto_intr(struct buf *bp)
 	struct scsi_xfer	*xs = wu->swu_xs;
 	struct sr_softc		*sc = sd->sd_sc;
 	struct cryptop		*crp;
-	int			s, s2, pend;
+	int			s, pend;
 
 	DNPRINTF(SR_D_INTR, "%s: sr_crypto_intr bp: %x xs: %x\n",
 	    DEVNAME(sc), bp, wu->swu_xs);
@@ -1248,8 +1244,6 @@ sr_crypto_intr(struct buf *bp)
 	    " b_flags: 0x%0x\n", DEVNAME(sc), ccb->ccb_buf.b_bcount,
 	    ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags);
 
-	s = splbio();
-
 	if (ccb->ccb_buf.b_flags & B_ERROR) {
 		printf("%s: i/o error on block %lld\n", DEVNAME(sc),
 		    ccb->ccb_buf.b_blkno);
@@ -1276,6 +1270,7 @@ sr_crypto_intr(struct buf *bp)
 			xs->error = XS_NOERROR;
 
 		pend = 0;
+		mtx_enter(&sd->sd_mtx);
 		TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) {
 			if (wu == wup) {
 				TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
@@ -1291,6 +1286,7 @@ sr_crypto_intr(struct buf *bp)
 				break;
 			}
 		}
+		mtx_leave(&sd->sd_mtx);
 
 		if (!pend)
 			printf("%s: wu: %p not on pending queue\n",
@@ -1303,17 +1299,14 @@ sr_crypto_intr(struct buf *bp)
 			crp->crp_opaque = wu;
 			DNPRINTF(SR_D_INTR, "%s: sr_crypto_intr: crypto_invoke "
 			    "%p\n", DEVNAME(sc), crp);
-			s2 = splvm();
+			s = splvm();
 			crypto_invoke(crp);
-			splx(s2);
-			goto done;
+			splx(s);
+			return;
 		}
 
 		sr_crypto_finish_io(wu);
 	}
-
-done:
-	splx(s);
 }
 
 void
@@ -1326,18 +1319,18 @@ sr_crypto_finish_io(struct sr_workunit *
 	struct sr_softc		*sc = sd->sd_sc;
 #endif /* SR_DEBUG */
 
-	splassert(IPL_BIO);
-
 	DNPRINTF(SR_D_INTR, "%s: sr_crypto_finish_io: wu %x xs: %x\n",
 	    DEVNAME(sc), wu, xs);
 
 	xs->resid = 0;
 
+	mtx_enter(&sd->sd_mtx);
 	TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
 		if (ccb->ccb_opaque == NULL)
 			continue;
 		sr_crypto_putcryptop(ccb->ccb_opaque);
 	}
+	mtx_leave(&sd->sd_mtx);
 
 	/* do not change the order of these 2 functions */
 	sr_wu_put(wu);
@@ -1350,7 +1343,6 @@ sr_crypto_finish_io(struct sr_workunit *
 int
 sr_crypto_read(struct cryptop *crp)
 {
-	int			s;
 	struct sr_workunit	*wu = crp->crp_opaque;
 
 	DNPRINTF(SR_D_INTR, "%s: sr_crypto_read: wu %x xs: %x\n",
@@ -1359,9 +1351,7 @@ sr_crypto_read(struct cryptop *crp)
 	if (crp->crp_etype)
 		wu->swu_xs->error = XS_DRIVER_STUFFUP;
 
-	s = splbio();
 	sr_crypto_finish_io(wu);
-	splx(s);
 
 	return (0);
 }
Index: softraid_raid0.c
===================================================================
RCS file: /cvs/src/sys/dev/softraid_raid0.c,v
retrieving revision 1.22
diff -u -p -r1.22 softraid_raid0.c
--- softraid_raid0.c	2 Jul 2010 09:20:26 -0000	1.22
+++ softraid_raid0.c	29 Jul 2010 18:51:26 -0000
@@ -166,14 +166,13 @@ sr_raid0_free_resources(struct sr_discip
 void
 sr_raid0_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
 {
-	int			old_state, s;
+	int			old_state;
 
 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
 
-	/* ok to go to splbio since this only happens in error path */
-	s = splbio();
+	mtx_enter(&sd->sd_mtx);
 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
 
 	/* multiple IOs to the same chunk that fail will come through here */
@@ -193,7 +192,7 @@ sr_raid0_set_chunk_state(struct sr_disci
 
 	default:
 die:
-		splx(s); /* XXX */
+		mtx_leave(&sd->sd_mtx); /* XXX */
 		panic("%s: %s: %s: invalid chunk state transition "
 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
 		    sd->sd_meta->ssd_devname,
@@ -208,7 +207,7 @@ die:
 	sd->sd_must_flush = 1;
 	workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
 done:
-	splx(s);
+	mtx_leave(&sd->sd_mtx);
 }
 
 void
@@ -218,6 +217,8 @@ sr_raid0_set_vol_state(struct sr_discipl
 	int			new_state, i, s, nd;
 	int			old_state = sd->sd_vol_status;
 
+	MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
+
 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
 
@@ -276,7 +277,6 @@ sr_raid0_rw(struct sr_workunit *wu)
 	struct scsi_xfer	*xs = wu->swu_xs;
 	struct sr_ccb		*ccb;
 	struct sr_chunk		*scp;
-	int			s;
 	daddr64_t		blk, lbaoffs, strip_no, chunk, stripoffs;
 	daddr64_t		strip_size, no_chunk, chunkoffs, physoffs;
 	daddr64_t		strip_bits, length, leftover;
@@ -312,7 +312,9 @@ sr_raid0_rw(struct sr_workunit *wu)
 			goto bad;
 		}
 
+		mtx_enter(&sd->sd_mtx);
 		ccb = sr_ccb_get(sd);
+		mtx_leave(&sd->sd_mtx);
 		if (!ccb) {
 			/* should never happen but handle more gracefully */
 			printf("%s: %s: too many ccbs queued\n",
@@ -348,7 +350,9 @@ sr_raid0_rw(struct sr_workunit *wu)
 		if ((ccb->ccb_buf.b_flags & B_READ) == 0)
 			ccb->ccb_buf.b_vp->v_numoutput++;
 		LIST_INIT(&ccb->ccb_buf.b_dep);
+		mtx_enter(&sd->sd_mtx);
 		TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
+		mtx_leave(&sd->sd_mtx);
 
 		DNPRINTF(SR_D_DIS, "%s: %s: sr_raid0: b_bcount: %d "
 		    "b_blkno: %lld b_flags 0x%0x b_data %p\n",
@@ -369,14 +373,11 @@ sr_raid0_rw(struct sr_workunit *wu)
 		length = MIN(leftover,strip_size);
 	}
 
-	s = splbio();
-
-	if (sr_check_io_collision(wu))
-		goto queued;
+	mtx_enter(&sd->sd_mtx);
+	if (sr_check_io_collision(wu) == 0)
+		sr_raid_startwu(wu);
+	mtx_leave(&sd->sd_mtx);
 
-	sr_raid_startwu(wu);
-queued:
-	splx(s);
 	return (0);
 bad:
 	/* wu is unwound by sr_wu_put */
@@ -391,7 +392,7 @@ sr_raid0_intr(struct buf *bp)
 	struct sr_discipline	*sd = wu->swu_dis;
 	struct scsi_xfer	*xs = wu->swu_xs;
 	struct sr_softc		*sc = sd->sd_sc;
-	int			s, pend;
+	int			pend;
 
 	DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n",
 	    DEVNAME(sc), bp, xs);
@@ -401,8 +402,6 @@ sr_raid0_intr(struct buf *bp)
 	    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags,
 	    ccb->ccb_buf.b_blkno, ccb->ccb_target);
 
-	s = splbio();
-
 	if (ccb->ccb_buf.b_flags & B_ERROR) {
 		printf("%s: i/o error on block %lld target: %d b_error: %d\n",
 		    DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target,
@@ -434,6 +433,7 @@ sr_raid0_intr(struct buf *bp)
 		xs->resid = 0;
 
 		pend = 0;
+		mtx_enter(&sd->sd_mtx);
 		TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) {
 			if (wu == wup) {
 				/* wu on pendq, remove */
@@ -451,6 +451,7 @@ sr_raid0_intr(struct buf *bp)
 				break;
 			}
 		}
+		mtx_leave(&sd->sd_mtx);
 
 		if (!pend)
 			printf("%s: wu: %p not on pending queue\n",
@@ -464,11 +465,9 @@ sr_raid0_intr(struct buf *bp)
 			wakeup(sd);
 	}
 
-	splx(s);
 	return;
 bad:
-	xs->error = XS_DRIVER_STUFFUP;
 	sr_wu_put(wu);
+	xs->error = XS_DRIVER_STUFFUP;
 	sr_scsi_done(sd, xs);
-	splx(s);
 }
Index: softraid_raid1.c
===================================================================
RCS file: /cvs/src/sys/dev/softraid_raid1.c,v
retrieving revision 1.25
diff -u -p -r1.25 softraid_raid1.c
--- softraid_raid1.c	2 Jul 2010 09:20:26 -0000	1.25
+++ softraid_raid1.c	29 Jul 2010 21:30:07 -0000
@@ -150,14 +150,13 @@ sr_raid1_free_resources(struct sr_discip
 void
 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
 {
-	int			old_state, s;
+	int			old_state;
 
 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
 
-	/* ok to go to splbio since this only happens in error path */
-	s = splbio();
+	mtx_enter(&sd->sd_mtx);
 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
 
 	/* multiple IOs to the same chunk that fail will come through here */
@@ -217,7 +216,7 @@ sr_raid1_set_chunk_state(struct sr_disci
 
 	default:
 die:
-		splx(s); /* XXX */
+		mtx_leave(&sd->sd_mtx); /* XXX */
 		panic("%s: %s: %s: invalid chunk state transition "
 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
 		    sd->sd_meta->ssd_devname,
@@ -232,7 +231,7 @@ die:
 	sd->sd_must_flush = 1;
 	workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
 done:
-	splx(s);
+	mtx_leave(&sd->sd_mtx);
 }
 
 void
@@ -240,7 +239,10 @@ sr_raid1_set_vol_state(struct sr_discipl
 {
 	int			states[SR_MAX_STATES];
 	int			new_state, i, s, nd;
-	int			old_state = sd->sd_vol_status;
+	int			old_state;
+
+	MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
+	old_state = sd->sd_vol_status;
 
 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
@@ -373,7 +375,7 @@ sr_raid1_rw(struct sr_workunit *wu)
 	struct sr_ccb		*ccb;
 	struct buf		*b;
 	struct sr_chunk		*scp;
-	int			ios, x, i, s, rt;
+	int			ios, x, i, rt;
 	daddr64_t		blk;
 
 	/* blk and scsi error will be handled by sr_validate_io */
@@ -390,7 +392,9 @@ sr_raid1_rw(struct sr_workunit *wu)
 	wu->swu_io_count = ios;
 
 	for (i = 0; i < ios; i++) {
+		mtx_enter(&sd->sd_mtx);
 		ccb = sr_ccb_get(sd);
+		mtx_leave(&sd->sd_mtx);
 		if (!ccb) {
 			/* should never happen but handle more gracefully */
 			printf("%s: %s: too many ccbs queued\n",
@@ -443,7 +447,9 @@ ragain:
 				/* volume offline */
 				printf("%s: is offline, can't read\n",
 				    DEVNAME(sd->sd_sc));
+				mtx_enter(&sd->sd_mtx);
 				sr_ccb_put(ccb);
+				mtx_leave(&sd->sd_mtx);
 				goto bad;
 			}
 		} else {
@@ -460,7 +466,9 @@ ragain:
 			case BIOC_SDHOTSPARE: /* should never happen */
 			case BIOC_SDOFFLINE:
 				wu->swu_io_count--;
+				mtx_enter(&sd->sd_mtx);
 				sr_ccb_put(ccb);
+				mtx_leave(&sd->sd_mtx);
 				continue;
 
 			default:
@@ -476,7 +484,9 @@ ragain:
 
 		LIST_INIT(&b->b_dep);
 
+		mtx_enter(&sd->sd_mtx);
 		TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
+		mtx_leave(&sd->sd_mtx);
 
 		DNPRINTF(SR_D_DIS, "%s: %s: sr_raid1: b_bcount: %d "
 		    "b_blkno: %x b_flags 0x%0x b_data %p\n",
@@ -485,7 +495,7 @@ ragain:
 		    b->b_flags, b->b_data);
 	}
 
-	s = splbio();
+	mtx_enter(&sd->sd_mtx);
 
 	/* rebuild io, let rebuild routine deal with it */
 	if (wu->swu_flags & SR_WUF_REBUILD)
@@ -505,7 +515,7 @@ ragain:
 start:
 	sr_raid_startwu(wu);
 queued:
-	splx(s);
+	mtx_leave(&sd->sd_mtx);
 	return (0);
 bad:
 	/* wu is unwound by sr_wu_put */
@@ -559,7 +569,9 @@ sr_raid1_intr(struct buf *bp)
 			if (xs->flags & SCSI_DATA_IN) {
 				printf("%s: retrying read on block %lld\n",
 				    DEVNAME(sc), b->b_blkno);
+				mtx_enter(&sd->sd_mtx);
 				sr_ccb_put(ccb);
+				mtx_leave(&sd->sd_mtx);
 				TAILQ_INIT(&wu->swu_ccb);
 				wu->swu_state = SR_WU_RESTART;
 				if (sd->sd_scsi_rw(wu))
@@ -578,6 +590,7 @@ sr_raid1_intr(struct buf *bp)
 		xs->resid = 0;
 
 		pend = 0;
+		mtx_enter(&sd->sd_mtx);
 		TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) {
 			if (wu == wup) {
 				/* wu on pendq, remove */
@@ -585,9 +598,11 @@ sr_raid1_intr(struct buf *bp)
 				pend = 1;
 
 				if (wu->swu_collider) {
-					if (wu->swu_ios_failed)
+					if (wu->swu_ios_failed) {
 						/* toss all ccbs and recreate */
 						sr_raid1_recreate_wu(wu->swu_collider);
+						MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
+					}
 
 					/* restart deferred wu */
 					wu->swu_collider->swu_state =
@@ -599,6 +614,7 @@ sr_raid1_intr(struct buf *bp)
 				break;
 			}
 		}
+		mtx_leave(&sd->sd_mtx);
 
 		if (!pend)
 			printf("%s: wu: %p not on pending queue\n",
@@ -612,7 +628,7 @@ sr_raid1_intr(struct buf *bp)
 		} else {
 			/* do not change the order of these 2 functions */
 			sr_wu_put(wu);
-			scsi_done(xs);
+			sr_scsi_done(sd, xs);
 		}
 
 		if (sd->sd_sync && sd->sd_wu_pending == 0)
@@ -630,7 +646,7 @@ bad:
 	} else {
 		/* do not change the order of these 2 functions */
 		sr_wu_put(wu);
-		scsi_done(xs);
+		sr_scsi_done(sd, xs);
 	}
 
 	splx(s);
@@ -643,6 +659,12 @@ sr_raid1_recreate_wu(struct sr_workunit 
 	struct sr_workunit	*wup = wu;
 	struct sr_ccb		*ccb;
 
+	/*
+	 * watch out!
+	 * must have mutex on way in and out but can't call rw with it
+	 */
+	MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
+
 	do {
 		DNPRINTF(SR_D_INTR, "%s: sr_raid1_recreate_wu: %p\n", wup);
 
@@ -655,8 +677,10 @@ sr_raid1_recreate_wu(struct sr_workunit 
 
 		/* recreate ccbs */
 		wup->swu_state = SR_WU_REQUEUE;
+		mtx_leave(&sd->sd_mtx);
 		if (sd->sd_scsi_rw(wup))
 			panic("could not requeue io");
+		mtx_enter(&sd->sd_mtx);
 
 		wup = wup->swu_collider;
 	} while (wup);
Index: softraid_raid6.c
===================================================================
RCS file: /cvs/src/sys/dev/softraid_raid6.c,v
retrieving revision 1.18
diff -u -p -r1.18 softraid_raid6.c
--- softraid_raid6.c	2 Jul 2010 09:20:26 -0000	1.18
+++ softraid_raid6.c	29 Jul 2010 21:35:37 -0000
@@ -246,15 +246,14 @@ sr_raid6_free_resources(struct sr_discip
 void
 sr_raid6_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
 {
-	int			old_state, s;
+	int			old_state;
 
 	/* XXX this is for RAID 0 */
 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
 
-	/* ok to go to splbio since this only happens in error path */
-	s = splbio();
+	mtx_enter(&sd->sd_mtx);
 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
 
 	/* multiple IOs to the same chunk that fail will come through here */
@@ -301,7 +300,7 @@ sr_raid6_set_chunk_state(struct sr_disci
 
 	default:
 die:
-		splx(s); /* XXX */
+		mtx_leave(&sd->sd_mtx);
 		panic("%s: %s: %s: invalid chunk state transition "
 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
 		    sd->sd_meta->ssd_devname,
@@ -316,18 +315,21 @@ die:
 	sd->sd_must_flush = 1;
 	workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
 done:
-	splx(s);
+	mtx_leave(&sd->sd_mtx);
 }
 
 void
 sr_raid6_set_vol_state(struct sr_discipline *sd)
 {
 	int			states[SR_MAX_STATES];
-	int			new_state, i, s, nd;
-	int			old_state = sd->sd_vol_status;
+	int			new_state, i, st, nd;
+	int			old_state;
 
 	/* XXX this is for RAID 0 */
 
+	MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
+	old_state = sd->sd_vol_status;
+
 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
 
@@ -337,13 +339,13 @@ sr_raid6_set_vol_state(struct sr_discipl
 		states[i] = 0;
 
 	for (i = 0; i < nd; i++) {
-		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
-		if (s >= SR_MAX_STATES)
+		st = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
+		if (st >= SR_MAX_STATES)
 			panic("%s: %s: %s: invalid chunk state",
 			    DEVNAME(sd->sd_sc),
 			    sd->sd_meta->ssd_devname,
 			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
-		states[s]++;
+		states[st]++;
 	}
 
 	if (states[BIOC_SDONLINE] == nd)
@@ -458,7 +460,7 @@ sr_raid6_rw(struct sr_workunit *wu)
 	struct sr_discipline	*sd = wu->swu_dis;
 	struct scsi_xfer	*xs = wu->swu_xs;
 	struct sr_chunk		*scp;
-	int			s, fail, i, rwmode, gxinv, pxinv;
+	int			fail, i, rwmode, gxinv, pxinv;
 	daddr64_t		blk, lbaoffs, strip_no, chunk, qchunk, pchunk, fchunk;
 	daddr64_t		strip_size, no_chunk, lba, chunk_offs, phys_offs;
 	daddr64_t		strip_bits, length, strip_offs, datalen, row_size;
@@ -703,7 +705,7 @@ sr_raid6_rw(struct sr_workunit *wu)
 		data += length;
 	}
 
-	s = splbio();
+	mtx_enter(&sd->sd_mtx);
 	if (wu_w) {
 		/* collide write request with reads */
 		wu_w->swu_blk_start = wu->swu_blk_start;
@@ -739,7 +741,7 @@ sr_raid6_rw(struct sr_workunit *wu)
 start:
 	sr_raid_startwu(wu);
 queued:
-	splx(s);
+	mtx_leave(&sd->sd_mtx);
 	return (0);
 bad:
 	/* wu is unwound by sr_wu_put */
@@ -755,6 +757,8 @@ sr_failio(struct sr_workunit *wu)
 	struct sr_discipline	*sd = wu->swu_dis;
 	struct sr_ccb		*ccb;
 
+	MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
+
 	if (!(wu->swu_flags & SR_WUF_FAIL))
 		return (0);
 
@@ -774,7 +778,7 @@ sr_raid6_intr(struct buf *bp)
 	struct scsi_xfer	*xs = wu->swu_xs;
 	struct sr_softc		*sc = sd->sd_sc;
 	struct sr_raid6_opaque  *pq = ccb->ccb_opaque;
-	int			s, pend;
+	int			pend;
 
 	DNPRINTF(SR_D_INTR, "%s: sr_intr bp %p xs %p\n",
 	    DEVNAME(sc), bp, xs);
@@ -784,8 +788,6 @@ sr_raid6_intr(struct buf *bp)
 	    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags,
 	    ccb->ccb_buf.b_blkno, ccb->ccb_target);
 
-	s = splbio();
-
 	if (ccb->ccb_buf.b_flags & B_ERROR) {
 		DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n",
 		    DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target);
@@ -834,8 +836,10 @@ sr_raid6_intr(struct buf *bp)
 			if (xs->flags & SCSI_DATA_IN) {
 				printf("%s: retrying read on block %lld\n",
 				    DEVNAME(sc), ccb->ccb_buf.b_blkno);
+				mtx_enter(&sd->sd_mtx);
 				sr_ccb_put(ccb);
 				TAILQ_INIT(&wu->swu_ccb);
+				mtx_leave(&sd->sd_mtx);
 				wu->swu_state = SR_WU_RESTART;
 				if (sd->sd_scsi_rw(wu))
 					goto bad;
@@ -856,6 +860,7 @@ sr_raid6_intr(struct buf *bp)
 		}
 
 		pend = 0;
+		mtx_enter(&sd->sd_mtx);
 		TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) {
 			if (wu == wup) {
 				/* wu on pendq, remove */
@@ -863,9 +868,11 @@ sr_raid6_intr(struct buf *bp)
 				pend = 1;
 
 				if (wu->swu_collider) {
-					if (wu->swu_ios_failed)
+					if (wu->swu_ios_failed) {
 						/* toss all ccbs and recreate */
 						sr_raid6_recreate_wu(wu->swu_collider);
+						MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
+					}
 
 					/* restart deferred wu */
 					wu->swu_collider->swu_state =
@@ -878,6 +885,7 @@ sr_raid6_intr(struct buf *bp)
 				break;
 			}
 		}
+		mtx_leave(&sd->sd_mtx);
 
 		if (!pend)
 			printf("%s: wu: %p not on pending queue\n",
@@ -892,7 +900,7 @@ sr_raid6_intr(struct buf *bp)
 			/* do not change the order of these 2 functions */
 			sr_wu_put(wu);
 			if (xs != NULL)
-				scsi_done(xs);
+				sr_scsi_done(sd, xs);
 		}
 
 		if (sd->sd_sync && sd->sd_wu_pending == 0)
@@ -900,7 +908,6 @@ sr_raid6_intr(struct buf *bp)
 	}
 
 retry:
-	splx(s);
 	return;
 bad:
 	xs->error = XS_DRIVER_STUFFUP;
@@ -910,10 +917,8 @@ bad:
 	} else {
 		/* do not change the order of these 2 functions */
 		sr_wu_put(wu);
-		scsi_done(xs);
+		sr_scsi_done(sd, xs);
 	}
-
-	splx(s);
 }
 
 void
@@ -923,6 +928,12 @@ sr_raid6_recreate_wu(struct sr_workunit 
 	struct sr_workunit	*wup = wu;
 	struct sr_ccb		*ccb;
 
+	/*
+	 * watch out!
+	 * must have mutex on way in and out but can't call rw with it
+	 */
+	MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
+
 	do {
 		DNPRINTF(SR_D_INTR, "%s: sr_raid6_recreate_wu: %p\n", wup);
 
@@ -935,8 +946,10 @@ sr_raid6_recreate_wu(struct sr_workunit 
 
 		/* recreate ccbs */
 		wup->swu_state = SR_WU_REQUEUE;
+		mtx_leave(&sd->sd_mtx);
 		if (sd->sd_scsi_rw(wup))
 			panic("could not requeue io");
+		mtx_enter(&sd->sd_mtx);
 
 		wup = wup->swu_collider;
 	} while (wup);
@@ -950,7 +963,9 @@ sr_raid6_addio(struct sr_workunit *wu, i
 	struct sr_ccb		*ccb;
 	struct sr_raid6_opaque  *pqbuf;
 
+	mtx_enter(&sd->sd_mtx);
 	ccb = sr_ccb_get(sd);
+	mtx_leave(&sd->sd_mtx);
 	if (!ccb)
 		return (-1);
 
@@ -1001,7 +1016,9 @@ sr_raid6_addio(struct sr_workunit *wu, i
 
 		pqbuf = malloc(sizeof(struct sr_raid6_opaque), M_DEVBUF, M_CANFAIL);
 		if (pqbuf == NULL) {
+			mtx_enter(&sd->sd_mtx);
 			sr_ccb_put(ccb);
+			mtx_leave(&sd->sd_mtx);
 			return (-1);
 		}
 		pqbuf->pbuf = pbuf;
@@ -1011,7 +1028,9 @@ sr_raid6_addio(struct sr_workunit *wu, i
 	}
 
 	LIST_INIT(&ccb->ccb_buf.b_dep);
+	mtx_enter(&sd->sd_mtx);
 	TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
+	mtx_leave(&sd->sd_mtx);
 
 	DNPRINTF(SR_D_DIS, "%s: %s: sr_raid6: b_bcount: %d "
 	    "b_blkno: %x b_flags 0x%0x b_data %p\n",
Index: softraid_raidp.c
===================================================================
RCS file: /cvs/src/sys/dev/softraid_raidp.c,v
retrieving revision 1.18
diff -u -p -r1.18 softraid_raidp.c
--- softraid_raidp.c	2 Jul 2010 09:20:26 -0000	1.18
+++ softraid_raidp.c	29 Jul 2010 21:44:26 -0000
@@ -183,14 +183,13 @@ sr_raidp_free_resources(struct sr_discip
 void
 sr_raidp_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
 {
-	int			old_state, s;
+	int			old_state;
 
 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
 
-	/* ok to go to splbio since this only happens in error path */
-	s = splbio();
+	mtx_enter(&sd->sd_mtx);
 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
 
 	/* multiple IOs to the same chunk that fail will come through here */
@@ -237,7 +236,7 @@ sr_raidp_set_chunk_state(struct sr_disci
 
 	default:
 die:
-		splx(s); /* XXX */
+		mtx_leave(&sd->sd_mtx);
 		panic("%s: %s: %s: invalid chunk state transition "
 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
 		    sd->sd_meta->ssd_devname,
@@ -252,15 +251,18 @@ die:
 	sd->sd_must_flush = 1;
 	workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
 done:
-	splx(s);
+	mtx_leave(&sd->sd_mtx);
 }
 
 void
 sr_raidp_set_vol_state(struct sr_discipline *sd)
 {
 	int			states[SR_MAX_STATES];
-	int			new_state, i, s, nd;
-	int			old_state = sd->sd_vol_status;
+	int			new_state, i, st, nd;
+	int			old_state;
+
+	MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
+	old_state = sd->sd_vol_status;
 
 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
@@ -271,13 +273,13 @@ sr_raidp_set_vol_state(struct sr_discipl
 		states[i] = 0;
 
 	for (i = 0; i < nd; i++) {
-		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
-		if (s >= SR_MAX_STATES)
+		st = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
+		if (st >= SR_MAX_STATES)
 			panic("%s: %s: %s: invalid chunk state",
 			    DEVNAME(sd->sd_sc),
 			    sd->sd_meta->ssd_devname,
 			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
-		states[s]++;
+		states[st]++;
 	}
 
 	if (states[BIOC_SDONLINE] == nd)
@@ -375,7 +377,7 @@ sr_raidp_rw(struct sr_workunit *wu)
 	struct sr_discipline	*sd = wu->swu_dis;
 	struct scsi_xfer	*xs = wu->swu_xs;
 	struct sr_chunk		*scp;
-	int			s, i;
+	int			i;
 	daddr64_t		blk, lbaoffs, strip_no, chunk, row_size;
 	daddr64_t		strip_size, no_chunk, lba, chunk_offs, phys_offs;
 	daddr64_t		strip_bits, length, parity, strip_offs, datalen;
@@ -512,7 +514,7 @@ sr_raidp_rw(struct sr_workunit *wu)
 		data += length;
 	}
 
-	s = splbio();
+	mtx_enter(&sd->sd_mtx);
 	if (wu_w) {
 		/* collide write request with reads */
 		wu_w->swu_blk_start = wu->swu_blk_start;
@@ -548,7 +550,7 @@ sr_raidp_rw(struct sr_workunit *wu)
 start:
 	sr_raid_startwu(wu);
 queued:
-	splx(s);
+	mtx_leave(&sd->sd_mtx);
 	return (0);
 bad:
 	/* wu is unwound by sr_wu_put */
@@ -565,7 +567,7 @@ sr_raidp_intr(struct buf *bp)
 	struct sr_discipline	*sd = wu->swu_dis;
 	struct scsi_xfer	*xs = wu->swu_xs;
 	struct sr_softc		*sc = sd->sd_sc;
-	int			s, pend;
+	int			pend;
 
 	DNPRINTF(SR_D_INTR, "%s: sr_intr bp %p xs %p\n",
 	    DEVNAME(sc), bp, xs);
@@ -575,8 +577,6 @@ sr_raidp_intr(struct buf *bp)
 	    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags,
 	    ccb->ccb_buf.b_blkno, ccb->ccb_target);
 
-	s = splbio();
-
 	if (ccb->ccb_buf.b_flags & B_ERROR) {
 		DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n",
 		    DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target);
@@ -615,8 +615,10 @@ sr_raidp_intr(struct buf *bp)
 			if (xs->flags & SCSI_DATA_IN) {
 				printf("%s: retrying read on block %lld\n",
 				    DEVNAME(sc), ccb->ccb_buf.b_blkno);
+				mtx_enter(&sd->sd_mtx);
 				sr_ccb_put(ccb);
 				TAILQ_INIT(&wu->swu_ccb);
+				mtx_leave(&sd->sd_mtx);
 				wu->swu_state = SR_WU_RESTART;
 				if (sd->sd_scsi_rw(wu))
 					goto bad;
@@ -637,6 +639,7 @@ sr_raidp_intr(struct buf *bp)
 		}
 
 		pend = 0;
+		mtx_enter(&sd->sd_mtx);
 		TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) {
 			if (wu == wup) {
 				/* wu on pendq, remove */
@@ -644,9 +647,11 @@ sr_raidp_intr(struct buf *bp)
 				pend = 1;
 
 				if (wu->swu_collider) {
-					if (wu->swu_ios_failed)
+					if (wu->swu_ios_failed) {
 						/* toss all ccbs and recreate */
 						sr_raidp_recreate_wu(wu->swu_collider);
+						MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
+					}
 
 					/* restart deferred wu */
 					wu->swu_collider->swu_state =
@@ -658,6 +663,7 @@ sr_raidp_intr(struct buf *bp)
 				break;
 			}
 		}
+		mtx_leave(&sd->sd_mtx);
 
 		if (!pend)
 			printf("%s: wu: %p not on pending queue\n",
@@ -672,7 +678,7 @@ sr_raidp_intr(struct buf *bp)
 			/* do not change the order of these 2 functions */
 			sr_wu_put(wu);
 			if (xs != NULL)
-				scsi_done(xs);
+				sr_scsi_done(sd, xs);
 		}
 
 		if (sd->sd_sync && sd->sd_wu_pending == 0)
@@ -680,7 +686,6 @@ sr_raidp_intr(struct buf *bp)
 	}
 
 retry:
-	splx(s);
 	return;
 bad:
 	xs->error = XS_DRIVER_STUFFUP;
@@ -690,10 +695,8 @@ bad:
 	} else {
 		/* do not change the order of these 2 functions */
 		sr_wu_put(wu);
-		scsi_done(xs);
+		sr_scsi_done(sd, xs);
 	}
-
-	splx(s);
 }
 
 void
@@ -703,6 +706,12 @@ sr_raidp_recreate_wu(struct sr_workunit 
 	struct sr_workunit	*wup = wu;
 	struct sr_ccb		*ccb;
 
+	/*
+	 * watch out!
+	 * must have mutex on way in and out but can't call rw with it
+	 */
+	MUTEX_ASSERT_LOCKED(&sd->sd_mtx);
+
 	do {
 		DNPRINTF(SR_D_INTR, "%s: sr_raidp_recreate_wu: %p\n", wup);
 
@@ -715,8 +724,10 @@ sr_raidp_recreate_wu(struct sr_workunit 
 
 		/* recreate ccbs */
 		wup->swu_state = SR_WU_REQUEUE;
+		mtx_leave(&sd->sd_mtx);
 		if (sd->sd_scsi_rw(wup))
 			panic("could not requeue io");
+		mtx_enter(&sd->sd_mtx);
 
 		wup = wup->swu_collider;
 	} while (wup);
@@ -729,7 +740,9 @@ sr_raidp_addio(struct sr_workunit *wu, i
 	struct sr_discipline 	*sd = wu->swu_dis;
 	struct sr_ccb		*ccb;
 
+	mtx_enter(&sd->sd_mtx);
 	ccb = sr_ccb_get(sd);
+	mtx_leave(&sd->sd_mtx);
 	if (!ccb)
 		return (-1);
 
@@ -778,7 +791,9 @@ sr_raidp_addio(struct sr_workunit *wu, i
 	ccb->ccb_opaque = xorbuf;
 
 	LIST_INIT(&ccb->ccb_buf.b_dep);
+	mtx_enter(&sd->sd_mtx);
 	TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
+	mtx_leave(&sd->sd_mtx);
 
 	DNPRINTF(SR_D_DIS, "%s: %s: sr_raidp: b_bcount: %d "
 	    "b_blkno: %x b_flags 0x%0x b_data %p\n",
@@ -828,7 +843,7 @@ sr_raidp_scrub(struct sr_discipline *sd)
 	daddr64_t strip_no, strip_size, no_chunk, parity, max_strip, strip_bits;
 	daddr64_t i;
 	struct sr_workunit *wu_r, *wu_w;
-	int s, slept;
+	int slept;
 	void *xorbuf;
 
 	if ((wu_r = sr_wu_get(sd, 1)) == NULL)
@@ -864,14 +879,12 @@ sr_raidp_scrub(struct sr_discipline *sd)
 		wu_w->swu_state = SR_WU_DEFERRED;
 		wu_r->swu_collider = wu_w;
 
-		s = splbio();
+		mtx_enter(&sd->sd_mtx);
 		TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link);
 
-		if (sr_check_io_collision(wu_r))
-			goto queued;
-		sr_raid_startwu(wu_r);
-	queued:
-		splx(s);
+		if (sr_check_io_collision(wu_r) == 0)
+			sr_raid_startwu(wu_r);
+		mtx_leave(&sd->sd_mtx);
 
 		slept = 0;
 		while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) {
Index: softraidvar.h
===================================================================
RCS file: /cvs/src/sys/dev/softraidvar.h,v
retrieving revision 1.94
diff -u -p -r1.94 softraidvar.h
--- softraidvar.h	2 Jul 2010 09:26:05 -0000	1.94
+++ softraidvar.h	30 Jul 2010 12:20:51 -0000
@@ -21,6 +21,7 @@
 
 #include <crypto/md5.h>
 #include <sys/vnode.h>
+#include <sys/mutex.h>
 
 #define SR_META_VERSION		4	/* bump when sr_metadata changes */
 #define SR_META_SIZE		64	/* save space at chunk beginning */
@@ -340,6 +341,9 @@ struct sr_workunit {
 	/* all ios that make up this workunit */
 	struct sr_ccb_list	swu_ccb;
 
+	/* task memory */
+	struct workq_task	swu_wqt;
+
 	TAILQ_ENTRY(sr_workunit) swu_link;
 };
 
@@ -479,6 +483,8 @@ struct sr_discipline {
 	}			sd_dis_specific;/* dis specific members */
 #define mds			sd_dis_specific
 
+	struct workq		*sd_workq;
+
 	/* discipline metadata */
 	struct sr_metadata	*sd_meta;	/* in memory copy of metadata */
 	void			*sd_meta_foreign; /* non native metadata */
@@ -511,6 +517,7 @@ struct sr_discipline {
 	struct sr_wu_list	sd_wu_freeq;	/* free wu queue */
 	struct sr_wu_list	sd_wu_pendq;	/* pending wu queue */
 	struct sr_wu_list	sd_wu_defq;	/* deferred wu queue */
+	struct mutex		sd_mtx;		/* mutex for all wu/ccb ops */
 	int			sd_wu_sleep;	/* wu sleepers counter */
 
 	/* discipline stats */
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
softraid using workq for VOP_STRATEGY take 2, Marco Peereboom, (Fri Jul 30, 5:39 am)