summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Motin <mav@FreeBSD.org>2021-08-17 11:47:00 -0400
committerTony Hutter <hutter2@llnl.gov>2021-09-14 14:31:22 -0700
commitc600f0687fefac00d3885909dd8e269677a36201 (patch)
tree3222e19ff29a141995fb55078adb97bab71bf975
parent5afc35b69824db01b36418e8f091ffeaeaeb98c9 (diff)
Avoid vq_lock drop in vdev_queue_aggregate()
vq_lock is already too congested for two more operations per I/O. Instead of dropping and reacquiring it inside vdev_queue_aggregate() delegate the zio_vdev_io_bypass() and zio_execute() calls for parent I/Os to callers, that drop the lock any way to execute the new I/O. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Mark Maybee <mark.maybee@delphix.com> Reviewed-by: Brian Atkinson <batkinson@lanl.gov> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Closes #12297
-rw-r--r--module/zfs/vdev_queue.c63
1 files changed, 34 insertions, 29 deletions
diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c
index 06d22f6df..cc5b15b8c 100644
--- a/module/zfs/vdev_queue.c
+++ b/module/zfs/vdev_queue.c
@@ -599,7 +599,6 @@ static zio_t *
vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
{
zio_t *first, *last, *aio, *dio, *mandatory, *nio;
- zio_link_t *zl = NULL;
uint64_t maxgap = 0;
uint64_t size;
uint64_t limit;
@@ -797,19 +796,12 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
ASSERT3U(abd_get_size(aio->io_abd), ==, aio->io_size);
/*
- * We need to drop the vdev queue's lock during zio_execute() to
- * avoid a deadlock that we could encounter due to lock order
- * reversal between vq_lock and io_lock in zio_change_priority().
+ * Callers must call zio_vdev_io_bypass() and zio_execute() for
+ * aggregated (parent) I/Os so that we could avoid dropping the
+ * queue's lock here to avoid a deadlock that we could encounter
+ * due to lock order reversal between vq_lock and io_lock in
+ * zio_change_priority().
*/
- mutex_exit(&vq->vq_lock);
- while ((dio = zio_walk_parents(aio, &zl)) != NULL) {
- ASSERT3U(dio->io_type, ==, aio->io_type);
-
- zio_vdev_io_bypass(dio);
- zio_execute(dio);
- }
- mutex_enter(&vq->vq_lock);
-
return (aio);
}
@@ -847,23 +839,24 @@ again:
ASSERT3U(zio->io_priority, ==, p);
aio = vdev_queue_aggregate(vq, zio);
- if (aio != NULL)
+ if (aio != NULL) {
zio = aio;
- else
+ } else {
vdev_queue_io_remove(vq, zio);
- /*
- * If the I/O is or was optional and therefore has no data, we need to
- * simply discard it. We need to drop the vdev queue's lock to avoid a
- * deadlock that we could encounter since this I/O will complete
- * immediately.
- */
- if (zio->io_flags & ZIO_FLAG_NODATA) {
- mutex_exit(&vq->vq_lock);
- zio_vdev_io_bypass(zio);
- zio_execute(zio);
- mutex_enter(&vq->vq_lock);
- goto again;
+ /*
+ * If the I/O is or was optional and therefore has no data, we
+ * need to simply discard it. We need to drop the vdev queue's
+ * lock to avoid a deadlock that we could encounter since this
+ * I/O will complete immediately.
+ */
+ if (zio->io_flags & ZIO_FLAG_NODATA) {
+ mutex_exit(&vq->vq_lock);
+ zio_vdev_io_bypass(zio);
+ zio_execute(zio);
+ mutex_enter(&vq->vq_lock);
+ goto again;
+ }
}
vdev_queue_pending_add(vq, zio);
@@ -876,7 +869,8 @@ zio_t *
vdev_queue_io(zio_t *zio)
{
vdev_queue_t *vq = &zio->io_vd->vdev_queue;
- zio_t *nio;
+ zio_t *dio, *nio;
+ zio_link_t *zl = NULL;
if (zio->io_flags & ZIO_FLAG_DONT_QUEUE)
return (zio);
@@ -923,6 +917,11 @@ vdev_queue_io(zio_t *zio)
return (NULL);
if (nio->io_done == vdev_queue_agg_io_done) {
+ while ((dio = zio_walk_parents(nio, &zl)) != NULL) {
+ ASSERT3U(dio->io_type, ==, nio->io_type);
+ zio_vdev_io_bypass(dio);
+ zio_execute(dio);
+ }
zio_nowait(nio);
return (NULL);
}
@@ -934,7 +933,8 @@ void
vdev_queue_io_done(zio_t *zio)
{
vdev_queue_t *vq = &zio->io_vd->vdev_queue;
- zio_t *nio;
+ zio_t *dio, *nio;
+ zio_link_t *zl = NULL;
hrtime_t now = gethrtime();
vq->vq_io_complete_ts = now;
@@ -946,6 +946,11 @@ vdev_queue_io_done(zio_t *zio)
while ((nio = vdev_queue_io_to_issue(vq)) != NULL) {
mutex_exit(&vq->vq_lock);
if (nio->io_done == vdev_queue_agg_io_done) {
+ while ((dio = zio_walk_parents(nio, &zl)) != NULL) {
+ ASSERT3U(dio->io_type, ==, nio->io_type);
+ zio_vdev_io_bypass(dio);
+ zio_execute(dio);
+ }
zio_nowait(nio);
} else {
zio_vdev_io_reissue(nio);