Skip to content

Commit

Permalink
fix write IO hang when mmap dirty_inode waiting this writing commit.
Browse files Browse the repository at this point in the history
The bug time sequence:
1. context openzfs#1, zfs_write assign a txg "n".
2. In a same process, context openzfs#2, mmap page fault (which means the `mm_sem`
   is hold) occurred, `zfs_dirty_inode` open a txg failed, and wait previous
   txg "n" completed.
3. context openzfs#1 call `uiomove` to write, however page fault is occurred in `uiomove`,
   which means it need `mm_sem`, but `mm_sem` is hold by context openzfs#2, so it
   stuck and can't complete,  then txg "n" will not complete.

So context openzfs#1 and context openzfs#2 trap into the "dead lock".

Signed-off-by: Grady Wong <grady.w@xtaotech.com>
Closes openzfs#7512
  • Loading branch information
Grady Wong committed Sep 21, 2018
1 parent d1ed1be commit 58937cb
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 5 deletions.
2 changes: 2 additions & 0 deletions include/sys/dmu_tx.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ extern dmu_tx_stats_t dmu_tx_stats;
*/
dmu_tx_t *dmu_tx_create(objset_t *dd);
int dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how);
void dmu_tx_unassign(dmu_tx_t *tx);
int dmu_tx_reassign(dmu_tx_t *tx, txg_how_t txg_how);
void dmu_tx_commit(dmu_tx_t *tx);
void dmu_tx_abort(dmu_tx_t *tx);
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
Expand Down
1 change: 1 addition & 0 deletions include/sys/uio_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <sys/uio.h>

extern int uiomove(void *, size_t, enum uio_rw, uio_t *);
extern int uiomove_write_no_pagefault(void *p, size_t n, struct uio *uio);
extern void uio_prefaultpages(ssize_t, uio_t *);
extern int uiocopy(void *, size_t, enum uio_rw, uio_t *, size_t *);
extern void uioskip(uio_t *, size_t);
Expand Down
49 changes: 49 additions & 0 deletions module/zcommon/zfs_uio.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
#include <sys/types.h>
#include <sys/uio_impl.h>
#include <linux/kmap_compat.h>
#include <linux/uaccess.h>

/*
* Move "n" bytes at byte address "p"; "rw" indicates the direction
Expand Down Expand Up @@ -148,6 +149,54 @@ uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
}
EXPORT_SYMBOL(uiomove);

int
uiomove_write_no_pagefault(void *p, size_t n, struct uio *uio)
{
const struct iovec *iov = uio->uio_iov;
size_t skip = uio->uio_skip;
ulong_t cnt;

ASSERT(uio->uio_segflg != UIO_BVEC);

while (n && uio->uio_resid) {
cnt = MIN(iov->iov_len - skip, n);
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
/*
* p = kernel data pointer
* iov->iov_base = user data pointer
*/
pagefault_disable();
if (copy_from_user(p, iov->iov_base+skip, cnt)) {
pagefault_enable();
return (EFAULT);
}
pagefault_enable();
break;
case UIO_SYSSPACE:
bcopy(iov->iov_base + skip, p, cnt);
break;
default:
ASSERT(0);
}
skip += cnt;
if (skip == iov->iov_len) {
skip = 0;
uio->uio_iov = (++iov);
uio->uio_iovcnt--;
}
uio->uio_skip = skip;
uio->uio_resid -= cnt;
uio->uio_loffset += cnt;
p = (caddr_t)p + cnt;
n -= cnt;
}
return (0);

}
EXPORT_SYMBOL(uiomove_write_no_pagefault);

#define fuword8(uptr, vptr) get_user((*vptr), (uptr))

/*
Expand Down
4 changes: 2 additions & 2 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1371,8 +1371,8 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
* to lock the pages in memory, so that uiomove won't
* block.
*/
err = uiomove((char *)db->db_data + bufoff, tocpy,
UIO_WRITE, uio);
err = uiomove_write_no_pagefault((char *)db->db_data + bufoff,
tocpy, uio);

if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
Expand Down
36 changes: 35 additions & 1 deletion module/zfs/dmu_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -940,7 +940,7 @@ dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
return (0);
}

static void
void
dmu_tx_unassign(dmu_tx_t *tx)
{
if (tx->tx_txg == 0)
Expand Down Expand Up @@ -1021,6 +1021,40 @@ dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how)
return (0);
}

int
dmu_tx_reassign(dmu_tx_t *tx, txg_how_t txg_how)
{
int err;
dmu_tx_hold_t *txh;

/*
* Walk the transaction's hold list, removing the hold on the
* associated dnode, and notifying waiters if the refcount drops to 0.
*/
for (txh = list_head(&tx->tx_holds); txh != tx->tx_needassign_txh;
txh = list_next(&tx->tx_holds, txh)) {
dnode_t *dn = txh->txh_dnode;

if (dn == NULL)
continue;
mutex_enter(&dn->dn_mtx);
ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);

if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
dn->dn_assigned_txg = 0;
cv_broadcast(&dn->dn_notxholds);
}
mutex_exit(&dn->dn_mtx);
}

txg_rele_to_sync(&tx->tx_txgh);

tx->tx_txg = 0;
err = dmu_tx_assign(tx, txg_how);

return err;
}

void
dmu_tx_wait(dmu_tx_t *tx)
{
Expand Down
26 changes: 24 additions & 2 deletions module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
#include <sys/cred.h>
#include <sys/attr.h>
#include <sys/zpl.h>
#include <sys/dmu_tx.h>

/*
* Programming rules.
Expand Down Expand Up @@ -600,6 +601,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
rl_t *rl;
int max_blksz = zfsvfs->z_max_blksz;
int error = 0;
int reassign_error = 0;
arc_buf_t *abuf;
const iovec_t *aiov = NULL;
xuio_t *xuio = NULL;
Expand Down Expand Up @@ -820,8 +822,28 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)

if (abuf == NULL) {
tx_bytes = uio->uio_resid;
error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
uio, nbytes, tx);
do {
error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
uio, nbytes, tx);
if (error) {
if (error == EFAULT) {
uio_prefaultpages(MIN(n, max_blksz), uio);
reassign_error = dmu_tx_reassign(tx, TXG_WAIT);
if (reassign_error) {
break;
}
} else {
}
}
} while (error == EFAULT);

if (reassign_error) {
dmu_tx_abort(tx);
if (abuf != NULL) {
dmu_return_arcbuf(abuf);
}
break;
}
tx_bytes -= uio->uio_resid;
} else {
tx_bytes = nbytes;
Expand Down

0 comments on commit 58937cb

Please sign in to comment.