-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Backport: mutex-force-serialization-on-mutex_exit-to-fix-races.patch
Commit: openzfs/zfs@a3c1eb7 From: Chunwei Chen <tuxoko@gmail.com> Date: Fri, 19 Dec 2014 11:31:59 +0800 Subject: mutex: force serialization on mutex_exit() to fix races It is known that mutexes in Linux are not safe when using them to synchronize the freeing of object in which the mutex is embedded: http://lwn.net/Articles/575477/ The known places in ZFS which are suspected to suffer from the race condition are zio->io_lock and dbuf->db_mtx. * zio uses zio->io_lock and zio->io_cv to synchronize freeing between zio_wait() and zio_done(). * dbuf uses dbuf->db_mtx to protect reference counting. This patch fixes this kind of race by forcing serialization on mutex_exit() with a spin lock, making the mutex safe by sacrificing a bit of performance and memory overhead. This issue most commonly manifests itself as a deadlock in the zio pipeline caused by a process spinning on the damaged mutex. Similar deadlocks have been reported for the dbuf->db_mtx mutex. And it can also cause a NULL dereference or bad paging request under the right circumstances. This issue any many like it are linked off the openzfs/zfs#2523 issue. Specifically this fix resolves at least the following outstanding issues: openzfs/zfs#401 openzfs/zfs#2523 openzfs/zfs#2679 openzfs/zfs#2684 openzfs/zfs#2704 openzfs/zfs#2708 openzfs/zfs#2517 openzfs/zfs#2827 openzfs/zfs#2850 openzfs/zfs#2891 openzfs/zfs#2897 openzfs/zfs#2247 openzfs/zfs#2939 Signed-off-by: Chunwei Chen <tuxoko@gmail.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Richard Yao <ryao@gentoo.org> Backported-by: Darik Horn <dajhorn@vanadac.com> Closes #421 Conflicts: include/sys/mutex.h
- Loading branch information
Showing
3 changed files
with
142 additions
and
6 deletions.
There are no files selected for viewing
11 changes: 5 additions & 6 deletions
11
debian/patches/0001-Revert-Remove-etc-hostid-missing-warning.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
136 changes: 136 additions & 0 deletions
136
debian/patches/0002-mutex-force-serialization-on-mutex_exit-to-fix-races.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
From: Chunwei Chen <tuxoko@gmail.com> | ||
Date: Fri, 19 Dec 2014 11:31:59 +0800 | ||
Subject: mutex: force serialization on mutex_exit() to fix races | ||
|
||
It is known that mutexes in Linux are not safe when using them to | ||
synchronize the freeing of object in which the mutex is embedded: | ||
|
||
http://lwn.net/Articles/575477/ | ||
|
||
The known places in ZFS which are suspected to suffer from the race | ||
condition are zio->io_lock and dbuf->db_mtx. | ||
|
||
* zio uses zio->io_lock and zio->io_cv to synchronize freeing | ||
between zio_wait() and zio_done(). | ||
* dbuf uses dbuf->db_mtx to protect reference counting. | ||
|
||
This patch fixes this kind of race by forcing serialization on | ||
mutex_exit() with a spin lock, making the mutex safe by sacrificing | ||
a bit of performance and memory overhead. | ||
|
||
This issue most commonly manifests itself as a deadlock in the zio | ||
pipeline caused by a process spinning on the damaged mutex. Similar | ||
deadlocks have been reported for the dbuf->db_mtx mutex. And it can | ||
also cause a NULL dereference or bad paging request under the right | ||
circumstances. | ||
|
||
This issue any many like it are linked off the zfsonlinux/zfs#2523 | ||
issue. Specifically this fix resolves at least the following | ||
outstanding issues: | ||
|
||
zfsonlinux/zfs#401 | ||
zfsonlinux/zfs#2523 | ||
zfsonlinux/zfs#2679 | ||
zfsonlinux/zfs#2684 | ||
zfsonlinux/zfs#2704 | ||
zfsonlinux/zfs#2708 | ||
zfsonlinux/zfs#2517 | ||
zfsonlinux/zfs#2827 | ||
zfsonlinux/zfs#2850 | ||
zfsonlinux/zfs#2891 | ||
zfsonlinux/zfs#2897 | ||
zfsonlinux/zfs#2247 | ||
zfsonlinux/zfs#2939 | ||
|
||
Signed-off-by: Chunwei Chen <tuxoko@gmail.com> | ||
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> | ||
Signed-off-by: Richard Yao <ryao@gentoo.org> | ||
Backported-by: Darik Horn <dajhorn@vanadac.com> | ||
Closes #421 | ||
|
||
Conflicts: | ||
include/sys/mutex.h | ||
--- | ||
include/sys/mutex.h | 32 +++++++++++++++++++++++++++++++- | ||
1 file changed, 31 insertions(+), 1 deletion(-) | ||
|
||
diff --git a/include/sys/mutex.h b/include/sys/mutex.h | ||
index ec3cfd5..a1f7348 100644 | ||
--- a/include/sys/mutex.h | ||
+++ b/include/sys/mutex.h | ||
@@ -43,6 +43,7 @@ typedef enum { | ||
*/ | ||
typedef struct { | ||
struct mutex m; | ||
+ spinlock_t m_lock; /* used for serializing mutex_exit */ | ||
} kmutex_t; | ||
|
||
static inline kthread_t * | ||
@@ -69,6 +70,7 @@ mutex_owner(kmutex_t *mp) | ||
ASSERT(type == MUTEX_DEFAULT); \ | ||
\ | ||
__mutex_init(&(mp)->m, #mp, &__key); \ | ||
+ spin_lock_init(&(mp)->m_lock); \ | ||
}) | ||
|
||
#undef mutex_destroy | ||
@@ -83,7 +85,31 @@ mutex_owner(kmutex_t *mp) | ||
ASSERT3P(mutex_owner(mp), !=, current); \ | ||
mutex_lock(&(mp)->m); \ | ||
}) | ||
-#define mutex_exit(mp) mutex_unlock(&(mp)->m) | ||
+/* | ||
+ * The reason for the spinlock: | ||
+ * | ||
+ * The Linux mutex is designed with a fast-path/slow-path design such that it | ||
+ * does not guarantee serialization upon itself, allowing a race where latter | ||
+ * acquirers finish mutex_unlock before former ones. | ||
+ * | ||
+ * The race renders it unsafe to be used for serializing the freeing of an | ||
+ * object in which the mutex is embedded, where the latter acquirer could go | ||
+ * on to free the object while the former one is still doing mutex_unlock and | ||
+ * causing memory corruption. | ||
+ * | ||
+ * However, there are many places in ZFS where the mutex is used for | ||
+ * serializing object freeing, and the code is shared among other OSes without | ||
+ * this issue. Thus, we need the spinlock to force the serialization on | ||
+ * mutex_exit(). | ||
+ * | ||
+ * See http://lwn.net/Articles/575477/ for the information about the race. | ||
+ */ | ||
+#define mutex_exit(mp) \ | ||
+({ \ | ||
+ spin_lock(&(mp)->m_lock); \ | ||
+ mutex_unlock(&(mp)->m); \ | ||
+ spin_unlock(&(mp)->m_lock); \ | ||
+}) | ||
|
||
#ifdef HAVE_GPL_ONLY_SYMBOLS | ||
# define mutex_enter_nested(mp, sc) mutex_lock_nested(&(mp)->m, sc) | ||
@@ -95,6 +121,7 @@ mutex_owner(kmutex_t *mp) | ||
|
||
typedef struct { | ||
struct mutex m_mutex; | ||
+ spinlock_t m_lock; | ||
kthread_t *m_owner; | ||
} kmutex_t; | ||
|
||
@@ -137,6 +164,7 @@ spl_mutex_clear_owner(kmutex_t *mp) | ||
ASSERT(type == MUTEX_DEFAULT); \ | ||
\ | ||
__mutex_init(MUTEX(mp), #mp, &__key); \ | ||
+ spin_lock_init(&(mp)->m_lock); \ | ||
spl_mutex_clear_owner(mp); \ | ||
}) | ||
|
||
@@ -193,8 +221,10 @@ spl_mutex_clear_owner(kmutex_t *mp) | ||
|
||
#define mutex_exit(mp) \ | ||
({ \ | ||
+ spin_lock(&(mp)->m_lock); \ | ||
spl_mutex_clear_owner(mp); \ | ||
mutex_unlock(MUTEX(mp)); \ | ||
+ spin_unlock(&(mp)->m_lock); \ | ||
}) | ||
|
||
#ifdef HAVE_GPL_ONLY_SYMBOLS |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
0001-Revert-Remove-etc-hostid-missing-warning.patch | ||
0002-mutex-force-serialization-on-mutex_exit-to-fix-races.patch |