Skip to content

Commit 498877b

Browse files
ahrensbehlendorf
authored andcommitted
Illumos #3112, #3113, #3114
3112 ztest does not honor ZFS_DEBUG 3113 ztest should use watchpoints to protect frozen arc bufs 3114 some leaked nvlists in zfsdev_ioctl Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Matt Amdur <Matt.Amdur@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Christopher Siden <chris.siden@delphix.com> Approved by: Eric Schrock <eric.schrock@delphix.com> References: https://www.illumos.org/issues/3112 https://www.illumos.org/issues/3113 https://www.illumos.org/issues/3114 illumos/illumos-gate@cd1c8b8 The /proc/self/cmd watchpoint interface is specific to Solaris. Therefore, the #3113 implementation was reworked to use the more portable mprotect(2) system call. When the pages are watched they are marked read-only for protection. Any write to the protected address range immediately trigger a SIGSEGV. The pages are marked writable again when they are unwatched. Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #1489
1 parent 03c6040 commit 498877b

File tree

6 files changed

+89
-12
lines changed

6 files changed

+89
-12
lines changed

cmd/ztest/ztest.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6210,11 +6210,12 @@ main(int argc, char **argv)
62106210

62116211
(void) setvbuf(stdout, NULL, _IOLBF, 0);
62126212

6213+
dprintf_setup(&argc, argv);
6214+
62136215
ztest_fd_rand = open("/dev/urandom", O_RDONLY);
62146216
ASSERT3S(ztest_fd_rand, >=, 0);
62156217

62166218
if (!fd_data_str) {
6217-
dprintf_setup(&argc, argv);
62186219
process_options(argc, argv);
62196220

62206221
setup_data_fd();

include/sys/arc.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ int arc_buf_size(arc_buf_t *buf);
136136
void arc_release(arc_buf_t *buf, void *tag);
137137
int arc_released(arc_buf_t *buf);
138138
int arc_has_callback(arc_buf_t *buf);
139+
void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused);
139140
void arc_buf_freeze(arc_buf_t *buf);
140141
void arc_buf_thaw(arc_buf_t *buf);
141142
boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
@@ -183,6 +184,10 @@ extern int zfs_write_limit_shift;
183184
extern unsigned long zfs_write_limit_max;
184185
extern kmutex_t zfs_write_limit_lock;
185186

187+
#ifndef _KERNEL
188+
extern boolean_t arc_watch;
189+
#endif
190+
186191
#ifdef __cplusplus
187192
}
188193
#endif

include/sys/zfs_context.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@
9797
#include <dirent.h>
9898
#include <time.h>
9999
#include <ctype.h>
100+
#include <signal.h>
101+
#include <sys/mman.h>
100102
#include <sys/note.h>
101103
#include <sys/types.h>
102104
#include <sys/cred.h>

module/zfs/arc.c

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,11 @@
145145
#include <sys/dmu_tx.h>
146146
#include <zfs_fletcher.h>
147147

148+
#ifndef _KERNEL
149+
/* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
150+
boolean_t arc_watch = B_FALSE;
151+
#endif
152+
148153
static kmutex_t arc_reclaim_thr_lock;
149154
static kcondvar_t arc_reclaim_thr_cv; /* used to signal reclaim thr */
150155
static uint8_t arc_thread_exit;
@@ -569,6 +574,7 @@ static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock);
569574
static int arc_evict_needed(arc_buf_contents_t type);
570575
static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes,
571576
arc_buf_contents_t type);
577+
static void arc_buf_watch(arc_buf_t *buf);
572578

573579
static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab);
574580

@@ -1060,6 +1066,37 @@ arc_cksum_compute(arc_buf_t *buf, boolean_t force)
10601066
fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
10611067
buf->b_hdr->b_freeze_cksum);
10621068
mutex_exit(&buf->b_hdr->b_freeze_lock);
1069+
arc_buf_watch(buf);
1070+
}
1071+
1072+
#ifndef _KERNEL
1073+
void
1074+
arc_buf_sigsegv(int sig, siginfo_t *si, void *unused)
1075+
{
1076+
panic("Got SIGSEGV at address: 0x%lx\n", (long) si->si_addr);
1077+
}
1078+
#endif
1079+
1080+
/* ARGSUSED */
1081+
static void
1082+
arc_buf_unwatch(arc_buf_t *buf)
1083+
{
1084+
#ifndef _KERNEL
1085+
if (arc_watch) {
1086+
ASSERT0(mprotect(buf->b_data, buf->b_hdr->b_size,
1087+
PROT_READ | PROT_WRITE));
1088+
}
1089+
#endif
1090+
}
1091+
1092+
/* ARGSUSED */
1093+
static void
1094+
arc_buf_watch(arc_buf_t *buf)
1095+
{
1096+
#ifndef _KERNEL
1097+
if (arc_watch)
1098+
ASSERT0(mprotect(buf->b_data, buf->b_hdr->b_size, PROT_READ));
1099+
#endif
10631100
}
10641101

10651102
void
@@ -1080,6 +1117,8 @@ arc_buf_thaw(arc_buf_t *buf)
10801117
}
10811118

10821119
mutex_exit(&buf->b_hdr->b_freeze_lock);
1120+
1121+
arc_buf_unwatch(buf);
10831122
}
10841123

10851124
void
@@ -1097,6 +1136,7 @@ arc_buf_freeze(arc_buf_t *buf)
10971136
buf->b_hdr->b_state == arc_anon);
10981137
arc_cksum_compute(buf, B_FALSE);
10991138
mutex_exit(hash_lock);
1139+
11001140
}
11011141

11021142
static void
@@ -1504,21 +1544,22 @@ arc_buf_add_ref(arc_buf_t *buf, void* tag)
15041544
* the buffer is placed on l2arc_free_on_write to be freed later.
15051545
*/
15061546
static void
1507-
arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t),
1508-
void *data, size_t size)
1547+
arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
15091548
{
1549+
arc_buf_hdr_t *hdr = buf->b_hdr;
1550+
15101551
if (HDR_L2_WRITING(hdr)) {
15111552
l2arc_data_free_t *df;
15121553
df = kmem_alloc(sizeof (l2arc_data_free_t), KM_PUSHPAGE);
1513-
df->l2df_data = data;
1514-
df->l2df_size = size;
1554+
df->l2df_data = buf->b_data;
1555+
df->l2df_size = hdr->b_size;
15151556
df->l2df_func = free_func;
15161557
mutex_enter(&l2arc_free_on_write_mtx);
15171558
list_insert_head(l2arc_free_on_write, df);
15181559
mutex_exit(&l2arc_free_on_write_mtx);
15191560
ARCSTAT_BUMP(arcstat_l2_free_on_write);
15201561
} else {
1521-
free_func(data, size);
1562+
free_func(buf->b_data, hdr->b_size);
15221563
}
15231564
}
15241565

@@ -1534,16 +1575,15 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
15341575
arc_buf_contents_t type = buf->b_hdr->b_type;
15351576

15361577
arc_cksum_verify(buf);
1578+
arc_buf_unwatch(buf);
15371579

15381580
if (!recycle) {
15391581
if (type == ARC_BUFC_METADATA) {
1540-
arc_buf_data_free(buf->b_hdr, zio_buf_free,
1541-
buf->b_data, size);
1582+
arc_buf_data_free(buf, zio_buf_free);
15421583
arc_space_return(size, ARC_SPACE_DATA);
15431584
} else {
15441585
ASSERT(type == ARC_BUFC_DATA);
1545-
arc_buf_data_free(buf->b_hdr,
1546-
zio_data_buf_free, buf->b_data, size);
1586+
arc_buf_data_free(buf, zio_data_buf_free);
15471587
ARCSTAT_INCR(arcstat_data_size, -size);
15481588
atomic_add_64(&arc_size, -size);
15491589
}
@@ -2908,6 +2948,7 @@ arc_read_done(zio_t *zio)
29082948
}
29092949

29102950
arc_cksum_compute(buf, B_FALSE);
2951+
arc_buf_watch(buf);
29112952

29122953
if (hash_lock && zio->io_error == 0 && hdr->b_state == arc_anon) {
29132954
/*
@@ -3542,6 +3583,7 @@ arc_release(arc_buf_t *buf, void *tag)
35423583
}
35433584
hdr->b_datacnt -= 1;
35443585
arc_cksum_verify(buf);
3586+
arc_buf_unwatch(buf);
35453587

35463588
mutex_exit(hash_lock);
35473589

module/zfs/spa_misc.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1630,6 +1630,23 @@ spa_init(int mode)
16301630

16311631
spa_mode_global = mode;
16321632

1633+
#ifndef _KERNEL
1634+
if (spa_mode_global != FREAD && dprintf_find_string("watch")) {
1635+
struct sigaction sa;
1636+
1637+
sa.sa_flags = SA_SIGINFO;
1638+
sigemptyset(&sa.sa_mask);
1639+
sa.sa_sigaction = arc_buf_sigsegv;
1640+
1641+
if (sigaction(SIGSEGV, &sa, NULL) == -1) {
1642+
perror("could not enable watchpoints: "
1643+
"sigaction(SIGSEGV, ...) = ");
1644+
} else {
1645+
arc_watch = B_TRUE;
1646+
}
1647+
}
1648+
#endif
1649+
16331650
fm_init();
16341651
refcount_init();
16351652
unique_init();

module/zfs/zio.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,21 @@ zio_init(void)
169169
while (p2 & (p2 - 1))
170170
p2 &= p2 - 1;
171171

172+
#ifndef _KERNEL
173+
/*
174+
* If we are using watchpoints, put each buffer on its own page,
175+
* to eliminate the performance overhead of trapping to the
176+
* kernel when modifying a non-watched buffer that shares the
177+
* page with a watched buffer.
178+
*/
179+
if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
180+
continue;
181+
#endif
172182
if (size <= 4 * SPA_MINBLOCKSIZE) {
173183
align = SPA_MINBLOCKSIZE;
174-
} else if (P2PHASE(size, PAGESIZE) == 0) {
184+
} else if (IS_P2ALIGNED(size, PAGESIZE)) {
175185
align = PAGESIZE;
176-
} else if (P2PHASE(size, p2 >> 2) == 0) {
186+
} else if (IS_P2ALIGNED(size, p2 >> 2)) {
177187
align = p2 >> 2;
178188
}
179189

0 commit comments

Comments
 (0)