Skip to content

Commit

Permalink
Illumos 4891 - want zdb option to dump all metadata
Browse files Browse the repository at this point in the history
4891 want zdb option to dump all metadata
Reviewed by: Sonu Pillai <sonu.pillai@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Garrett D'Amore <garrett@damore.org>

We'd like a way for zdb to dump metadata in a machine-readable
format, so that we can bring that back from a customer site for
in-house diagnosis.  Think of it as a crash dump for zpools,
which can be used for post-mortem analysis of a malfunctioning
pool

References:
  https://www.illumos.org/issues/4891
  illumos/illumos-gate@df15e41

Porting notes:
- [cmd/zdb/zdb.c]
  - a5778ea zdb: Introduce -V for verbatim import
  - In main() getopt 'opt' variable removed and the code was
    brought back in line with illumos.
- [lib/libzpool/kernel.c]
  - 1e33ac1 Fix Solaris thread dependency by using pthreads
  - f0e324f Update utsname support
  - 4d58b69 Fix vn_open/vn_rdwr error handling
  - In vn_open() allocate 'dumppath' on heap instead of stack
  - Properly handle 'dump_fd == -1' error path
  - Free 'realpath' after added vn_dumpdir_code block

Ported-by: kernelOfTruth kerneloftruth@gmail.com
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
  • Loading branch information
ahrens authored and behlendorf committed Jan 11, 2016
1 parent f3c9dca commit 9867e8b
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 10 deletions.
17 changes: 11 additions & 6 deletions cmd/zdb/zdb.c
Expand Up @@ -118,7 +118,7 @@ usage(void)
{
(void) fprintf(stderr,
"Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
"[-U config] [-I inflight I/Os] poolname [object...]\n"
"[-U config] [-I inflight I/Os] [-x dumpdir] poolname [object...]\n"
" %s [-divPA] [-e -p path...] [-U config] dataset "
"[object...]\n"
" %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
Expand Down Expand Up @@ -157,7 +157,7 @@ usage(void)
(void) fprintf(stderr, " -R read and display block from a "
"device\n\n");
(void) fprintf(stderr, " Below options are intended for use "
"with other options (except -l):\n");
"with other options:\n");
(void) fprintf(stderr, " -A ignore assertions (-A), enable "
"panic recovery (-AA) or both (-AAA)\n");
(void) fprintf(stderr, " -F attempt automatic rewind within "
Expand All @@ -170,12 +170,14 @@ usage(void)
"has altroot/not in a cachefile\n");
(void) fprintf(stderr, " -p <path> -- use one or more with "
"-e to specify path to vdev dir\n");
(void) fprintf(stderr, " -x <dumpdir> -- "
"dump all read blocks into specified directory\n");
(void) fprintf(stderr, " -P print numbers in parseable form\n");
(void) fprintf(stderr, " -t <txg> -- highest txg to use when "
"searching for uberblocks\n");
(void) fprintf(stderr, " -I <number of inflight I/Os> -- "
"specify the maximum number of checksumming I/Os "
"[default is 200]\n");
"specify the maximum number of "
"checksumming I/Os [default is 200]\n");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
Expand Down Expand Up @@ -3626,7 +3628,6 @@ main(int argc, char **argv)
int flags = ZFS_IMPORT_MISSING_LOG;
int rewind = ZPOOL_NEVER_REWIND;
char *spa_config_path_env;
const char *opts = "bcdhilmMI:suCDRSAFLXevp:t:U:PV";
boolean_t target_is_spa = B_TRUE;

(void) setrlimit(RLIMIT_NOFILE, &rl);
Expand All @@ -3643,7 +3644,8 @@ main(int argc, char **argv)
if (spa_config_path_env != NULL)
spa_config_path = spa_config_path_env;

while ((c = getopt(argc, argv, opts)) != -1) {
while ((c = getopt(argc, argv,
"bcdhilmMI:suCDRSAFLXx:evp:t:U:PV")) != -1) {
switch (c) {
case 'b':
case 'c':
Expand Down Expand Up @@ -3697,6 +3699,9 @@ main(int argc, char **argv)
}
searchdirs[nsearch++] = optarg;
break;
case 'x':
vn_dumpdir = optarg;
break;
case 't':
max_txg = strtoull(optarg, NULL, 0);
if (max_txg < TXG_INITIAL) {
Expand Down
2 changes: 2 additions & 0 deletions include/sys/zfs_context.h
Expand Up @@ -500,8 +500,10 @@ typedef struct vnode {
uint64_t v_size;
int v_fd;
char *v_path;
int v_dump_fd;
} vnode_t;

extern char *vn_dumpdir;
#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */

typedef struct xoptattr {
Expand Down
35 changes: 33 additions & 2 deletions lib/libzpool/kernel.c
Expand Up @@ -29,6 +29,7 @@
#include <stdlib.h>
#include <string.h>
#include <zlib.h>
#include <libgen.h>
#include <sys/signal.h>
#include <sys/spa.h>
#include <sys/stat.h>
Expand All @@ -50,6 +51,9 @@ char hw_serial[HW_HOSTID_LEN];
struct utsname hw_utsname;
vmem_t *zio_arena = NULL;

/* If set, all blocks read will be copied to the specified directory. */
char *vn_dumpdir = NULL;

/* this only exists to have its address taken */
struct proc p0;

Expand Down Expand Up @@ -588,6 +592,7 @@ int
vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
{
int fd;
int dump_fd;
vnode_t *vp;
int old_umask = 0;
char *realpath;
Expand Down Expand Up @@ -655,13 +660,31 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
* FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
*/
fd = open64(realpath, flags - FREAD, mode);
free(realpath);
err = errno;

if (flags & FCREAT)
(void) umask(old_umask);

if (vn_dumpdir != NULL) {
char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
(void) snprintf(dumppath, MAXPATHLEN,
"%s/%s", vn_dumpdir, basename(realpath));
dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
umem_free(dumppath, MAXPATHLEN);
if (dump_fd == -1) {
err = errno;
free(realpath);
close(fd);
return (err);
}
} else {
dump_fd = -1;
}

free(realpath);

if (fd == -1)
return (errno);
return (err);

if (fstat64_blk(fd, &st) == -1) {
err = errno;
Expand All @@ -676,6 +699,7 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
vp->v_fd = fd;
vp->v_size = st.st_size;
vp->v_path = spa_strdup(path);
vp->v_dump_fd = dump_fd;

return (0);
}
Expand Down Expand Up @@ -708,6 +732,11 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,

if (uio == UIO_READ) {
rc = pread64(vp->v_fd, addr, len, offset);
if (vp->v_dump_fd != -1) {
int status =
pwrite64(vp->v_dump_fd, addr, rc, offset);
ASSERT(status != -1);
}
} else {
/*
* To simulate partial disk writes, we split writes into two
Expand Down Expand Up @@ -750,6 +779,8 @@ void
vn_close(vnode_t *vp)
{
close(vp->v_fd);
if (vp->v_dump_fd != -1)
close(vp->v_dump_fd);
spa_strfree(vp->v_path);
umem_free(vp, sizeof (vnode_t));
}
Expand Down
18 changes: 16 additions & 2 deletions man/man8/zdb.8
Expand Up @@ -11,7 +11,7 @@
.\"
.\"
.\" Copyright 2012, Richard Lowe.
.\" Copyright (c) 2012 by Delphix. All rights reserved.
.\" Copyright (c) 2012, 2014 by Delphix. All rights reserved.
.\"
.TH "ZDB" "8" "February 15, 2012" "" ""

Expand All @@ -20,7 +20,7 @@

.SH "SYNOPSIS"
\fBzdb\fR [-CumdibcsDvhLMXFPA] [-e [-p \fIpath\fR...]] [-t \fItxg\fR]
[-U \fIcache\fR] [-I \fIinflight I/Os\fR]
[-U \fIcache\fR] [-I \fIinflight I/Os\fR] [-x \fIdumpdir\fR]
[\fIpoolname\fR [\fIobject\fR ...]]

.P
Expand Down Expand Up @@ -372,6 +372,20 @@ Operate on an exported pool, not present in \fB/etc/zfs/zpool.cache\fR. The
\fB-p\fR flag specifies the path under which devices are to be searched.
.RE

.sp
.ne 2
.na
\fB-x\fR \fIdumpdir\fR
.ad
.sp .6
.RS 4n
All blocks accessed will be copied to files in the specified directory.
The blocks will be placed in sparse files whose name is the same as
that of the file or device read. zdb can be then run on the generated files.
Note that the \fB-bbc\fR flags are sufficient to access (and thus copy)
all metadata on the pool.
.RE

.sp
.ne 2
.na
Expand Down

0 comments on commit 9867e8b

Please sign in to comment.