Skip to content

Commit

Permalink
Illumos 4757, 4913
Browse files Browse the repository at this point in the history
4757 ZFS embedded-data block pointers ("zero block compression")
4913 zfs release should not be subject to space checks

Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Max Grossman <max.grossman@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Dan McDonald <danmcd@omniti.com>

References:
  https://www.illumos.org/issues/4757
  https://www.illumos.org/issues/4913
  illumos/illumos-gate@5d7b4d4

Porting notes:

For compatibility with the fastpath code the zio_done() function
needed to be updated.  Because embedded-data block pointers do
not require DVAs to be allocated the associated vdevs will not
be marked and therefore should not be unmarked.

Ported by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #2544
  • Loading branch information
ahrens authored and behlendorf committed Aug 1, 2014
1 parent faf0f58 commit 9b67f60
Show file tree
Hide file tree
Showing 46 changed files with 1,196 additions and 259 deletions.
65 changes: 53 additions & 12 deletions cmd/zdb/zdb.c
Expand Up @@ -1047,6 +1047,16 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
return;
}

if (BP_IS_EMBEDDED(bp)) {
(void) sprintf(blkbuf,
"EMBEDDED et=%u %llxL/%llxP B=%llu",
(int)BPE_GET_ETYPE(bp),
(u_longlong_t)BPE_GET_LSIZE(bp),
(u_longlong_t)BPE_GET_PSIZE(bp),
(u_longlong_t)bp->blk_birth);
return;
}

blkbuf[0] = '\0';

for (i = 0; i < ndvas; i++)
Expand All @@ -1066,7 +1076,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
"%llxL/%llxP F=%llu B=%llu/%llu",
(u_longlong_t)BP_GET_LSIZE(bp),
(u_longlong_t)BP_GET_PSIZE(bp),
(u_longlong_t)bp->blk_fill,
(u_longlong_t)BP_GET_FILL(bp),
(u_longlong_t)bp->blk_birth,
(u_longlong_t)BP_PHYSICAL_BIRTH(bp));
}
Expand All @@ -1079,8 +1089,10 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb,
char blkbuf[BP_SPRINTF_LEN];
int l;

ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
if (!BP_IS_EMBEDDED(bp)) {
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
}

(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));

Expand Down Expand Up @@ -1134,10 +1146,10 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
err = visit_indirect(spa, dnp, cbp, &czb);
if (err)
break;
fill += cbp->blk_fill;
fill += BP_GET_FILL(cbp);
}
if (!err)
ASSERT3U(fill, ==, bp->blk_fill);
ASSERT3U(fill, ==, BP_GET_FILL(bp));
(void) arc_buf_remove_ref(buf, &buf);
}

Expand Down Expand Up @@ -1861,14 +1873,14 @@ dump_dir(objset_t *os)

if (dds.dds_type == DMU_OST_META) {
dds.dds_creation_txg = TXG_INITIAL;
usedobjs = os->os_rootbp->blk_fill;
usedobjs = BP_GET_FILL(os->os_rootbp);
refdbytes = os->os_spa->spa_dsl_pool->
dp_mos_dir->dd_phys->dd_used_bytes;
} else {
dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
}

ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));

zdb_nicenum(refdbytes, numbuf);

Expand Down Expand Up @@ -2171,6 +2183,9 @@ typedef struct zdb_cb {
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
uint64_t zcb_dedup_asize;
uint64_t zcb_dedup_blocks;
uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
[BPE_PAYLOAD_SIZE];
uint64_t zcb_start;
uint64_t zcb_lastprint;
uint64_t zcb_totalasize;
Expand Down Expand Up @@ -2204,6 +2219,13 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++;
}

if (BP_IS_EMBEDDED(bp)) {
zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
[BPE_GET_PSIZE(bp)]++;
return;
}

if (dump_opt['L'])
return;

Expand Down Expand Up @@ -2301,7 +2323,8 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,

is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));

if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
if (!BP_IS_EMBEDDED(bp) &&
(dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
size_t size = BP_GET_PSIZE(bp);
void *data = zio_data_buf_alloc(size);
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
Expand Down Expand Up @@ -2497,8 +2520,9 @@ dump_block_stats(spa_t *spa)
zdb_blkstats_t *zb, *tzb;
uint64_t norm_alloc, norm_space, total_alloc, total_found;
int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
int leaks = 0;
boolean_t leaks = B_FALSE;
int e;
bp_embedded_type_t i;

(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
Expand Down Expand Up @@ -2587,7 +2611,7 @@ dump_block_stats(spa_t *spa)
(u_longlong_t)total_alloc,
(dump_opt['L']) ? "unreachable" : "leaked",
(longlong_t)(total_alloc - total_found));
leaks = 1;
leaks = B_TRUE;
}

if (tzb->zb_count == 0)
Expand Down Expand Up @@ -2617,6 +2641,23 @@ dump_block_stats(spa_t *spa)
(void) printf("\tSPA allocated: %10llu used: %5.2f%%\n",
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);

for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
if (zcb.zcb_embedded_blocks[i] == 0)
continue;
(void) printf("\n");
(void) printf("\tadditional, non-pointer bps of type %u: "
"%10llu\n",
i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);

if (dump_opt['b'] >= 3) {
(void) printf("\t number of (compressed) bytes: "
"number of bps\n");
dump_histogram(zcb.zcb_embedded_histogram[i],
sizeof (zcb.zcb_embedded_histogram[i]) /
sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
}
}

if (dump_opt['b'] >= 2) {
int l, t, level;
(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
Expand Down Expand Up @@ -2718,14 +2759,14 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
avl_index_t where;
zdb_ddt_entry_t *zdde, zdde_search;

if (BP_IS_HOLE(bp))
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);

if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
(void) printf("traversing objset %llu, %llu objects, "
"%lu blocks so far\n",
(u_longlong_t)zb->zb_objset,
(u_longlong_t)bp->blk_fill,
(u_longlong_t)BP_GET_FILL(bp),
avl_numnodes(t));
}

Expand Down
17 changes: 13 additions & 4 deletions cmd/zfs/zfs_main.c
Expand Up @@ -258,9 +258,9 @@ get_usage(zfs_help_t idx)
case HELP_ROLLBACK:
return (gettext("\trollback [-rRf] <snapshot>\n"));
case HELP_SEND:
return (gettext("\tsend [-DnPpRrv] [-[iI] snapshot] "
return (gettext("\tsend [-DnPpRrve] [-[iI] snapshot] "
"<snapshot>\n"
"\tsend [-i snapshot|bookmark] "
"\tsend [-e] [-i snapshot|bookmark] "
"<filesystem|volume|snapshot>\n"));
case HELP_SET:
return (gettext("\tset <property=value> "
Expand Down Expand Up @@ -3338,6 +3338,8 @@ rollback_check_dependent(zfs_handle_t *zhp, void *data)
zfs_close(zhp);
return (0);
}


/*
* Report any snapshots more recent than the one specified. Used when '-r' is
* not specified. We reuse this same callback for the snapshot dependents - if
Expand Down Expand Up @@ -3677,7 +3679,7 @@ zfs_do_send(int argc, char **argv)
boolean_t extraverbose = B_FALSE;

/* check options */
while ((c = getopt(argc, argv, ":i:I:RDpvnP")) != -1) {
while ((c = getopt(argc, argv, ":i:I:RDpvnPe")) != -1) {
switch (c) {
case 'i':
if (fromname)
Expand Down Expand Up @@ -3712,6 +3714,9 @@ zfs_do_send(int argc, char **argv)
case 'n':
flags.dryrun = B_TRUE;
break;
case 'e':
flags.embed_data = B_TRUE;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
Expand Down Expand Up @@ -3750,6 +3755,7 @@ zfs_do_send(int argc, char **argv)
if (strchr(argv[0], '@') == NULL ||
(fromname && strchr(fromname, '#') != NULL)) {
char frombuf[ZFS_MAXNAMELEN];
enum lzc_send_flags lzc_flags = 0;

if (flags.replicate || flags.doall || flags.props ||
flags.dedup || flags.dryrun || flags.verbose ||
Expand All @@ -3764,6 +3770,9 @@ zfs_do_send(int argc, char **argv)
if (zhp == NULL)
return (1);

if (flags.embed_data)
lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;

if (fromname != NULL &&
(fromname[0] == '#' || fromname[0] == '@')) {
/*
Expand All @@ -3777,7 +3786,7 @@ zfs_do_send(int argc, char **argv)
(void) strlcat(frombuf, fromname, sizeof (frombuf));
fromname = frombuf;
}
err = zfs_send_one(zhp, fromname, STDOUT_FILENO);
err = zfs_send_one(zhp, fromname, STDOUT_FILENO, lzc_flags);
zfs_close(zhp);
return (err != 0);
}
Expand Down
53 changes: 43 additions & 10 deletions cmd/zstreamdump/zstreamdump.c
Expand Up @@ -36,7 +36,6 @@
#include <sys/zfs_ioctl.h>
#include <zfs_fletcher.h>

uint64_t drr_record_count[DRR_NUMTYPES];
uint64_t total_write_size = 0;
uint64_t total_stream_len = 0;
FILE *send_stream = 0;
Expand Down Expand Up @@ -81,6 +80,8 @@ int
main(int argc, char *argv[])
{
char *buf = malloc(INITIAL_BUFLEN);
uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
uint64_t total_records = 0;
dmu_replay_record_t thedrr;
dmu_replay_record_t *drr = &thedrr;
struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
Expand All @@ -91,6 +92,7 @@ main(int argc, char *argv[])
struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref;
struct drr_free *drrf = &thedrr.drr_u.drr_free;
struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
char c;
boolean_t verbose = B_FALSE;
boolean_t first = B_TRUE;
Expand Down Expand Up @@ -170,6 +172,7 @@ main(int argc, char *argv[])
}

drr_record_count[drr->drr_type]++;
total_records++;

switch (drr->drr_type) {
case DRR_BEGIN:
Expand Down Expand Up @@ -286,8 +289,8 @@ main(int argc, char *argv[])
drro->drr_bonuslen);
}
if (drro->drr_bonuslen > 0) {
(void) ssread(buf, P2ROUNDUP(drro->drr_bonuslen,
8), &zc);
(void) ssread(buf,
P2ROUNDUP(drro->drr_bonuslen, 8), &zc);
}
break;

Expand Down Expand Up @@ -397,6 +400,38 @@ main(int argc, char *argv[])
}
(void) ssread(buf, drrs->drr_length, &zc);
break;
case DRR_WRITE_EMBEDDED:
if (do_byteswap) {
drrwe->drr_object =
BSWAP_64(drrwe->drr_object);
drrwe->drr_offset =
BSWAP_64(drrwe->drr_offset);
drrwe->drr_length =
BSWAP_64(drrwe->drr_length);
drrwe->drr_toguid =
BSWAP_64(drrwe->drr_toguid);
drrwe->drr_lsize =
BSWAP_32(drrwe->drr_lsize);
drrwe->drr_psize =
BSWAP_32(drrwe->drr_psize);
}
if (verbose) {
(void) printf("WRITE_EMBEDDED object = %llu "
"offset = %llu length = %llu\n"
"toguid = %llx comp = %u etype = %u "
"lsize = %u psize = %u\n",
(u_longlong_t)drrwe->drr_object,
(u_longlong_t)drrwe->drr_offset,
(u_longlong_t)drrwe->drr_length,
(u_longlong_t)drrwe->drr_toguid,
drrwe->drr_compression,
drrwe->drr_etype,
drrwe->drr_lsize,
drrwe->drr_psize);
}
(void) ssread(buf,
P2ROUNDUP(drrwe->drr_psize, 8), &zc);
break;
case DRR_NUMTYPES:
/* should never be reached */
exit(1);
Expand All @@ -418,18 +453,16 @@ main(int argc, char *argv[])
(u_longlong_t)drr_record_count[DRR_FREEOBJECTS]);
(void) printf("\tTotal DRR_WRITE records = %lld\n",
(u_longlong_t)drr_record_count[DRR_WRITE]);
(void) printf("\tTotal DRR_WRITE_BYREF records = %lld\n",
(u_longlong_t)drr_record_count[DRR_WRITE_BYREF]);
(void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld\n",
(u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED]);
(void) printf("\tTotal DRR_FREE records = %lld\n",
(u_longlong_t)drr_record_count[DRR_FREE]);
(void) printf("\tTotal DRR_SPILL records = %lld\n",
(u_longlong_t)drr_record_count[DRR_SPILL]);
(void) printf("\tTotal records = %lld\n",
(u_longlong_t)(drr_record_count[DRR_BEGIN] +
drr_record_count[DRR_OBJECT] +
drr_record_count[DRR_FREEOBJECTS] +
drr_record_count[DRR_WRITE] +
drr_record_count[DRR_FREE] +
drr_record_count[DRR_SPILL] +
drr_record_count[DRR_END]));
(u_longlong_t)total_records);
(void) printf("\tTotal write size = %lld (0x%llx)\n",
(u_longlong_t)total_write_size, (u_longlong_t)total_write_size);
(void) printf("\tTotal stream length = %lld (0x%llx)\n",
Expand Down

0 comments on commit 9b67f60

Please sign in to comment.