Skip to content

Commit 9b67f60

Browse files
ahrensbehlendorf
authored andcommitted
Illumos 4757, 4913
4757 ZFS embedded-data block pointers ("zero block compression") 4913 zfs release should not be subject to space checks Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Max Grossman <max.grossman@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/4757 https://www.illumos.org/issues/4913 illumos/illumos-gate@5d7b4d4 Porting notes: For compatibility with the fastpath code the zio_done() function needed to be updated. Because embedded-data block pointers do not require DVAs to be allocated the associated vdevs will not be marked and therefore should not be unmarked. Ported by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2544
1 parent faf0f58 commit 9b67f60

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1196
-259
lines changed

cmd/zdb/zdb.c

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,6 +1047,16 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
10471047
return;
10481048
}
10491049

1050+
if (BP_IS_EMBEDDED(bp)) {
1051+
(void) sprintf(blkbuf,
1052+
"EMBEDDED et=%u %llxL/%llxP B=%llu",
1053+
(int)BPE_GET_ETYPE(bp),
1054+
(u_longlong_t)BPE_GET_LSIZE(bp),
1055+
(u_longlong_t)BPE_GET_PSIZE(bp),
1056+
(u_longlong_t)bp->blk_birth);
1057+
return;
1058+
}
1059+
10501060
blkbuf[0] = '\0';
10511061

10521062
for (i = 0; i < ndvas; i++)
@@ -1066,7 +1076,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
10661076
"%llxL/%llxP F=%llu B=%llu/%llu",
10671077
(u_longlong_t)BP_GET_LSIZE(bp),
10681078
(u_longlong_t)BP_GET_PSIZE(bp),
1069-
(u_longlong_t)bp->blk_fill,
1079+
(u_longlong_t)BP_GET_FILL(bp),
10701080
(u_longlong_t)bp->blk_birth,
10711081
(u_longlong_t)BP_PHYSICAL_BIRTH(bp));
10721082
}
@@ -1079,8 +1089,10 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb,
10791089
char blkbuf[BP_SPRINTF_LEN];
10801090
int l;
10811091

1082-
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
1083-
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
1092+
if (!BP_IS_EMBEDDED(bp)) {
1093+
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
1094+
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
1095+
}
10841096

10851097
(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
10861098

@@ -1134,10 +1146,10 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
11341146
err = visit_indirect(spa, dnp, cbp, &czb);
11351147
if (err)
11361148
break;
1137-
fill += cbp->blk_fill;
1149+
fill += BP_GET_FILL(cbp);
11381150
}
11391151
if (!err)
1140-
ASSERT3U(fill, ==, bp->blk_fill);
1152+
ASSERT3U(fill, ==, BP_GET_FILL(bp));
11411153
(void) arc_buf_remove_ref(buf, &buf);
11421154
}
11431155

@@ -1861,14 +1873,14 @@ dump_dir(objset_t *os)
18611873

18621874
if (dds.dds_type == DMU_OST_META) {
18631875
dds.dds_creation_txg = TXG_INITIAL;
1864-
usedobjs = os->os_rootbp->blk_fill;
1876+
usedobjs = BP_GET_FILL(os->os_rootbp);
18651877
refdbytes = os->os_spa->spa_dsl_pool->
18661878
dp_mos_dir->dd_phys->dd_used_bytes;
18671879
} else {
18681880
dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
18691881
}
18701882

1871-
ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
1883+
ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));
18721884

18731885
zdb_nicenum(refdbytes, numbuf);
18741886

@@ -2171,6 +2183,9 @@ typedef struct zdb_cb {
21712183
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
21722184
uint64_t zcb_dedup_asize;
21732185
uint64_t zcb_dedup_blocks;
2186+
uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
2187+
uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
2188+
[BPE_PAYLOAD_SIZE];
21742189
uint64_t zcb_start;
21752190
uint64_t zcb_lastprint;
21762191
uint64_t zcb_totalasize;
@@ -2204,6 +2219,13 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
22042219
zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++;
22052220
}
22062221

2222+
if (BP_IS_EMBEDDED(bp)) {
2223+
zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
2224+
zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
2225+
[BPE_GET_PSIZE(bp)]++;
2226+
return;
2227+
}
2228+
22072229
if (dump_opt['L'])
22082230
return;
22092231

@@ -2301,7 +2323,8 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
23012323

23022324
is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
23032325

2304-
if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
2326+
if (!BP_IS_EMBEDDED(bp) &&
2327+
(dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
23052328
size_t size = BP_GET_PSIZE(bp);
23062329
void *data = zio_data_buf_alloc(size);
23072330
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
@@ -2497,8 +2520,9 @@ dump_block_stats(spa_t *spa)
24972520
zdb_blkstats_t *zb, *tzb;
24982521
uint64_t norm_alloc, norm_space, total_alloc, total_found;
24992522
int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
2500-
int leaks = 0;
2523+
boolean_t leaks = B_FALSE;
25012524
int e;
2525+
bp_embedded_type_t i;
25022526

25032527
(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
25042528
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
@@ -2587,7 +2611,7 @@ dump_block_stats(spa_t *spa)
25872611
(u_longlong_t)total_alloc,
25882612
(dump_opt['L']) ? "unreachable" : "leaked",
25892613
(longlong_t)(total_alloc - total_found));
2590-
leaks = 1;
2614+
leaks = B_TRUE;
25912615
}
25922616

25932617
if (tzb->zb_count == 0)
@@ -2617,6 +2641,23 @@ dump_block_stats(spa_t *spa)
26172641
(void) printf("\tSPA allocated: %10llu used: %5.2f%%\n",
26182642
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
26192643

2644+
for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
2645+
if (zcb.zcb_embedded_blocks[i] == 0)
2646+
continue;
2647+
(void) printf("\n");
2648+
(void) printf("\tadditional, non-pointer bps of type %u: "
2649+
"%10llu\n",
2650+
i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);
2651+
2652+
if (dump_opt['b'] >= 3) {
2653+
(void) printf("\t number of (compressed) bytes: "
2654+
"number of bps\n");
2655+
dump_histogram(zcb.zcb_embedded_histogram[i],
2656+
sizeof (zcb.zcb_embedded_histogram[i]) /
2657+
sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
2658+
}
2659+
}
2660+
26202661
if (dump_opt['b'] >= 2) {
26212662
int l, t, level;
26222663
(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
@@ -2718,14 +2759,14 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
27182759
avl_index_t where;
27192760
zdb_ddt_entry_t *zdde, zdde_search;
27202761

2721-
if (BP_IS_HOLE(bp))
2762+
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
27222763
return (0);
27232764

27242765
if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
27252766
(void) printf("traversing objset %llu, %llu objects, "
27262767
"%lu blocks so far\n",
27272768
(u_longlong_t)zb->zb_objset,
2728-
(u_longlong_t)bp->blk_fill,
2769+
(u_longlong_t)BP_GET_FILL(bp),
27292770
avl_numnodes(t));
27302771
}
27312772

cmd/zfs/zfs_main.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -258,9 +258,9 @@ get_usage(zfs_help_t idx)
258258
case HELP_ROLLBACK:
259259
return (gettext("\trollback [-rRf] <snapshot>\n"));
260260
case HELP_SEND:
261-
return (gettext("\tsend [-DnPpRrv] [-[iI] snapshot] "
261+
return (gettext("\tsend [-DnPpRrve] [-[iI] snapshot] "
262262
"<snapshot>\n"
263-
"\tsend [-i snapshot|bookmark] "
263+
"\tsend [-e] [-i snapshot|bookmark] "
264264
"<filesystem|volume|snapshot>\n"));
265265
case HELP_SET:
266266
return (gettext("\tset <property=value> "
@@ -3338,6 +3338,8 @@ rollback_check_dependent(zfs_handle_t *zhp, void *data)
33383338
zfs_close(zhp);
33393339
return (0);
33403340
}
3341+
3342+
33413343
/*
33423344
* Report any snapshots more recent than the one specified. Used when '-r' is
33433345
* not specified. We reuse this same callback for the snapshot dependents - if
@@ -3677,7 +3679,7 @@ zfs_do_send(int argc, char **argv)
36773679
boolean_t extraverbose = B_FALSE;
36783680

36793681
/* check options */
3680-
while ((c = getopt(argc, argv, ":i:I:RDpvnP")) != -1) {
3682+
while ((c = getopt(argc, argv, ":i:I:RDpvnPe")) != -1) {
36813683
switch (c) {
36823684
case 'i':
36833685
if (fromname)
@@ -3712,6 +3714,9 @@ zfs_do_send(int argc, char **argv)
37123714
case 'n':
37133715
flags.dryrun = B_TRUE;
37143716
break;
3717+
case 'e':
3718+
flags.embed_data = B_TRUE;
3719+
break;
37153720
case ':':
37163721
(void) fprintf(stderr, gettext("missing argument for "
37173722
"'%c' option\n"), optopt);
@@ -3750,6 +3755,7 @@ zfs_do_send(int argc, char **argv)
37503755
if (strchr(argv[0], '@') == NULL ||
37513756
(fromname && strchr(fromname, '#') != NULL)) {
37523757
char frombuf[ZFS_MAXNAMELEN];
3758+
enum lzc_send_flags lzc_flags = 0;
37533759

37543760
if (flags.replicate || flags.doall || flags.props ||
37553761
flags.dedup || flags.dryrun || flags.verbose ||
@@ -3764,6 +3770,9 @@ zfs_do_send(int argc, char **argv)
37643770
if (zhp == NULL)
37653771
return (1);
37663772

3773+
if (flags.embed_data)
3774+
lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
3775+
37673776
if (fromname != NULL &&
37683777
(fromname[0] == '#' || fromname[0] == '@')) {
37693778
/*
@@ -3777,7 +3786,7 @@ zfs_do_send(int argc, char **argv)
37773786
(void) strlcat(frombuf, fromname, sizeof (frombuf));
37783787
fromname = frombuf;
37793788
}
3780-
err = zfs_send_one(zhp, fromname, STDOUT_FILENO);
3789+
err = zfs_send_one(zhp, fromname, STDOUT_FILENO, lzc_flags);
37813790
zfs_close(zhp);
37823791
return (err != 0);
37833792
}

cmd/zstreamdump/zstreamdump.c

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
#include <sys/zfs_ioctl.h>
3737
#include <zfs_fletcher.h>
3838

39-
uint64_t drr_record_count[DRR_NUMTYPES];
4039
uint64_t total_write_size = 0;
4140
uint64_t total_stream_len = 0;
4241
FILE *send_stream = 0;
@@ -81,6 +80,8 @@ int
8180
main(int argc, char *argv[])
8281
{
8382
char *buf = malloc(INITIAL_BUFLEN);
83+
uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
84+
uint64_t total_records = 0;
8485
dmu_replay_record_t thedrr;
8586
dmu_replay_record_t *drr = &thedrr;
8687
struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
@@ -91,6 +92,7 @@ main(int argc, char *argv[])
9192
struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref;
9293
struct drr_free *drrf = &thedrr.drr_u.drr_free;
9394
struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
95+
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
9496
char c;
9597
boolean_t verbose = B_FALSE;
9698
boolean_t first = B_TRUE;
@@ -170,6 +172,7 @@ main(int argc, char *argv[])
170172
}
171173

172174
drr_record_count[drr->drr_type]++;
175+
total_records++;
173176

174177
switch (drr->drr_type) {
175178
case DRR_BEGIN:
@@ -286,8 +289,8 @@ main(int argc, char *argv[])
286289
drro->drr_bonuslen);
287290
}
288291
if (drro->drr_bonuslen > 0) {
289-
(void) ssread(buf, P2ROUNDUP(drro->drr_bonuslen,
290-
8), &zc);
292+
(void) ssread(buf,
293+
P2ROUNDUP(drro->drr_bonuslen, 8), &zc);
291294
}
292295
break;
293296

@@ -397,6 +400,38 @@ main(int argc, char *argv[])
397400
}
398401
(void) ssread(buf, drrs->drr_length, &zc);
399402
break;
403+
case DRR_WRITE_EMBEDDED:
404+
if (do_byteswap) {
405+
drrwe->drr_object =
406+
BSWAP_64(drrwe->drr_object);
407+
drrwe->drr_offset =
408+
BSWAP_64(drrwe->drr_offset);
409+
drrwe->drr_length =
410+
BSWAP_64(drrwe->drr_length);
411+
drrwe->drr_toguid =
412+
BSWAP_64(drrwe->drr_toguid);
413+
drrwe->drr_lsize =
414+
BSWAP_32(drrwe->drr_lsize);
415+
drrwe->drr_psize =
416+
BSWAP_32(drrwe->drr_psize);
417+
}
418+
if (verbose) {
419+
(void) printf("WRITE_EMBEDDED object = %llu "
420+
"offset = %llu length = %llu\n"
421+
"toguid = %llx comp = %u etype = %u "
422+
"lsize = %u psize = %u\n",
423+
(u_longlong_t)drrwe->drr_object,
424+
(u_longlong_t)drrwe->drr_offset,
425+
(u_longlong_t)drrwe->drr_length,
426+
(u_longlong_t)drrwe->drr_toguid,
427+
drrwe->drr_compression,
428+
drrwe->drr_etype,
429+
drrwe->drr_lsize,
430+
drrwe->drr_psize);
431+
}
432+
(void) ssread(buf,
433+
P2ROUNDUP(drrwe->drr_psize, 8), &zc);
434+
break;
400435
case DRR_NUMTYPES:
401436
/* should never be reached */
402437
exit(1);
@@ -418,18 +453,16 @@ main(int argc, char *argv[])
418453
(u_longlong_t)drr_record_count[DRR_FREEOBJECTS]);
419454
(void) printf("\tTotal DRR_WRITE records = %lld\n",
420455
(u_longlong_t)drr_record_count[DRR_WRITE]);
456+
(void) printf("\tTotal DRR_WRITE_BYREF records = %lld\n",
457+
(u_longlong_t)drr_record_count[DRR_WRITE_BYREF]);
458+
(void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld\n",
459+
(u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED]);
421460
(void) printf("\tTotal DRR_FREE records = %lld\n",
422461
(u_longlong_t)drr_record_count[DRR_FREE]);
423462
(void) printf("\tTotal DRR_SPILL records = %lld\n",
424463
(u_longlong_t)drr_record_count[DRR_SPILL]);
425464
(void) printf("\tTotal records = %lld\n",
426-
(u_longlong_t)(drr_record_count[DRR_BEGIN] +
427-
drr_record_count[DRR_OBJECT] +
428-
drr_record_count[DRR_FREEOBJECTS] +
429-
drr_record_count[DRR_WRITE] +
430-
drr_record_count[DRR_FREE] +
431-
drr_record_count[DRR_SPILL] +
432-
drr_record_count[DRR_END]));
465+
(u_longlong_t)total_records);
433466
(void) printf("\tTotal write size = %lld (0x%llx)\n",
434467
(u_longlong_t)total_write_size, (u_longlong_t)total_write_size);
435468
(void) printf("\tTotal stream length = %lld (0x%llx)\n",

0 commit comments

Comments
 (0)