Skip to content

Commit

Permalink
MetaTool support checksum
Browse files Browse the repository at this point in the history
  • Loading branch information
trueeyu committed Aug 24, 2023
1 parent 149b542 commit 5434c06
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 22 deletions.
9 changes: 2 additions & 7 deletions be/src/storage/task/engine_checksum_task.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,9 @@ Status EngineChecksumTask::_compute_checksum() {
size_t num_columns = tablet_schema->num_columns();
for (size_t i = 0; i < num_columns; ++i) {
LogicalType type = tablet_schema->column(i).type();
// The approximation of FLOAT/DOUBLE in a certain precision range, the binary of byte is not
// a fixed value, so these two types are ignored in calculating checksum.
// And also HLL/OBJCET/PERCENTILE is too large to calculate the checksum.
if (type == TYPE_FLOAT || type == TYPE_DOUBLE || type == TYPE_HLL || type == TYPE_OBJECT ||
type == TYPE_PERCENTILE || type == TYPE_JSON) {
continue;
if (is_support_checksum_type(type)) {
return_columns.push_back(i);
}
return_columns.push_back(i);
}

Schema schema = ChunkHelper::convert_schema(tablet_schema, return_columns);
Expand Down
64 changes: 49 additions & 15 deletions be/src/tools/meta_tool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,30 +106,30 @@ std::string get_usage(const std::string& progname) {
ss << progname << " is the StarRocks BE Meta tool.\n";
ss << "Stop BE first before use this tool.\n";
ss << "Usage:\n";
ss << "./meta_tool --operation=get_meta --root_path=/path/to/storage/path "
ss << "./meta_tool.sh --operation=get_meta --root_path=/path/to/storage/path "
"--tablet_id=tabletid [--schema_hash=schemahash]\n";
ss << "./meta_tool --operation=load_meta --root_path=/path/to/storage/path "
ss << "./meta_tool.sh --operation=load_meta --root_path=/path/to/storage/path "
"--json_meta_path=path\n";
ss << "./meta_tool --operation=delete_meta "
ss << "./meta_tool.sh --operation=delete_meta "
"--root_path=/path/to/storage/path --tablet_id=tabletid "
"[--schema_hash=schemahash] | ./meta_tool --operation=delete_meta "
"--root_path=/path/to/storage/path --table_id=tableid\n";
ss << "./meta_tool --operation=delete_meta --tablet_file=file_path\n";
ss << "./meta_tool --operation=delete_rowset_meta "
ss << "./meta_tool.sh --operation=delete_meta --tablet_file=file_path\n";
ss << "./meta_tool.sh --operation=delete_rowset_meta "
"--root_path=/path/to/storage/path --tablet_uid=tablet_uid "
"--rowset_id=rowset_id\n";
ss << "./meta_tool --operation=delete_persistent_index_meta "
ss << "./meta_tool.sh --operation=delete_persistent_index_meta "
"--root_path=/path/to/storage/path --tablet_id=tabletid | "
"./meta_tool --operation=delete_persistent_index_meta "
"./meta_tool.sh --operation=delete_persistent_index_meta "
"--root_path=/path/to/storage/path --table_id=tableid\n";
ss << "./meta_tool --operation=compact_meta --root_path=/path/to/storage/path\n";
ss << "./meta_tool --operation=get_meta_stats --root_path=/path/to/storage/path\n";
ss << "./meta_tool --operation=ls --root_path=/path/to/storage/path\n";
ss << "./meta_tool --operation=show_meta --pb_meta_path=path\n";
ss << "./meta_tool --operation=show_segment_footer --file=/path/to/segment/file\n";
ss << "./meta_tool --operation=dump_segment_data --file=/path/to/segment/file\n";
ss << "./meta_tool --operation=dump_short_key_index --file=/path/to/segment/file --key_column_count=2\n";
ss << "./meta_tool --operation=check_table_meta_consistency --root_path=/path/to/storage/path "
ss << "./meta_tool.sh --operation=compact_meta --root_path=/path/to/storage/path\n";
ss << "./meta_tool.sh --operation=get_meta_stats --root_path=/path/to/storage/path\n";
ss << "./meta_tool.sh --operation=ls --root_path=/path/to/storage/path\n";
ss << "./meta_tool.sh --operation=show_meta --pb_meta_path=path\n";
ss << "./meta_tool.sh --operation=show_segment_footer --file=/path/to/segment/file\n";
ss << "./meta_tool.sh --operation=dump_segment_data --file=/path/to/segment/file\n";
ss << "./meta_tool.sh --operation=dump_short_key_index --file=/path/to/segment/file --key_column_count=2\n";
ss << "./meta_tool.sh --operation=check_table_meta_consistency --root_path=/path/to/storage/path "
"--table_id=tableid\n";
ss << "cat 0001000000001394_0000000000000004.meta | ./meta_tool --operation=print_lake_metadata\n";
ss << "cat 0001000000001391_0000000000000001.log | ./meta_tool --operation=print_lake_txn_log\n";
Expand Down Expand Up @@ -587,6 +587,7 @@ class SegmentDump {

Status dump_segment_data();
Status dump_short_key_index(size_t key_column_count);
Status calc_checksum();

private:
struct ColItem {
Expand Down Expand Up @@ -754,6 +755,39 @@ Status SegmentDump::_output_short_key_string(const std::vector<ColItem>& cols, s
return Status::OK();
}

Status SegmentDump::calc_checksum() {
Status st = _init();
if (!st.ok()) {
std::cout << "SegmentDump init failed: " << st << std::endl;
return st;
}

// convert schema
std::vector<uint32_t> return_columns;
size_t num_columns = _tablet_schema->num_columns();
for (size_t i = 0; i < num_columns; i++) {
LogicalType type = _tablet_schema->column(i).type();
if (is_support_checksum_type(type)) {
return_columns.push_back(i);
}
}

auto schema = ChunkHelper::convert_schema(_tablet_schema, return_columns);
SegmentReadOptions seg_opts;
seg_opts.fs = _fs;
seg_opts.use_page_cache = false;
OlapReaderStatistics stats;
seg_opts.stats = &stats;
auto seg_res = _segment->new_iterator(schema, seg_opts);
if (!seg_res.ok()) {
std::cout << "new segment iterator failed: " << seg_res.status() << std::endl;
return seg_res.status();
}
auto seg_iter = std::move(seg_res.value());


}

Status SegmentDump::dump_short_key_index(size_t key_column_count) {
key_column_count = std::min(key_column_count, _max_short_key_col_cnt);
Status st = _init();
Expand Down
8 changes: 8 additions & 0 deletions be/src/types/logical_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@ inline bool is_zone_map_key_type(LogicalType type) {
type != TYPE_OBJECT && type != TYPE_HLL && type != TYPE_PERCENTILE;
}

// The approximation of FLOAT/DOUBLE in a certain precision range, the binary of byte is not
// a fixed value, so these two types are ignored in calculating checksum.
// And also HLL/OBJCET/PERCENTILE is too large to calculate the checksum.
inline bool is_support_checksum_type(LogicalType type) {
return type != TYPE_FLOAT && type != TYPE_DOUBLE && type != TYPE_HLL && type != TYPE_OBJECT &&
type != TYPE_PERCENTILE && type != TYPE_JSON;
}

template <LogicalType TYPE>
inline constexpr LogicalType DelegateType = TYPE;
template <>
Expand Down

0 comments on commit 5434c06

Please sign in to comment.