diff --git a/ydb/core/kqp/tools/join_perf/benchmark_settings.cpp b/ydb/core/kqp/tools/join_perf/benchmark_settings.cpp index 3703b0437b18..e6c187a6c49e 100644 --- a/ydb/core/kqp/tools/join_perf/benchmark_settings.cpp +++ b/ydb/core/kqp/tools/join_perf/benchmark_settings.cpp @@ -2,7 +2,7 @@ namespace NKikimr::NMiniKQL { -TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TBenchmarkSettings::TPreset& preset, +TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TPreset& preset, TTableSizes size) { TString algoName = [&] { switch (algo) { @@ -37,28 +37,40 @@ TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TBenchm } namespace NBenchmarkSizes { -TVector ExponentialSizeIncrease() { - TVector ret; +TPreset ExponentialSizeIncrease(int samples, int scale) { + TPreset ret; + ret.PresetName = "ExpGrowth"; int init = 1 << 18; + init *= scale; for (int index = 0; index < 8; index++) { int thisNum = init * (1 << index); - ret.emplace_back(thisNum, thisNum); + for (int _ = 0; _ < samples; ++_){ + ret.Cases.emplace_back(thisNum, thisNum); + } } return ret; } -TVector LinearSizeIncrease() { - TVector ret; - int init = 1 << 22; +TPreset LinearSizeIncrease(int samples, int scale) { + TPreset ret; + ret.PresetName = "LinearGrowth"; + int init = 1 << 18; + init *= scale; for (int index = 1; index < 9; index++) { int thisNum = init * index; - ret.emplace_back(thisNum, thisNum); + for (int _ = 0; _ < samples; ++_){ + ret.Cases.emplace_back(thisNum, thisNum); + } } return ret; } -TVector VerySmallSizes() { - return {{512, 512}, {1024, 1024}}; +TPreset VerySmallSizes(int, int) { + TPreset ret; + ret.PresetName = "VerySmall"; + ret.Cases.emplace_back(512, 512); + ret.Cases.emplace_back(1024, 1024); + return ret; } } // namespace NBenchmarkSizes diff --git a/ydb/core/kqp/tools/join_perf/benchmark_settings.h b/ydb/core/kqp/tools/join_perf/benchmark_settings.h index 6823dda42882..4711f7b895d4 100644 --- a/ydb/core/kqp/tools/join_perf/benchmark_settings.h +++ b/ydb/core/kqp/tools/join_perf/benchmark_settings.h @@ -12,25 +12,25 @@ struct TTableSizes { int Left; int Right; }; +struct TPreset { + TVector Cases; + TString PresetName; +}; struct TBenchmarkSettings { - struct TPreset { - TVector Cases; - TString PresetName; - }; TVector Presets; TSet KeyTypes; TSet Algorithms; }; -TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TBenchmarkSettings::TPreset& preset, +TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TPreset& preset, TTableSizes size); namespace NBenchmarkSizes { -TVector ExponentialSizeIncrease(); -TVector LinearSizeIncrease(); -TVector VerySmallSizes(); +TPreset ExponentialSizeIncrease(int samples, int scale); +TPreset LinearSizeIncrease(int samples, int scale); +TPreset VerySmallSizes(int samples, int scale); } // namespace NBenchmarkSizes } // namespace NKikimr::NMiniKQL diff --git a/ydb/core/kqp/tools/join_perf/graph.py b/ydb/core/kqp/tools/join_perf/graph.py index 3f2b07e63e8c..69006bad91d4 100644 --- a/ydb/core/kqp/tools/join_perf/graph.py +++ b/ydb/core/kqp/tools/join_perf/graph.py @@ -3,6 +3,8 @@ import matplotlib.pyplot as plt import sys import os +import numpy as np +import math from pathlib import Path if len(sys.argv) < 2: print("usage: python3 graph.py folder/file.jsonl") @@ -24,6 +26,15 @@ 'key_type': name_parts[1] } ) +# is_time_sampled = only_needed[0]["input_data_flavour"].startswith("Sampling") +def geo_mean_70percent_lowest(series): + size = len(series) + smallest = series.nsmallest(math.ceil(size * 0.7)) + positive = smallest[smallest > 0] + if len(positive) == 0: + return np.nan + return np.exp(np.mean(np.log(positive))) + df = pd.DataFrame(only_needed) df = df.drop('run_name', axis=1) images_root_base = str(Path.home())+"/.join_perf/images" @@ -42,15 +53,15 @@ print(graph_name) subset = df[(df["input_data_flavour"] == data_flavour) & (df["key_type"] == key_type)] - print(subset) fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 8), sharex=True) for name, group in subset.groupby('join_algorithm'): + group = group.groupby('left_table_size')['time'].apply(lambda x: geo_mean_70percent_lowest(x)).sort_values() axes.plot( - group['left_table_size'], - group['time'], - label=name, - marker='o' + group.index, + group.values, + label=name, + marker='o' ) axes.set_ylabel('time') axes.set_xlabel('left_rows') @@ -65,5 +76,4 @@ plt.savefig(log_images + "/" + graph_name + ".jpeg") print(f"images without y-axis log scaling are written to {simple_images}") -print(f"images with y-axis log scaling are written to {log_images}") - +print(f"images with y-axis log scaling are written to {log_images}") \ No newline at end of file diff --git a/ydb/core/kqp/tools/join_perf/main.cpp b/ydb/core/kqp/tools/join_perf/main.cpp index ea2b9a78e654..a28795357806 100644 --- a/ydb/core/kqp/tools/join_perf/main.cpp +++ b/ydb/core/kqp/tools/join_perf/main.cpp @@ -36,27 +36,30 @@ int main(int argc, char** argv) { opts.AddHelpOption('h'); NKikimr::NMiniKQL::TBenchmarkSettings params; - opts.AddLongOption('s', "benchmark_sizes") - .Help("left and right table sizes to choose for joins benchmark. visit NBenchmarkSizes namespace in " - "benchmark_settings.cpp to see exact values") + NKikimr::NMiniKQL::TPreset(*presetWithSamples)(int, int); + int samples = 1; + int scale = 1; + opts.AddHelpOption().Help("visit NBenchmarkSizes namespace in benchmark_settings.cpp for explanation"); + opts.AddLongOption('c', "case") + .Help("left and right table sizes to choose for joins benchmark.") .Choices({"exp", "linear", "small"}) .DefaultValue("small") .Handler1([&](const NLastGetopt::TOptsParser* option) { auto val = TStringBuf(option->CurVal()); - auto preset = [&]() -> NKikimr::NMiniKQL::TBenchmarkSettings::TPreset { + presetWithSamples = [&]() { if (val == "exp") { - return {NKikimr::NMiniKQL::NBenchmarkSizes::ExponentialSizeIncrease(), "ExpGrowth"}; + return &NKikimr::NMiniKQL::NBenchmarkSizes::ExponentialSizeIncrease; } else if (val == "linear") { - return {NKikimr::NMiniKQL::NBenchmarkSizes::LinearSizeIncrease(), "LinearGrowth"}; + return &NKikimr::NMiniKQL::NBenchmarkSizes::LinearSizeIncrease; } else if (val == "small") { - return {NKikimr::NMiniKQL::NBenchmarkSizes::VerySmallSizes(), "VerySmall"}; + return &NKikimr::NMiniKQL::NBenchmarkSizes::VerySmallSizes; } else { Y_ABORT("unknown option for benchmark_sizes"); } }(); - params.Presets.push_back(preset); }); - + opts.AddLongOption('s', "samples").Help("number representing how much to repeat single case. useful for noise reduction.").DefaultValue(1).StoreResult(&samples); + opts.AddLongOption("scale").Help("size of smallest table in case").DefaultValue(1<<18).StoreResult(&scale); params.Algorithms = { NKikimr::NMiniKQL::ETestedJoinAlgo::kBlockMap, // NKikimr::NMiniKQL::ETestedJoinAlgo::kBlockHash, @@ -70,6 +73,7 @@ int main(int argc, char** argv) { }; NLastGetopt::TOptsParseResult parsedOptions(&opts, argc, argv); + params.Presets.push_back(presetWithSamples(samples, scale)); AddLittleLeftTablePreset(params); auto benchmarkResults = NKikimr::NMiniKQL::RunJoinsBench(params);