Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 22 additions & 10 deletions ydb/core/kqp/tools/join_perf/benchmark_settings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

namespace NKikimr::NMiniKQL {

TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TBenchmarkSettings::TPreset& preset,
TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TPreset& preset,
TTableSizes size) {
TString algoName = [&] {
switch (algo) {
Expand Down Expand Up @@ -37,28 +37,40 @@ TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TBenchm
}

namespace NBenchmarkSizes {
TVector<TTableSizes> ExponentialSizeIncrease() {
TVector<TTableSizes> ret;
TPreset ExponentialSizeIncrease(int samples, int scale) {
TPreset ret;
ret.PresetName = "ExpGrowth";
int init = 1 << 18;
init *= scale;
for (int index = 0; index < 8; index++) {
int thisNum = init * (1 << index);
ret.emplace_back(thisNum, thisNum);
for (int _ = 0; _ < samples; ++_){
ret.Cases.emplace_back(thisNum, thisNum);
}
}
return ret;
}

TVector<TTableSizes> LinearSizeIncrease() {
TVector<TTableSizes> ret;
int init = 1 << 22;
TPreset LinearSizeIncrease(int samples, int scale) {
TPreset ret;
ret.PresetName = "LinearGrowth";
int init = 1 << 18;
init *= scale;
for (int index = 1; index < 9; index++) {
int thisNum = init * index;
ret.emplace_back(thisNum, thisNum);
for (int _ = 0; _ < samples; ++_){
ret.Cases.emplace_back(thisNum, thisNum);
}
}
return ret;
}

TVector<TTableSizes> VerySmallSizes() {
return {{512, 512}, {1024, 1024}};
TPreset VerySmallSizes(int, int) {
TPreset ret;
ret.PresetName = "VerySmall";
ret.Cases.emplace_back(512, 512);
ret.Cases.emplace_back(1024, 1024);
return ret;
}
} // namespace NBenchmarkSizes

Expand Down
16 changes: 8 additions & 8 deletions ydb/core/kqp/tools/join_perf/benchmark_settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,25 @@ struct TTableSizes {
int Left;
int Right;
};
struct TPreset {
TVector<TTableSizes> Cases;
TString PresetName;
};

struct TBenchmarkSettings {
struct TPreset {
TVector<TTableSizes> Cases;
TString PresetName;
};

TVector<TPreset> Presets;
TSet<ETestedJoinKeyType> KeyTypes;
TSet<ETestedJoinAlgo> Algorithms;
};

TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TBenchmarkSettings::TPreset& preset,
TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TPreset& preset,
TTableSizes size);

namespace NBenchmarkSizes {
TVector<TTableSizes> ExponentialSizeIncrease();
TVector<TTableSizes> LinearSizeIncrease();
TVector<TTableSizes> VerySmallSizes();
TPreset ExponentialSizeIncrease(int samples, int scale);
TPreset LinearSizeIncrease(int samples, int scale);
TPreset VerySmallSizes(int samples, int scale);
} // namespace NBenchmarkSizes

} // namespace NKikimr::NMiniKQL
24 changes: 17 additions & 7 deletions ydb/core/kqp/tools/join_perf/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import matplotlib.pyplot as plt
import sys
import os
import numpy as np
import math
from pathlib import Path
if len(sys.argv) < 2:
print("usage: python3 graph.py folder/file.jsonl")
Expand All @@ -24,6 +26,15 @@
'key_type': name_parts[1]
}
)
# is_time_sampled = only_needed[0]["input_data_flavour"].startswith("Sampling")
def geo_mean_70percent_lowest(series):
size = len(series)
smallest = series.nsmallest(math.ceil(size * 0.7))
positive = smallest[smallest > 0]
if len(positive) == 0:
return np.nan
return np.exp(np.mean(np.log(positive)))

df = pd.DataFrame(only_needed)
df = df.drop('run_name', axis=1)
images_root_base = str(Path.home())+"/.join_perf/images"
Expand All @@ -42,15 +53,15 @@
print(graph_name)
subset = df[(df["input_data_flavour"] == data_flavour) &
(df["key_type"] == key_type)]
print(subset)
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 8), sharex=True)

for name, group in subset.groupby('join_algorithm'):
group = group.groupby('left_table_size')['time'].apply(lambda x: geo_mean_70percent_lowest(x)).sort_values()
axes.plot(
group['left_table_size'],
group['time'],
label=name,
marker='o'
group.index,
group.values,
label=name,
marker='o'
)
axes.set_ylabel('time')
axes.set_xlabel('left_rows')
Expand All @@ -65,5 +76,4 @@
plt.savefig(log_images + "/" + graph_name + ".jpeg")

print(f"images without y-axis log scaling are written to {simple_images}")
print(f"images with y-axis log scaling are written to {log_images}")

print(f"images with y-axis log scaling are written to {log_images}")
22 changes: 13 additions & 9 deletions ydb/core/kqp/tools/join_perf/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,27 +36,30 @@ int main(int argc, char** argv) {
opts.AddHelpOption('h');

NKikimr::NMiniKQL::TBenchmarkSettings params;
opts.AddLongOption('s', "benchmark_sizes")
.Help("left and right table sizes to choose for joins benchmark. visit NBenchmarkSizes namespace in "
"benchmark_settings.cpp to see exact values")
NKikimr::NMiniKQL::TPreset(*presetWithSamples)(int, int);
int samples = 1;
int scale = 1;
opts.AddHelpOption().Help("visit NBenchmarkSizes namespace in benchmark_settings.cpp for explanation");
opts.AddLongOption('c', "case")
.Help("left and right table sizes to choose for joins benchmark.")
.Choices({"exp", "linear", "small"})
.DefaultValue("small")
.Handler1([&](const NLastGetopt::TOptsParser* option) {
auto val = TStringBuf(option->CurVal());
auto preset = [&]() -> NKikimr::NMiniKQL::TBenchmarkSettings::TPreset {
presetWithSamples = [&]() {
if (val == "exp") {
return {NKikimr::NMiniKQL::NBenchmarkSizes::ExponentialSizeIncrease(), "ExpGrowth"};
return &NKikimr::NMiniKQL::NBenchmarkSizes::ExponentialSizeIncrease;
} else if (val == "linear") {
return {NKikimr::NMiniKQL::NBenchmarkSizes::LinearSizeIncrease(), "LinearGrowth"};
return &NKikimr::NMiniKQL::NBenchmarkSizes::LinearSizeIncrease;
} else if (val == "small") {
return {NKikimr::NMiniKQL::NBenchmarkSizes::VerySmallSizes(), "VerySmall"};
return &NKikimr::NMiniKQL::NBenchmarkSizes::VerySmallSizes;
} else {
Y_ABORT("unknown option for benchmark_sizes");
}
}();
params.Presets.push_back(preset);
});

opts.AddLongOption('s', "samples").Help("number representing how much to repeat single case. useful for noise reduction.").DefaultValue(1).StoreResult(&samples);
opts.AddLongOption("scale").Help("size of smallest table in case").DefaultValue(1<<18).StoreResult(&scale);
params.Algorithms = {
NKikimr::NMiniKQL::ETestedJoinAlgo::kBlockMap,
// NKikimr::NMiniKQL::ETestedJoinAlgo::kBlockHash,
Expand All @@ -70,6 +73,7 @@ int main(int argc, char** argv) {
};

NLastGetopt::TOptsParseResult parsedOptions(&opts, argc, argv);
params.Presets.push_back(presetWithSamples(samples, scale));
AddLittleLeftTablePreset(params);

auto benchmarkResults = NKikimr::NMiniKQL::RunJoinsBench(params);
Expand Down