Skip to content

Commit 6fff3e8

Browse files
authored
Merge 2497534 into b3a7583
2 parents b3a7583 + 2497534 commit 6fff3e8

File tree

4 files changed

+60
-34
lines changed

4 files changed

+60
-34
lines changed

ydb/core/kqp/tools/join_perf/benchmark_settings.cpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
namespace NKikimr::NMiniKQL {
44

5-
TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TBenchmarkSettings::TPreset& preset,
5+
TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TPreset& preset,
66
TTableSizes size) {
77
TString algoName = [&] {
88
switch (algo) {
@@ -37,28 +37,40 @@ TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TBenchm
3737
}
3838

3939
namespace NBenchmarkSizes {
40-
TVector<TTableSizes> ExponentialSizeIncrease() {
41-
TVector<TTableSizes> ret;
40+
TPreset ExponentialSizeIncrease(int samples, int scale) {
41+
TPreset ret;
42+
ret.PresetName = "ExpGrowth";
4243
int init = 1 << 18;
44+
init *= scale;
4345
for (int index = 0; index < 8; index++) {
4446
int thisNum = init * (1 << index);
45-
ret.emplace_back(thisNum, thisNum);
47+
for (int _ = 0; _ < samples; ++_){
48+
ret.Cases.emplace_back(thisNum, thisNum);
49+
}
4650
}
4751
return ret;
4852
}
4953

50-
TVector<TTableSizes> LinearSizeIncrease() {
51-
TVector<TTableSizes> ret;
52-
int init = 1 << 22;
54+
TPreset LinearSizeIncrease(int samples, int scale) {
55+
TPreset ret;
56+
ret.PresetName = "LinearGrowth";
57+
int init = 1 << 18;
58+
init *= scale;
5359
for (int index = 1; index < 9; index++) {
5460
int thisNum = init * index;
55-
ret.emplace_back(thisNum, thisNum);
61+
for (int _ = 0; _ < samples; ++_){
62+
ret.Cases.emplace_back(thisNum, thisNum);
63+
}
5664
}
5765
return ret;
5866
}
5967

60-
TVector<TTableSizes> VerySmallSizes() {
61-
return {{512, 512}, {1024, 1024}};
68+
TPreset VerySmallSizes(int, int) {
69+
TPreset ret;
70+
ret.PresetName = "VerySmall";
71+
ret.Cases.emplace_back(512, 512);
72+
ret.Cases.emplace_back(1024, 1024);
73+
return ret;
6274
}
6375
} // namespace NBenchmarkSizes
6476

ydb/core/kqp/tools/join_perf/benchmark_settings.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,25 @@ struct TTableSizes {
1212
int Left;
1313
int Right;
1414
};
15+
struct TPreset {
16+
TVector<TTableSizes> Cases;
17+
TString PresetName;
18+
};
1519

1620
struct TBenchmarkSettings {
17-
struct TPreset {
18-
TVector<TTableSizes> Cases;
19-
TString PresetName;
20-
};
2121

2222
TVector<TPreset> Presets;
2323
TSet<ETestedJoinKeyType> KeyTypes;
2424
TSet<ETestedJoinAlgo> Algorithms;
2525
};
2626

27-
TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TBenchmarkSettings::TPreset& preset,
27+
TString CaseName(ETestedJoinAlgo algo, ETestedJoinKeyType keyType, const TPreset& preset,
2828
TTableSizes size);
2929

3030
namespace NBenchmarkSizes {
31-
TVector<TTableSizes> ExponentialSizeIncrease();
32-
TVector<TTableSizes> LinearSizeIncrease();
33-
TVector<TTableSizes> VerySmallSizes();
31+
TPreset ExponentialSizeIncrease(int samples, int scale);
32+
TPreset LinearSizeIncrease(int samples, int scale);
33+
TPreset VerySmallSizes(int samples, int scale);
3434
} // namespace NBenchmarkSizes
3535

3636
} // namespace NKikimr::NMiniKQL

ydb/core/kqp/tools/join_perf/graph.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import matplotlib.pyplot as plt
44
import sys
55
import os
6+
import numpy as np
7+
import math
68
from pathlib import Path
79
if len(sys.argv) < 2:
810
print("usage: python3 graph.py folder/file.jsonl")
@@ -24,6 +26,15 @@
2426
'key_type': name_parts[1]
2527
}
2628
)
29+
# is_time_sampled = only_needed[0]["input_data_flavour"].startswith("Sampling")
30+
def geo_mean_70percent_lowest(series):
31+
size = len(series)
32+
smallest = series.nsmallest(math.ceil(size * 0.7))
33+
positive = smallest[smallest > 0]
34+
if len(positive) == 0:
35+
return np.nan
36+
return np.exp(np.mean(np.log(positive)))
37+
2738
df = pd.DataFrame(only_needed)
2839
df = df.drop('run_name', axis=1)
2940
images_root_base = str(Path.home())+"/.join_perf/images"
@@ -42,15 +53,15 @@
4253
print(graph_name)
4354
subset = df[(df["input_data_flavour"] == data_flavour) &
4455
(df["key_type"] == key_type)]
45-
print(subset)
4656
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 8), sharex=True)
4757

4858
for name, group in subset.groupby('join_algorithm'):
59+
group = group.groupby('left_table_size')['time'].apply(lambda x: geo_mean_70percent_lowest(x)).sort_values()
4960
axes.plot(
50-
group['left_table_size'],
51-
group['time'],
52-
label=name,
53-
marker='o'
61+
group.index,
62+
group.values,
63+
label=name,
64+
marker='o'
5465
)
5566
axes.set_ylabel('time')
5667
axes.set_xlabel('left_rows')
@@ -65,5 +76,4 @@
6576
plt.savefig(log_images + "/" + graph_name + ".jpeg")
6677

6778
print(f"images without y-axis log scaling are written to {simple_images}")
68-
print(f"images with y-axis log scaling are written to {log_images}")
69-
79+
print(f"images with y-axis log scaling are written to {log_images}")

ydb/core/kqp/tools/join_perf/main.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,27 +36,30 @@ int main(int argc, char** argv) {
3636
opts.AddHelpOption('h');
3737

3838
NKikimr::NMiniKQL::TBenchmarkSettings params;
39-
opts.AddLongOption('s', "benchmark_sizes")
40-
.Help("left and right table sizes to choose for joins benchmark. visit NBenchmarkSizes namespace in "
41-
"benchmark_settings.cpp to see exact values")
39+
NKikimr::NMiniKQL::TPreset(*presetWithSamples)(int, int);
40+
int samples = 1;
41+
int scale = 1;
42+
opts.AddHelpOption().Help("visit NBenchmarkSizes namespace in benchmark_settings.cpp for explanation");
43+
opts.AddLongOption('c', "case")
44+
.Help("left and right table sizes to choose for joins benchmark.")
4245
.Choices({"exp", "linear", "small"})
4346
.DefaultValue("small")
4447
.Handler1([&](const NLastGetopt::TOptsParser* option) {
4548
auto val = TStringBuf(option->CurVal());
46-
auto preset = [&]() -> NKikimr::NMiniKQL::TBenchmarkSettings::TPreset {
49+
presetWithSamples = [&]() {
4750
if (val == "exp") {
48-
return {NKikimr::NMiniKQL::NBenchmarkSizes::ExponentialSizeIncrease(), "ExpGrowth"};
51+
return &NKikimr::NMiniKQL::NBenchmarkSizes::ExponentialSizeIncrease;
4952
} else if (val == "linear") {
50-
return {NKikimr::NMiniKQL::NBenchmarkSizes::LinearSizeIncrease(), "LinearGrowth"};
53+
return &NKikimr::NMiniKQL::NBenchmarkSizes::LinearSizeIncrease;
5154
} else if (val == "small") {
52-
return {NKikimr::NMiniKQL::NBenchmarkSizes::VerySmallSizes(), "VerySmall"};
55+
return &NKikimr::NMiniKQL::NBenchmarkSizes::VerySmallSizes;
5356
} else {
5457
Y_ABORT("unknown option for benchmark_sizes");
5558
}
5659
}();
57-
params.Presets.push_back(preset);
5860
});
59-
61+
opts.AddLongOption('s', "samples").Help("number representing how much to repeat single case. useful for noise reduction.").DefaultValue(1).StoreResult(&samples);
62+
opts.AddLongOption("scale").Help("size of smallest table in case").DefaultValue(1<<18).StoreResult(&scale);
6063
params.Algorithms = {
6164
NKikimr::NMiniKQL::ETestedJoinAlgo::kBlockMap,
6265
// NKikimr::NMiniKQL::ETestedJoinAlgo::kBlockHash,
@@ -70,6 +73,7 @@ int main(int argc, char** argv) {
7073
};
7174

7275
NLastGetopt::TOptsParseResult parsedOptions(&opts, argc, argv);
76+
params.Presets.push_back(presetWithSamples(samples, scale));
7377
AddLittleLeftTablePreset(params);
7478

7579
auto benchmarkResults = NKikimr::NMiniKQL::RunJoinsBench(params);

0 commit comments

Comments
 (0)