Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
e1d0b6d
Add ForceShuffleElimination option to use for benchmarking
alexpaniman Oct 17, 2025
edc9eeb
Fix PRAGMA message for MaxDPHypDPTableSize
alexpaniman Oct 17, 2025
6c3a78e
Add join order tests on different randomized topologies
alexpaniman Oct 17, 2025
b1e62d3
Fix table appearing in join condition before its join clause
alexpaniman Oct 21, 2025
fb6c192
Implement vertex connectivity ordering for TRelationGraph to avoid cr…
alexpaniman Oct 24, 2025
78d976f
Migrate DumpGraph to IOutputStream
alexpaniman Oct 24, 2025
d4a2c49
Fix assert in GenerateRandomPruferSequence
alexpaniman Oct 24, 2025
8d78d1f
Implement significantly more robust benchmarking method for join order
alexpaniman Oct 24, 2025
5e2d33b
Implement degree sequence calculation for TRelationGraph
alexpaniman Oct 24, 2025
911a9de
Refactor kqp_join_topology_generator
alexpaniman Oct 24, 2025
865ba76
Disable sample tables in join topology tests
alexpaniman Oct 27, 2025
2775cd7
Improve timeout handling in join topology tests
alexpaniman Oct 27, 2025
6341d97
Separate topology tests into separate file
alexpaniman Oct 27, 2025
18ff88f
Early exit if values are stable, + stats for derived values, + serial…
alexpaniman Oct 28, 2025
ab8db74
Implement runtime configuration of benchmarks
alexpaniman Oct 29, 2025
e747ec2
Simplify test reproduction by wrapping mersenne twister
alexpaniman Oct 29, 2025
da59890
Allow args to be separated by space
alexpaniman Oct 29, 2025
92664a7
Simplify topology reproduction
alexpaniman Oct 29, 2025
9c8f95c
Use Pitman-Yor process for key selection
alexpaniman Oct 30, 2025
ccd6c9d
Replace ForceShuffleElimination with more flexible cutoff parameter
alexpaniman Oct 30, 2025
2e2066f
Clearer error message if arg is not provided
alexpaniman Oct 30, 2025
bb4ed54
Implement Chung-Lu random graph model
alexpaniman Oct 30, 2025
e58421b
Implement fixed degree topology with HavalHakimi + MCMC + MH
alexpaniman Oct 30, 2025
38d00a9
Add convenient launcher for many topologies
alexpaniman Oct 30, 2025
448e7e2
Allow to collect different values
alexpaniman Oct 31, 2025
af36631
Iterate number of nodes last to prevent early timeout
alexpaniman Oct 31, 2025
c58bab4
Collect a lot more data for plotting during benchmarks
alexpaniman Oct 31, 2025
51adb62
Reuse more comprehensive info collection for simpler topologies
alexpaniman Oct 31, 2025
ea0fdbf
Implement repeated regeneration with same params + fix bug with seria…
alexpaniman Oct 31, 2025
df26fdf
Fix cbo unittests after MakeNativeOptimizerNew args were updated
alexpaniman Oct 31, 2025
d6d7ec4
Large refactoring of topology benchmark
alexpaniman Nov 1, 2025
2859d01
Large refactoring of kqp_benches
alexpaniman Nov 5, 2025
9fb08de
Fix reproducibility of topology-based benchmarks
alexpaniman Nov 5, 2025
a224bed
Revert unnecessary changes in kqp_join_order_ut.cpp
alexpaniman Nov 5, 2025
1b223d2
Add kqp_benches.cpp that was extracted from kqp_benches.h after refactor
alexpaniman Nov 5, 2025
8216c39
Fix bug in TStatistics::Merge - incorrect min of a distribution
alexpaniman Nov 6, 2025
433a4f4
Don't reject samples where Shuffle Elimination was faster
alexpaniman Nov 6, 2025
2a4bbd7
Prevent NANs in speed ratio calculation
alexpaniman Nov 6, 2025
0ac5488
Fix styling + minor fixes
alexpaniman Nov 6, 2025
8f8a000
Make Arcadia style checker happy
alexpaniman Nov 11, 2025
ed39ebb
Better repro message in topology benchmarks
alexpaniman Nov 11, 2025
60b3623
Reorder option for reproducing bugs with cross join elimination
alexpaniman Nov 11, 2025
83bb78b
Extract tests for kqp benches from topology-based benchmarks
alexpaniman Nov 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ydb/core/kqp/opt/logical/kqp_opt_log.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase {
}

TMaybeNode<TExprBase> OptimizeEquiJoinWithCosts(TExprBase node, TExprContext& ctx) {
TCBOSettings settings {
TCBOSettings settings{
.MaxDPhypDPTableSize = Config->MaxDPHypDPTableSize.Get().GetOrElse(TDqSettings::TDefault::MaxDPHypDPTableSize),
.ShuffleEliminationJoinNumCutoff = Config->ShuffleEliminationJoinNumCutoff.Get().GetOrElse(TDqSettings::TDefault::ShuffleEliminationJoinNumCutoff)
};
Expand Down
266 changes: 266 additions & 0 deletions ydb/core/kqp/ut/common/kqp_arg_parser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
#pragma once

#include <cassert>
#include <chrono>
#include <cstdint>
#include <map>
#include <sstream>
#include <string>
#include <type_traits>
#include <regex>
#include <stdexcept>

namespace NKikimr::NKqp {

class TArgs {
public:
template <typename TValue>
class TRangedValueIter {
public:
TRangedValueIter(TValue current, TValue end, TValue step)
: Current_(current)
, End_(end)
, Step_(step)
{
}

TValue operator*() const {
return Current_;
}

TRangedValueIter& operator++() {
Current_ += Step_;
if (Current_ >= End_) {
Current_ = End_;
}

return *this;
}

bool operator!=(TRangedValueIter other) const {
assert(Step_ == other.Step_);
return Current_ != other.Current_;
}

private:
TValue Current_;
TValue End_;
TValue Step_;
};

template <typename TValue>
class TRangedValue {
public:
TRangedValue(TValue from, TValue to, TValue step)
: IsRange_(true)
, From_(from)
, To_(to)
, Step_(step)
{
}

TRangedValue(TValue from)
: IsRange_(false)
, From_(from)
, To_(from)
, Step_(1)
{
}

bool IsRange() const {
return IsRange_;
}

TRangedValueIter<TValue> end() const {
TValue End = To_ + 1; // immediately after the last
return TRangedValueIter<TValue>{End, End, Step_};
}

TRangedValueIter<TValue> begin() const {
return TRangedValueIter{From_, *end(), Step_};
}

TValue GetValue() const {
return From_;
}

TValue GetFirst() const {
return From_;
}

TValue GetLast() const {
return To_;
}

TValue GetStep() const {
return Step_;
}

private:
bool IsRange_;

TValue From_;
TValue To_;
TValue Step_;
};

public:
TArgs(std::string input)
: Values_(ParseMap(input))
{
}

std::string GetString(std::string key) {
if (!HasArg(key)) {
throw std::out_of_range("arg not provided: '" + key + "'");
}
return Values_[key];
}

std::string GetStringOrDefault(std::string key, std::string defaultValue) {
if (HasArg(key)) {
return GetString(key);
}

return defaultValue;
}

template <typename TValue>
auto GetArg(std::string key) {
return ParseRangedValue<TValue>(GetString(key));
}

template <typename TValue>
auto GetArgOrDefault(std::string key, std::string defaultSerialized) {
if (HasArg(key)) {
return GetArg<TValue>(key);
}
return ParseRangedValue<TValue>(defaultSerialized);
}

bool HasArg(std::string key) {
return Values_.contains(key);
}

private:
std::map<std::string, std::string> Values_;

private:
static void LTrim(std::string& input) {
input.erase(input.begin(), std::find_if(input.begin(), input.end(), [](unsigned char ch) {
return !std::isspace(ch);
}));
}

static void RTrim(std::string& input) {
input.erase(std::find_if(input.rbegin(), input.rend(), [](unsigned char ch) {
return !std::isspace(ch);
}).base(), input.end());
}

static void Trim(std::string& input) {
LTrim(input);
RTrim(input);
}

static std::map<std::string, std::string> ParseMap(const std::string& input, char delimiter = ';') {
std::map<std::string, std::string> result;
std::stringstream ss(input);

std::string entry;
while (std::getline(ss, entry, delimiter)) {
// each entry looks like key value pair, e.g. "N=5"
Trim(entry);
size_t pos = entry.find('=');

if (pos != std::string::npos) {
std::string key = entry.substr(0, pos);
std::string value = entry.substr(pos + 1);
Trim(value);
result[std::move(key)] = std::move(value);
}
}

return result;
}

template <typename TValue>
static auto ParseRangedValue(const std::string& input) {
// Check if it contains ".."
size_t dotdot = input.find("..");

if (dotdot == std::string::npos) {
// parse fixed value
auto value = ParseValue<TValue>(input);
return TRangedValue<decltype(value)>{value};
} else {
// parse ranged (with step or without)
size_t comma = input.find(',');

auto to = ParseValue<TValue>(input.substr(dotdot + 2));
if (comma != std::string::npos && comma < dotdot) {
// parse ranges like "0.1,0.2..1.0"
auto first = ParseValue<TValue>(input.substr(0, comma));
auto second = ParseValue<TValue>(input.substr(comma + 1, dotdot - comma - 1));
auto step = second - first;
return TRangedValue<decltype(first)>{first, to, step};
}

// parse ranges like "1..100"
auto first = ParseValue<TValue>(input.substr(0, dotdot));
return TRangedValue<decltype(first)>{first, to, /*default step=*/1};
}
}

template <typename TValue>
static auto ParseValue(const std::string& input) {
if constexpr (std::is_same_v<TValue, double>) {
return std::stod(input);
} else if constexpr (std::is_same_v<TValue, uint64_t>) {
return static_cast<uint64_t>(std::stoull(input));
} else if constexpr (std::is_same_v<TValue, int64_t>) {
return static_cast<int64_t>(std::stoll(input));
} else if constexpr (std::is_same_v<TValue, std::string>) {
return input;
} else if constexpr (std::is_same_v<TValue, std::chrono::nanoseconds>) {
return static_cast<ui64>(ParseDuration(input).count());
} else {
static_assert(false, "Unhandled type");
}
}

static std::chrono::nanoseconds ParseDuration(const std::string& input) {
std::regex pattern(R"((\d+(?:\.\d+)?)\s*(ns|us|ms|s|m|h))");
std::smatch match;

if (!std::regex_match(input, match, pattern)) {
throw std::invalid_argument("Invalid duration format");
}

double value = std::stod(match[1]);
std::string unit = match[2];

if (unit == "ns") {
return std::chrono::nanoseconds(static_cast<uint64_t>(value));
}
if (unit == "us") {
return std::chrono::microseconds(static_cast<uint64_t>(value));
}
if (unit == "ms") {
return std::chrono::milliseconds(static_cast<uint64_t>(value));
}
if (unit == "s") {
return std::chrono::seconds(static_cast<uint64_t>(value));
}
if (unit == "m") {
return std::chrono::minutes(static_cast<uint64_t>(value));
}
if (unit == "h") {
return std::chrono::hours(static_cast<uint64_t>(value));
}

throw std::invalid_argument("Unknown unit");
}
};

} // namespace NKikimr::NKqp
Loading
Loading