Skip to content

Commit

Permalink
More tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dianaclarke committed May 27, 2021
1 parent 7948c4f commit 8be76aa
Showing 1 changed file with 187 additions and 25 deletions.
212 changes: 187 additions & 25 deletions conbench/tests/entities/test_comparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,87 @@ def test_compare_regression():
}


def test_compare_regression_less_is_better():
baseline = {
"batch": "arrow-compute-scalar-cast-benchmark",
"benchmark": "CastUInt32ToInt32Safe/262144/1000",
"value": "1000",
"unit": "s",
"id": "some-benchmark-id-1",
"batch_id": "some-batch-id-1",
"run_id": "some-run-id-1",
"tags": {"tag_one": "one", "tag_two": "two"},
"z_score": "3.0",
}
contender = {
"batch": "arrow-compute-scalar-cast-benchmark",
"benchmark": "CastUInt32ToInt32Safe/262144/1000",
"value": "1060",
"unit": "s",
"id": "some-benchmark-id-2",
"batch_id": "some-batch-id-2",
"run_id": "some-run-id-2",
"tags": {"tag_one": "one", "tag_two": "two"},
"z_score": "3.0",
}

result = BenchmarkComparator(baseline, contender).compare()
formatted = BenchmarkComparator(baseline, contender).formatted()

assert result == {
"batch": "arrow-compute-scalar-cast-benchmark",
"benchmark": "CastUInt32ToInt32Safe/262144/1000",
"change": "6.000",
"threshold": "5.000",
"regression": True,
"improvement": False,
"deviations": "2.000",
"baseline_z_score": "3.000",
"contender_z_score": "3.000",
"baseline_z_regression": True,
"baseline_z_improvement": False,
"contender_z_regression": True,
"contender_z_improvement": False,
"baseline": "1000.000",
"contender": "1060.000",
"baseline_id": "some-benchmark-id-1",
"contender_id": "some-benchmark-id-2",
"baseline_batch_id": "some-batch-id-1",
"contender_batch_id": "some-batch-id-2",
"baseline_run_id": "some-run-id-1",
"contender_run_id": "some-run-id-2",
"unit": "s",
"less_is_better": True,
"tags": {"tag_one": "one", "tag_two": "two"},
}
assert formatted == {
"batch": "arrow-compute-scalar-cast-benchmark",
"benchmark": "CastUInt32ToInt32Safe/262144/1000",
"change": "6.000%",
"threshold": "5.000%",
"regression": True,
"improvement": False,
"deviations": "2.000",
"baseline_z_score": "3.000",
"contender_z_score": "3.000",
"baseline_z_regression": True,
"baseline_z_improvement": False,
"contender_z_regression": True,
"contender_z_improvement": False,
"baseline": "1000.000 s",
"contender": "1060.000 s",
"baseline_id": "some-benchmark-id-1",
"contender_id": "some-benchmark-id-2",
"baseline_batch_id": "some-batch-id-1",
"contender_batch_id": "some-batch-id-2",
"baseline_run_id": "some-run-id-1",
"contender_run_id": "some-run-id-2",
"unit": "s",
"less_is_better": True,
"tags": {"tag_one": "one", "tag_two": "two"},
}


def test_compare_regression_but_under_threshold():
baseline = {
"batch": "arrow-compute-scalar-cast-benchmark",
Expand Down Expand Up @@ -409,28 +490,28 @@ def test_compare_improvement():
}


def test_compare_improvement_custom_threshold_and_deviations():
def test_compare_improvement_less_is_better():
baseline = {
"batch": "arrow-compute-scalar-cast-benchmark",
"benchmark": "CastUInt32ToInt32Safe/262144/1000",
"value": "1000",
"unit": "i/s",
"unit": "s",
"id": "some-benchmark-id-1",
"batch_id": "some-batch-id-1",
"run_id": "some-run-id-1",
"tags": {"tag_one": "one", "tag_two": "two"},
"z_score": "2.0",
"z_score": "-3.0",
}
contender = {
"batch": "arrow-compute-scalar-cast-benchmark",
"benchmark": "CastUInt32ToInt32Safe/262144/1000",
"value": "1050",
"unit": "i/s",
"value": "940",
"unit": "s",
"id": "some-benchmark-id-2",
"batch_id": "some-batch-id-2",
"run_id": "some-run-id-2",
"tags": {"tag_one": "one", "tag_two": "two"},
"z_score": "2.0",
"z_score": "-3.0",
}

result = BenchmarkComparator(baseline, contender).compare()
Expand All @@ -439,53 +520,53 @@ def test_compare_improvement_custom_threshold_and_deviations():
assert result == {
"batch": "arrow-compute-scalar-cast-benchmark",
"benchmark": "CastUInt32ToInt32Safe/262144/1000",
"change": "5.000",
"change": "-6.000",
"threshold": "5.000",
"regression": False,
"improvement": False,
"improvement": True,
"deviations": "2.000",
"baseline_z_score": "2.000",
"contender_z_score": "2.000",
"baseline_z_score": "-3.000",
"contender_z_score": "-3.000",
"baseline_z_regression": False,
"baseline_z_improvement": False,
"baseline_z_improvement": True,
"contender_z_regression": False,
"contender_z_improvement": False,
"contender_z_improvement": True,
"baseline": "1000.000",
"contender": "1050.000",
"contender": "940.000",
"baseline_id": "some-benchmark-id-1",
"contender_id": "some-benchmark-id-2",
"baseline_batch_id": "some-batch-id-1",
"contender_batch_id": "some-batch-id-2",
"baseline_run_id": "some-run-id-1",
"contender_run_id": "some-run-id-2",
"unit": "i/s",
"less_is_better": False,
"unit": "s",
"less_is_better": True,
"tags": {"tag_one": "one", "tag_two": "two"},
}
assert formatted == {
"batch": "arrow-compute-scalar-cast-benchmark",
"benchmark": "CastUInt32ToInt32Safe/262144/1000",
"change": "5.000%",
"change": "-6.000%",
"threshold": "5.000%",
"regression": False,
"improvement": False,
"improvement": True,
"deviations": "2.000",
"baseline_z_score": "2.000",
"contender_z_score": "2.000",
"baseline_z_score": "-3.000",
"contender_z_score": "-3.000",
"baseline_z_regression": False,
"baseline_z_improvement": False,
"baseline_z_improvement": True,
"contender_z_regression": False,
"contender_z_improvement": False,
"baseline": "1.000K i/s",
"contender": "1.050K i/s",
"contender_z_improvement": True,
"baseline": "1000.000 s",
"contender": "940.000 s",
"baseline_id": "some-benchmark-id-1",
"contender_id": "some-benchmark-id-2",
"baseline_batch_id": "some-batch-id-1",
"contender_batch_id": "some-batch-id-2",
"baseline_run_id": "some-run-id-1",
"contender_run_id": "some-run-id-2",
"unit": "i/s",
"less_is_better": False,
"unit": "s",
"less_is_better": True,
"tags": {"tag_one": "one", "tag_two": "two"},
}

Expand Down Expand Up @@ -574,6 +655,87 @@ def test_compare_improvement_but_under_threshold():
}


def test_compare_improvement_custom_threshold_and_deviations():
baseline = {
"batch": "arrow-compute-scalar-cast-benchmark",
"benchmark": "CastUInt32ToInt32Safe/262144/1000",
"value": "1000",
"unit": "i/s",
"id": "some-benchmark-id-1",
"batch_id": "some-batch-id-1",
"run_id": "some-run-id-1",
"tags": {"tag_one": "one", "tag_two": "two"},
"z_score": "2.0",
}
contender = {
"batch": "arrow-compute-scalar-cast-benchmark",
"benchmark": "CastUInt32ToInt32Safe/262144/1000",
"value": "1050",
"unit": "i/s",
"id": "some-benchmark-id-2",
"batch_id": "some-batch-id-2",
"run_id": "some-run-id-2",
"tags": {"tag_one": "one", "tag_two": "two"},
"z_score": "2.0",
}

result = BenchmarkComparator(baseline, contender).compare()
formatted = BenchmarkComparator(baseline, contender).formatted()

assert result == {
"batch": "arrow-compute-scalar-cast-benchmark",
"benchmark": "CastUInt32ToInt32Safe/262144/1000",
"change": "5.000",
"threshold": "5.000",
"regression": False,
"improvement": False,
"deviations": "2.000",
"baseline_z_score": "2.000",
"contender_z_score": "2.000",
"baseline_z_regression": False,
"baseline_z_improvement": False,
"contender_z_regression": False,
"contender_z_improvement": False,
"baseline": "1000.000",
"contender": "1050.000",
"baseline_id": "some-benchmark-id-1",
"contender_id": "some-benchmark-id-2",
"baseline_batch_id": "some-batch-id-1",
"contender_batch_id": "some-batch-id-2",
"baseline_run_id": "some-run-id-1",
"contender_run_id": "some-run-id-2",
"unit": "i/s",
"less_is_better": False,
"tags": {"tag_one": "one", "tag_two": "two"},
}
assert formatted == {
"batch": "arrow-compute-scalar-cast-benchmark",
"benchmark": "CastUInt32ToInt32Safe/262144/1000",
"change": "5.000%",
"threshold": "5.000%",
"regression": False,
"improvement": False,
"deviations": "2.000",
"baseline_z_score": "2.000",
"contender_z_score": "2.000",
"baseline_z_regression": False,
"baseline_z_improvement": False,
"contender_z_regression": False,
"contender_z_improvement": False,
"baseline": "1.000K i/s",
"contender": "1.050K i/s",
"baseline_id": "some-benchmark-id-1",
"contender_id": "some-benchmark-id-2",
"baseline_batch_id": "some-batch-id-1",
"contender_batch_id": "some-batch-id-2",
"baseline_run_id": "some-run-id-1",
"contender_run_id": "some-run-id-2",
"unit": "i/s",
"less_is_better": False,
"tags": {"tag_one": "one", "tag_two": "two"},
}


def test_compare_list():
pairs = {
"some-case-id-1": {
Expand Down

0 comments on commit 8be76aa

Please sign in to comment.