Skip to content

Commit 2edc31b

Browse files
committed
ci: aarch64: make ctime regression a warning
1 parent 54a7c26 commit 2edc31b

File tree

1 file changed

+39
-26
lines changed

1 file changed

+39
-26
lines changed

.github/automation/performance/benchdnn_comparison.py

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@
2525
import statistics
2626

2727

28+
def print_to_github_out(message):
29+
if "GITHUB_OUTPUT" in os.environ:
30+
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
31+
print(message.replace("\n", "%0A"), file=f)
32+
33+
2834
def compare_two_benchdnn(file1, file2, tolerance=0.05):
2935
"""
3036
Compare two benchdnn output files
@@ -40,9 +46,9 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
4046
r2 = [x.split(",") for x in r2 if x[0:8] == "--mode=P"]
4147

4248
if (len(r1) == 0) or (len(r2) == 0):
43-
warnings.warn("One or both of the test results have zero lines")
49+
raise Exception("One or both of the test results have zero lines")
4450
if len(r1) != len(r2):
45-
warnings.warn("The number of benchdnn runs do not match")
51+
raise Exception("The number of benchdnn runs do not match")
4652

4753
r1_exec = defaultdict(list)
4854
r1_ctime = defaultdict(list)
@@ -57,17 +63,17 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
5763
r2_exec[key].append(float(exec_time))
5864
r2_ctime[key].append(float(ctime))
5965

60-
failed_tests = []
66+
exec_failures, ctime_failures = [], []
6167
for prb in r1_exec:
6268
if prb not in r2_exec:
63-
warnings.warn(f"{prb} exists in {file1} but not {file2}")
64-
continue
69+
raise Exception(f"{prb} exists in {file1} but not {file2}")
70+
6571
exec1 = r1_exec[prb]
6672
exec2 = r2_exec[prb]
6773
ctime1 = r1_ctime[prb]
6874
ctime2 = r2_ctime[prb]
69-
res = ttest_ind(exec2, exec1, alternative="greater")
70-
ctime_test = ttest_ind(ctime2, ctime1, alternative="greater")
75+
exec_ttest = ttest_ind(exec2, exec1, alternative="greater")
76+
ctime_ttest = ttest_ind(ctime2, ctime1, alternative="greater")
7177
r1_med_exec = statistics.median(exec1)
7278
r2_med_exec = statistics.median(exec2)
7379
r1_med_ctime = statistics.median(ctime1)
@@ -81,42 +87,49 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
8187
)
8288
continue
8389

84-
# A test fails if either execution time or creation time:
90+
# A test fails if execution time:
8591
# - shows a statistically significant regression and
86-
# - shows ≥ 10% slowdown in both median or min times
87-
exec_regressed = res.pvalue <= 0.05 and (
92+
# - shows ≥ 10% slowdown in either median or min times
93+
exec_regressed = exec_ttest.pvalue <= 0.05 and (
8894
(r2_med_exec - r1_med_exec) / r1_med_exec >= 0.1
8995
or (min(exec2) - min(exec1)) / min(exec1) >= 0.1
9096
)
91-
ctime_regressed = ctime_test.pvalue <= 0.05 and (
97+
ctime_regressed = ctime_ttest.pvalue <= 0.05 and (
9298
(r2_med_ctime - r1_med_ctime) / r1_med_ctime >= 0.1
9399
or (min(ctime2) - min(ctime1)) / min(ctime1) >= 0.1
94100
)
95101

96-
if exec_regressed or ctime_regressed:
97-
failed_tests.append(
102+
if exec_regressed:
103+
exec_failures.append(
98104
f"{prb} exec: {r1_med_exec:.3g}{r2_med_exec:.3g} "
99-
f"(p={res.pvalue:.3g}), "
105+
f"(p={exec_ttest.pvalue:.3g})"
106+
)
107+
if ctime_regressed:
108+
ctime_failures.append(
100109
f"ctime: {r1_med_ctime:.3g}{r2_med_ctime:.3g}"
101-
f"(p={ctime_test.pvalue:.3g})"
110+
f"(p={ctime_ttest.pvalue:.3g})"
102111
)
103112

104-
if "GITHUB_OUTPUT" in os.environ:
105-
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
106-
print(f"pass={not failed_tests}", file=f)
113+
print_to_github_out(f"pass={not exec_failures}")
114+
115+
message = ""
116+
if ctime_failures:
117+
message += (
118+
"\n----The following ctime regression tests failed:----\n"
119+
+ "\n".join(ctime_failures)
120+
+ "\n"
121+
)
107122

108-
if not failed_tests:
123+
if not exec_failures:
124+
print_to_github_out(f"message={message}")
109125
print("Regression tests passed")
110126
else:
111-
message = (
112-
"\n----The following regression tests failed:----\n"
113-
+ "\n".join(failed_tests)
127+
message += (
128+
"\n----The following exec time regression tests failed:----\n"
129+
+ "\n".join(exec_failures)
114130
+ "\n"
115131
)
116-
if "GITHUB_OUTPUT" in os.environ:
117-
out_message = message.replace("\n", "%0A")
118-
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
119-
print(f"message={out_message}", file=f)
132+
print_to_github_out(f"message={message}")
120133
print(message)
121134
raise Exception("Some regression tests failed")
122135

0 commit comments

Comments
 (0)