Skip to content

Commit b1fce07

Browse files
committed
ci: aarch64: make ctime regression a warning
1 parent 54a7c26 commit b1fce07

File tree

1 file changed

+37
-26
lines changed

1 file changed

+37
-26
lines changed

.github/automation/performance/benchdnn_comparison.py

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
import warnings
2525
import statistics
2626

27+
def print_to_github_out(message):
28+
if "GITHUB_OUTPUT" in os.environ:
29+
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
30+
print(message, file=f)
2731

2832
def compare_two_benchdnn(file1, file2, tolerance=0.05):
2933
"""
@@ -40,9 +44,9 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
4044
r2 = [x.split(",") for x in r2 if x[0:8] == "--mode=P"]
4145

4246
if (len(r1) == 0) or (len(r2) == 0):
43-
warnings.warn("One or both of the test results have zero lines")
47+
raise Exception("One or both of the test results have zero lines")
4448
if len(r1) != len(r2):
45-
warnings.warn("The number of benchdnn runs do not match")
49+
raise Exception("The number of benchdnn runs do not match")
4650

4751
r1_exec = defaultdict(list)
4852
r1_ctime = defaultdict(list)
@@ -57,17 +61,17 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
5761
r2_exec[key].append(float(exec_time))
5862
r2_ctime[key].append(float(ctime))
5963

60-
failed_tests = []
64+
exec_failures, ctime_failures = [], []
6165
for prb in r1_exec:
6266
if prb not in r2_exec:
63-
warnings.warn(f"{prb} exists in {file1} but not {file2}")
64-
continue
67+
raise Exception(f"{prb} exists in {file1} but not {file2}")
68+
6569
exec1 = r1_exec[prb]
6670
exec2 = r2_exec[prb]
6771
ctime1 = r1_ctime[prb]
6872
ctime2 = r2_ctime[prb]
69-
res = ttest_ind(exec2, exec1, alternative="greater")
70-
ctime_test = ttest_ind(ctime2, ctime1, alternative="greater")
73+
exec_ttest = ttest_ind(exec2, exec1, alternative="greater")
74+
ctime_ttest = ttest_ind(ctime2, ctime1, alternative="greater")
7175
r1_med_exec = statistics.median(exec1)
7276
r2_med_exec = statistics.median(exec2)
7377
r1_med_ctime = statistics.median(ctime1)
@@ -81,42 +85,49 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
8185
)
8286
continue
8387

84-
# A test fails if either execution time or creation time:
88+
# A test fails if execution time:
8589
# - shows a statistically significant regression and
86-
# - shows ≥ 10% slowdown in both median or min times
87-
exec_regressed = res.pvalue <= 0.05 and (
90+
# - shows ≥ 10% slowdown in either median or min times
91+
exec_regressed = exec_ttest.pvalue <= 0.05 and (
8892
(r2_med_exec - r1_med_exec) / r1_med_exec >= 0.1
8993
or (min(exec2) - min(exec1)) / min(exec1) >= 0.1
9094
)
91-
ctime_regressed = ctime_test.pvalue <= 0.05 and (
95+
ctime_regressed = ctime_ttest.pvalue <= 0.05 and (
9296
(r2_med_ctime - r1_med_ctime) / r1_med_ctime >= 0.1
9397
or (min(ctime2) - min(ctime1)) / min(ctime1) >= 0.1
9498
)
9599

96-
if exec_regressed or ctime_regressed:
97-
failed_tests.append(
100+
if exec_regressed:
101+
exec_failures.append(
98102
f"{prb} exec: {r1_med_exec:.3g}{r2_med_exec:.3g} "
99-
f"(p={res.pvalue:.3g}), "
103+
f"(p={exec_ttest.pvalue:.3g})"
104+
)
105+
if ctime_regressed:
106+
ctime_failures.append(
100107
f"ctime: {r1_med_ctime:.3g}{r2_med_ctime:.3g}"
101-
f"(p={ctime_test.pvalue:.3g})"
108+
f"(p={ctime_ttest.pvalue:.3g})"
102109
)
103110

104-
if "GITHUB_OUTPUT" in os.environ:
105-
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
106-
print(f"pass={not failed_tests}", file=f)
111+
print_to_github_out(f"pass={not exec_failures}")
112+
113+
message = ""
114+
if ctime_failures:
115+
message += (
116+
"\n----The following ctime regression tests failed:----\n"
117+
+ "\n".join(ctime_failures)
118+
+ "\n"
119+
)
107120

108-
if not failed_tests:
121+
if not exec_failures:
122+
print_to_github_out(f'message={message.replace("\n", "%0A")}')
109123
print("Regression tests passed")
110124
else:
111-
message = (
112-
"\n----The following regression tests failed:----\n"
113-
+ "\n".join(failed_tests)
125+
message += (
126+
"\n----The following exec time regression tests failed:----\n"
127+
+ "\n".join(exec_failures)
114128
+ "\n"
115129
)
116-
if "GITHUB_OUTPUT" in os.environ:
117-
out_message = message.replace("\n", "%0A")
118-
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
119-
print(f"message={out_message}", file=f)
130+
print_to_github_out(f'message={message.replace("\n", "%0A")}')
120131
print(message)
121132
raise Exception("Some regression tests failed")
122133

0 commit comments

Comments
 (0)