25
25
import statistics
26
26
27
27
28
+ def print_to_github_out (message ):
29
+ if "GITHUB_OUTPUT" in os .environ :
30
+ with open (os .environ ["GITHUB_OUTPUT" ], "a" ) as f :
31
+ print (message .replace ("\n " , "%0A" ), file = f )
32
+
33
+
28
34
def compare_two_benchdnn (file1 , file2 , tolerance = 0.05 ):
29
35
"""
30
36
Compare two benchdnn output files
@@ -40,9 +46,9 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
40
46
r2 = [x .split ("," ) for x in r2 if x [0 :8 ] == "--mode=P" ]
41
47
42
48
if (len (r1 ) == 0 ) or (len (r2 ) == 0 ):
43
- warnings . warn ("One or both of the test results have zero lines" )
49
+ raise Exception ("One or both of the test results have zero lines" )
44
50
if len (r1 ) != len (r2 ):
45
- warnings . warn ("The number of benchdnn runs do not match" )
51
+ raise Exception ("The number of benchdnn runs do not match" )
46
52
47
53
r1_exec = defaultdict (list )
48
54
r1_ctime = defaultdict (list )
@@ -57,17 +63,17 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
57
63
r2_exec [key ].append (float (exec_time ))
58
64
r2_ctime [key ].append (float (ctime ))
59
65
60
- failed_tests = []
66
+ exec_failures , ctime_failures = [], []
61
67
for prb in r1_exec :
62
68
if prb not in r2_exec :
63
- warnings . warn (f"{ prb } exists in { file1 } but not { file2 } " )
64
- continue
69
+ raise Exception (f"{ prb } exists in { file1 } but not { file2 } " )
70
+
65
71
exec1 = r1_exec [prb ]
66
72
exec2 = r2_exec [prb ]
67
73
ctime1 = r1_ctime [prb ]
68
74
ctime2 = r2_ctime [prb ]
69
- res = ttest_ind (exec2 , exec1 , alternative = "greater" )
70
- ctime_test = ttest_ind (ctime2 , ctime1 , alternative = "greater" )
75
+ exec_ttest = ttest_ind (exec2 , exec1 , alternative = "greater" )
76
+ ctime_ttest = ttest_ind (ctime2 , ctime1 , alternative = "greater" )
71
77
r1_med_exec = statistics .median (exec1 )
72
78
r2_med_exec = statistics .median (exec2 )
73
79
r1_med_ctime = statistics .median (ctime1 )
@@ -81,42 +87,49 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
81
87
)
82
88
continue
83
89
84
- # A test fails if either execution time or creation time:
90
+ # A test fails if execution time:
85
91
# - shows a statistically significant regression and
86
- # - shows ≥ 10% slowdown in both median or min times
87
- exec_regressed = res .pvalue <= 0.05 and (
92
+ # - shows ≥ 10% slowdown in either median or min times
93
+ exec_regressed = exec_ttest .pvalue <= 0.05 and (
88
94
(r2_med_exec - r1_med_exec ) / r1_med_exec >= 0.1
89
95
or (min (exec2 ) - min (exec1 )) / min (exec1 ) >= 0.1
90
96
)
91
- ctime_regressed = ctime_test .pvalue <= 0.05 and (
97
+ ctime_regressed = ctime_ttest .pvalue <= 0.05 and (
92
98
(r2_med_ctime - r1_med_ctime ) / r1_med_ctime >= 0.1
93
99
or (min (ctime2 ) - min (ctime1 )) / min (ctime1 ) >= 0.1
94
100
)
95
101
96
- if exec_regressed or ctime_regressed :
97
- failed_tests .append (
102
+ if exec_regressed :
103
+ exec_failures .append (
98
104
f"{ prb } exec: { r1_med_exec :.3g} → { r2_med_exec :.3g} "
99
- f"(p={ res .pvalue :.3g} ), "
105
+ f"(p={ exec_ttest .pvalue :.3g} )"
106
+ )
107
+ if ctime_regressed :
108
+ ctime_failures .append (
100
109
f"ctime: { r1_med_ctime :.3g} → { r2_med_ctime :.3g} "
101
- f"(p={ ctime_test .pvalue :.3g} )"
110
+ f"(p={ ctime_ttest .pvalue :.3g} )"
102
111
)
103
112
104
- if "GITHUB_OUTPUT" in os .environ :
105
- with open (os .environ ["GITHUB_OUTPUT" ], "a" ) as f :
106
- print (f"pass={ not failed_tests } " , file = f )
113
+ print_to_github_out (f"pass={ not exec_failures } " )
114
+
115
+ message = ""
116
+ if ctime_failures :
117
+ message += (
118
+ "\n ----The following ctime regression tests failed:----\n "
119
+ + "\n " .join (ctime_failures )
120
+ + "\n "
121
+ )
107
122
108
- if not failed_tests :
123
+ if not exec_failures :
124
+ print_to_github_out (f"message={ message } " )
109
125
print ("Regression tests passed" )
110
126
else :
111
- message = (
112
- "\n ----The following regression tests failed:----\n "
113
- + "\n " .join (failed_tests )
127
+ message + = (
128
+ "\n ----The following exec time regression tests failed:----\n "
129
+ + "\n " .join (exec_failures )
114
130
+ "\n "
115
131
)
116
- if "GITHUB_OUTPUT" in os .environ :
117
- out_message = message .replace ("\n " , "%0A" )
118
- with open (os .environ ["GITHUB_OUTPUT" ], "a" ) as f :
119
- print (f"message={ out_message } " , file = f )
132
+ print_to_github_out (f"message={ message } " )
120
133
print (message )
121
134
raise Exception ("Some regression tests failed" )
122
135
0 commit comments