24
24
import warnings
25
25
import statistics
26
26
27
+ def print_to_github_out (message ):
28
+ if "GITHUB_OUTPUT" in os .environ :
29
+ with open (os .environ ["GITHUB_OUTPUT" ], "a" ) as f :
30
+ print (message , file = f )
27
31
28
32
def compare_two_benchdnn (file1 , file2 , tolerance = 0.05 ):
29
33
"""
@@ -40,9 +44,9 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
40
44
r2 = [x .split ("," ) for x in r2 if x [0 :8 ] == "--mode=P" ]
41
45
42
46
if (len (r1 ) == 0 ) or (len (r2 ) == 0 ):
43
- warnings . warn ("One or both of the test results have zero lines" )
47
+ raise Exception ("One or both of the test results have zero lines" )
44
48
if len (r1 ) != len (r2 ):
45
- warnings . warn ("The number of benchdnn runs do not match" )
49
+ raise Exception ("The number of benchdnn runs do not match" )
46
50
47
51
r1_exec = defaultdict (list )
48
52
r1_ctime = defaultdict (list )
@@ -57,17 +61,17 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
57
61
r2_exec [key ].append (float (exec_time ))
58
62
r2_ctime [key ].append (float (ctime ))
59
63
60
- failed_tests = []
64
+ exec_failures , ctime_failures = [], []
61
65
for prb in r1_exec :
62
66
if prb not in r2_exec :
63
- warnings . warn (f"{ prb } exists in { file1 } but not { file2 } " )
64
- continue
67
+ raise Exception (f"{ prb } exists in { file1 } but not { file2 } " )
68
+
65
69
exec1 = r1_exec [prb ]
66
70
exec2 = r2_exec [prb ]
67
71
ctime1 = r1_ctime [prb ]
68
72
ctime2 = r2_ctime [prb ]
69
- res = ttest_ind (exec2 , exec1 , alternative = "greater" )
70
- ctime_test = ttest_ind (ctime2 , ctime1 , alternative = "greater" )
73
+ exec_ttest = ttest_ind (exec2 , exec1 , alternative = "greater" )
74
+ ctime_ttest = ttest_ind (ctime2 , ctime1 , alternative = "greater" )
71
75
r1_med_exec = statistics .median (exec1 )
72
76
r2_med_exec = statistics .median (exec2 )
73
77
r1_med_ctime = statistics .median (ctime1 )
@@ -81,42 +85,49 @@ def compare_two_benchdnn(file1, file2, tolerance=0.05):
81
85
)
82
86
continue
83
87
84
- # A test fails if either execution time or creation time:
88
+ # A test fails if execution time:
85
89
# - shows a statistically significant regression and
86
- # - shows ≥ 10% slowdown in both median or min times
87
- exec_regressed = res .pvalue <= 0.05 and (
90
+ # - shows ≥ 10% slowdown in either median or min times
91
+ exec_regressed = exec_ttest .pvalue <= 0.05 and (
88
92
(r2_med_exec - r1_med_exec ) / r1_med_exec >= 0.1
89
93
or (min (exec2 ) - min (exec1 )) / min (exec1 ) >= 0.1
90
94
)
91
- ctime_regressed = ctime_test .pvalue <= 0.05 and (
95
+ ctime_regressed = ctime_ttest .pvalue <= 0.05 and (
92
96
(r2_med_ctime - r1_med_ctime ) / r1_med_ctime >= 0.1
93
97
or (min (ctime2 ) - min (ctime1 )) / min (ctime1 ) >= 0.1
94
98
)
95
99
96
- if exec_regressed or ctime_regressed :
97
- failed_tests .append (
100
+ if exec_regressed :
101
+ exec_failures .append (
98
102
f"{ prb } exec: { r1_med_exec :.3g} → { r2_med_exec :.3g} "
99
- f"(p={ res .pvalue :.3g} ), "
103
+ f"(p={ exec_ttest .pvalue :.3g} )"
104
+ )
105
+ if ctime_regressed :
106
+ ctime_failures .append (
100
107
f"ctime: { r1_med_ctime :.3g} → { r2_med_ctime :.3g} "
101
- f"(p={ ctime_test .pvalue :.3g} )"
108
+ f"(p={ ctime_ttest .pvalue :.3g} )"
102
109
)
103
110
104
- if "GITHUB_OUTPUT" in os .environ :
105
- with open (os .environ ["GITHUB_OUTPUT" ], "a" ) as f :
106
- print (f"pass={ not failed_tests } " , file = f )
111
+ print_to_github_out (f"pass={ not exec_failures } " )
112
+
113
+ message = ""
114
+ if ctime_failures :
115
+ message += (
116
+ "\n ----The following ctime regression tests failed:----\n "
117
+ + "\n " .join (ctime_failures )
118
+ + "\n "
119
+ )
107
120
108
- if not failed_tests :
121
+ if not exec_failures :
122
+ print_to_github_out (f'message={ message .replace ("\n " , "%0A" )} ' )
109
123
print ("Regression tests passed" )
110
124
else :
111
- message = (
112
- "\n ----The following regression tests failed:----\n "
113
- + "\n " .join (failed_tests )
125
+ message + = (
126
+ "\n ----The following exec time regression tests failed:----\n "
127
+ + "\n " .join (exec_failures )
114
128
+ "\n "
115
129
)
116
- if "GITHUB_OUTPUT" in os .environ :
117
- out_message = message .replace ("\n " , "%0A" )
118
- with open (os .environ ["GITHUB_OUTPUT" ], "a" ) as f :
119
- print (f"message={ out_message } " , file = f )
130
+ print_to_github_out (f'message={ message .replace ("\n " , "%0A" )} ' )
120
131
print (message )
121
132
raise Exception ("Some regression tests failed" )
122
133
0 commit comments