13
13
14
14
15
15
def load_model_info (jobs_dir , job_dir ):
16
- template , date , time , rnd , fold = job_dir .split ('-' )
16
+ experiment , date , time , rnd , fold = job_dir .split ('-' )
17
17
hist_path = os .path .join (jobs_dir , job_dir , 'train.csv' )
18
18
19
19
df = pandas .read_csv (hist_path )
20
20
21
21
df ['epoch' ] = df .epoch + 1
22
22
df ['fold' ] = int (fold [- 1 ])
23
- df ['template ' ] = template
23
+ df ['experiment ' ] = experiment
24
24
df ['run' ] = '-' .join ([date , time , rnd ])
25
25
26
26
models = []
@@ -34,7 +34,7 @@ def load_model_info(jobs_dir, job_dir):
34
34
expected_last = 'e{:02d}-' .format (len (models ))
35
35
assert last_model .startswith (expected_last ), (last_model , expected_last )
36
36
37
- df ['model ' ] = [ os .path .join (jobs_dir , job_dir , m ) for m in models ]
37
+ df ['model_path ' ] = [ os .path .join (jobs_dir , job_dir , m ) for m in models ]
38
38
return df
39
39
40
40
def load_train_history (jobs_dir , limit = None ):
@@ -44,7 +44,6 @@ def load_train_history(jobs_dir, limit=None):
44
44
matching = [ d for d in jobs if limit in d ]
45
45
else :
46
46
matching = jobs
47
- #assert len(matching) == 9, "Expected 9 folds, found {} matching {}".format(len(matching), job_id)
48
47
49
48
dataframes = []
50
49
@@ -72,51 +71,67 @@ def pick_best(history, n_best=1):
72
71
73
72
def best_by_loss (df ):
74
73
return df .sort_values ('voted_val_acc' , ascending = False ).head (n_best )
75
- return history .groupby (' fold' ).apply (best_by_loss )
74
+ return history .groupby ([ 'experiment' , ' fold'] ).apply (best_by_loss )
76
75
77
-
78
- def evaluate (models , folds , test , predictor ):
76
+ def evaluate_model (predictor , model_path , val_data , test_data ):
79
77
80
78
def score (model , data ):
81
79
y_true = data .classID
82
80
p = predictor (model , data )
83
81
y_pred = numpy .argmax (p , axis = 1 )
84
82
# other metrics can be derived from confusion matrix
85
83
acc = sklearn .metrics .accuracy_score (y_true , y_pred )
86
- print ('acc' , acc )
87
84
labels = list (range (len (urbansound8k .classnames )))
88
85
confusion = sklearn .metrics .confusion_matrix (y_true , y_pred , labels = labels )
89
- return confusion
86
+ return acc , confusion
90
87
91
- # validation
92
- out = {
93
- 'val_foreground' : [],
94
- 'val_background' : [],
95
- 'test_foreground' : [],
96
- 'test_background' : [],
97
- }
88
+ model = keras .models .load_model (model_path )
98
89
99
90
salience_info = { 'foreground' : 1 , 'background' : 2 }
100
-
101
- # val
102
- for i , m in enumerate (models ):
103
- data = folds [i ][1 ]
91
+ test_info = { 'val' : val_data , 'test' : test_data }
92
+ out = {}
93
+ for setname , data in test_info .items ():
104
94
for variant , salience in salience_info .items ():
105
- s = score (m , data [data .salience == salience ])
106
- out ['val_' + variant ].append (s )
107
-
108
- # test
109
- for i , m in enumerate (models ):
110
- data = test
111
- for variant , salience in salience_info .items ():
112
- s = score (m , data [data .salience == salience ])
113
- out ['test_' + variant ].append (s )
114
-
115
- for k , v in out .items ():
116
- out [k ] = numpy .stack (v )
95
+ key = '{}_{}' .format (setname , variant )
96
+ acc , confusion = score (model , data [data .salience == salience ])
97
+ print ('acc for ' , key , acc )
98
+ out [key ] = confusion
117
99
118
100
out ['val' ] = out ['val_foreground' ] + out ['val_background' ]
119
101
out ['test' ] = out ['test_foreground' ] + out ['test_background' ]
102
+ return out
103
+
104
+ def evaluate (models , folds , testset , predictor , out_dir , dry_run = False ):
105
+
106
+ def eval_experiment (df ):
107
+ results = {}
108
+ by_fold = df .sort_index (level = "fold" , ascending = True )
109
+
110
+ for idx , row in by_fold .iterrows ():
111
+ print ('Testing model {} fold={}' .format (row ['experiment' ], row ['fold' ]))
112
+
113
+ model_path = row ['model_path' ]
114
+ val = folds [row ['fold' ]][1 ]
115
+ test = testset
116
+ if dry_run :
117
+ val = test [0 :20 ]
118
+ test = test [0 :20 ]
119
+
120
+ result = evaluate_model (predictor , model_path , val , test )
121
+
122
+ # convert to dict-of-arrays
123
+ for k , v in result .items ():
124
+ if results .get (k ) is None :
125
+ results [k ] = []
126
+ results [k ].append (v )
127
+
128
+ exname = df ['experiment' ].unique ()[0 ]
129
+ results_path = os .path .join (out_dir , '{}.confusion.npz' .format (exname ))
130
+ numpy .savez (results_path , ** results )
131
+ print ('Wrote' , results_path )
132
+ return results_path
133
+
134
+ out = models .groupby (level = 'experiment' ).apply (eval_experiment )
120
135
121
136
return out
122
137
@@ -132,8 +147,8 @@ def parse(args):
132
147
133
148
a ('--run' , dest = 'run' , default = '' ,
134
149
help = '%(default)s' )
135
- a ('--model ' , dest = 'model ' , default = '' ,
136
- help = '%(default)s ' )
150
+ a ('--check ' , action = 'store_true ' , default = '' ,
151
+ help = 'Run a check pass, not actually evaluating ' )
137
152
138
153
a ('--out' , dest = 'results_dir' , default = './data/results' ,
139
154
help = '%(default)s' )
@@ -146,10 +161,7 @@ def parse(args):
146
161
def main ():
147
162
148
163
args = parse (sys .argv [1 :])
149
- if not args .run :
150
- args .run = args .experiment
151
-
152
- out_dir = os .path .join (args .results_dir , args .experiment )
164
+ out_dir = os .path .join (args .results_dir , args .run )
153
165
154
166
common .ensure_directories (out_dir )
155
167
@@ -180,19 +192,16 @@ def predict(model, data):
180
192
method = voting , overlap = overlap )
181
193
182
194
history = load_train_history (args .models_dir , args .run )
183
- best = pick_best (history )
195
+ n_folds = len (history .fold .unique ())
196
+ n_experiments = len (history .experiment .unique ())
197
+ print ("Found {} experiments across {} folds" , n_folds , n_experiments )
184
198
185
- print ('Loading models...' )
186
- models = best ['model' ].apply (lambda p : keras .models .load_model (p ))
187
- print ('Best model' , best .voted_val_acc )
199
+ best = pick_best (history )
200
+ print ('Best models\n ' , best [['epoch' , 'voted_val_acc' ]])
188
201
189
202
print ('Testing models...' )
190
- results = evaluate (models , folds , test , predictor = predict )
191
-
192
- results_path = os .path .join (out_dir , 'confusion.npz' )
193
- numpy .savez (results_path , ** results )
203
+ results = evaluate (best , folds , test , predictor = predict , out_dir = out_dir , dry_run = args .check )
194
204
195
- print ('Wrote to' , results_path )
196
205
197
206
if __name__ == '__main__' :
198
207
main ()
0 commit comments