Skip to content

Commit ac9f8d4

Browse files
committed
test: Evaluate all experiments
1 parent 9a316af commit ac9f8d4

File tree

3 files changed

+66
-55
lines changed

3 files changed

+66
-55
lines changed

TODO.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ Report
2222
Experiments
2323

2424
- Output STM32 stats to file
25-
- Write tool for running all experiments
2625
- Write uniform Keras model based on SB-CNN etc
2726
- Determine filter settings to make MACCs approx equal
2827
- Run each model on device, record inference time

microesc/report.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
import seaborn as sns
77
import matplotlib.pyplot as plt
88

9-
from . import common, urbansound8k
9+
import common, urbansound8k
10+
#from . import common, urbansound8k
1011

1112
groups = {
1213
'social_activity': [ 'street_music', 'children_playing', 'dog_bark' ],
@@ -78,6 +79,9 @@ def parse(args):
7879

7980
common.add_arguments(parser)
8081

82+
a('--run', dest='run', default='',
83+
help='%(default)s')
84+
8185
a('--out', dest='results_dir', default='./data/results',
8286
help='%(default)s')
8387

@@ -98,7 +102,7 @@ def main():
98102

99103
args = parse(None)
100104

101-
cm = numpy.load(os.path.join(args.results_dir, '{}'.format(args.experiment), 'confusion.npz'))
105+
cm = numpy.load(os.path.join(args.results_dir, args.run, '{}.confusion.npz'.format(1)))
102106
val, test = cm['val'], cm['test']
103107

104108

@@ -108,15 +112,14 @@ def main():
108112
val_fig.savefig('val.cm.png')
109113
test_fig.savefig('test.cm.png')
110114

111-
c_acc = cm_class_accuracy(numpy.mean(val, axis=0))
112-
print_accuracies(c_acc, 'class_acc')
115+
tests_acc = [ cm_accuracy(test[f]) for f in range(0, len(test)) ]
116+
print_accuracies(tests_acc, 'test_acc')
113117

114118
folds_acc = [ cm_accuracy(val[f]) for f in range(0, len(val)) ]
115119
print_accuracies(folds_acc, 'val_acc')
116120

117-
tests_acc = [ cm_accuracy(test[f]) for f in range(0, len(test)) ]
118-
print_accuracies(tests_acc, 'test_acc')
119-
121+
c_acc = cm_class_accuracy(numpy.mean(val, axis=0))
122+
print_accuracies(c_acc, 'class_acc')
120123

121124
print('wrote')
122125

microesc/test.py

Lines changed: 56 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@
1313

1414

1515
def load_model_info(jobs_dir, job_dir):
16-
template, date, time, rnd, fold = job_dir.split('-')
16+
experiment, date, time, rnd, fold = job_dir.split('-')
1717
hist_path = os.path.join(jobs_dir, job_dir, 'train.csv')
1818

1919
df = pandas.read_csv(hist_path)
2020

2121
df['epoch'] = df.epoch + 1
2222
df['fold'] = int(fold[-1])
23-
df['template'] = template
23+
df['experiment'] = experiment
2424
df['run'] = '-'.join([date, time, rnd])
2525

2626
models = []
@@ -34,7 +34,7 @@ def load_model_info(jobs_dir, job_dir):
3434
expected_last = 'e{:02d}-'.format(len(models))
3535
assert last_model.startswith(expected_last), (last_model, expected_last)
3636

37-
df['model'] = [ os.path.join(jobs_dir, job_dir, m) for m in models ]
37+
df['model_path'] = [ os.path.join(jobs_dir, job_dir, m) for m in models ]
3838
return df
3939

4040
def load_train_history(jobs_dir, limit=None):
@@ -44,7 +44,6 @@ def load_train_history(jobs_dir, limit=None):
4444
matching = [ d for d in jobs if limit in d ]
4545
else:
4646
matching = jobs
47-
#assert len(matching) == 9, "Expected 9 folds, found {} matching {}".format(len(matching), job_id)
4847

4948
dataframes = []
5049

@@ -72,51 +71,67 @@ def pick_best(history, n_best=1):
7271

7372
def best_by_loss(df):
7473
return df.sort_values('voted_val_acc', ascending=False).head(n_best)
75-
return history.groupby('fold').apply(best_by_loss)
74+
return history.groupby(['experiment', 'fold']).apply(best_by_loss)
7675

77-
78-
def evaluate(models, folds, test, predictor):
76+
def evaluate_model(predictor, model_path, val_data, test_data):
7977

8078
def score(model, data):
8179
y_true = data.classID
8280
p = predictor(model, data)
8381
y_pred = numpy.argmax(p, axis=1)
8482
# other metrics can be derived from confusion matrix
8583
acc = sklearn.metrics.accuracy_score(y_true, y_pred)
86-
print('acc', acc)
8784
labels = list(range(len(urbansound8k.classnames)))
8885
confusion = sklearn.metrics.confusion_matrix(y_true, y_pred, labels=labels)
89-
return confusion
86+
return acc, confusion
9087

91-
# validation
92-
out = {
93-
'val_foreground': [],
94-
'val_background': [],
95-
'test_foreground': [],
96-
'test_background': [],
97-
}
88+
model = keras.models.load_model(model_path)
9889

9990
salience_info = { 'foreground': 1, 'background': 2 }
100-
101-
# val
102-
for i, m in enumerate(models):
103-
data = folds[i][1]
91+
test_info = { 'val': val_data, 'test': test_data }
92+
out = {}
93+
for setname, data in test_info.items():
10494
for variant, salience in salience_info.items():
105-
s = score(m, data[data.salience == salience])
106-
out['val_'+variant].append(s)
107-
108-
# test
109-
for i, m in enumerate(models):
110-
data = test
111-
for variant, salience in salience_info.items():
112-
s = score(m, data[data.salience == salience])
113-
out['test_'+variant].append(s)
114-
115-
for k, v in out.items():
116-
out[k] = numpy.stack(v)
95+
key = '{}_{}'.format(setname, variant)
96+
acc, confusion = score(model, data[data.salience == salience])
97+
print('acc for ', key, acc)
98+
out[key] = confusion
11799

118100
out['val'] = out['val_foreground'] + out['val_background']
119101
out['test'] = out['test_foreground'] + out['test_background']
102+
return out
103+
104+
def evaluate(models, folds, testset, predictor, out_dir, dry_run=False):
105+
106+
def eval_experiment(df):
107+
results = {}
108+
by_fold = df.sort_index(level="fold", ascending=True)
109+
110+
for idx, row in by_fold.iterrows():
111+
print('Testing model {} fold={}'.format(row['experiment'], row['fold']))
112+
113+
model_path = row['model_path']
114+
val = folds[row['fold']][1]
115+
test = testset
116+
if dry_run:
117+
val = test[0:20]
118+
test = test[0:20]
119+
120+
result = evaluate_model(predictor, model_path, val, test)
121+
122+
# convert to dict-of-arrays
123+
for k, v in result.items():
124+
if results.get(k) is None:
125+
results[k] = []
126+
results[k].append(v)
127+
128+
exname = df['experiment'].unique()[0]
129+
results_path = os.path.join(out_dir, '{}.confusion.npz'.format(exname))
130+
numpy.savez(results_path, **results)
131+
print('Wrote', results_path)
132+
return results_path
133+
134+
out = models.groupby(level='experiment').apply(eval_experiment)
120135

121136
return out
122137

@@ -132,8 +147,8 @@ def parse(args):
132147

133148
a('--run', dest='run', default='',
134149
help='%(default)s')
135-
a('--model', dest='model', default='',
136-
help='%(default)s')
150+
a('--check', action='store_true', default='',
151+
help='Run a check pass, not actually evaluating')
137152

138153
a('--out', dest='results_dir', default='./data/results',
139154
help='%(default)s')
@@ -146,10 +161,7 @@ def parse(args):
146161
def main():
147162

148163
args = parse(sys.argv[1:])
149-
if not args.run:
150-
args.run = args.experiment
151-
152-
out_dir = os.path.join(args.results_dir, args.experiment)
164+
out_dir = os.path.join(args.results_dir, args.run)
153165

154166
common.ensure_directories(out_dir)
155167

@@ -180,19 +192,16 @@ def predict(model, data):
180192
method=voting, overlap=overlap)
181193

182194
history = load_train_history(args.models_dir, args.run)
183-
best = pick_best(history)
195+
n_folds = len(history.fold.unique())
196+
n_experiments = len(history.experiment.unique())
197+
print("Found {} experiments across {} folds", n_folds, n_experiments)
184198

185-
print('Loading models...')
186-
models = best['model'].apply(lambda p: keras.models.load_model(p))
187-
print('Best model', best.voted_val_acc)
199+
best = pick_best(history)
200+
print('Best models\n', best[['epoch', 'voted_val_acc']])
188201

189202
print('Testing models...')
190-
results = evaluate(models, folds, test, predictor=predict)
191-
192-
results_path = os.path.join(out_dir, 'confusion.npz')
193-
numpy.savez(results_path, **results)
203+
results = evaluate(best, folds, test, predictor=predict, out_dir=out_dir, dry_run=args.check)
194204

195-
print('Wrote to', results_path)
196205

197206
if __name__ == '__main__':
198207
main()

0 commit comments

Comments
 (0)