
## 20200620_00
malignant を upsampling して、bening とほぼ同じ数まで増やして学習  
cross validation で、5つのモデルの予測値から最終的な予測値を決定

In [None]:
import chainer
from chainer.dataset import convert

import melanoma

title = "20200620_00"

device = 0
batch_size = 32
width = 336
height = 224
epoch = 10
n_classes = 2
output_dir = melanoma.constants.PROJECT_ROOT / "results" / title
transfered_weights = None
n_folds = 5

model = melanoma.models.EfficientNet()
loss_func = melanoma.models.loss.get_sigmoid_loss_func(model)
optimizer = chainer.optimizers.Adam()
predictor = melanoma.predictor.Predictor(model, (width, height))

if device >= 0:
    model.to_gpu(device)

optimizer.setup(model)

dataset_gen = melanoma.dataset.DatasetBuilder(img_size=(width, height), n_classes=n_classes).get_cross_validation_dataset_generator(n_folds)

In [None]:
# run train
import cProfile

for idx, (train_ds, val_ds, test_ds) in enumerate(dataset_gen):
    print(f"\nstart cross validation : {idx} / {n_folds}\n")
    # Training
    cross_val_output_dir = output_dir / f"{idx:02d}"
    train_itr = chainer.iterators.MultiprocessIterator(train_ds, batch_size, repeat=True)
    val_itr = chainer.iterators.MultiprocessIterator(val_ds, batch_size, repeat=False)
    test_itr = chainer.iterators.MultiprocessIterator(test_ds, batch_size, repeat=False)
    
    updater = chainer.training.StandardUpdater(
        train_itr, optimizer, converter=convert.concat_examples, loss_func=loss_func, device=device
    )
    evaluator = chainer.training.extensions.Evaluator(
        val_itr, model, device=device, eval_func=loss_func
    )
    trainer = melanoma.trainer.TrainerBuilder(updater, epoch ,evaluator, cross_val_output_dir).build()

    profiler = cProfile.Profile()
    profiler.runcall(trainer.run)
    
    profiler.dump_stats(cross_val_output_dir / "profile.stat".format(idx))
    
    # Evaluation
    chainer.serializers.load_npz(cross_val_output_dir / "snapshot_model_{}.npz".format(epoch), model)
    if device >= 0:
        model.to_gpu()
        predictor.to_gpu()
    test_itr.reset()
    melanoma.evaluate.evaluate(predictor, 
                               test_itr,
                               [l.name for l in melanoma.constants.Labels],
                               cross_val_output_dir / f"{title}_eval",
                               device=device,
                              )    

In [None]:
# submission
npz_footer = "accuracy"
files = output_dir.glob("*/snapshot_model_{}.npz".format(npz_footer))

ds = melanoma.dataset.SubmissionDataset(melanoma.dataset.DATASET_ROOT / "test.csv")
itr = chainer.iterators.MultiprocessIterator(ds, batch_size, repeat=False)
itr.reset()
melanoma.utility.start_pdb(
# lambda: melanoma.evaluate.evaluate_submission(predictor, 
#                                               itr,
#                                               output_dir / f"{title}_submission_{npz_footer}",
#                                               device=device,
#                                               filenames=files,
#                                              )
    lambda: melanoma.evaluate._sum_predict(output_dir.glob(f"{title}_submission_{npz_footer}*.csv"), output_dir / f"{title}_submission_{npz_footer}")
)
