
## 20200711_00
augmentation : flip, noise  
meta data を追加  
20200630_00からデータセット周りのバグ修正

In [None]:
import chainer
import chainercv
from chainer.dataset import convert
import numpy as np

import mython
import melanoma
from melanoma import augmentations

title = "20200711_00"

device = 0
batch_size = 32
width = 336
height = 224
epoch = 10
n_classes = 2
output_dir = melanoma.constants.PROJECT_ROOT / "results" / title
transfered_weights = melanoma.constants.PROJECT_ROOT / "src" / "melanoma" / "models" / "weights" / "efficientnetb0_chainer.npz"
n_folds = 5
n_ensemble_loop = 1
augs = [augmentations.base.standard_aug_transform]
trans = [augmentations.base.normalize_transform]

In [None]:
# run train
import cProfile

for loop in range(n_ensemble_loop):
    dataset_gen = melanoma.dataset.DatasetBuilder(
        img_size=(width, height), n_classes=n_classes, random_state=loop, augmentations=augs, transforms=trans, is_onehot=True,
    ).get_cross_validation_dataset_generator(n_folds)

    for idx, (train_ds, val_ds, test_ds) in enumerate(dataset_gen):
        print(f"\nstart cross validation : {idx + 1} / {n_folds}, loop : {loop + 1} / {n_ensemble_loop} \n")
        
        # Training
        cross_val_output_dir = output_dir / f"{loop * n_folds + idx:02d}"
        train_itr = chainer.iterators.MultiprocessIterator(train_ds, batch_size, repeat=True)
        val_itr = chainer.iterators.MultiprocessIterator(val_ds, batch_size, repeat=False)
        test_itr = chainer.iterators.MultiprocessIterator(test_ds, batch_size, repeat=False)

        extractor = melanoma.models.EfficientNet(num_classes=1, global_params=melanoma.models.EfficientNetB0)
        #model = extractor
        model = melanoma.models.Net(extractor, len(melanoma.dataset.Dataset.METAFEATURES))
        if transfered_weights is not None:
            #chainer.serializers.load_npz(transfered_weights, model)
            mython.ml.load_weights.load_weights(np.load(transfered_weights), extractor)
        
        if device >= 0:
            model.to_gpu(device)
            
        loss_func = melanoma.models.loss.SigmoidLoss(model)
        predictor = melanoma.predictor.Predictor(model, (width, height))

        optimizer = chainer.optimizers.Adam(alpha=0.0001)
        optimizer.setup(model)

        updater = chainer.training.StandardUpdater(
            train_itr, optimizer, converter=convert.concat_examples, loss_func=loss_func, device=device
        )
        evaluator = melanoma.evaluator.CustomEvaluator(
            val_itr, model, device=device, eval_func=loss_func
        )
        trainer = melanoma.trainer.TrainerBuilder(updater, epoch ,evaluator, cross_val_output_dir).build()

        profiler = cProfile.Profile()
        mython.debug.start_pdb(
            lambda: profiler.runcall(trainer.run)
        )

        profiler.dump_stats(cross_val_output_dir / "profile.stat".format(idx))

        # Evaluation
        npz_footer = "accuracy"
        fname = cross_val_output_dir / f"snapshot_model_{npz_footer}.npz"
        chainer.serializers.load_npz(fname, model)
        mython.debug.start_pdb(
            lambda : melanoma.evaluate.evaluate(predictor, 
                                   test_itr,
                                   [l.name for l in melanoma.constants.Labels],
                                   cross_val_output_dir / f"{title}_eval_{npz_footer}",
                                   device=device,
                                  )
        )

In [None]:
# submission
npz_footer = "accuracy"
files = sorted(output_dir.glob("*/snapshot_model_{}.npz".format(npz_footer)))

model = melanoma.models.Net(melanoma.models.EfficientNet(), len(melanoma.dataset.Dataset.METAFEATURES))
predictor = melanoma.predictor.Predictor(model, (width, height))
ds = melanoma.dataset.SubmissionDataset(melanoma.dataset.DATASET_ROOT / "test.csv")
itr = chainer.iterators.MultiprocessIterator(ds, batch_size, repeat=False)
itr.reset()
mython.debug.start_pdb(
lambda: melanoma.evaluate.evaluate_submission(predictor, 
                                              itr,
                                              output_dir / f"{title}_submission_{npz_footer}",
                                              device=device,
                                              filenames=files,
                                             )
#    lambda: melanoma.evaluate._sum_predict(output_dir.glob("*.csv"), output_dir / f"{title}_submission_{npz_footer}")
)