In [1]:
import os

# colaboratory環境ならTrue
if 'COLAB_GPU' in set(os.environ.keys()):
  from google.colab import drive
  drive.mount('/content/drive')
  os.chdir('drive/MyDrive/git/kaggle-vinbigdata-xray/working/class_filter')
  print(os.getcwd())

Mounted at /content/drive
/content/drive/MyDrive/git/kaggle-vinbigdata-xray/working/class_filter


In [2]:
!pip install -U albumentations
!pip install timm

Collecting albumentations
[?25l  Downloading https://files.pythonhosted.org/packages/03/58/63fb1d742dc42d9ba2800ea741de1f2bc6bb05548d8724aa84794042eaf2/albumentations-0.5.2-py3-none-any.whl (72kB)
[K     |████▌                           | 10kB 17.4MB/s eta 0:00:01[K     |█████████                       | 20kB 10.2MB/s eta 0:00:01[K     |█████████████▋                  | 30kB 8.0MB/s eta 0:00:01[K     |██████████████████▏             | 40kB 7.2MB/s eta 0:00:01[K     |██████████████████████▊         | 51kB 4.5MB/s eta 0:00:01[K     |███████████████████████████▏    | 61kB 5.1MB/s eta 0:00:01[K     |███████████████████████████████▊| 71kB 5.5MB/s eta 0:00:01[K     |████████████████████████████████| 81kB 4.0MB/s 
Collecting imgaug>=0.4.0
[?25l  Downloading https://files.pythonhosted.org/packages/66/b1/af3142c4a85cba6da9f4ebb5ff4e21e2616309552caca5e8acefe9840622/imgaug-0.4.0-py2.py3-none-any.whl (948kB)
[K     |████████████████████████████████| 952kB 6.9MB/s 
[?25hCollecti

In [4]:
config_file = "./configs/efficientnet_b1_512.json"

import argparse
import json
import os
import datetime

import numpy as np
import pandas as pd
import torch
from torch import nn
from sklearn.model_selection import StratifiedKFold

CFG = json.load(open(config_file))

# logger の設定
from logging import getLogger, StreamHandler,FileHandler, Formatter, DEBUG, INFO
logger = getLogger("logger")    #logger名loggerを取得
logger.setLevel(DEBUG)  #loggerとしてはDEBUGで
#handler1を作成
handler_stream = StreamHandler()
handler_stream.setLevel(DEBUG)
handler_stream.setFormatter(Formatter("%(asctime)s: %(message)s"))
#handler2を作成
config_filename = os.path.splitext(os.path.basename(config_file))[0]
handler_file = FileHandler(filename=f'./logs/{config_filename}_{CFG["model_arch"]}.log')
handler_file.setLevel(DEBUG)
handler_file.setFormatter(Formatter("%(asctime)s: %(message)s"))
#loggerに2つのハンドラを設定
logger.addHandler(handler_stream)
logger.addHandler(handler_file)


In [8]:
!unzip -q '../../input/vinbigdata-chest-xray-resized-png-{CFG["dim"]}x{CFG["dim"]}{CFG["way"]}.zip' -d "/content"

replace /content/vinbigdata-chest-xray-resized-png-512x512/train_meta.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [12]:
MAIN_PATH = '../../input/vinbigdata-chest-xray-abnormalities-detection/'
TRAIN_PATH = f'/content/vinbigdata-chest-xray-resized-png-{CFG["dim"]}x{CFG["dim"]}{CFG["way"]}/train'
TRAIN_META = "../../input/dicom-meta/train_meta.csv"

In [13]:


def load_train_df():
    path = os.path.join(MAIN_PATH,"train.csv")
    train_df = pd.read_csv(path)
    is_normal_df = train_df.groupby("image_id")["class_id"].agg(lambda s: (s == 14).sum()).reset_index().rename({"class_id": "num_normal_annotations"}, axis=1)
    is_normal_df["label"] = (is_normal_df["num_normal_annotations"] == 3).astype(int)  # 3人とも異常なしを1とする
    # meta情報を結合
    meta = pd.read_csv(TRAIN_META)
    meta = meta[["FileName", "PixelSpacing0", "PixelSpacing1","PatientSex"]]
    meta["image_id"] = meta["FileName"].str.replace('.dicom', '')
    meta = meta.drop(["FileName"], axis=1)
    is_normal_df = is_normal_df[["image_id", "label"]].merge(meta, how="left", on="image_id")
    """
    is_normal_df["ch1"] = is_normal_df["PixelSpacing0"].fillna(1.)
    is_normal_df["ch2"] = 0
    """
    is_normal_df["PatientSex"] = is_normal_df["PatientSex"].fillna("no")
    is_normal_df["ch1"] = ((is_normal_df["PatientSex"]=="O")|(is_normal_df["PatientSex"]=="no")).astype(int)
    is_normal_df["ch2"] = 0

    print(is_normal_df)
    is_normal_df["label"] = is_normal_df["label"].astype(int)
    return is_normal_df

def main():
    from model.transform import get_train_transforms, get_valid_transforms
    from model.dataloader import prepare_dataloader
    from model.model import XrayImgClassifierEfficientnet, XrayImgClassifierVit
    from model.epoch_api import train_one_epoch, valid_one_epoch
    from model.utils import seed_everything

    logger.debug(CFG)
    train = load_train_df()
    seed_everything(CFG['seed'])

    folds = StratifiedKFold(n_splits=CFG['fold_num'], shuffle=True, random_state=CFG['seed']).split(np.arange(train.shape[0]), train.label.values)

    for fold, (trn_idx, val_idx) in enumerate(folds):
        """
        if fold > 0:
            break
        """
        logger.debug(f'Training with fold {fold} started (train:{len(trn_idx)}, val:{len(val_idx)})')

        train_loader, val_loader = prepare_dataloader(train, (CFG["resize_dim"], CFG["resize_dim"]), trn_idx, val_idx, data_root=os.path.join(TRAIN_PATH), train_bs=CFG["train_bs"], valid_bs=CFG["valid_bs"], num_workers=CFG["num_workers"], do_fmix=False, do_cutmix=False, transform_way=CFG["transform_way"], use_meta = CFG["meta"])

        device = torch.device(CFG['device'])


        if CFG["model"]=="efficientnet":
            model = XrayImgClassifierEfficientnet(CFG['model_arch'], train.label.nunique(), pretrained=True).to(device)
        elif CFG["model"]=="vit":
            model = XrayImgClassifierVit(CFG['model_arch'], train.label.nunique(), pretrained=True).to(device)

        optimizer = torch.optim.Adam(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'])
        #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=0.1, step_size=CFG['epochs']-1)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=CFG['T_0'], T_mult=1, eta_min=CFG['min_lr'], last_epoch=-1)
        #scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=25,
        #                                                max_lr=CFG['lr'], epochs=CFG['epochs'], steps_per_epoch=len(train_loader))

        loss_tr = nn.CrossEntropyLoss().to(device) #MyCrossEntropyLoss().to(device)
        loss_fn = nn.CrossEntropyLoss().to(device)

        for epoch in range(CFG['epochs']):
            train_one_epoch(epoch, model, loss_tr, optimizer, train_loader, device, CFG['accum_iter'], CFG['verbose_step'],scheduler=scheduler, schd_batch_update=False)

            with torch.no_grad():
                valid_one_epoch(epoch, model, loss_fn, val_loader, device, CFG['accum_iter'], CFG['verbose_step'], scheduler=None, schd_loss_update=False)

            torch.save(model.state_dict(),f'save/{config_filename}_{CFG["model_arch"]}_fold_{fold}_{epoch}')

        del model, optimizer, train_loader, val_loader,  scheduler
        torch.cuda.empty_cache()
        logger.debug("\n")


In [14]:
main()

2021-03-24 06:50:27,938: {'fold_num': 5, 'seed': 719, 'meta': 0, 'model': 'efficientnet', 'model_arch': 'tf_efficientnet_b1_ns', 'dim': 512, 'resize_dim': 512, 'way': '', 'epochs': 6, 'train_bs': 25, 'valid_bs': 25, 'T_0': 10, 'lr': 0.0005, 'min_lr': 5e-06, 'weight_decay': 0.1, 'num_workers': 2, 'accum_iter': 20, 'verbose_step': 1, 'device': 'cuda:0', 'transform_way': 'resize', 'tta': 1, 'used_epochs': [5], 'weights': [1]}
2021-03-24 06:50:32,684: Training with fold 0 started (train:12000, val:3000)


                               image_id  label  ...  ch1  ch2
0      000434271f63a053c4128a0ba6352c7f      1  ...    1    0
1      00053190460d56c53cc3e57321387478      1  ...    1    0
2      0005e8e3701dfb1dd93d53e2ff537b6e      0  ...    0    0
3      0006e0a85696f6bb578e84fafa9a5607      1  ...    0    0
4      0007d316f756b3fa0baea2ff514ce945      0  ...    0    0
...                                 ...    ...  ...  ...  ...
14995  ffe6f9fe648a7ec29a50feb92d6c15a4      0  ...    0    0
14996  ffea246f04196af602c7dc123e5e48fc      1  ...    1    0
14997  ffeffc54594debf3716d6fcd2402a99f      0  ...    0    0
14998  fff0f82159f9083f3dd1f8967fc54f6a      1  ...    1    0
14999  fff2025e3c1d6970a8a6ee0404ac6940      1  ...    1    0

[15000 rows x 7 columns]


[1;30;43mストリーミング出力は最後の 5000 行に切り捨てられました。[0m

epoch 3 loss: 0.1330:  63%|██████▎   | 76/120 [00:13<00:07,  5.74it/s][A[A

epoch 3 loss: 0.1319:  63%|██████▎   | 76/120 [00:13<00:07,  5.74it/s][A[A

epoch 3 loss: 0.1319:  64%|██████▍   | 77/120 [00:13<00:07,  5.75it/s][A[A

epoch 3 loss: 0.1317:  64%|██████▍   | 77/120 [00:13<00:07,  5.75it/s][A[A

epoch 3 loss: 0.1317:  65%|██████▌   | 78/120 [00:13<00:07,  5.78it/s][A[A

epoch 3 loss: 0.1333:  65%|██████▌   | 78/120 [00:13<00:07,  5.78it/s][A[A

epoch 3 loss: 0.1333:  66%|██████▌   | 79/120 [00:13<00:07,  5.81it/s][A[A

epoch 3 loss: 0.1327:  66%|██████▌   | 79/120 [00:13<00:07,  5.81it/s][A[A

epoch 3 loss: 0.1327:  67%|██████▋   | 80/120 [00:13<00:06,  5.75it/s][A[A

epoch 3 loss: 0.1326:  67%|██████▋   | 80/120 [00:14<00:06,  5.75it/s][A[A

epoch 3 loss: 0.1326:  68%|██████▊   | 81/120 [00:14<00:06,  5.77it/s][A[A

epoch 3 loss: 0.1314:  68%|██████▊   | 81/120 [00:14<00:06,  5.77it/s][A[A

epoch 3 loss: 0.1