# Import Libraries

In [None]:
import os
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

import matplotlib.pyplot as plt
import random
import copy
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import f1_score, accuracy_score
from collections import defaultdict

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import torchvision.transforms.functional as TF

import cv2
import timm

from tqdm import tqdm

import albumentations as A
from albumentations import (
    Compose, OneOf, Normalize, CenterCrop, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, RandomRotate90, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose, HueSaturationValue, CoarseDropout
    )
from albumentations.pytorch import ToTensorV2

import warnings
warnings.filterwarnings("ignore")

import logging
import time
from contextlib import contextmanager

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Directly Settings

In [None]:
DATA_DIR = '../input/plant-pathology-2021-fgvc8/'
TEST_DIR = DATA_DIR + 'test_images/'
TRAIN_DIR = DATA_DIR + 'train_images/'
TRAIN_CSV_DIR = DATA_DIR + 'train.csv'
TEST_CSV_DIR = DATA_DIR + 'sample_submission.csv'
train_df = pd.read_csv(TRAIN_CSV_DIR)
sub_df = pd.read_csv(TEST_CSV_DIR)

# EDA

In [None]:
train_df

In [None]:
sub_df

In [None]:
#TARGET = train_df['labels'].unique()
TARGET = ['image','healthy', 'scab frog_eye_leaf_spot complex', 'scab', 'complex',
       'rust', 'frog_eye_leaf_spot', 'powdery_mildew',
       'scab frog_eye_leaf_spot', 'frog_eye_leaf_spot complex',
       'rust frog_eye_leaf_spot', 'powdery_mildew complex',
       'rust complex']
SUB_LABELS = ['image','labels']

TARGET

# One hot encoding

In [None]:
train_df

In [None]:
onehot_df = pd.get_dummies(train_df, columns=['labels'])
onehot_df

In [None]:
col = onehot_df.drop('image', axis=1)#変換したラベルを取得する
col

In [None]:
TARGET_COLUMNS = col.columns.values
TARGET_COLUMNS

In [None]:
onehot_df[TARGET_COLUMNS].values

In [None]:
onehot_df.shape

# Config

In [None]:
CFG = {
    'model_arch': 'efficientnet_b4',
    'img_size': 320,
    'epochs': 3,
    'lr': 5e-4,
}

TARGET = ['image','healthy', 'scab frog_eye_leaf_spot complex', 'scab', 'complex',
       'rust', 'frog_eye_leaf_spot', 'powdery_mildew',
       'scab frog_eye_leaf_spot', 'frog_eye_leaf_spot complex',
       'rust frog_eye_leaf_spot', 'powdery_mildew complex',
       'rust complex']

def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

@contextmanager
def timer(name, logger=None, level=logging.DEBUG):
    print_ = print if logger is None else lambda msg: logger.log(level, msg)
    t0 = time.time()
    print_(f'[{name}] start')
    yield
    print_(f'[{name}] done in {time.time() - t0:.0f} s')

In [None]:
DEBUG = True
if DEBUG:
    onehot_df = onehot_df.sample(frac = 0.01).reset_index(drop = True)
    print(onehot_df.shape)

# Split data

In [None]:
from sklearn.model_selection import train_test_split
train, valid =  train_test_split(onehot_df, test_size = 0.1)
print(train.shape, valid.shape)

# Dataset

All datasets that represent a map from keys to data samples should subclass it. All subclasses should overwrite __getitem__(), supporting fetching a data sample for a given key. Subclasses could also optionally overwrite __len__(), which is expected to return the size of the dataset by many Sampler implementations and the default options of DataLoader.

In [None]:
#pytorchのDatasetクラスを継承したクラスを作成する
class TrainDataset(Dataset):
    def __init__(self, onehot_df, transform = None): #初期化
        self.onehot_df = onehot_df
        self.image_names = onehot_df["image"].values
        self.labels = onehot_df[TARGET_COLUMNS].values
        self.transform = transform
        
#データの長さを返す関数
    def __len__(self):
        return len(self.onehot_df)
    
#indexに対応する画像とラベルを返す関数
    def __getitem__(self, idx):
        image_name = self.image_names[idx] #indexに対応するimageの値
        image_path = TRAIN_DIR + image_name #indexに対応するデータのパス
        image = cv2.imread(image_path) #画像読み込み
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #BGR→RGB
        #label = self.labels[idx] #indexに対応するラベル
        label = torch.tensor(self.labels[idx]).float()
        if self.transform: #前処理ある場合
            augmented = self.transform(image=image)
            image = augmented['image']
        return image, label

# Transform

In [None]:
def get_transforms(*,data):
    
    if data == 'train':
        return Compose([
            A.Resize(CFG['img_size'], CFG['img_size']),
            A.RandomResizedCrop(CFG['img_size'], CFG['img_size']),
            A.HorizontalFlip(p=0.5),
            A.ShiftScaleRotate(p=0.5),
                        A.RandomBrightnessContrast(p=0.5),

            A.Normalize(),
            ToTensorV2(),
        ])
    elif data == 'valid':
        return Compose([
            Resize(CFG['img_size'], CFG['img_size']),
            Normalize(),
            ToTensorV2(),
        ])

In [None]:
# 上で作成したデータセットクラスを呼び出す
train_dataset = TrainDataset(onehot_df, transform = get_transforms(data = 'train'))
# データセットクラスによって取り出されるデータをbatch数でまとめる
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True, num_workers = 2, drop_last = True)
train_dataset[0]

In [None]:
valid_dataset = TrainDataset(onehot_df, transform = get_transforms(data = 'valid'))
valid_loader = DataLoader(valid_dataset, batch_size = 32, shuffle = False, num_workers = 2)
valid_dataset[0]

In [None]:
dataloaders_dict = {
    'train': train_loader, 
    'valid': valid_loader
}

In [None]:
batch_iterator = iter(dataloaders_dict['train'])
images, labels = next(batch_iterator)
print(images.size())
print(labels.shape)

In [None]:
for i in range(5):
    image, label = train_dataset[i]
    plt.imshow(image[0])
    plt.title(f'label: {label}')
    plt.show() 

# Create model

In [None]:
class EfficientNetB4(nn.Module):
    """
        Base class for all neural network modules.
        Your models should also subclass this class.
        Modules can also contain other Modules, allowing to nest them in a tree structure.
        You can assign the submodules as regular attributes:
    """
    def __init__(self):
        super().__init__()
        self.efficientnetb4 = timm.create_model(model_name = CFG['model_arch'],pretrained = False)
        in_features = self.efficientnetb4.classifier.in_features
        self.efficientnetb4.classifier = nn.Linear(in_features, len(TARGET_COLUMNS))#(input_size,output_size)
        
    def forward(self, x):
        x = self.efficientnetb4(x)
        return x
    
model = EfficientNetB4()
model = model.to(DEVICE)

#print(model)

# Training

In [None]:
since = time.time()
epochs = CFG['epochs']

best_model_wts = 0
best_acc = 0.0

criterion = nn.BCEWithLogitsLoss().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=CFG['lr'])

for epoch in range(epochs):
    print('Epoch {}/{}'.format(epoch+1, epochs))
    print('-' * 10)
    
    for phase in ['train', 'valid']:
        if phase == 'train':
            model.train()  #training
        else:
            model.eval()   #evaluate

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data.
        for inputs, labels in dataloaders_dict[phase]:
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)
            #print(inputs.shape)
            #print(labels.shape)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            #訓練の時だけ、履歴を保持
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                #print(outputs.shape)
                _, preds = torch.max(outputs, 0)
                loss = criterion(outputs, labels)
                #print(loss.shape)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # statistics
            running_loss += loss.item()*inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            
        epoch_loss = running_loss / len(dataloaders_dict[phase])
        epoch_acc = running_corrects.double() / len(dataloaders_dict[phase])
        
        print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
        #running_loss += loss.item()
        
        # モデルをディープ・コピー
        if phase == 'valid' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
    print()
            
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))

# load best model weights
#model.load_state_dict(best_model_wts)

model_path = 'model.pth'
torch.save(model.state_dict(), model_path)