In [1]:
import torch
torch.cuda.empty_cache()

# Import Libraries

In [2]:
import os
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

import matplotlib.pyplot as plt
import random
import copy
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import f1_score, accuracy_score
from collections import defaultdict

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import torchvision.transforms.functional as TF
#from efficientnet_pytorch import model as enet

import cv2
import timm

from tqdm import tqdm
from tqdm.notebook import tqdm

import albumentations as A
from albumentations import (
    Compose, OneOf, Normalize, CenterCrop, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, RandomRotate90, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose, HueSaturationValue, CoarseDropout
    )
from albumentations.pytorch import ToTensorV2

import warnings
warnings.filterwarnings("ignore")

import logging
import time
from contextlib import contextmanager

# Config

In [3]:
class CFG:
    DATA_PATH = '../input/plant-pathology-2021-fgvc8/'
    TEST_PATH = DATA_PATH + 'test_images/'
    TRAIN_PATH = DATA_PATH + 'train_images/'
    TRAIN_CSV_PATH = DATA_PATH + 'train.csv'
    SUB_CSV_PATH = DATA_PATH + 'sample_submission.csv'
    
    MODEL_ARCH ='efficientnet_b2'
    #MODEL_ARCH = 'efficientnet_b4'
    IMG_SIZE = 224
    EPOCH = 10
    BATCH_SIZE = 32
    DEBUG_SIZE = 0.1
    RANDOM_STATE = 1234
    CLASS_THRESHOLD = 0.4
    DROPOUT = .4
    LR = 5e-4
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    LABELS = ['healthy','complex','rust','frog_eye_leaf_spot','powdery_mildew','scab']
    N_CLASS = len(LABELS)
    LABELS_DICT = {
    0: 'healthy', 
    1: 'scab',
    2: 'rust',
    3: 'frog_eye_leaf_spot',
    4: 'complex', 
    5: 'powdery_mildew'}


In [4]:
train_df = pd.read_csv(CFG.TRAIN_CSV_PATH)
sub_df = pd.read_csv(CFG.SUB_CSV_PATH)
sub = pd.read_csv(CFG.SUB_CSV_PATH)

# EDA

In [5]:
train_df

Unnamed: 0,image,labels
0,800113bb65efe69e.jpg,healthy
1,8002cb321f8bfcdf.jpg,scab frog_eye_leaf_spot complex
2,80070f7fb5e2ccaa.jpg,scab
3,80077517781fb94f.jpg,scab
4,800cbf0ff87721f8.jpg,complex
...,...,...
18627,fffb900a92289a33.jpg,healthy
18628,fffc488fa4c0e80c.jpg,scab
18629,fffc94e092a59086.jpg,rust
18630,fffe105cf6808292.jpg,scab frog_eye_leaf_spot


In [6]:
sub_df

Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,healthy
1,ad8770db05586b59.jpg,healthy
2,c7b03e718489f3ca.jpg,healthy


In [7]:
train_df['label_list'] = train_df['labels'].str.split(' ')
train_df

Unnamed: 0,image,labels,label_list
0,800113bb65efe69e.jpg,healthy,[healthy]
1,8002cb321f8bfcdf.jpg,scab frog_eye_leaf_spot complex,"[scab, frog_eye_leaf_spot, complex]"
2,80070f7fb5e2ccaa.jpg,scab,[scab]
3,80077517781fb94f.jpg,scab,[scab]
4,800cbf0ff87721f8.jpg,complex,[complex]
...,...,...,...
18627,fffb900a92289a33.jpg,healthy,[healthy]
18628,fffc488fa4c0e80c.jpg,scab,[scab]
18629,fffc94e092a59086.jpg,rust,[rust]
18630,fffe105cf6808292.jpg,scab frog_eye_leaf_spot,"[scab, frog_eye_leaf_spot]"


# One hot encoding

In [8]:
#label listの要素を列名にしてtrain_dfに0を初期値として結合する
#LABELS = ['healthy','complex','rust','frog_eye_leaf_spot','powdery_mildew','scab']
for x in CFG.LABELS:
    train_df[x]=0
train_df

Unnamed: 0,image,labels,label_list,healthy,complex,rust,frog_eye_leaf_spot,powdery_mildew,scab
0,800113bb65efe69e.jpg,healthy,[healthy],0,0,0,0,0,0
1,8002cb321f8bfcdf.jpg,scab frog_eye_leaf_spot complex,"[scab, frog_eye_leaf_spot, complex]",0,0,0,0,0,0
2,80070f7fb5e2ccaa.jpg,scab,[scab],0,0,0,0,0,0
3,80077517781fb94f.jpg,scab,[scab],0,0,0,0,0,0
4,800cbf0ff87721f8.jpg,complex,[complex],0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
18627,fffb900a92289a33.jpg,healthy,[healthy],0,0,0,0,0,0
18628,fffc488fa4c0e80c.jpg,scab,[scab],0,0,0,0,0,0
18629,fffc94e092a59086.jpg,rust,[rust],0,0,0,0,0,0
18630,fffe105cf6808292.jpg,scab frog_eye_leaf_spot,"[scab, frog_eye_leaf_spot]",0,0,0,0,0,0


In [9]:
#ある列がlabel_listと一致するなら1, しないなら0を返す
def onehot(col, label_list):
    if col in label_list:
        t = 1
    else:
        t = 0
    return t

In [10]:
#LABELS = ['healthy','complex','rust','frog_eye_leaf_spot','powdery_mildew','scab']
for x in CFG.LABELS:
    """
    class numpy.vectorize(pyfunc, otypes=None, doc=None, excluded=None, cache=False, signature=None)
    """
    train_df[x] = np.vectorize(onehot)(x,train_df['label_list'])
train_df

Unnamed: 0,image,labels,label_list,healthy,complex,rust,frog_eye_leaf_spot,powdery_mildew,scab
0,800113bb65efe69e.jpg,healthy,[healthy],1,0,0,0,0,0
1,8002cb321f8bfcdf.jpg,scab frog_eye_leaf_spot complex,"[scab, frog_eye_leaf_spot, complex]",0,1,0,1,0,1
2,80070f7fb5e2ccaa.jpg,scab,[scab],0,0,0,0,0,1
3,80077517781fb94f.jpg,scab,[scab],0,0,0,0,0,1
4,800cbf0ff87721f8.jpg,complex,[complex],0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...
18627,fffb900a92289a33.jpg,healthy,[healthy],1,0,0,0,0,0
18628,fffc488fa4c0e80c.jpg,scab,[scab],0,0,0,0,0,1
18629,fffc94e092a59086.jpg,rust,[rust],0,0,1,0,0,0
18630,fffe105cf6808292.jpg,scab frog_eye_leaf_spot,"[scab, frog_eye_leaf_spot]",0,0,0,1,0,1


In [11]:
#sub_df2 = sub_df.drop(['labels','label_list'], axis = 1)
train_df2 = train_df.drop(['labels','label_list'], axis = 1)
train_df2

Unnamed: 0,image,healthy,complex,rust,frog_eye_leaf_spot,powdery_mildew,scab
0,800113bb65efe69e.jpg,1,0,0,0,0,0
1,8002cb321f8bfcdf.jpg,0,1,0,1,0,1
2,80070f7fb5e2ccaa.jpg,0,0,0,0,0,1
3,80077517781fb94f.jpg,0,0,0,0,0,1
4,800cbf0ff87721f8.jpg,0,1,0,0,0,0
...,...,...,...,...,...,...,...
18627,fffb900a92289a33.jpg,1,0,0,0,0,0
18628,fffc488fa4c0e80c.jpg,0,0,0,0,0,1
18629,fffc94e092a59086.jpg,0,0,1,0,0,0
18630,fffe105cf6808292.jpg,0,0,0,1,0,1


In [12]:
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

@contextmanager
def timer(name, logger=None, level=logging.DEBUG):
    print_ = print if logger is None else lambda msg: logger.log(level, msg)
    t0 = time.time()
    print_(f'[{name}] start')
    yield
    print_(f'[{name}] done in {time.time() - t0:.0f} s')

In [13]:
def to_numpy(tensor):
    """Auxiliary function to convert tensors into numpy arrays
    """
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()


# Dataset

All datasets that represent a map from keys to data samples should subclass it. All subclasses should overwrite __getitem__(), supporting fetching a data sample for a given key. Subclasses could also optionally overwrite __len__(), which is expected to return the size of the dataset by many Sampler implementations and the default options of DataLoader.

#pytorchのDatasetクラスを継承したクラスを作成する
class TestDataset(Dataset):
    def __init__(self, train_df2, transform = None): #初期化
        self.train_df2 = train_df2
        self.image_names = train_df2["image"].values
        #self.labels = sub_df["labels"].values
        self.transform = transform
        
#データの長さを返す関数
    def __len__(self):
        return len(self.train_df2)
    
#indexに対応する画像とラベルを返す関数
    def __getitem__(self, idx):
        image_name = self.image_names[idx] #indexに対応するimageの値
        image_path = CFG.TEST_PATH + image_name #indexに対応するデータのパス
        #label = self.labels[idx]
        image = cv2.imread(image_path) #画像読み込み
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #BGR→RGB
        #label = torch.tensor(self.labels[idx]).float()
        if self.transform: #前処理ある場合
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

In [14]:
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.image_id = df['image'].values
        self.labels = df['labels'].values
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image_id = self.image_id[idx]
        label = self.labels[idx]
        
        image_path = CFG.TEST_PATH + image_id
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        augmented = self.transform(image=image)
        image = augmented['image']
        return {'image':image, 'target': label}

# Transform

In [15]:
def get_transforms(*,data):
    
    if data == 'train':
        return Compose([
            A.Resize(CFG.IMG_SIZE, CFG.IMG_SIZE),
            A.RandomResizedCrop(CFG.IMG_SIZE, CFG.IMG_SIZE),
            A.HorizontalFlip(p=0.5),
            A.ShiftScaleRotate(p=0.5),
            A.RandomBrightnessContrast(p=0.5),

            A.Normalize(),
            ToTensorV2(),
        ])
    elif data == 'valid':
        return Compose([
            Resize(CFG.IMG_SIZE, CFG.IMG_SIZE),
            Normalize(),
            ToTensorV2(),
        ])

In [16]:
test_dataset = TestDataset(sub_df, transform = get_transforms(data='valid'))
test_loader = DataLoader(test_dataset, batch_size = 32, shuffle = False)

# Create model

In [17]:
class EfficientNetB2(nn.Module):
    """
        Base class for all neural network modules.
        Your models should also subclass this class.
        Modules can also contain other Modules, allowing to nest them in a tree structure.
        You can assign the submodules as regular attributes:
    """
    def __init__(self):
        super(EfficientNetB2, self).__init__()
        self.efficientnetb2 = timm.create_model(model_name = CFG.MODEL_ARCH,pretrained = False)
        in_features = self.efficientnetb2.classifier.in_features
        self.efficientnetb2.classifier = nn.Linear(in_features, len(CFG.LABELS))#(input_size,output_size)
        
    def forward(self, x):
        x = self.efficientnetb2(x)
        return x
    
model = EfficientNetB2()
model.load_state_dict(torch.load("../input/myefnet-pth/model_x (1).pt"))
model = model.to(CFG.DEVICE)
#model.load_state_dict(torch.load("../input/myefnet-pth/model_x.pt"))

# Prediction

submit_preds = []

model.eval()
with torch.no_grad():
    for X in test_loader:
        X = X.float().to(CFG.DEVICE)
        outputs = model(X)
        preds = outputs.argmax(1).detach().cpu().numpy()
        submit_preds.append(preds)
print(outputs)

In [18]:
#model.cuda()
model.eval()

predictions = []
for batch in test_loader:
    image = batch['image'].cuda()
    with torch.no_grad():
        outputs = model(image)
        preds = outputs.argmax(1).detach().cpu().numpy()
        predictions.append(preds)


submit_preds = []

model.eval()
with torch.no_grad():
    for X in test_loader:
        X = X.float().to(CFG.DEVICE)
        print(X)
        outputs = model(X)
        preds = outputs.argmax(1).detach().cpu().numpy()
        submit_preds.append(preds)
    print(outputs)
    print(preds)
    #submit_preds = np.concatenate([p.numpy() for p in submit_preds], axis = 0)
submit_preds

In [19]:
sub['labels'] = np.concatenate(predictions)
sub = sub.replace({"labels": CFG.LABELS_DICT})
sub.to_csv('submission.csv', index=False)
sub.head()

Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,powdery_mildew
1,ad8770db05586b59.jpg,scab
2,c7b03e718489f3ca.jpg,frog_eye_leaf_spot
