# 12장 병든 잎사귀 식별 경진대회 환경 세팅된 노트북 양식

In [None]:
# 데이터 입력 537
import pandas as pd
train = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/train.csv')
test = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/test.csv')
submission = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/sample_submission.csv')
# submission.head()

In [None]:
# 데이터 시각화 539
# 데이터를 타깃별로 골라내기
healthy = train.loc[train['healthy'] == 1]
multiple_diseases = train.loc[train['multiple_diseases'] == 1]
rust = train.loc[train['rust'] == 1]
scab = train.loc[train['scab'] == 1]

In [None]:
# 그래프 그리기 539
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

mpl.rc('font', size=15)
plt.figure(figsize=(7,7))

label = ['healthy','multiple_diseases','rust','scab']
plt.pie([len(healthy), len(multiple_diseases), len(rust),len(scab)], labels=label, autopct='%.1f%%')

In [None]:
# 출력해보기 540
import cv2
import matplotlib.gridspec as gridspec

# 경로를 만들어두겠습니다.
data_path = '/kaggle/input/plant-pathology-2020-fgvc7/'

def show_image(img_ids, rows=2, cols=3):
    assert len(img_ids) <= rows*cols
    plt.figure(figsize=(15, 8))
    grid = gridspec.GridSpec(rows, cols)

    for idx, img_id in enumerate(img_ids):
        img_path = f'{data_path}/images/{img_id}.jpg'  # 위 data_path를 사용할 경우
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        ax = plt.subplot(grid[idx])
        ax.imshow(image)


#출력
num_of_imgs = 6
last_healthy_img_ids = healthy['image_id'][-num_of_imgs:]
show_image(last_healthy_img_ids)

In [None]:
# 여러 질병에 걸린 잎
last_multiple_diseases_img_ids = multiple_diseases['image_id'][-num_of_imgs:]
#show_image(last_multiple_diseases_img_ids)
#녹병
last_rust_img_ids = rust['image_id'][-num_of_imgs:]
# show_image(last_rust_img_ids)

#붉은 곰팡이병
last_scab_img_ids = scab['image_id'][-num_of_imgs:]
# show_image(last_scab_img_ids)

In [None]:
# 분리하기
from sklearn.model_selection import train_test_split

train, valid = train_test_split(train,
                                test_size=0.1,
                                stratify=train[['healthy', 'multiple_diseases', 'rust', 'scab']],
                                random_state=50)

In [None]:
# 데이터셋 만들기 547
import cv2
from torch.utils.data import Dataset
import numpy as np

class ImageDataset(Dataset):
    def __init__(self, df, img_dir='./', transform=None, is_test=False):
        super().__init__()
        self.df = df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.iloc[idx, 0]
        img_path = self.img_dir + img_id + '.jpg'
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform is not None:
            image = self.transform(image=image)['image']
        if self.is_test:
            return image
        else:
            label = np.argmax(self.df.ilic[idx, 1:5])
            return image, label


In [None]:
# 이미지 변환기 정의하기 549
import albumentations as A
from albumentations.pytorch import ToTensorV2

transform_train = A.Compose([A.Resize(450, 650),
                            A.RandomBrightnessContrast(brightness_limit=0.2, 
                                                       contrast_limit=0.2, p=0.3),
                            A.VerticalFlip(p=0.2),
                            A.HorizontalFlip(p=0.5),
                            A.ShiftScaleRotate(shift_limit=0.1, 
                                               scale_limit=0.2, 
                                               rotate_limit=30, 
                                               p=0.3),
                            A.OneOf([A.Emboss(p=1),
                                     A.Sharpen(p=1),
                                     A.Blur(p=1)], p=0.3),
                            A.PiecewiseAffine(p=0.3),
                            A.Normalize(),
                            ToTensorV2()
                            ])

In [None]:
# 검증 및 티스트 테스터용 변환기 만들기 551
transform_test = A.Compose([A.Resize(450, 650),
                          A.Normalize(),
                          ToTensorV2()
                           ])
# transform_test

In [None]:
# function 552
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)


In [None]:
# 데이터 로더 생성 552
from torch.utils.data import DataLoader

batch_size = 4
loader_train = DataLoader(dataset_train, batch_size=batch_size, 
                          shuffle=True, worker_init_fn=seed_worker,
                          generator=g, num_workers=2
                         )
loader_valid = DataLoader(dataset_valid, batch_size=batch_size, 
                          shuffle=False, worker_init_fn=seed_worker,
                          generator=g, num_workers=2
                         )