### kerasからplへの移行 dataloader編

### 必要な機能

#### make_path_list
- dirから学習pathを取り出す

#### dataset
input:path_list
output:data,
- 学習pathからデータをロードして、datatransformを元に変形する
- anotationとセットで返す。必要なら、座標もともに返す


#### datatransform
- 関数を何個か受け取って処理を行う。

#### dataloader
- dataset,datatransformを合体させて、バッチで返す 

In [1]:
import torch
import torch.utils.data as data
from torchvision import transforms
import pathlib
import pandas as pd
from pathlib import Path
import numpy as np 
import sys 
import os
import matplotlib.pyplot as plt




## DataLoader

In [2]:
DATA_DIR = '/home/higuchi/ssd/kits19/data'
train_patch = 'tumor_48x48x16'
val_patch = 'tumor_60x60x20'
train_ids=['001','002']
val_ids=['001','002']

class DataPathMaker():
    '''
    DataSetに渡すpath_listを作るためのDataFrameを作る
    今後の展望として、統計量を持ったDFを渡してその条件でlistを変えるようにする。
    '''
    # TODO:文字列の除去
    def __init__(self, data_dir, patch_dir_name='patch'):
        self.data_dir = pathlib.Path(data_dir)
        self.patch_dir_name = patch_dir_name
        

    def create_dataframe(self, id_list):
        data = []
        for patient_id in id_list:
            # TODO: case_00の部分もyamlから渡せるようにしたほうがよい
            patient_dir = self.data_dir / f'case_00{patient_id}' / self.patch_dir_name
            images = sorted(patient_dir.glob('patch_image_*.npy'))
            labels = sorted(patient_dir.glob('patch_no_onehot_*.npy'))
            if len(images) == 1 or len(labels) == 0:
                print(f'{patient_id} is no data')
            for image, label in zip(images, labels):
                data.append(['image', patient_id, image])
                data.append(['label', patient_id, label])
        return pd.DataFrame(data, columns=['type', 'id', 'path'])


train_path_df = DataPathMaker(DATA_DIR, patch_dir_name=train_patch).create_dataframe(train_ids)
val_path_df = DataPathMaker(DATA_DIR, patch_dir_name=val_patch).create_dataframe(val_ids)

train_im_list = train_path_df[train_path_df['type']=='image']['path'].astype(str).values
val_im_list = train_path_df[train_path_df['type']=='image']['path'].astype(str).values

train_lb_list = train_path_df[train_path_df['type']=='label']['path'].astype(str).values
val_lb_list = train_path_df[train_path_df['type']=='label']['path'].astype(str).values


In [3]:
# パッケージのimport
import torch
from torchvision import transforms
from PIL import Image, ImageOps, ImageFilter
import numpy as np


class Compose(object):
    """引数transformに格納された変形を順番に実行するクラス
       対象画像とアノテーション画像を同時に変換させます。 
    """

    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, anno_class_img):
        for t in self.transforms:
            img, anno_class_img = t(img, anno_class_img)
        return img, anno_class_img


class Scale(object):
    def __init__(self, scale):
        self.scale = scale

    def __call__(self, img, anno_class_img):

        width = img.size[0]  # img.size=[幅][高さ]
        height = img.size[1]  # img.size=[幅][高さ]

        # 拡大倍率をランダムに設定
        scale = np.random.uniform(self.scale[0], self.scale[1])

        scaled_w = int(width * scale)  # img.size=[幅][高さ]
        scaled_h = int(height * scale)  # img.size=[幅][高さ]

        # 画像のリサイズ
        img = img.resize((scaled_w, scaled_h), Image.BICUBIC)

        # アノテーションのリサイズ
        anno_class_img = anno_class_img.resize(
            (scaled_w, scaled_h), Image.NEAREST)

        # 画像を元の大きさに
        # 切り出し位置を求める
        if scale > 1.0:
            left = scaled_w - width
            left = int(np.random.uniform(0, left))

            top = scaled_h-height
            top = int(np.random.uniform(0, top))

            img = img.crop((left, top, left+width, top+height))
            anno_class_img = anno_class_img.crop(
                (left, top, left+width, top+height))

        else:
            # input_sizeよりも短い辺はpaddingする
            p_palette = anno_class_img.copy().getpalette()

            img_original = img.copy()
            anno_class_img_original = anno_class_img.copy()

            pad_width = width-scaled_w
            pad_width_left = int(np.random.uniform(0, pad_width))

            pad_height = height-scaled_h
            pad_height_top = int(np.random.uniform(0, pad_height))

            img = Image.new(img.mode, (width, height), (0, 0, 0))
            img.paste(img_original, (pad_width_left, pad_height_top))

            anno_class_img = Image.new(
                anno_class_img.mode, (width, height), (0))
            anno_class_img.paste(anno_class_img_original,
                                 (pad_width_left, pad_height_top))
            anno_class_img.putpalette(p_palette)

        return img, anno_class_img


class RandomRotation(object):
    def __init__(self, angle):
        self.angle = angle

    def __call__(self, img, anno_class_img):

        # 回転角度を決める
        rotate_angle = (np.random.uniform(self.angle[0], self.angle[1]))

        # 回転
        img = img.rotate(rotate_angle, Image.BILINEAR)
        anno_class_img = anno_class_img.rotate(rotate_angle, Image.NEAREST)

        return img, anno_class_img

In [8]:
from tensorflow.keras.utils import Sequence, to_categorical
class KitsDataSet(data.Dataset):
    '''
    loadした後のデータの処理のみを行う。
    input:train,val,testのdata_list(絞り込み済み) & label_list
    '''
    # TODO: ラベルが重ねってる部分の処理(binaly_labels)
    # TODO: Augment_code
    def __init__(self, img_list, label_list, transform, phase='train'):
        self.img_list = img_list
        self.label_list = label_list
        self.transform = transform
        self.phase = phase
    
    def __len__(self):
        return len(self.img_list)
    
    def __getitem__(self,index):
        im = np.load(self.img_list[index])
        lb = np.load(self.label_list[index])
    
        if self.transform:
            im, lb = self.transform(im, lb, self.phase)

        # B,D,C,H,Wに変換する
        # im = im.permute(3, 2, 0, 1)
        im = np.transpose(im, (2, 3, 0, 1))

        # B,C,H,Wに変換する
        # lb = lb.permute(2, 0, 1)

        #numpy ver
        lb = to_categorical(lb, num_classes=3)
        lb = np.transpose(lb, (2, 3, 0, 1))
        return im, lb 




tr_DS = KitsDataSet(train_im_list,train_lb_list,phase='train', transform = None)
val_DS = KitsDataSet(val_im_list,val_lb_list,phase='val', transform = None)


print(tr_DS.__getitem__(0)[1].shape)



(16, 3, 48, 48)


In [9]:
batch_size = 8
train_dl = data.DataLoader(tr_DS, batch_size = batch_size)
val_dl = data.DataLoader(val_DS, batch_size = batch_size)

dataloaders_dict = {
    "train": train_dl,
    "val": val_dl
}

batch_iterator = iter(dataloaders_dict["val"])

images, labels = next(batch_iterator)

print(images.size())
print(labels.size())

torch.Size([8, 16, 1, 48, 48])
torch.Size([8, 16, 3, 48, 48])


In [139]:
import torchvision
grid = torchvision.utils.make_grid\
(images[:,0,:, :].to(torch.int16).abs())

# grid = transforms.ToPILImage()(grid)grid

plt.imshow(grid.permute(1,2,0).numpy())


## Model

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class UNet3D(nn.Module):
    def __init__(self,input_shape,nclasses):
    

class SingleConv(nn.Sequential):
    def __init__(self, in_channels, out_channels, kernel_size=3, activation='relu'):
        super(SingleConv).__init__()

        for name, module in create_conv(in_channels, out_channels, kernel_size, order, padding=padding):
            self.add_module(name, module)


class DoubleConv(nn.Sequential):
    def __init__(self, in_channels, out_channels, kernel_size=3, activation='relu'):
        super(DoubleConv).__init__()

        self.add_module('SingleConv1', SingleConv(in_channels, out_channels, kernel_size, order, activation))
        self.add_module('SingleConv1', SingleConv(in_channels, out_channels, kernel_size, order, activation))


def create_conv(in_channels, out_channels, kernel_size, activation, padding=1):
    modules = []
    modules.append(('ReLU', nn.ReLU(inplace=True)))
    modules.append(('conv', conv3d(in_channels, out_channels, kernel_size, bias, padding=padding)))
    modules.append(('batchnorm', nn.BatchNorm3d(in_channels)))
    return modules


In [2]:



class KitsDataSet(data.Dataset):
    def __init__(self, data_dir, patch_dir_name, phase='train'):
        self.data_dir = data_dir
        self.patch_dir_name = patch_dir_name
        self.phase = phase
    
    def __len__(self):
        return len(self.img_list)
    
    def __get
    


train_ds = KitsDataSet()
val_ds = KitsDataSet()

train_tf = DataTrasform()
val_tf = DataTrasform()






ValueError: batch_size should be a positive integer value, but got batch_size=<__main__.DataTrasform object at 0x7fe25f838710>