In [1]:
import zipfile
from zipfile import ZipFile
import os 
import io
import cv2
import random
import matplotlib.pyplot as plt

In [2]:
zip_file=zipfile.ZipFile("D:\data\mri2.Zip")
zip_file.extractall()

In [5]:
root_dir = 'D:/data/dataset/'

#### 데이터 준비 및 전처리

##### 이미지 예시 출력

In [6]:
# Train data의 뇌종양 데이터 랜덤하게 5개 확인하기 
covid_train_path = os.path.join(root_dir, 'train', 'yes')

covid_files  = [os.path.join(covid_train_path, x) for x in os.listdir(covid_train_path)]
covid_images = [cv2.imread(x) for x in random.sample(covid_files, 5)]

plt.figure(figsize=(20,10))
columns = 5
for i, image in enumerate(covid_images):
    # plt.subplot(len(covid_images) / columns + 1, columns, i + 1)
    plt.subplot(int(len(covid_images) / columns + 1), columns, i + 1)

    plt.imshow(image)

FileNotFoundError: [WinError 3] 지정된 경로를 찾을 수 없습니다: 'D:/data/dataset/train\\yes'

In [None]:
# Train data의 정상 데이터 랜덤하게 5개 확인하기
covid_train_path = os.path.join(root_dir, 'train', 'no')

covid_files  = [os.path.join(covid_train_path, x) for x in os.listdir(covid_train_path)]
covid_images = [cv2.imread(x) for x in random.sample(covid_files, 5)]

plt.figure(figsize=(20,10))
columns = 5
for i, image in enumerate(covid_images):
    plt.subplot(int(len(covid_images) / columns + 1), columns, i + 1)
    plt.imshow(image)

#### 데이터 분포 확인

In [31]:
def print_files_in_dir(root_dir, prefix):
    files = os.listdir(root_dir)
    lab = ['yes', 'no']
    for i, file in enumerate(files):
        path = os.path.join(root_dir, file)
        file_list = os.listdir(path)
        print(f"{prefix} 데이터의 {lab[i]} 수: {len(file_list)}")
    
        # print(f"해당 폴더의 데이터의 수 {len(file_list)}")

In [32]:
phase = "train"
print_files_in_dir(root_dir + f"{phase}", phase)
print()
phase = "valid"
print_files_in_dir(root_dir + f"{phase}", phase)
print()
# phase = "test"
# print_files_in_dir(root_dir + f"{phase}", phase)

FileNotFoundError: [WinError 3] 지정된 경로를 찾을 수 없습니다: 'D:\\data\\dataset\\train'

#### 데이터 전처리

In [None]:
# 필요한 모듈 불러오기
import numpy as np
import torch
from torchvision import datasets, transforms
import torch.utils.data as data

In [None]:
# 디바이스 설정 (GPU 사용 가능하면 GPU 사용하도록)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

random_seed = 2023

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)

np.random.seed(random_seed)
random.seed(random_seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
# 데이터 transform 적용하기
normalize = transforms.Normalize(mean=[0,0,0], std=[1,1,1])
image_transforms = {
    'train': transforms.Compose([
    transforms.Resize(256),  
    
    # data augmentation
    transforms.RandomResizedCrop((224),scale=(0.5,1.0)),
    transforms.RandomHorizontalFlip(),
    
    transforms.ToTensor(),
    normalize
    ]),

    'valid': transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    normalize
    ])
}

In [None]:
train_dataset_folder = root_dir + 'train'
val_dataset_folder = root_dir + 'valid'
test_dataset_folder = root_dir + 'test'

In [None]:
train_dataset = datasets.ImageFolder(root=train_dataset_folder, transform=image_transforms['train'])
valid_dataset = datasets.ImageFolder(root=val_dataset_folder, transform=image_transforms['valid'])
test_dataset = datasets.ImageFolder(root=test_dataset_folder, transform=image_transforms['valid'])

In [None]:
# # trainning set 중 validation 데이터로 사용할 비율
# valid_size = 0.3

# # validation으로 사용할 trainning indices를 얻는다.
# num_train = len(train_dataset)
# indices = list(range(num_train))
# np.random.shuffle(indices)
# valid_dataset = int(np.floor(valid_size * num_train))

In [None]:
# DataLoader을 위한 hyperparameter 설정
train_params = {
    'batch_size': 64,
    'shuffle': True,
    'num_workers': 1,
    'drop_last': False}

valid_params = {
    'batch_size': 64,
    'shuffle': False,
    'num_workers': 1,
    'drop_last': False}

In [None]:
train_loader = data.DataLoader(train_dataset, **train_params)
valid_loader = data.DataLoader(valid_dataset, **valid_params)
test_loader = data.DataLoader(test_dataset, **valid_params)

In [None]:
# Train DataLoader 데이터 확인해보기
for x, y in train_loader:
    print(x.shape)
    print(y.shape)
    break

In [None]:
# Valid DataLoader 데이터 확인해보기
for x, y in valid_loader:
    print(x.shape)
    print(y.shape)
    break

#### 모델 만들기

In [None]:
from torchvision import models
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [None]:
# 모델 설정 값
config = {
    # Classfier 설정
    "cls_hidden_dims" : []
    }

In [None]:
class BasicBlock(nn.Module) :
    def __init__(self, in_channels, out_channels, downsample) :
        super().__init__()
        self.downsample = downsample 
        if self.downsample : 
            stride = 2 
            self.down_skip_net = nn.Conv2d(in_channels=in_channels, 
                                           out_channels=out_channels, 
                                           kernel_size=1, 
                                           stride=stride, padding=0)
        else : 
            stride = 1

        self.conv1 = nn.Conv2d(in_channels=in_channels, 
                               out_channels=out_channels,
                               kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(num_features=out_channels)
        
        self.conv2 = nn.Conv2d(in_channels=out_channels, 
                               out_channels=out_channels,
                               kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(num_features=out_channels)

        self.relu = nn.ReLU()

    def forward(self, x) : 
        if self.downsample : 
            skip = self.down_skip_net(x)
        else : 
            skip = x
        
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        
        x = x + skip 

        return x 

class ResNet18(nn.Module) :
    def __init__(self, num_class=10):
        super().__init__()
        
        # 도입부 
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, 
                               kernel_size=7, stride=2, padding=3)
        self.bn = nn.BatchNorm2d(num_features=64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # 중간층 (layer1 ~ 4)
        self.layer1 = self.make_layer(in_channels=64, out_channels=64, num_blocks=2)
        self.layer2 = self.make_layer(in_channels=64, out_channels=128, num_blocks=2, downsample=True)
        self.layer3 = self.make_layer(in_channels=128, out_channels=256, num_blocks=2, downsample=True)
        self.layer4 = self.make_layer(in_channels=256, out_channels=512, num_blocks=2, downsample=True)
        
        # 아웃풋 
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512, num_class)

    
    def make_layer(self, in_channels, out_channels, num_blocks, downsample=False) :
        layer = []
        layer.append(BasicBlock(in_channels=in_channels,                                                                                                                                                                                                    
                                out_channels=out_channels,
                                downsample=downsample))
        for i in range(1, num_blocks) : 
            layer.append(BasicBlock(in_channels=out_channels, 
                                    out_channels=out_channels,
                                    downsample=False))
        return nn.Sequential(*layer)
        
    def forward(self, x) : 
        batch_size = x.shape[0]
        # 도입부 
        x = self.conv1(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.maxpool(x)

        # 중간층 (layer1 ~ 4)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        # 아웃풋 
        x = self.avgpool(x)
        x = x.view(batch_size, -1)
        x = self.fc(x)
        return x

In [None]:
num_classes = 10

In [None]:
class CovidResNet(nn.Module):
    """pretrain 된 ResNet을 이용해 CT image embedding
    """
    
    def __init__(self):
        """
		Args:
			base_model : resnet18 / resnet50
			config: 모델 설정 값
		"""
        super(CovidResNet, self).__init__()

        # model = models.resnet50(pretrained=True)
        model = ResNet18(num_classes).to(device) # 모델을 지정한 device로 올려줌 
        num_ftrs = model.fc.in_features
        self.num_ftrs = num_ftrs
        
        for name, param in model.named_parameters():
            if 'layer2' in name:
                break
            param.requires_grad = False

        self.features = nn.Sequential(*list(model.children())[:-1])

    def forward(self, x):
        x = self.features(x)
        b = x.size(0)
        x = x.view(b, -1)

        return x

In [None]:
model_image = CovidResNet()
model_image

In [None]:
class Classifier(nn.Sequential):
    """임베딩 된 feature를 이용해 classificaion
    """
    def __init__(self, model_image, **config):
        """
        Args:
            model_image : image emedding 모델
            config: 모델 설정 값
        """
        super(Classifier, self).__init__()

        self.model_image = model_image # image 임베딩 모델

        self.input_dim = model_image.num_ftrs # image feature 사이즈
        self.dropout = nn.Dropout(0.1) # dropout 적용

        self.hidden_dims = config['cls_hidden_dims'] # classifier hidden dimensions
        layer_size = len(self.hidden_dims) + 1 # hidden layer 개수
        dims = [self.input_dim] + self.hidden_dims + [2] 

        self.predictor = nn.ModuleList([nn.Linear(dims[i], dims[i+1]) for i in range(layer_size)]) # classifer layers 

    def forward(self, v):
        # Drug/protein 임베딩
        v_i = self.model_image(v) # batch_size x hidden_dim 

        for i, l in enumerate(self.predictor):
            if i == (len(self.predictor)-1):
                # If last layer,
                v_i = l(v_i)
            else:
                # If Not last layer, dropout과 ReLU 적용
                v_i = F.relu(self.dropout(l(v_i)))

        return v_i

In [None]:
model = Classifier(model_image, **config)
model

#### 모델 학습 (Model training)

##### 모델 학습을 위한 설정