In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import sampler

import os
import pandas as pd
from skimage import io, transform
import numpy as np

import matplotlib.pyplot as plt

#import torchvision.datasets as dset
import torchvision.transforms as T

In [14]:
image_dir = '../data/train_images'
NUM_TRAIN = 6800 #8492

In [21]:
# Customized Dataset
class ProstateCancerDataset(Dataset):
    """Prostate Cancer Biopsy Dataset"""
    
    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file
            root_dir (string): Path to the directory with all images
            transform (callable, optional): Optional transform to be applied on an image sample
        """
        # Shuffle dataframes with fixed seed
        self.cancer_df = pd.read_csv(csv_file).sample(frac=1, random_state=1)
        # Use DataLoader to shuffer data
        # self.cancer_df = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.cancer_df)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, f'{self.cancer_df.iloc[idx, 0]}.tiff')
        # (D,W,H)
        #img = io.imread(img_path)
        img = io.MultiImage(img_path) # conserve_memory=True  Turn off to improve performance
        isup = self.cancer_df.iloc[idx, 2]
        gleason = self.cancer_df.iloc[idx, 3]
        # Recommend using downsample rate of 16 to speed up resizing
        sample = {'image': img[-1], 'isup_grade': isup, 'gleason_score': gleason}

        if self.transform:
            sample = self.transform(sample)
        return sample        

In [4]:
# Customize Transforms
class Rescale(object):
    """Rescale the image sample to the given size
    Args:
        output_size (tuple): Desired output size. Output is matched to output_size.
    """
    
    def __init__(self, output_size):
        assert isinstance(output_size, tuple)
        self.output_size = output_size
        
    def __call__(self, sample):
        img = transform.resize(sample['image'], self.output_size)
        return {'image': img, 'isup_grade': sample['isup_grade'], 'gleason_score': sample['gleason_score']}
    

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""
    
    def __call__(self, sample):
        img = sample['image']        
        # Swap color axis to [C,H,W]
        img = img.transpose(2,0,1)
        return {'image': img, 'isup_grade': sample['isup_grade'], 'gleason_score': sample['gleason_score']}

# Data Preparation

In [22]:
# Compse tranforms of rescale and totensor
# More transformer to try: crop, normalize, etc.
# And transformer is a useful tool for data augmentation
biopsy_train = ProstateCancerDataset(csv_file='train_512.csv',
                                     root_dir=image_dir,
                                     transform=T.Compose([
                                         Rescale((512, 512)),
                                         ToTensor()
                                     ]))

loader_train = DataLoader(biopsy_train, batch_size=16, num_workers=4,
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

loader_val = DataLoader(biopsy_train, batch_size=16, num_workers=4,
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 8492)))

In [23]:
for batch_i, batch_sample in enumerate(loader_val):
    #print(batch_sample['image'][0].shape)
    #for s_i, sample in enumerate(batch_sample['isup_grade']):
    #    print(sample)
    print(batch_sample['isup_grade'])
    #plt.imshow(batch_sample['image'][0].permute(1,2,0))
    break

tensor([0, 1, 0, 5, 3, 4, 3, 1, 2, 0, 4, 2, 1, 0, 4, 2])


# Two-Layer Network

In [12]:
print(len(biopsy_train))

8492
