In [None]:
# from google.colab import drive
# drive.mount('/content/gdrive', force_remount = True)
# %cd /content/gdrive/MyDrive/

Mounted at /content/gdrive
/content/gdrive/MyDrive


In [1]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pickle as pkl
from torch import Tensor

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [6]:
path_tr = '.\\lazydata\\'
path_te = '.\\lazydata\\'

In [7]:
class load(Dataset):
    def __init__(self, path, isTrain=True, transform = None):
        self.transform = transform
        path = path + ('train\\' if isTrain else 'test\\')
        self.pathx = path + 'X\\'
        self.pathy = path + 'Y\\'
        self.data = os.listdir(self.pathx)
        self.isTrain = isTrain
    def __getitem__(self, idx):
        f = self.data[idx]
        img0 = cv2.imread(self.pathx + f + '\\rgb\\0.png')
        img1 = cv2.imread(self.pathx + f + '\\rgb\\1.png')
        img2 = cv2.imread(self.pathx + f + '\\rgb\\2.png')
        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)
            img2 = self.transform(img2)
        depth = np.load(self.pathx + f + '\\depth.npy')
        field_id = pkl.load(open(self.pathx + f + '\\field_id.pkl', 'rb'))
        if self.isTrain == False:
            return (img0, img1, img2, depth, torch.tensor(int(field_id)))
        y = np.load(self.pathy + f + '.npy')
        return (img0, img1, img2, depth, torch.tensor(int(field_id))), torch.tensor(y)
    def __len__(self):
        return len(self.data)

In [8]:
data_train = load(path_tr, isTrain = True)
data_test = load(path_te, isTrain = False)

In [9]:
def tensorToArray(data, isTrain=True):
    n_samples = len(data)
    if isTrain:
        (img0_, img1_, img2_, depth_, field_id_), y_ = data[0]
        img_shape, depth_shape, n_y = img0_.shape, depth_.shape, len(y_)
        y_array = np.empty(shape=(n_samples, n_y))
    else:
        (img0_, img1_, img2_, depth_, field_id_)= data[0]
        img_shape, depth_shape = img0_.shape, depth_.shape
    img0_array = np.empty(shape=(n_samples, img_shape[0], img_shape[1], img_shape[2]))
    img1_array = np.empty(shape=(n_samples, img_shape[0], img_shape[1], img_shape[2]))
    img2_array = np.empty(shape=(n_samples, img_shape[0], img_shape[1], img_shape[2]))
    depth_array = np.empty(shape=(n_samples, depth_shape[0], depth_shape[1], depth_shape[2]))

    for inx, d in enumerate(data):
        # print(inx)
        if isTrain:
            (img0, img1, img2, depth, field_id), y = d
            y_array[inx, :] = np.array(y)
            img0_array[inx, :, :, :] = img0
            img1_array[inx, :, :, :] = img1
            img2_array[inx, :, :, :] = img2
            depth_array[inx, :, :, :] = depth
        else:
            (img0, img1, img2, depth, field_id) = d
            img0_array[inx, :, :, :] = img0
            img1_array[inx, :, :, :] = img1
            img2_array[inx, :, :, :] = img2
            depth_array[inx, :, :, :] = depth
    if isTrain:
        return img0_array, img1_array, img2_array, depth_array, y_array
    return img0_array, img1_array, img2_array, depth_array

In [10]:
img0_array, img1_array, img2_array, depth_array, y_array = tensorToArray(data=data_train, isTrain=True)
img0_array_test, img1_array_test, img2_array_test, depth_array_test = tensorToArray(data=data_test, isTrain=False)

MemoryError: Unable to allocate 3.81 GiB for an array with shape (3396, 224, 224, 3) and data type float64

In [20]:
print(img0_array.shape)
print(depth_array.shape)
print(y_array.shape)

(3396, 224, 224, 3)
(3396, 3, 224, 224)
(3396, 12)


In [21]:
# The input shape should be (samples, width, height, 1)
def depth_normalization(depth):
    # normalized data = (data - Min number) / (Max number - Min number)
    min_num = np.min(depth)
    max_num = np.max(depth)
    normalized_depth = (depth-min_num)/(max_num-min_num)
    return normalized_depth

In [22]:
# The input shape should be (samples, width, height, 1)
def img_normalization(img):
    # Original image data is from 0-255, and we want to scale data to 0-1. Thus, we can just divide original data by 255.
    normalized_img = img/255.0
    return normalized_img

In [24]:
normalized_depth = depth_normalization(depth=depth_array)
normalized_img0 = img_normalization(img=img0_array)

normalized_depth_test = depth_normalization(depth=depth_array_test)
normalized_img0_test = img_normalization(img=img0_array_test)

In [25]:
def combine_image_depth(img, depth, whichImg=0):
    new_img = np.empty(shape=(img.shape[0], img.shape[1], img.shape[2], img.shape[3]+1))

    # First, try only use one image (img0), so use the first depth only.
    depth0 = depth[:, whichImg, :, :]

    for inx, _ in enumerate(img):
        # 2D array (224, 224) to 3D array (224, 224, 1)
        depth_4d = np.expand_dims(depth0[inx], 2)
        # combine img and depth into one array
        new_img[inx] = np.concatenate((img[inx], depth_4d), axis=2)

    return new_img

In [26]:
new_img = combine_image_depth(img=normalized_img0, depth=normalized_depth)
new_img_test = combine_image_depth(img=normalized_img0_test, depth=normalized_depth_test)

In [27]:
print(new_img.shape)
print(y_array.shape)
print(new_img_test.shape)

(3396, 224, 224, 4)
(3396, 12)
(849, 224, 224, 4)


In [28]:
# Expected input data size for CNN is (Samples, Channels, Heights, Widths).
# Thus, we have to reshape original data size (Samples, Heights, Widths, Channels) to the new size mentioned above.
def reshape_data(data):
    samples = data.shape[0]
    channels = data.shape[3]
    heights = data.shape[1]
    widths = data.shape[2]
    new_data = np.empty(shape=(samples, channels, heights, widths))

    for i in range(channels):
        new_data[:, i, :, :] = data[:, :, :, i]
    return new_data

In [29]:
ready_img = reshape_data(new_img)
ready_img_test = reshape_data(new_img_test)

In [30]:
print(ready_img.shape)

(3396, 4, 224, 224)


In [31]:
dataset_train = TensorDataset(Tensor(ready_img),Tensor(y_array))
train_dataloader = DataLoader(dataset=dataset_train, batch_size=128, shuffle=True, num_workers=2)

In [32]:
len(train_dataloader)

27

In [None]:
import pickle
save_dict = [ready_img, y_array, ready_img_test]
with open('lx_preprocessed_data.pkl', 'wb') as file:
    # A new file will be created
    pickle.dump(save_dict, file)

In [None]:
with open('lx_preprocessed_data.pkl', 'rb') as f:
    read_prep_data = pickle.load(f)
tr_data, tr_y, te_data = read_prep_data[0], read_prep_data[1], read_prep_data[2]

print(tr_data.shape)
print(tr_y.shape)
print(te_data.shape)

In [34]:
class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, i_downsample=None, stride=1):
        super(Bottleneck, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)

        self.conv3 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=1, stride=1, padding=0)
        self.batch_norm3 = nn.BatchNorm2d(out_channels*self.expansion)

        self.i_downsample = i_downsample
        self.stride = stride
        self.relu = nn.ReLU()

    def forward(self, x):
        identity = x.clone()
        x = self.relu(self.batch_norm1(self.conv1(x)))

        x = self.relu(self.batch_norm2(self.conv2(x)))

        x = self.conv3(x)
        x = self.batch_norm3(x)

        #downsample if needed
        if self.i_downsample is not None:
            identity = self.i_downsample(identity)
        #add identity
        x+=identity
        x=self.relu(x)

        return x

class Block(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, i_downsample=None, stride=1):
        super(Block, self).__init__()


        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=False)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)

        self.i_downsample = i_downsample
        self.stride = stride
        self.relu = nn.ReLU()

    def forward(self, x):
      identity = x.clone()

      x = self.relu(self.batch_norm2(self.conv1(x)))
      x = self.batch_norm2(self.conv2(x))

      if self.i_downsample is not None:
          identity = self.i_downsample(identity)
      print(x.shape)
      print(identity.shape)
      x += identity
      x = self.relu(x)
      return x




class ResNet(nn.Module):
    def __init__(self, ResBlock, layer_list, num_classes, num_channels=3):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(num_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool2d(kernel_size = 3, stride=2, padding=1)

        self.layer1 = self._make_layer(ResBlock, layer_list[0], planes=64)
        self.layer2 = self._make_layer(ResBlock, layer_list[1], planes=128, stride=2)
        self.layer3 = self._make_layer(ResBlock, layer_list[2], planes=256, stride=2)
        self.layer4 = self._make_layer(ResBlock, layer_list[3], planes=512, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512*ResBlock.expansion, num_classes)

    def forward(self, x):
        x = self.relu(self.batch_norm1(self.conv1(x)))
        x = self.max_pool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x

    def _make_layer(self, ResBlock, blocks, planes, stride=1):
        ii_downsample = None
        layers = []

        if stride != 1 or self.in_channels != planes*ResBlock.expansion:
            ii_downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, planes*ResBlock.expansion, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes*ResBlock.expansion)
            )

        layers.append(ResBlock(self.in_channels, planes, i_downsample=ii_downsample, stride=stride))
        self.in_channels = planes*ResBlock.expansion

        for i in range(blocks-1):
            layers.append(ResBlock(self.in_channels, planes))

        return nn.Sequential(*layers)



def ResNet50(num_classes, channels=3):
    return ResNet(Bottleneck, [3,4,6,3], num_classes, channels)

def ResNet101(num_classes, channels=3):
    return ResNet(Bottleneck, [3,4,23,3], num_classes, channels)

def ResNet152(num_classes, channels=3):
    return ResNet(Bottleneck, [3,8,36,3], num_classes, channels)

In [35]:
model_test = ResNet50(num_classes=12, channels=4)
x = torch.Tensor(128, 4, 224, 224)
y = model_test(x)
y.shape

torch.Size([128, 12])

In [36]:
model = ResNet101(num_classes=12, channels=4)
model = model.to(device)
loss_function = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
# optimizer = torch.optim.SGD(CNN.parameters(), lr = 1e-3)

In [37]:
def train_model(dataloader, model, loss_fn, optimizer, n_epoch):
    for e in range(n_epoch):
        model.train()
        # Print epoch
        print(f'Starting epoch {e+1}')
        # Set current loss value
        current_loss = 0.0
        for batch_inx, (x, y) in enumerate(dataloader):
            x, y = x.to(device), y.to(device)
            pred = model(x)
            loss = loss_fn(pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # Print statistics
            current_loss += loss.item()
            if batch_inx % 10 == 0:
                print('Loss after mini-batch %5d: %.3f' % (batch_inx + 1, current_loss / 10))
                current_loss = 0.0

In [1]:
train_model(dataloader=train_dataloader, model=model, loss_fn=loss_function, optimizer=optimizer, n_epoch=20)

NameError: name 'train' is not defined

In [None]:
test_prediction = model(ready_img_test)

In [23]:
# def test(dataloader, model, loss_fn):
#   size = len(dataloader.dataset)
#   num_batch = len(dataloader)
#   model.eval()
#   test_loss, correct = 0, 0
#   with torch.no_grad():
#     for x, y in dataloader:
#       x, y = x.to(device), y.to(device)
#       pred = model(x)
#       test_loss += loss_fn(pred, y).item()
#       correct += (pred.argmax(1) == y).type(torch.float).sum().item()
#   test_loss /= num_batch
#   correct /= size
#   print(f'test error \n accuracy: {(100 * correct):>0.1f} %, avg loss: {test_loss:>8f} \n')