# Intro

In this notebook we build a CNN classifier for the problem

In [8]:
import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import Dataset

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms

import copy
import argparse
import os
import logging
import sys
from tqdm import tqdm
from PIL import ImageFile
from PIL import Image
ImageFile.LOAD_TRUNCATED_IMAGES = True

from ipywidgets import IntProgress
from IPython.display import display


In [9]:
# Create a custom data loader for the train, test, & validation data
class NumericalAndImageDataset(Dataset):
    def __init__(self, overview_file: str, transform=None):
        """
        Initialize this dataloader
        :param overview_file: location of the overview file
        :param transform: transformer for the images
        """
        self.overview= pd.read_csv(overview_file)
        self.transform = transform


    def __len__(self):
        return len(self.overview.index)


    def __getitem__(self, idx):
        img_path_1m = self.overview["1_month_img"].iloc[idx]
        img_path_6m = self.overview["6_month_img"].iloc[idx]
        img_path_12m = self.overview["12_month_img"].iloc[idx]

        img_1m = Image.open(img_path_1m).convert('RGB')
        img_6m = Image.open(img_path_6m).convert('RGB')
        img_12m = Image.open(img_path_12m).convert('RGB')

        num_features = torch.tensor(self.overview[[
            "1_month_return", "6_month_return", "12_month_return",
            "1_month_volatility", "6_month_volatility", "12_month_volatility"
        ]].iloc[idx].values)

        label = self.overview.label.iloc[idx]

        if self.transform:
            img_1m = self.transform(img_1m)
            img_6m = self.transform(img_6m)
            img_12m = self.transform(img_12m)

        return img_1m, img_6m, img_12m, num_features, label

In [10]:
class AllImageNet(nn.Module):
    def __init__(self):
        super(AllImageNet, self).__init__()

        self.image_1_features = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=5, stride=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout(),
            nn.Conv2d(8, 32, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout(),
        )

        self.image_2_features = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=5, stride=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout(),
            nn.Conv2d(8, 32, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout(),
        )

        self.image_3_features = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=5, stride=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout(),
            nn.Conv2d(8, 32, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout(),
        )

        self.fc = nn.Sequential(
            nn.Linear(32 * 34 * 70 * 3, 32),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(32, 16),
            nn.ReLU(inplace=True),
            nn.Linear(16, 3)
        )

    def forward(self, img_1, img_2, img_3):
        step1 = self.image_1_features(img_1)
        step1 = step1.view(step1.size(0), -1)
        step2 = self.image_2_features(img_2)
        step2 = step2.view(step2.size(0), -1)
        step3 = self.image_3_features(img_1)
        step3 = step3.view(step3.size(0), -1)
        res = self.fc(torch.cat((step1, step2, step3), 1))
        return res

In [11]:
def test(model, test_loader, criterion):
    model.eval()
    running_loss = 0
    running_corrects = 0
    running_total = 0

    for img_1ms, img_6ms, img_12ms, _, labels in test_loader:
        outputs = model(img_1ms, img_6ms, img_12ms)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)

        running_loss += loss.item() * img_1ms.size(0)
        running_corrects += torch.sum(preds == labels.data)

        running_total += len(img_1ms)

    total_loss = running_loss / running_total
    total_acc = running_corrects.double() / running_total

    print("Test Loss: {}".format(total_loss))
    print("Test Accuracy: {}".format(total_acc))

    return total_loss, total_acc

In [12]:
def train(model, train_loader, validation_loader, criterion, optimizer, epochs):
    datasets = {'train':train_loader, 'valid':validation_loader}
    #log = Report(epochs)

    train_losses = []
    train_accs = []
    val_losses = []
    val_accs = []

    for epoch in range(epochs):
        print("Epoch: " + str(epoch))

        f = IntProgress(min=0, max=5000) # instantiate the bar
        display(f) # display the bar

        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
                running_loss = 0.0
                running_corrects = 0
                running_total = 0

                for pos, (img_1ms, img_6ms, img_12ms, _, labels) in enumerate(datasets[phase]):
                    f.value += len(img_1ms)

                    outputs = model(img_1ms, img_6ms, img_12ms)
                    loss = criterion(outputs, labels)

                    if phase=='train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                    _, preds = torch.max(outputs, 1)
                    running_loss += loss.item() * img_1ms.size(0)
                    running_corrects += torch.sum(preds == labels.data)
                    running_total += len(img_1ms)

                train_losses.append(running_loss / running_total)
                train_accs.append(running_corrects / running_total)

            if phase == "valid":
                val_loss, val_acc = test(model, datasets[phase], criterion)
                val_losses.append(val_loss)
                val_accs.append(val_acc)

    return model, train_losses, val_losses, train_accs, val_accs

In [13]:
def create_data_loaders(batch_size):
    train_data_path = "ModelData/obs_train.csv"
    test_data_path = "ModelData/obs_test.csv"
    val_data_path = "ModelData/obs_val.csv"

    train_transform = transforms.Compose([
        # transforms.RandomResizedCrop((224, 224)),
        # transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])

    test_transform = transforms.Compose([
        # transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

    train_data = NumericalAndImageDataset(
        overview_file=train_data_path,
        transform=train_transform
    )
    train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

    test_data = NumericalAndImageDataset(
        overview_file=test_data_path,
        transform=test_transform
    )
    test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)

    val_data = NumericalAndImageDataset(
        overview_file=val_data_path,
        transform=test_transform
    )
    val_data_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=True)

    return train_data_loader, test_data_loader, val_data_loader

In [14]:
batch_size = 32
learning_rate = 0.001
epochs = 5

train_loader, test_loader, val_loader = create_data_loaders(batch_size)
model = AllImageNet()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

print("Starting Model Training")

model, train_losses, val_losses, train_accs, val_accs = train(model, train_loader, val_loader, criterion, optimizer, epochs)

print("Train Losses:")
print(train_losses)
print("Validation Losses:")
print(val_losses)
print("Train Accuracies:")
print(train_accs)
print("Validation Accuracies:")
print(val_accs)

print("Start Model Testing")

test_loss, test_acc = test(model, test_loader, criterion)

print("Test Loss: {}".format(test_loss))
print("Test Accuracy: {}".format(test_acc))

print('saved')

Starting Model Training
Epoch: 0


IntProgress(value=0, max=5000)

Test Loss: 1.046882831848924
Test Accuracy: 0.4505229283990346
Epoch: 1


IntProgress(value=0, max=5000)

Test Loss: 1.0252062907069321
Test Accuracy: 0.4505229283990346
Epoch: 2


IntProgress(value=0, max=5000)

Test Loss: 1.0159294136282138
Test Accuracy: 0.4505229283990346
Epoch: 3


IntProgress(value=0, max=5000)

Test Loss: 1.0135572675918247
Test Accuracy: 0.4505229283990346
Epoch: 4


IntProgress(value=0, max=5000)

Test Loss: 1.0136391087011858
Test Accuracy: 0.4505229283990346
Epoch: 5


IntProgress(value=0, max=5000)

Test Loss: 1.0139434237395768
Test Accuracy: 0.4505229283990346
Epoch: 6


IntProgress(value=0, max=5000)

Test Loss: 1.0141994014250493
Test Accuracy: 0.4505229283990346
Epoch: 7


IntProgress(value=0, max=5000)

KeyboardInterrupt: 