https://github.com/walsvid/CoordConv/blob/master/coordconv.py
https://medium.com/analytics-vidhya/encoding-time-series-as-images-b043becbdbf3
https://eng.uber.com/coordconv/
https://towardsdatascience.com/reading-charts-with-convolutional-neural-networks-cbaabdd5f478

# Intro

In this notebook we build a CNN classifier for the problem

In [8]:
import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import Dataset

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms

from coordconv import CoordConv2d

import copy
import argparse
import os
import logging
import sys
from tqdm import tqdm
from PIL import ImageFile
from PIL import Image
ImageFile.LOAD_TRUNCATED_IMAGES = True

from ipywidgets import IntProgress
from IPython.display import display


In [9]:
# Create a custom data loader for the train, test, & validation data
class NumericalAndImageDataset(Dataset):
    def __init__(self, overview_file: str, transform=None):
        """
        Initialize this dataloader
        :param overview_file: location of the overview file
        :param transform: transformer for the images
        """
        self.overview= pd.read_csv(overview_file)
        self.transform = transform


    def __len__(self):
        return len(self.overview.index)


    def __getitem__(self, idx):
        img_path_1m = self.overview["1_month_img"].iloc[idx]
        img_path_6m = self.overview["6_month_img"].iloc[idx]
        img_path_12m = self.overview["12_month_img"].iloc[idx]

        img_1m = Image.open(img_path_1m).convert('RGB')
        img_6m = Image.open(img_path_6m).convert('RGB')
        img_12m = Image.open(img_path_12m).convert('RGB')

        num_features = torch.tensor(self.overview[[
            "1_month_return", "6_month_return", "12_month_return",
            "1_month_volatility", "6_month_volatility", "12_month_volatility"
        ]].iloc[idx].values).type(torch.FloatTensor)

        label = self.overview.label_q.iloc[idx]

        if self.transform:
            img_1m = self.transform(img_1m)
            img_6m = self.transform(img_6m)
            img_12m = self.transform(img_12m)

        return img_1m, img_6m, img_12m, num_features, label

In [10]:
class OneImageNet(nn.Module):
    def __init__(self):
        super(OneImageNet, self).__init__()

        self.image_1_features = nn.Sequential(
            CoordConv2d(3, 32, kernel_size=3, stride=1, with_r=True, use_cuda=False),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Dropout()
        )

        self.image_2_features = nn.Sequential(
            CoordConv2d(3, 32, kernel_size=3, stride=1, with_r=True, use_cuda=False),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Dropout()
        )

        self.image_3_features = nn.Sequential(
            CoordConv2d(3, 32, kernel_size=3, stride=1, with_r=True, use_cuda=False),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Dropout()
        )

        self.num_features = nn.Sequential(
            nn.Linear(6, 6),
            nn.ReLU(inplace=True)
        )

        self.fc_im = nn.Sequential(
            nn.Linear(28836, 64),
            nn.ReLU(inplace=True),
            # nn.Dropout(),
            nn.Linear(64, 32),
            nn.ReLU(inplace=True)
        )

        self.fc_fin = nn.Sequential(
            nn.Linear(38, 38),
            nn.ReLU(inplace=True),
            # nn.Dropout(),
            nn.Linear(38, 16),
            nn.ReLU(inplace=True),
            # nn.Dropout(),
            nn.Linear(16, 3)
        )

    def forward(self, img_1, img_2, img_3, num_features):
        step1 = self.image_1_features(img_1)
        step1 = step1.view(step1.size(0), -1)
        step2 = self.image_2_features(img_2)
        step2 = step2.view(step2.size(0), -1)
        step3 = self.image_3_features(img_3)
        step3 = step3.view(step3.size(0), -1)

        nums = self.num_features(num_features)

        print(torch.cat((step1, step2, step3), 1).shape)

        imgs = self.fc_im(torch.cat((step1, step2, step3), 1))

        res = self.fc_fin(torch.cat((nums, imgs), 1))

        return res

In [11]:
def test(model, test_loader, criterion):
    model.eval()
    running_loss = 0
    running_corrects = 0
    running_total = 0

    for img_1ms, img_6ms, img_12ms, num_features, labels in test_loader:
        outputs = model(img_1ms, img_6ms, img_12ms, num_features)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)

        running_loss += loss.item() * img_1ms.size(0)
        running_corrects += torch.sum(preds == labels.data)

        running_total += len(img_1ms)

    total_loss = running_loss / running_total
    total_acc = running_corrects.double() / running_total

    print("Test Loss: {}".format(total_loss))
    print("Test Accuracy: {}".format(total_acc))

    return total_loss, total_acc

In [12]:
def train(model, train_loader, validation_loader, criterion, optimizer, epochs):
    datasets = {'train':train_loader, 'valid':validation_loader}
    #log = Report(epochs)

    train_losses = []
    train_accs = []
    val_losses = []
    val_accs = []

    for epoch in range(epochs):
        print("Epoch: " + str(epoch))

        f = IntProgress(min=0, max=18000) # instantiate the bar
        display(f) # display the bar

        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
                running_loss = 0.0
                running_corrects = 0
                running_total = 0

                for pos, (img_1ms, img_6ms, img_12ms, num_features, labels) in enumerate(datasets[phase]):
                    f.value += len(img_1ms)

                    if f.value > 2000: break

                    outputs = model(img_1ms, img_6ms, img_12ms, num_features)
                    loss = criterion(outputs, labels)

                    if phase=='train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                    _, preds = torch.max(outputs, 1)
                    running_loss += loss.item() * img_1ms.size(0)
                    running_corrects += torch.sum(preds == labels.data)
                    running_total += len(img_6ms)

                train_losses.append(running_loss / running_total)
                train_accs.append(running_corrects / running_total)

                print("Train Loss: {}".format(running_loss / running_total))
                print("Train Accuracy: {}".format(running_corrects / running_total))

            if phase == "valid":
                val_loss, val_acc = test(model, datasets[phase], criterion)
                val_losses.append(val_loss)
                val_accs.append(val_acc)

    return model, train_losses, val_losses, train_accs, val_accs

In [13]:
def create_data_loaders(batch_size):
    train_data_path = "ModelData/obs_train.csv"
    test_data_path = "ModelData/obs_test.csv"
    val_data_path = "ModelData/obs_val.csv"

    train_transform = transforms.Compose([
        # transforms.RandomResizedCrop((224, 224)),
        # transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])

    test_transform = transforms.Compose([
        # transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

    train_data = NumericalAndImageDataset(
        overview_file=train_data_path,
        transform=train_transform
    )
    train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

    test_data = NumericalAndImageDataset(
        overview_file=test_data_path,
        transform=test_transform
    )
    test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)

    val_data = NumericalAndImageDataset(
        overview_file=val_data_path,
        transform=test_transform
    )
    val_data_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=True)

    return train_data_loader, test_data_loader, val_data_loader

In [14]:
batch_size = 64
learning_rate = 0.00075
epochs = 100

train_loader, test_loader, val_loader = create_data_loaders(batch_size)
model = OneImageNet()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate) # Use adaptive momentum optimizer

print("Starting Model Training")

model, train_losses, val_losses, train_accs, val_accs = train(model, train_loader, val_loader, criterion, optimizer, epochs)

print("Train Losses:")
print(train_losses)
print("Validation Losses:")
print(val_losses)
print("Train Accuracies:")
print(train_accs)
print("Validation Accuracies:")
print(val_accs)

print("Start Model Testing")

test_loss, test_acc = test(model, test_loader, criterion)

print('saved')

Starting Model Training
Epoch: 0


IntProgress(value=0, max=18000)

Train Loss: 0.9630043929623019
Train Accuracy: 0.4964717626571655


KeyboardInterrupt: 