https://github.com/walsvid/CoordConv/blob/master/coordconv.py
https://medium.com/analytics-vidhya/encoding-time-series-as-images-b043becbdbf3
https://eng.uber.com/coordconv/
https://towardsdatascience.com/reading-charts-with-convolutional-neural-networks-cbaabdd5f478

# Intro

In this notebook we build a CNN classifier for the problem. The input data for this classifier consists of 1 image of the Gramian Angular Difference field for the past month, 1 image of the area plot of smoothed log prices for the past month, and a collection of numerical data on past month returns & volatility.

In [1]:
import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import Dataset

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms

from coordconv import CoordConv2d

import copy
import argparse
import os
import logging
import sys
from tqdm import tqdm
from PIL import ImageFile
from PIL import Image
ImageFile.LOAD_TRUNCATED_IMAGES = True

from ipywidgets import IntProgress
from ipywidgets import FloatProgress
from IPython.display import display


In [2]:
# Create a custom data loader for the train, test, & validation data
class NumericalAndImageDataset(Dataset):
    def __init__(self, overview_file: str, transform=None):
        """
        Initialize this dataloader
        :param overview_file: location of the overview file
        :param transform: transformer for the images
        """
        self.overview= pd.read_csv(overview_file)
        self.transform = transform


    def __len__(self):
        return len(self.overview.index)


    def __getitem__(self, idx):
        img_path_1m = self.overview["1_month_img"].iloc[idx]
        img_path_1m_bar = self.overview["1_month_img_bar"].iloc[idx]

        img_1m = Image.open(img_path_1m).convert('RGB')
        img_1m_bar = Image.open(img_path_1m_bar).convert('RGB')

        num_features = torch.tensor(self.overview[[
            "1_month_return", "6_month_return", "12_month_return",
            "1_month_volatility", "6_month_volatility", "12_month_volatility",
            "stock"
        ]].iloc[idx].values).type(torch.FloatTensor)

        label = self.overview.label_1m.iloc[idx]

        if self.transform:
            img_1m = self.transform(img_1m)
            img_1m_bar = self.transform(img_1m_bar)

        return img_1m, img_1m_bar, num_features, label

In [3]:
class OneImageNet(nn.Module):
    def __init__(self):
        super(OneImageNet, self).__init__()

        self.image_1_features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Dropout()
        )

        self.image_1_flat = nn.Sequential(
            nn.Dropout(p=0.25),
            nn.Linear(21632 // 2, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(128, 3),
            nn.ReLU(inplace=True)
        )

        self.image_2_features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Dropout()
        )

        self.image_2_flat = nn.Sequential(
            nn.Dropout(p=0.25),
            nn.Linear(21632 // 2, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(128, 3),
            nn.ReLU(inplace=True)
        )

        self.num_features = nn.Sequential(
            nn.Linear(7, 6),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(6, 3),
            nn.ReLU(inplace=True)
        )

        self.fc_fin = nn.Sequential(
            nn.Dropout(p=0.25),
            nn.Linear(9, 32),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(32, 3)
        )

    def forward(self, img_1, img_2, num_features):
        step1 = self.image_1_features(img_1)
        step1 = step1.view(step1.size(0), -1)
        step1 = self.image_1_flat(step1)

        step2 = self.image_2_features(img_2)
        step2 = step2.view(step2.size(0), -1)
        step2 = self.image_2_flat(step2)

        nums = self.num_features(num_features)

        res = self.fc_fin(torch.cat((step1, step2, nums), 1))

        return res

In [4]:
def test(model, test_loader, criterion):
    model.eval()
    running_loss = 0
    running_corrects = 0
    running_total = 0

    for img_1ms, img_1m_bars, num_features, labels in test_loader:
        outputs = model(img_1ms, img_1m_bars, num_features)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)

        running_loss += loss.item() * img_1ms.size(0)
        running_corrects += torch.sum(preds == labels.data)

        running_total += len(img_1ms)

    total_loss = running_loss / running_total
    total_acc = running_corrects.double() / running_total

    print("Test Loss: {}".format(total_loss))
    print("Test Accuracy: {}".format(total_acc))

    return total_loss, total_acc

In [5]:
def train(model, train_loader, validation_loader, criterion, optimizer, epochs):
    datasets = {'train':train_loader, 'valid':validation_loader}
    #log = Report(epochs)

    train_losses = []
    train_accs = []
    val_losses = []
    val_accs = []

    for epoch in range(epochs):
        print("Epoch: " + str(epoch))

        f = IntProgress(min=0, max=17500 * 0.7) # instantiate the bar
        display(f) # display the bar

        f1 = FloatProgress(min=0, max=1)
        display(f1)

        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
                running_loss = 0.0
                running_corrects = 0
                running_total = 0

                for pos, (img_1ms, img_1m_bars, num_features, labels) in enumerate(datasets[phase]):
                    f.value += len(img_1ms)

                    outputs = model(img_1ms, img_1m_bars, num_features)
                    loss = criterion(outputs, labels)

                    if phase=='train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                    _, preds = torch.max(outputs, 1)
                    running_loss += loss.item() * img_1ms.size(0)
                    running_corrects += torch.sum(preds == labels.data)
                    running_total += len(img_1ms)

                    f1.value = running_corrects / running_total

                train_losses.append(running_loss / running_total)
                train_accs.append(running_corrects / running_total)

                print("Train Loss: {}".format(running_loss / running_total))
                print("Train Accuracy: {}".format(running_corrects / running_total))

            if phase == "valid":
                val_loss, val_acc = test(model, datasets[phase], criterion)
                val_losses.append(val_loss)
                val_accs.append(val_acc)

    return model, train_losses, val_losses, train_accs, val_accs

In [6]:
def create_data_loaders(batch_size):
    train_data_path = "ModelData/obs_train.csv"
    test_data_path = "ModelData/obs_test.csv"
    val_data_path = "ModelData/obs_val.csv"

    train_transform = transforms.Compose([
        # transforms.RandomResizedCrop((224, 224)),
        # transforms.RandomHorizontalFlip(),
        transforms.Resize((30, 30)),
        transforms.ToTensor(),
    ])

    test_transform = transforms.Compose([
        transforms.Resize((30, 30)),
        transforms.ToTensor(),
    ])

    train_data = NumericalAndImageDataset(
        overview_file=train_data_path,
        transform=train_transform
    )
    train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

    test_data = NumericalAndImageDataset(
        overview_file=test_data_path,
        transform=test_transform
    )
    test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)

    val_data = NumericalAndImageDataset(
        overview_file=val_data_path,
        transform=test_transform
    )
    val_data_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=True)

    return train_data_loader, test_data_loader, val_data_loader

In [7]:
batch_size = 128
learning_rate = 0.001
epochs = 20

train_loader, test_loader, val_loader = create_data_loaders(batch_size)
model = OneImageNet()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate) # Use adaptive momentum optimizer

print("Starting Model Training")

model, train_losses, val_losses, train_accs, val_accs = train(model, train_loader, val_loader, criterion, optimizer, epochs)

print("Train Losses:")
print(train_losses)
print("Validation Losses:")
print(val_losses)
print("Train Accuracies:")
print(train_accs)
print("Validation Accuracies:")
print(val_accs)

print("Start Model Testing")

test_loss, test_acc = test(model, test_loader, criterion)

print('saved')

Starting Model Training
Epoch: 0


IntProgress(value=0, max=12250)

FloatProgress(value=0.0, max=1.0)

Train Loss: 1.1026315643372437
Train Accuracy: 0.3284476399421692
Test Loss: 1.1003370681030615
Test Accuracy: 0.28657487091222034
Epoch: 1


IntProgress(value=0, max=12250)

FloatProgress(value=0.0, max=1.0)

Train Loss: 1.1006622219507673
Train Accuracy: 0.33554571866989136
Test Loss: 1.0963232059692147
Test Accuracy: 0.28657487091222034
Epoch: 2


IntProgress(value=0, max=12250)

FloatProgress(value=0.0, max=1.0)

Train Loss: 1.099641828762043
Train Accuracy: 0.3356378972530365
Test Loss: 1.0972474011209459
Test Accuracy: 0.40834767641996556
Epoch: 3


IntProgress(value=0, max=12250)

FloatProgress(value=0.0, max=1.0)

Train Loss: 1.1004599853251185
Train Accuracy: 0.33637535572052
Test Loss: 1.0979969704623067
Test Accuracy: 0.28657487091222034
Epoch: 4


IntProgress(value=0, max=12250)

FloatProgress(value=0.0, max=1.0)

Train Loss: 1.099764816528928
Train Accuracy: 0.3326880633831024
Test Loss: 1.0986228584217328
Test Accuracy: 0.286144578313253
Epoch: 5


IntProgress(value=0, max=12250)

FloatProgress(value=0.0, max=1.0)

Train Loss: 1.0991120897563158
Train Accuracy: 0.33720502257347107
Test Loss: 1.0994485729532686
Test Accuracy: 0.28313253012048195
Epoch: 6


IntProgress(value=0, max=12250)

FloatProgress(value=0.0, max=1.0)

Train Loss: 1.0998328006373042
Train Accuracy: 0.32955384254455566
Test Loss: 1.0929936470140074
Test Accuracy: 0.28657487091222034
Epoch: 7


IntProgress(value=0, max=12250)

FloatProgress(value=0.0, max=1.0)

KeyboardInterrupt: 