In [1]:
import torch 
from torch import nn
import numpy as np
import math
import pandas as pd
import matplotlib


if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Cuda Device Available")
    print("Name of the Cuda Device: ", torch.cuda.get_device_name())
    print("GPU Computational Capablity: ", torch.cuda.get_device_capability())

"""
def ChangeDateShiller(date):
    date = date.split()
    firstElement = date[0]
    firstElement = firstElement.split('-')
    firstElement[0], firstElement[1] = firstElement[1], firstElement[0]
    firstElement[1], firstElement[2] = firstElement[2], firstElement[1]
    if len(firstElement[0]) == 1:
        firstElement[0] = "0" + firstElement
    if len(firstElement[1]) == 1:
        firstElement[1] = "0" + firstElement
    firstElement = "/".join(firstElement)
    return firstElement
"""

#Rough Estimate made Graphically
def MultiplyPE(pe):
    pe = pe * 25
    return pe

#Scales PE for 2012 y intercept
def ScalePERatio(df):
    df['Shiller PE Ratio'] = df['Shiller PE Ratio'].apply(MultiplyPE)
    return df

    
def BuildSandPDataSet():
    df = pd.read_csv("SandP500Data.csv", sep = ',')
    #df.drop('Volume', inplace = True, axis=1)
    df = df.sort_index(axis=0,ascending=False).reset_index()
    df.drop('index', inplace = True, axis = 1)
    return df

def BuildShillerPeDataSet():
    df = pd.read_csv("ShillerPERatio.csv", sep = ',', skiprows = [0])
    df = df.rename(columns={'DateTime': 'Date'})
    #df['Date'] = df['Date'].apply(ChangeDateShiller)
    df = df.sort_index(axis=0,ascending=False).reset_index()
    df.drop('index', inplace = True, axis = 1)
    return df

def BuildDataset():
    SP = BuildSandPDataSet()
    PE = BuildShillerPeDataSet()
    df = pd.merge(SP, PE, how="inner", on=["Date"])
    df = df[['Date', 'Open', 'High', 'Low', 'Volume', 'Shiller PE Ratio', 'Close']]
    return df

def MergeByDate(df1, df2):
    dfinal = df1.merge(df2, on="Date", how = 'inner')
    return dfinal


#Looks like the derivatives of each are correlated
#Even though maybe the magnitudes are not
def GraphPEToClose(SandPData, PEData, logy = True):
    if logy == False:
        #PEData = ScalePERatio(PEData)
        title = "Graph of S&P and Schiller PE"
    else:
         title = "Graph of S&P and Schiller PE"
        
    dfinal = MergeByDate(SandPData, PEData)
    dfinal[:].plot(x='Date', y=['Close', 'Shiller PE Ratio'], figsize=(10,5), logy = logy, title = title, grid=True)

def PEGraph():
    SP = BuildSandPDataSet()
    PE = BuildShillerPeDataSet()
    GraphPEToClose(SP, PE)
    
def BuildInputSequence(dataset, window, prediction):
    
    features = []
    labels = []

    for i in range(0, len(dataset) - window + 1 - prediction):
        feature = []
        label = []
        for j in range(0, window):
            row = []
            row.append(np.log(dataset['Open'][i + j]))
            row.append(np.log(dataset['High'][i + j]))
            row.append(np.log(dataset['Low'][i + j]))
            row.append(np.log(dataset['Volume'][i + j]))
            row.append(np.log(dataset['Shiller PE Ratio'][i + j]))
            row.append(np.log(dataset['Close'][i + j]))
            feature.append(row)

        for j in range(0, prediction):
            label.append(np.log(dataset['Close'])[i + window + j])

        features.append(feature)
        labels.append(label)
        
    return [features, labels]

def TrainTestDataset(window = 1, prediction = 1):
    df = BuildDataset()
    a = BuildInputSequence(df, window, prediction)

    X_train = a[0][:round(len(a[0])/2)]
    Y_train = a[0][(len(a[0]) - round(len(a[0])/2)) + 1:]

    X_test = a[1][:round(len(a[1])/2)]
    Y_test = a[1][(len(a[1]) - round(len(a[1])/2)) + 1:]
    
    return [X_train, Y_train, X_test, Y_test]


class RNN(nn.Module):
    
    def __init__(self, input_size, output_size, hidden_size, layers):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.layers = layers
        self.rnn = nn.RNN(input_size, hidden_size, layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size)
        out, hidden = self.rnn(x, hidden)
        out = out.contiguous().view(-1, self.hidden_size)
        out = self.fc(out)
        return out, hidden
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.layers, batch_size, self.hidden_size).to(device)
        return hidden
    
def Train(Model, X_train, Y_train, epochs, learningRate):
    
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(Model.parameters(), lr=learningRate)
    
    X_train = X_train.to(torch.float32)
    X_train = X_train.to(device)
    
    for epoch in range(1, epochs + 1):
        
        optimizer.zero_grad() 
        
        output, hidden = Model(X_train)
        
        hidden = hidden.to(torch.float32)
        
        output = output.to(torch.float32)
        output = output.to(device)
        
        Y_train = Y_train.to(torch.float32)
        Y_train = Y_train.to(device)
        
        loss = criterion(output, Y_train.view(-1).float())
        
        loss.backward() 
        optimizer.step() 

        if epoch%10 == 0:
            print('Epoch: {}/{}.............'.format(epoch, epochs), end=' ')
            print("Loss: {:.4f}".format(loss.item()))
    
    
#rnn = RNN(6, 32, 1)
D = TrainTestDataset(3)
X_train, Y_train, X_test, Y_test = D[0], D[1], D[2], D[3]

X_train = torch.Tensor(X_train)
Y_train = torch.Tensor(Y_train)

print(X_train.shape)

Model = RNN(5*3, 1, 32, 1)
Model = Model.to(device)

Train(Model, X_train, Y_train, 1000, 0.01)


Cuda Device Available
Name of the Cuda Device:  NVIDIA GeForce RTX 3060 Laptop GPU
GPU Computational Capablity:  (8, 6)
torch.Size([222, 3, 6])


RuntimeError: input.size(-1) must be equal to input_size. Expected 15, got 6