In [12]:
import torch 
from torch import nn
import numpy as np
import math
import pandas as pd
import matplotlib
from sklearn.model_selection import train_test_split


if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Cuda Device Available")
    print("Name of the Cuda Device: ", torch.cuda.get_device_name())
    print("GPU Computational Capablity: ", torch.cuda.get_device_capability())

"""
def ChangeDateShiller(date):
    date = date.split()
    firstElement = date[0]
    firstElement = firstElement.split('-')
    firstElement[0], firstElement[1] = firstElement[1], firstElement[0]
    firstElement[1], firstElement[2] = firstElement[2], firstElement[1]
    if len(firstElement[0]) == 1:
        firstElement[0] = "0" + firstElement
    if len(firstElement[1]) == 1:
        firstElement[1] = "0" + firstElement
    firstElement = "/".join(firstElement)
    return firstElement
"""

#Rough Estimate made Graphically
def MultiplyPE(pe):
    pe = pe * 25
    return pe

#Scales PE for 2012 y intercept
def ScalePERatio(df):
    df['Shiller PE Ratio'] = df['Shiller PE Ratio'].apply(MultiplyPE)
    return df

    
def BuildSandPDataSet():
    df = pd.read_csv("SandP500Data.csv", sep = ',')
    #df.drop('Volume', inplace = True, axis=1)
    df = df.sort_index(axis=0,ascending=False).reset_index()
    df.drop('index', inplace = True, axis = 1)
    return df

def BuildShillerPeDataSet():
    df = pd.read_csv("ShillerPERatio.csv", sep = ',', skiprows = [0])
    df = df.rename(columns={'DateTime': 'Date'})
    #df['Date'] = df['Date'].apply(ChangeDateShiller)
    df = df.sort_index(axis=0,ascending=False).reset_index()
    df.drop('index', inplace = True, axis = 1)
    return df

def BuildDataset():
    SP = BuildSandPDataSet()
    PE = BuildShillerPeDataSet()
    df = pd.merge(SP, PE, how="inner", on=["Date"])
    df = df[['Date', 'Open', 'High', 'Low', 'Volume', 'Shiller PE Ratio', 'Close']]
    return df

def MergeByDate(df1, df2):
    dfinal = df1.merge(df2, on="Date", how = 'inner')
    return dfinal


#Looks like the derivatives of each are correlated
#Even though maybe the magnitudes are not
def GraphPEToClose(SandPData, PEData, logy = True):
    if logy == False:
        #PEData = ScalePERatio(PEData)
        title = "Graph of S&P and Schiller PE"
    else:
         title = "Graph of S&P and Schiller PE"
       
    dfinal = MergeByDate(SandPData, PEData)
    dfinal[:].plot(x='Date', y=['Close', 'Shiller PE Ratio'], figsize=(10,5), logy = logy, title = title, grid=True)

def PEGraph():
    SP = BuildSandPDataSet()
    PE = BuildShillerPeDataSet()
    GraphPEToClose(SP, PE)
    
def BuildInputSequence(dataset, window, prediction):
    
    features = []
    labels = []

    for i in range(0, len(dataset) - window + 1 - prediction):
        feature = []
        label = []
        for j in range(0, window):
            row = []
            row.append(np.log(dataset['Open'][i + j]))
            row.append(np.log(dataset['High'][i + j]))
            row.append(np.log(dataset['Low'][i + j]))
            row.append(np.log(dataset['Volume'][i + j]))
            row.append(np.log(dataset['Shiller PE Ratio'][i + j]))
            row.append(np.log(dataset['Close'][i + j]))
            feature.append(row)

        for j in range(0, prediction):
            label.append(np.log(dataset['Close'])[i + window + j])

        features.append(feature)
        labels.append(label)
        
    return [features, labels]

def TrainTestDataset(window = 1, prediction = 1):
    df = BuildDataset()
    a = BuildInputSequence(df, window, prediction)
   
    X_train, X_test, Y_train, Y_test = train_test_split(a[0], a[1], test_size = 0.2)

    #X_train = a[0][:round(len(a[0])/2)]
    #Y_train = a[0][(len(a[0]) - round(len(a[0])/2)) + 1:]

    #X_test = a[1][:round(len(a[1])/2)]
    #Y_test = a[1][(len(a[1]) - round(len(a[1])/2)) + 1:]
    
    return [X_train, Y_train, X_test, Y_test]


class RNN(nn.Module):
    
    def __init__(self, input_size, output_size, hidden_size, layers):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.layers = layers
        self.rnn = nn.RNN(input_size, hidden_size, layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size)
        out, hidden = self.rnn(x, hidden)
        out = out.contiguous().view(-1, self.hidden_size)
        out = self.fc(out)
        return out#, hidden
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.layers, batch_size, self.hidden_size).to(device)
        return hidden
    
def Train(Model, X_train, Y_train, epochs, batchSize, learningRate):
    
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(Model.parameters(), lr=learningRate)
    
    dataset = torch.utils.data.TensorDataset(X_train, Y_train)
    dataLoader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batchSize, shuffle=True)
    
    for epoch in range(epochs):
        for i, (data, label) in enumerate(dataLoader):
            data = data.to(device)
            label = label.type(torch.float32)
            label = label.to(device)

            output = Model(data)
            #output = output.squeeze(1)
            output = output.float()
            label = label.float()
            #print(str(data), "\n")
            #print(str(output), "\n")
            #print(str(label), "\n")
            loss = criterion(output, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            #if(loss.item() < 0.01):
                #print("Done Training..")
                #return
            
        print("Epoch: " + str(epoch) + " Loss: " + str(loss.item()))
    
#rnn = RNN(6, 32, 1)
D = TrainTestDataset(1)
X_train, Y_train, X_test, Y_test = D[0], D[1], D[2], D[3]


X_train = torch.Tensor(X_train)
Y_train = torch.Tensor(Y_train)

X_train.shape


Model = RNN(6, 1, 32, 1)
Model = Model.to(device)

Train(Model, X_train, Y_train, 100, 10, 0.01)

#Getting Input size[30]
#Expecting Output size [10, 1]

#Getting Input size[12]
#Expecting Output size [4, 1]


Cuda Device Available
Name of the Cuda Device:  NVIDIA GeForce RTX 3060 Laptop GPU
GPU Computational Capablity:  (8, 6)
Epoch: 0 Loss: 0.892368495464325
Epoch: 1 Loss: 0.626775860786438
Epoch: 2 Loss: 0.4531162679195404
Epoch: 3 Loss: 0.5295746326446533
Epoch: 4 Loss: 0.9807287454605103
Epoch: 5 Loss: 0.4141603708267212
Epoch: 6 Loss: 1.0806300640106201
Epoch: 7 Loss: 0.5280286073684692
Epoch: 8 Loss: 0.8709087371826172
Epoch: 9 Loss: 0.9116748571395874
Epoch: 10 Loss: 0.861945629119873
Epoch: 11 Loss: 0.09044738113880157
Epoch: 12 Loss: 0.5059970617294312
Epoch: 13 Loss: 0.473993718624115
Epoch: 14 Loss: 0.4133793115615845
Epoch: 15 Loss: 0.6593553423881531
Epoch: 16 Loss: 1.1558674573898315
Epoch: 17 Loss: 0.976487398147583
Epoch: 18 Loss: 0.645256757736206
Epoch: 19 Loss: 0.03499095141887665
Epoch: 20 Loss: 1.203829288482666
Epoch: 21 Loss: 0.8012686967849731
Epoch: 22 Loss: 0.7101081609725952
Epoch: 23 Loss: 0.4729779362678528
Epoch: 24 Loss: 0.8352971076965332
Epoch: 25 Loss: 0.36