In [94]:
import torch 
from torch import nn
import numpy as np
import math
import pandas as pd
import matplotlib
from sklearn.model_selection import train_test_split


if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Cuda Device Available")
    print("Name of the Cuda Device: ", torch.cuda.get_device_name())
    print("GPU Computational Capablity: ", torch.cuda.get_device_capability())

else:
    device = torch.device("cpu")

"""
def ChangeDateShiller(date):
    date = date.split()
    firstElement = date[0]
    firstElement = firstElement.split('-')
    firstElement[0], firstElement[1] = firstElement[1], firstElement[0]
    firstElement[1], firstElement[2] = firstElement[2], firstElement[1]
    if len(firstElement[0]) == 1:
        firstElement[0] = "0" + firstElement
    if len(firstElement[1]) == 1:
        firstElement[1] = "0" + firstElement
    firstElement = "/".join(firstElement)
    return firstElement
"""

#Rough Estimate made Graphically
def MultiplyPE(pe):
    pe = pe * 25
    return pe

#Scales PE for 2012 y intercept
def ScalePERatio(df):
    df['Shiller PE Ratio'] = df['Shiller PE Ratio'].apply(MultiplyPE)
    return df

    
def BuildSandPDataSet():
    df = pd.read_csv("SandP500Data.csv", sep = ',')
    #df.drop('Volume', inplace = True, axis=1)
    df = df.sort_index(axis=0,ascending=False).reset_index()
    df.drop('index', inplace = True, axis = 1)
    return df

def BuildShillerPeDataSet():
    df = pd.read_csv("ShillerPERatio.csv", sep = ',', skiprows = [0])
    df = df.rename(columns={'DateTime': 'Date'})
    #df['Date'] = df['Date'].apply(ChangeDateShiller)
    df = df.sort_index(axis=0,ascending=False).reset_index()
    df.drop('index', inplace = True, axis = 1)
    return df

def BuildDataset():
    SP = BuildSandPDataSet()
    PE = BuildShillerPeDataSet()
    df = pd.merge(SP, PE, how="inner", on=["Date"])
    df = df[['Date', 'Open', 'High', 'Low', 'Volume', 'Shiller PE Ratio', 'Close']]
    return df

def MergeByDate(df1, df2):
    dfinal = df1.merge(df2, on="Date", how = 'inner')
    return dfinal


#Looks like the derivatives of each are correlated
#Even though maybe the magnitudes are not
def GraphPEToClose(SandPData, PEData, logy = True):
    if logy == False:
        #PEData = ScalePERatio(PEData)
        title = "Graph of S&P and Schiller PE"
    else:
         title = "Graph of S&P and Schiller PE"
       
    dfinal = MergeByDate(SandPData, PEData)
    dfinal[:].plot(x='Date', y=['Close', 'Shiller PE Ratio'], figsize=(10,5), logy = logy, title = title, grid=True)

def PEGraph():
    SP = BuildSandPDataSet()
    PE = BuildShillerPeDataSet()
    GraphPEToClose(SP, PE)
    
def BuildInputSequence(dataset, window, prediction):
    
    features = []
    labels = []

    for i in range(0, len(dataset) - window + 1 - prediction):
        feature = []
        label = []
        for j in range(0, window):
            row = []
            row.append(np.log(dataset['Open'][i + j]))
            row.append(np.log(dataset['High'][i + j]))
            row.append(np.log(dataset['Low'][i + j]))
            row.append(np.log(dataset['Volume'][i + j]))
            row.append(np.log(dataset['Shiller PE Ratio'][i + j]))
            row.append(np.log(dataset['Close'][i + j]))
            feature.append(row)

        for j in range(0, prediction):
            label.append(np.log(dataset['Open'][i + window + j]))
            label.append(np.log(dataset['High'][i + window + j]))
            label.append(np.log(dataset['Low'][i + window + j]))
            label.append(np.log(dataset['Volume'][i + window + j]))
            label.append(np.log(dataset['Shiller PE Ratio'][i + window + j]))
            label.append(np.log(dataset['Close'])[i + window + j])

        features.append(feature)
        labels.append(label)
        
    return [features, labels]

def TrainTestDataset(window = 1, prediction = 1):
    df = BuildDataset()
    a = BuildInputSequence(df, window, prediction)
   
    X_train, X_test, Y_train, Y_test = train_test_split(a[0], a[1], test_size = 0.2)

    #X_train = a[0][:round(len(a[0])/2)]
    #Y_train = a[0][(len(a[0]) - round(len(a[0])/2)) + 1:]

    #X_test = a[1][:round(len(a[1])/2)]
    #Y_test = a[1][(len(a[1]) - round(len(a[1])/2)) + 1:]
    
    return [X_train, Y_train, X_test, Y_test]


class RNN(nn.Module):
    
    def __init__(self, input_size, output_size, hidden_size, layers):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.layers = layers
        self.rnn = nn.RNN(input_size, hidden_size, layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size)
        out, hidden = self.rnn(x, hidden)
        out = out.contiguous().view(-1, self.hidden_size)
        out = self.fc(out)
        return out, hidden
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.layers, batch_size, self.hidden_size).to(device)
        return hidden
    
def Train(Model, X_train, Y_train, epochs, batchSize, learningRate):
    
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(Model.parameters(), lr=learningRate)
    
    dataset = torch.utils.data.TensorDataset(X_train, Y_train)
    dataLoader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batchSize, shuffle=True)
    
    for epoch in range(epochs):
        for i, (data, label) in enumerate(dataLoader):
            data = data.to(device)
            label = label.type(torch.float32)
            label = label.to(device)

            output, hidden = Model(data)
            #output = output.squeeze(1)
            output = output.float()
            label = label.float()
            #print(str(data), "\n")
            #print(str(output), "\n")
            #print(str(label), "\n")
            loss = criterion(output, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            #if(loss.item() < 0.01):
                #print("Done Training..")
                #return
        if epoch % 10 == 0:
            print("Epoch: " + str(epoch) + " Loss: " + str(loss.item()))
    
#rnn = RNN(6, 32, 1)
D = TrainTestDataset(1)
X_train, Y_train, X_test, Y_test = D[0], D[1], D[2], D[3]


X_train = torch.Tensor(X_train)
Y_train = torch.Tensor(Y_train)

X_test = torch.Tensor(X_test)
Y_test = torch.Tensor(Y_test)

X_train.shape


Model = RNN(6, 6, 32, 1)
Model = Model.to(device)

Train(Model, X_train, Y_train, 200, 10, 0.01)



#Getting Input size[30]
#Expecting Output size [10, 1]

#Getting Input size[12]
#Expecting Output size [4, 1]


Epoch: 0 Loss: 26.050697326660156
Epoch: 10 Loss: 0.9161461591720581
Epoch: 20 Loss: 0.2668100595474243
Epoch: 30 Loss: 0.07120290398597717
Epoch: 40 Loss: 0.1366715133190155
Epoch: 50 Loss: 0.11425287276506424
Epoch: 60 Loss: 0.04849095270037651
Epoch: 70 Loss: 0.04103562608361244
Epoch: 80 Loss: 0.026546698063611984
Epoch: 90 Loss: 0.009111122228205204
Epoch: 100 Loss: 0.053307682275772095
Epoch: 110 Loss: 0.024628030136227608
Epoch: 120 Loss: 0.024579429998993874
Epoch: 130 Loss: 0.03734736144542694
Epoch: 140 Loss: 0.026211053133010864
Epoch: 150 Loss: 0.023989230394363403
Epoch: 160 Loss: 0.031053433194756508
Epoch: 170 Loss: 0.026591042056679726
Epoch: 180 Loss: 0.024981698021292686
Epoch: 190 Loss: 0.020448049530386925


In [95]:
def PredictAhead(M, XInit, FarAhead):
    output = XInit
    for _ in range(FarAhead):
        output, hidden = M(output)
        output = torch.Tensor([output.cpu().detach().numpy()]).to(device)
        #print(output)
    output = output.cpu().detach().numpy()
    return math.e**output[-1][-1][-1]

#print(Model(X_train[0:1].to(device))[0])
print(X_train[0:10])
#print(Model(X_train[0:10].to(device))[0])
#print(type(X_train[0:1].to(device)))
PredictAhead(Model, X_train[0:5].to(device), 7)


tensor([[[ 7.9998,  8.0109,  7.9452, 25.1003,  3.3572,  7.9815]],

        [[ 6.2793,  6.3119,  6.2653, 22.7456,  3.1232,  6.3003]],

        [[ 6.0770,  6.0928,  6.0565, 22.3874,  3.0116,  6.0840]],

        [[ 6.5082,  6.5160,  6.4067, 22.9037,  3.2133,  6.4614]],

        [[ 5.5331,  5.5378,  5.4297, 21.8758,  2.6005,  5.4438]],

        [[ 6.7035,  6.8106,  6.6446, 24.3571,  3.0892,  6.7864]],

        [[ 7.5819,  7.5966,  7.5656, 24.9208,  3.2511,  7.5656]],

        [[ 7.0452,  7.0456,  6.9693, 24.0756,  3.3676,  6.9819]],

        [[ 6.2160,  6.2447,  6.2150, 22.5649,  3.0745,  6.2436]],

        [[ 7.3974,  7.4111,  7.3527, 25.0400,  3.1324,  7.3817]]])


2799.842165311102

In [102]:
def Test(M):
    
    data = BuildDataset().to_dict('split')['data']
    
    dates = []
    for i in range(len(data)):
        dates.append(data[i][0])
        
    
    #for one day prediction on train data
    trainOne = []
    
    for i in range(len(X_train) - 7):
        trainOne.append(PredictAhead(Model, X_train[i:i + 5].to(device), 1))
        
    #for one day prediction on test data
    testOne = []
    
    for i in range(len(X_test) - 7):
        testOne.append(PredictAhead(Model, X_test[i:i + 5].to(device), 1))
    
    #for two day prediction on train data
    trainTwo = []
    
    for i in range(len(X_train) - 8):
        trainTwo.append(PredictAhead(Model, X_train[i:i + 5].to(device), 2))
    
    #for two day prediction on test data
    testTwo = []
    
    for i in range(len(X_test) - 8):
        testTwo.append(PredictAhead(Model, X_test[i:i + 5].to(device), 2))
    
    #for three day prediction on train data
    trainThree = []
    
    for i in range(len(X_train) - 9):
        trainThree.append(PredictAhead(Model, X_train[i:i + 5].to(device), 3))
    
    #for three day prediction on test data
    testThree = []
    
    for i in range(len(X_test) - 9):
        testThree.append(PredictAhead(Model, X_test[i:i + 5].to(device), 3))
    
    #for four day prediction on train data
    trainFour = []
    
    for i in range(len(X_train) - 10):
        trainFour.append(PredictAhead(Model, X_train[i:i + 5].to(device), 4))
    
    #for four day prediction on test data
    testFour = []
    
    for i in range(len(X_test) - 10):
        testFour.append(PredictAhead(Model, X_test[i:i + 5].to(device), 4))
        

In [103]:
Test(Model)