In [None]:
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
from torch.autograd import Variable
import math
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
mm = MinMaxScaler()

In [None]:
class PriceDataset(Dataset):

    def __init__(self, data, window_size, scaler, days_after):
        self.data = scaler.transform(data)
        self.window_size = window_size
        self.days_after = days_after

        self.x, self.y = self.getSeriesData(self.data, window_size, days_after)


    def __len__(self):
        return len(self.data) - self.window_size - self.days_after


    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]


    # Learning Period, Prediction Period
    def getSeriesData(self, data, window_size, days_after):
        n_pairs = len(data) - window_size - days_after
        xlist = [data[i:i + window_size] for i in range(n_pairs)]
        ylist = [data[i + window_size + days_after] for i in range(n_pairs)]

        x = torch.Tensor(xlist)
        y = torch.Tensor(ylist)

        return x, y

In [None]:
df = pd.read_csv('data_final.csv')
df = df.drop(['time'], axis = 1)

df.head(2)

In [None]:
train, test = train_test_split(df, test_size = 0.4, shuffle = False)
valid, test = train_test_split(test, test_size = 0.5, shuffle = False)

window_size = 30 # Set how many days to learn
days_after = 3 # Set the number of days for price forecasting
batch_size = 64

days_after = (days_after - 1)

mm.fit(train)    
train_dataset = PriceDataset(train, window_size, mm, days_after) 
valid_dataset = PriceDataset(valid, window_size, mm, days_after) 
test_dataset  = PriceDataset(test, window_size, mm, days_after) 

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle = False, drop_last = True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle = False, drop_last = True)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle = False, drop_last = True)

In [None]:
class LSTM1(nn.Module): 
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length, dropout): 
        super(LSTM1, self).__init__() 
        self.num_classes = num_classes 
        self.num_layers = num_layers 
        self.input_size = input_size 
        self.hidden_size = hidden_size 
        self.seq_length = seq_length 

        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, batch_first = True) # LSTM
        self.fc_1 = nn.Linear(hidden_size, 128) # Fully connected 1 
        self.fc = nn.Linear(128, num_classes) # Fully connected last layer 
        self.relu = nn.ReLU() 
       
        self.dropout = nn.Dropout(dropout)
        
    
    def forward(self,x): 

        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).to(device) # Hidden State 
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).to(device) # Internal State 
        # Propagate input through LSTM 

        output, (hn, cn) = self.lstm(x, (h_0, c_0)) # LSTM with input, hidden, and internal state 

        hn = hn.view(-1, self.hidden_size) # Reshaping the data for Dense layer next 
        out = self.relu(hn) 
        out = self.fc_1(out) # First Dense 
        out = self.relu(out) # Relu 
        out = self.fc(out) # Final Output 
        return out  

In [None]:
# Adjust Parameters
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

epochs = 300
learning_rate = 0.001 
dropout = 0.01

input_size = 68 # Number of Features
hidden_size = 100
num_layers = 1 

clip = 5 # Gradient Clipping
counter = 0
print_every = 500

num_classes = 1 # Number of Output Classes 
model = LSTM1(num_classes, input_size, hidden_size, num_layers, window_size, dropout)

model.to(device)

loss_function = torch.nn.MSELoss() # Mean-squared Error for Regression
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Adam Optimizer

cuda:0


In [None]:
model.train()
for e in range(epochs):
    running_loss = 0
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device)
        y = y[:, 0].unsqueeze(1)
        model.zero_grad()

        output = model(x)
        
        loss = loss_function(output.squeeze(), y.float())
        loss.backward()
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        
        # Loss Stats
        if counter % print_every == 0:
          
            # Get validation loss
            val_losses = []
            model.eval()

            for inputs, labels in valid_loader:
                labels = labels[:, 0].unsqueeze(1)

                inputs, labels = inputs.to(device), labels.to(device)

                output = model(inputs)
                val_loss = loss_function(output.squeeze(), labels.float())

                val_losses.append(val_loss.item())

            model.train()

            print("Epoch: {}/{}...".format(e+1, epochs),
                  "Loss: {:.6f}...".format(loss.item()),
                  "Val Loss: {:.6f}".format(np.mean(val_losses)))

In [None]:
test_RMSE = []
outputs = []
actual = []

model.eval()

sum = 0
number = 0

for inputs, labels in test_loader: 
  labels = labels[:, 0].unsqueeze(1)
  inputs, labels = inputs.to(device), labels.to(device)
  output = model(inputs)

  outputs.extend(output[:, 0].view(-1).detach().cpu().numpy())
  actual.extend(labels.view(-1).detach().cpu().numpy())

  test_loss = loss_function(output.squeeze(), labels.float())
  test_loss = torch.sqrt(test_loss)
  test_RMSE.append(test_loss.item())

  sum += np.mean(test_RMSE)
  number += 1

  print(np.mean(test_RMSE))

average = sum / number
print("\n average : {} \n".format(average)) 

In [None]:
import matplotlib.pyplot as plt

fix, ax = plt.subplots()
ax.plot(actual, label = 'actul')
ax.plot(outputs, label = 'prediction')

plt.legend()
plt.show()