# Link: 
https://towardsdatascience.com/pytorch-lstms-for-time-series-data-cd16190929d7
https://colab.research.google.com/github/dlmacedo/starter-academic/blob/master/content/courses/deeplearning/notebooks/pytorch/Time_Series_Prediction_with_LSTM_Using_PyTorch.ipynb#scrollTo=a2vvHeFgVtCp

In [None]:
import torch
from torch import nn
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
import os
import time

class Dataloader(object):
    def __init__(self, data, seq_length = 16, batch_size = 16):
        self.data = pd.DataFrame(data)
        self.seq_length = seq_length
        self.batch_size = batch_size
        self.flag = True
        self.index_start = 0
        
    def get_item(self):
        out_put = []
        out_label = []
        if (self.index_start + self.seq_length + 2 * self.batch_size - 1 >= self.data.shape[0]):
            self.flag = False
#         print(self.index_start)
        for index in range(self.batch_size):
            out_put.append(self.data.iloc[range(self.index_start + index, self.seq_length + self.index_start + index)])
            out_label.append(self.data.iloc[self.seq_length + self.index_start + index,:])
        
        self.index_start = self.index_start + self.batch_size   
        return torch.Tensor(np.array(out_put[:])), torch.Tensor(np.array(out_label))
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length, batch_size):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        self.batch_size = batch_size
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out
    #####***********##########
    def train(self, data, epochs, learning_rate):
        losses = []
        optimizer = torch.optim.Adam(self.parameters(), lr = learning_rate, weight_decay = 1e-8)
        loss_function = torch.nn.MSELoss()
        for epoch in range(epochs):
            print(epoch)
            dataloader = Dataloader(data, seq_length = self.seq_length,batch_size= self.batch_size)
            while(dataloader.flag == True):
                data_window, data_label = dataloader.get_item()
                predict = self.forward(data_window)
                loss = loss_function(predict, data_label)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                losses.append(loss.detach().numpy())
        return losses
    def test(self, test):
        losses = []
        for i in range(self.seq_length):
            losses.append(0)
        loss_function = torch.nn.MSELoss()
        dataloader = Dataloader(test, seq_length = self.seq_length, batch_size = self.batch_size)
        while(dataloader.flag == True):
            data_window, data_label = dataloader.get_item()
            loss = loss_function(self.forward(data_window), data_label)
            losses.append(loss.detach().numpy())
        threshold = np.mean(losses) + 3 * np.std(losses) 
        label = []
        for i in range(len(losses)):
            if losses[i] > threshold:
                label.append(1)
            else: 
                label.append(0)
        return losses, label

dataset = pd.DataFrame()
path_1 = "/kaggle/input/data-17-02-2023-60s"
dir_list_1 = os.listdir(path_1)
for name_file in dir_list_1:
    dataset = pd.concat([dataset,pd.read_csv(path_1 + "/" + name_file)])
    
path_2 = "/kaggle/input/test-data-21-02-2023"
dir_list_2 = os.listdir(path_2)
for name_file in dir_list_2:
    dataset = pd.concat([dataset,pd.read_csv(path_2 + "/" + name_file)])
    
dataset.fillna(0, inplace=True)
dataset = dataset.drop('EVENT_TIME', axis = 1)
dataset = dataset.drop('Unnamed: 0', axis = 1)
from sklearn.model_selection import train_test_split
train, test = train_test_split(dataset, test_size=0.02,shuffle=False)
features = train.columns
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(train)
train = scaler.transform(train)
test = scaler.transform(test)
train = pd.DataFrame(train, columns = features)
test = pd.DataFrame(test, columns = features)
import time
a = time.time()
model = LSTM(num_classes = train.shape[1], input_size =train.shape[1], hidden_size = 64, num_layers = 1, seq_length = 10, batch_size = 1)
losses_train = model.train(train, epochs = 3,learning_rate = 0.0001)
print(time.time() - a)
a = time.time()
losses_test, label = model.test(test)
print(time.time() - a)
import matplotlib.pyplot as plt
plt.style.use('bmh')
plt.xlabel('Sample')
plt.ylabel('Loss')
plt.plot(losses_train)