In [1]:
import torch
from torch import nn
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from torch.autograd import Variable
import os
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
class Dataloader(object):
    def __init__(self, data, seq_length=16, batch_size = 1):
        self.data = pd.DataFrame(data)
        self.seq_length = seq_length
        self.batch_size = batch_size
        self.flag = True
        self.index_start = 0
        
    def get_item(self):
        #print(self.data)
        out_put = []
        out_label = []
        if (self.index_start + self.seq_length + 1 == self.data.shape[0]):
            self.flag = False
        for index in range(self.batch_size):
            out_put.append(self.data.iloc[range(self.index_start + index, self.seq_length + self.index_start + index)])
            out_label.append(self.data.iloc[self.seq_length + self.index_start + index,:])
        
        self.index_start = self.index_start + 1   
        return torch.Tensor(np.array(out_put[:])), torch.Tensor(np.array(out_label))   
#### AutoEncoder #####
class AE(torch.nn.Module):
    def __init__(self, input_size):
        super().__init__()
        # Building an linear encoder with Linear
        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(input_size, input_size//2),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(input_size//2, input_size//4),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(input_size//4, input_size//8)
        )
         
        # Building an linear decoder with Linear
        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(input_size//8, input_size//4),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(input_size//4, input_size//2),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(input_size//2, input_size),
            torch.nn.LeakyReLU(0.1))
#### LSTM ####
class LSTM(nn.Module):

    def __init__(self, num_feature, seq_length, hidden_size=64, num_layers=15):
        super(LSTM, self).__init__()
        
        self.num_layers = num_layers
        self.num_feature = int(num_feature)
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size= self.num_feature, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, self.num_feature)

    def forward(self, x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        #h_0 = Variable(torch.zeros(x.size(0), self.hidden_size))
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        #c_0 = Variable(torch.zeros(x.size(0), self.hidden_size))
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out
#### AE-LSTM ####
class AutoEncoder_LSTM(nn.Module):
    def __init__(self, input_size, seq_length):
        super().__init__()
        self.input_size = input_size
        self.seq_length = seq_length
        self.autoencoder = AE(input_size)
        self.lstm = LSTM(input_size/8, seq_length = seq_length)
 
    def forward(self, x):
        data_encoder = self.autoencoder.encoder(x)
        data_lstm = self.lstm(data_encoder)
        output = self.autoencoder.decoder(data_lstm)
        return output
    ##### train method #####
    def train(self, data, epochs, learning_rate, batch_size):
        losses = []
        optimizer = torch.optim.Adam(self.parameters(), lr = learning_rate, weight_decay = 1e-8)
        loss_function = torch.nn.MSELoss()
        for epoch in range(epochs):
            dataloader = Dataloader(data, seq_length = self.seq_length)
            while(dataloader.flag == True):
                data_window, data_label = dataloader.get_item()
                predict = self.forward(data_window)
                loss = loss_function(predict, data_label)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                losses.append(loss.detach().numpy())
        return losses
    ##### test method #####
    def test(self, test):
        losses = []
        for i in range(self.seq_length):
            losses.append(0)
        loss_function = torch.nn.MSELoss()
        dataloader = Dataloader(test, seq_length = self.seq_length)
        while(dataloader.flag == True):
            data_window, data_label = dataloader.get_item()
            loss = loss_function(self.forward(data_window), data_label)
            losses.append(loss.detach().numpy())
        threshold = np.mean(losses) + 3 * np.std(losses) 
        label = []
        for i in range(len(losses)):
#             if self.check_tensor(test_tensor[i]):
#                 label.append(2)
#             else:
#                 if losses[i] > threshold:
#                     label.append(1)
#                 else: 
#                     label.append(0)
            if losses[i] > threshold:
                label.append(1)
            else: 
                label.append(0)
        return losses, label
def main():
    dataset = pd.DataFrame()
    path_1 = "/kaggle/input/data-17-02-2023-60s"
    dir_list_1 = os.listdir(path_1)
    for name_file in dir_list_1:
        dataset = pd.concat([dataset,pd.read_csv(path_1 + "/" + name_file)])
    path_2 = "/kaggle/input/test-data-21-02-2023"
    dir_list_2 = os.listdir(path_2)
    for name_file in dir_list_2:
        dataset = pd.concat([dataset,pd.read_csv(path_2 + "/" + name_file)])
    dataset.fillna(0, inplace=True)
    dataset = dataset.drop(['EVENT_TIME', 'Unnamed: 0'], axis = 1)
    start = time.time()
    
    train, test = train_test_split(dataset, test_size=0.02,shuffle=False)
    features = train.columns
    scaler = MinMaxScaler()
    scaler.fit(train)
    train = scaler.transform(train)
    test = scaler.transform(test)
    train = pd.DataFrame(train, columns = features)
    test = pd.DataFrame(test, columns = features)
    
    start = time.time()
    AE_LSTM = AutoEncoder_LSTM(seq_length = 10,input_size = train.shape[1])
    losses_train = AE_LSTM.train(train, epochs = 3,learning_rate = 0.0001, batch_size = 1)
    plt.style.use('bmh')
    plt.xlabel('Sample')
    plt.ylabel('Loss training')
    plt.plot(losses_train)
    print( "Training time: ", time.time() - start, "s")
    
    start = time.time()
    losses_test, label = AE_LSTM.test(test)
    print("Testing time: ", time.time() - start, "s")
    plt.style.use('bmh')
    plt.xlabel('Sample')
    plt.ylabel('Loss testing')
    plt.plot(losses_test)
    
if __name__ == "__main__":
    main()


SyntaxError: invalid syntax (1356228009.py, line 179)