In [2]:
import torch
from torch import nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from datetime import datetime

class TimeSeriesDataset(Dataset):
    def __init__(self, dataframe, seq_len=7*24, pred_len=24):
        self.seq_len = seq_len
        self.pred_len = pred_len

        self.dataframe = self._preprocess(dataframe)

    def _preprocess(self, df):
        # If there are any missing values, fill them with the previous value in time-series
        df.fillna(method='ffill', inplace=True)

        # Normalize numerical columns to range [0, 1]
        scaler = MinMaxScaler()
        numerical_cols = df.select_dtypes(include=[np.number]).columns
        df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

        # One-hot encode categorical variables
        categorical_cols = df.select_dtypes(include=['object']).columns
        if not categorical_cols.empty:
            encoder = OneHotEncoder()
            encoded = encoder.fit_transform(df[categorical_cols])
            encoded_df = pd.DataFrame(encoded.toarray(), columns=encoder.get_feature_names(categorical_cols))
            
            # Drop original categorical columns and merge with encoded ones
            df.drop(columns=categorical_cols, inplace=True)
            df = pd.concat([df, encoded_df], axis=1)
        
        return df

    def __len__(self):
        return len(self.dataframe) - self.seq_len - self.pred_len + 1

    def __getitem__(self, idx):
        x = self.dataframe.iloc[idx:idx+self.seq_len]
        y = self.dataframe.iloc[idx+self.seq_len:idx+self.seq_len+self.pred_len, -56:] # Assuming last 56 columns are power values
        return torch.Tensor(x.values), torch.Tensor(y.values).reshape(-1)  # flatten y values
    
# Define LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.n_layers, x.size(0), self.hidden_dim).to(x.device) # Initialize hidden state
        c0 = torch.zeros(self.n_layers, x.size(0), self.hidden_dim).to(x.device) # Initialize cell state

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out


In [3]:
# Load the excel file
df = pd.read_excel('/home/kimyirum/EMS/ict-2023-ems/load/data/merged_data.xlsx')

# Initialize our dataset class
dataset = TimeSeriesDataset(df)

# Define the split sizes for train, validation, and test sets
train_size = int(0.7 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = len(dataset) - train_size - val_size

# Split dataset
train_set, val_set, test_set = random_split(dataset, [train_size, val_size, test_size])

In [4]:
########################################## Hyperparameters ##########################################
input_dim = len(train_set[0][0][0]) # 63
output_dim = 24*56
hidden_dim = 256
n_layers = 9
learning_rate = 0.001
num_epochs = 150
batch_size = 64
########################################################################################################

# DataLoader
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_set, batch_size=batch_size, shuffle=False)

# Initialize model, loss and optimizer
model = LSTMModel(input_dim, hidden_dim, output_dim, n_layers)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

########################################## Training loop ##########################################
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Forward pass
        outputs = model(data)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validate the model
    model.eval()
    with torch.no_grad():
        val_losses = []
        for data, targets in val_loader:
            outputs = model(data)
            loss = criterion(outputs, targets)
            val_losses.append(loss.item())
        avg_val_loss = sum(val_losses) / len(val_losses)
        print(f'Epoch [{epoch+1}/{num_epochs}], Validation Loss: {avg_val_loss:.4f}')
########################################################################################################

########################################## Evaluate the model ##########################################
# DataLoader for test set
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False)
model.eval()
with torch.no_grad():
    all_targets = []
    all_outputs = []
    for data, targets in test_loader:
        outputs = model(data)
        all_targets.append(targets.numpy())
        all_outputs.append(outputs.numpy())

# Flatten targets and outputs to calculate metrics
all_targets = np.concatenate(all_targets).flatten()
all_outputs = np.concatenate(all_outputs).flatten()

# Calculate MAE, MSE and RMSE
mae = mean_absolute_error(all_targets, all_outputs)
mse = mean_squared_error(all_targets, all_outputs)
rmse = sqrt(mse)

print(f'MAE: {mae:.4f}, MSE: {mse:.4f}, RMSE: {rmse:.4f}')
########################################################################################################


Epoch [1/150], Validation Loss: 0.0829
Epoch [2/150], Validation Loss: 0.0726
Epoch [3/150], Validation Loss: 0.0658
Epoch [4/150], Validation Loss: 0.0618
Epoch [5/150], Validation Loss: 0.0531
Epoch [6/150], Validation Loss: 0.0457
Epoch [7/150], Validation Loss: 0.0439
Epoch [8/150], Validation Loss: 0.0440
Epoch [9/150], Validation Loss: 0.0448
Epoch [10/150], Validation Loss: 0.0442
Epoch [11/150], Validation Loss: 0.0444
Epoch [12/150], Validation Loss: 0.0447
Epoch [13/150], Validation Loss: 0.0439
Epoch [14/150], Validation Loss: 0.0444
Epoch [15/150], Validation Loss: 0.0438
Epoch [16/150], Validation Loss: 0.0443
Epoch [17/150], Validation Loss: 0.0441
Epoch [18/150], Validation Loss: 0.0439
Epoch [19/150], Validation Loss: 0.0441
Epoch [20/150], Validation Loss: 0.0453
Epoch [21/150], Validation Loss: 0.0457
Epoch [22/150], Validation Loss: 0.0444
Epoch [23/150], Validation Loss: 0.0441
Epoch [24/150], Validation Loss: 0.0450
Epoch [25/150], Validation Loss: 0.0442
Epoch [26

In [48]:
# 모델 저장하고 싶으면 이 코드 실행. 저장 안하고 싶으면 그냥 패스
results_folder = "/home/kimyirum/EMS/ict-2023-ems/load/results"
hyperparams = {
    '\tlearning_rate': learning_rate,
    '\tbatch_size': batch_size,
    '\tnum_epochs': num_epochs,
    '\thidden_dim': hidden_dim,
    '\tn_layers': n_layers
}
metrics = {
    '\tMAE': mae,
    '\tMSE': mse,
    '\tRMSE': rmse,
}

now = datetime.now()
now_str = now.strftime('%Y%m%d_%H%M%S')
filename_metrics = f'{now_str}.txt'
filename_model = f'model_{now_str}.pt'

# Save the model parameters
torch.save(model.state_dict(), results_folder+filename_model)

# Open the file in write mode ('w')
with open(results_folder+filename_metrics, 'w') as f:
    # Write hyperparameters
    f.write('Hyperparameters:\n')
    for key, value in hyperparams.items():
        f.write(f'{key}: {value}\n')
    
    # Write metrics
    f.write('\nMetrics:\n')
    for key, value in metrics.items():
        f.write(f'{key}: {value}\n')

In [5]:
# Assuming df is the original dataset and it includes a 'date' column

# Get the building names
building_names = df.columns[-56:]  # adjust this as necessary

# DataLoader for test set
test_loader = DataLoader(dataset=test_set, batch_size=1, shuffle=False)

# Get the first sequence and its target from the test set
real_sequence, real_target = next(iter(test_loader))

# Switch model to eval mode
model.eval()

# Make prediction
with torch.no_grad():
    prediction = model(real_sequence)

# Remove batch dimension, reshape and convert prediction to numpy array
prediction = prediction.squeeze(0).reshape(24, 56).numpy()

# Reshape real_target and convert it to numpy array
real_target = real_target.view(24, 56).numpy()

# Calculate error (difference between real target and prediction)
error = real_target - prediction

# Create DataFrame for prediction
predicted_df = pd.DataFrame(prediction, columns=building_names)
real_target_df = pd.DataFrame(real_target, columns=building_names)
error_df = pd.DataFrame(error, columns=building_names)

# print("Predicted Values for next 24 hours:")
# print(predicted_df)

# print("Real Values for next 24 hours:")
# print(real_target_df)

print("Error for next 24 hours:")
print(error_df)


Error for next 24 hours:
      0_SV-2    1_SV-5    2_SV-6    3_SV-7  4_HV-NM1  5_HV-NM2   6_고압콘덴샤  \
0  -0.249648 -0.248628 -0.001138 -0.256933 -0.001955 -0.290911 -0.026695   
1  -0.249591 -0.250597  0.001241 -0.253004  0.000565 -0.284044 -0.028964   
2  -0.246761 -0.246217 -0.000619 -0.257665 -0.000331 -0.277790 -0.027177   
3  -0.242882 -0.241013 -0.000384 -0.249653 -0.000302 -0.271895 -0.028800   
4  -0.240177 -0.240528  0.000733 -0.244750 -0.000471 -0.270508 -0.029942   
5  -0.241743 -0.240013  0.000709 -0.241505 -0.001707 -0.262848 -0.030394   
6  -0.237556 -0.241275 -0.000451 -0.245925  0.001141 -0.259856 -0.032543   
7  -0.236888 -0.237028 -0.000368 -0.244763  0.000112 -0.262263 -0.031567   
8  -0.234855 -0.235198 -0.000201 -0.239928 -0.000184 -0.252539 -0.029305   
9  -0.235916 -0.234361 -0.000439 -0.237787 -0.001414 -0.245139 -0.032982   
10 -0.229464 -0.227502 -0.000296 -0.234399  0.000786 -0.247575 -0.032889   
11 -0.228833 -0.224779 -0.001018 -0.237540  0.001602 -0.242909 