In [2]:
import torch
from torch import nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from datetime import datetime
import pickle

class TimeSeriesDataset_forPredict(Dataset):
    def __init__(self, dataframe, seq_len=7*24):
        self.seq_len = seq_len

        self.dataframe = self._preprocess(dataframe)

    def _preprocess(self, df):
        # If there are any missing values, fill them with the previous value in time-series
        df.fillna(method='ffill', inplace=True)

        # Normalize numerical columns to range [0, 1]
        scaler = MinMaxScaler()
        numerical_cols = df.select_dtypes(include=[np.number]).columns
        df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

        # One-hot encode categorical variables
        categorical_cols = df.select_dtypes(include=['object']).columns
        if not categorical_cols.empty:
            encoder = OneHotEncoder()
            encoded = encoder.fit_transform(df[categorical_cols])
            encoded_df = pd.DataFrame(encoded.toarray(), columns=encoder.get_feature_names(categorical_cols))
            
            # Drop original categorical columns and merge with encoded ones
            df.drop(columns=categorical_cols, inplace=True)
            df = pd.concat([df, encoded_df], axis=1)
        
        return df

    def __len__(self):
        return max(0, len(self.dataframe) - self.seq_len + 1)

    def __getitem__(self, idx):
        x = self.dataframe.iloc[idx:idx+self.seq_len]
        return torch.Tensor(x.values)  # return only x values

    
# Define LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.n_layers, x.size(0), self.hidden_dim).to(x.device) # Initialize hidden state
        c0 = torch.zeros(self.n_layers, x.size(0), self.hidden_dim).to(x.device) # Initialize cell state

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out


In [3]:
predict_day = '0901'

hyperparameters_filepath = '/home/kimyirum/EMS/ict-2023-ems/load/results/20230802_204633.pkl'
model_filepath = '/home/kimyirum/EMS/ict-2023-ems/load/results/model_20230802_204633.pt'
test_data = '/home/kimyirum/EMS/ict-2023-ems/load/data/test_for_'+predict_day+'.xlsx'

df = pd.read_excel(test_data)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
hyperparameters = {}

# # Read hyperparameters from txt file
# with open(hyperparameters_filepath, 'r') as file:
#     for line in file:
#         line = line.strip()
#         if line == '' or ':' not in line:
#             continue
#         key, value = line.split(':')
#         value = value.strip()
#         if value != '':
#             hyperparameters[key.strip()] = float(value)

# Load results from a pickle file
with open(hyperparameters_filepath, 'rb') as f:
    loaded_results = pickle.load(f)
    hyperparameters = loaded_results['Hyperparameters']
    scalers = loaded_results['Scalers']

# Print hyperparameters
for key, value in hyperparameters.items():
    print(f'{key}: {value}')

# Initialize our dataset class
dataset = TimeSeriesDataset_forPredict(df)
test_loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False)

# Recreate the model architecture
model = LSTMModel(
    input_dim=63,
    hidden_dim=int(hyperparameters['hidden_dim']),
    output_dim=24*56,
    n_layers=int(hyperparameters['n_layers'])
).to(device)

# Load the saved weights
model.load_state_dict(torch.load(model_filepath))

# Switch the model to evaluation mode
model.eval()

cuda
learning_rate: 0.001
batch_size: 128
num_epochs: 200
hidden_dim: 128
n_layers: 7


LSTMModel(
  (lstm): LSTM(63, 128, num_layers=7, batch_first=True)
  (fc): Linear(in_features=128, out_features=1344, bias=True)
)

In [4]:
# Assuming df is the original dataset and it includes a 'date' column
building_names = df.columns[-56:]  # adjust this as necessary

# Prepare storage for predictions
predictions = []

# Iterate over test set
for sequence in test_loader:
    # Move sequence to correct device
    sequence = sequence.to(device)
    # Make prediction
    with torch.no_grad():
        prediction = model(sequence).cpu().numpy()

    prediction_res = prediction.squeeze(0).reshape(24, 56)
    padding = np.zeros((prediction_res.shape[0], 7))
    prediction_pad = np.hstack((padding, prediction_res))
    prediction_inv = scalers.inverse_transform(prediction_pad)
    prediction_inv = np.delete(prediction_inv, np.s_[:7], axis=1)
    prediction = prediction_inv.reshape(prediction.shape)

    # Store the prediction
    predictions.append(prediction)

# Combine all predictions
predictions = np.concatenate(predictions, axis=0)

# Create a DataFrame for predictions
# Reshape the predictions to align with the number of building_names
predictions = predictions.reshape(-1, len(building_names))
predictions_df = pd.DataFrame(predictions, columns=building_names)

predictions_df['total(KW)'] = predictions_df.sum(axis=1)

# Save to Excel file
output_filepath = '/home/kimyirum/EMS/ict-2023-ems/load/predict_for_'+predict_day+'.xlsx'  # adjust this as necessary
predictions_df.to_excel(output_filepath, index=False)
