### Prediciton Application
- input: 원하는 예측 날짜의 7일 전부터 전날까지 측정한 데이터. 
         merged_data 엑셀 파일과 같은 형태이어야 함(월|일|요일|공휴일 유무|온도|습도|건물이름 유효전력량*56개 건물).
- output: 원하는 예측 날짜의 1시간 단위로 예측한 결과(ex: test_for_0901.xlsx)

### 주의할 점
- input 데이터 형태를 꼭 맞춰줘야함. 해당 날짜의 

In [1]:
import torch
from torch import nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from datetime import datetime
import pickle

# 시계열 예측을 위한 데이터셋 클래스 정의
class TimeSeriesDataset_forPredict(Dataset):
    def __init__(self, dataframe, seq_len=7*24):
        self.seq_len = seq_len  # 시퀀스 길이 지정 (기본값은 7일 * 24시간 = 168시간)

        # 데이터 전처리 실행
        self.dataframe = self._preprocess(dataframe)

    def _preprocess(self, df):
        # 결측치가 있다면, 바로 앞의 값으로 채우기
        df.fillna(method='ffill', inplace=True)

        # 수치형 컬럼 정규화하여 [0, 1] 범위로 변환
        scaler = MinMaxScaler()
        numerical_cols = df.select_dtypes(include=[np.number]).columns
        df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

        # 범주형 변수를 원-핫 인코딩하기
        categorical_cols = df.select_dtypes(include=['object']).columns
        if not categorical_cols.empty:
            encoder = OneHotEncoder()
            encoded = encoder.fit_transform(df[categorical_cols])
            encoded_df = pd.DataFrame(encoded.toarray(), columns=encoder.get_feature_names(categorical_cols))
            
            # 원래의 범주형 컬럼 제거하고 인코딩된 컬럼을 추가
            df.drop(columns=categorical_cols, inplace=True)
            df = pd.concat([df, encoded_df], axis=1)
        
        return df

    def __len__(self):
        # 데이터셋의 총 길이 반환
        return max(0, len(self.dataframe) - self.seq_len + 1)

    def __getitem__(self, idx):
        # idx 위치에서 시작하는 시퀀스 반환
        x = self.dataframe.iloc[idx:idx+self.seq_len,:7]
        return torch.Tensor(x.values)  # x값만 반환

    
# LSTM 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        # LSTM 및 Fully Connected Layer 정의
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # 초기 hidden state 및 cell state 설정
        h0 = torch.zeros(self.n_layers, x.size(0), self.hidden_dim).to(x.device) 
        c0 = torch.zeros(self.n_layers, x.size(0), self.hidden_dim).to(x.device) 

        # LSTM 계층을 통과한 후의 출력값 계산
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # 마지막 시퀀스만을 이용하여 최종 예측값 계산
        return out


In [3]:
predict_day = '0901'

hyperparameters_filepath = '/home/kimyirum/EMS/ict-2023-ems/load/results/20230807_180431.pkl'
model_filepath = '/home/kimyirum/EMS/ict-2023-ems/load/results/model_20230807_180431.pt'
test_data = '/home/kimyirum/EMS/ict-2023-ems/load/data/test_for_'+predict_day+'.xlsx'

df = pd.read_excel(test_data)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
hyperparameters = {}

# Load results from a pickle file
with open(hyperparameters_filepath, 'rb') as f:
    loaded_results = pickle.load(f)
    hyperparameters = loaded_results['Hyperparameters']
    scalers = loaded_results['Scalers']

# Print hyperparameters
for key, value in hyperparameters.items():
    print(f'{key}: {value}')

# Initialize our dataset class
dataset = TimeSeriesDataset_forPredict(df)
test_loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False)

# Recreate the model architecture
model = LSTMModel(
    input_dim=7,
    hidden_dim=int(hyperparameters['hidden_dim']),
    output_dim=24*56,
    n_layers=int(hyperparameters['n_layers'])
).to(device)

# Load the saved weights
model.load_state_dict(torch.load(model_filepath))

# Switch the model to evaluation mode
model.eval()

cuda
learning_rate: 0.001
final_learning_rate: 0.00025
batch_size: 256
max_epochs: 700
stop_epoch: -1
hidden_dim: 128
n_layers: 7


  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


LSTMModel(
  (lstm): LSTM(7, 128, num_layers=7, batch_first=True)
  (fc): Linear(in_features=128, out_features=1344, bias=True)
)

In [4]:
# Assuming df is the original dataset and it includes a 'date' column
building_names = df.columns[-56:]  # adjust this as necessary

# Prepare storage for predictions
predictions = []

# Iterate over test set
for sequence in test_loader:
    # Move sequence to correct device
    sequence = sequence.to(device)
    # Make prediction
    with torch.no_grad():
        prediction = model(sequence).cpu().numpy()

    prediction_res = prediction.squeeze(0).reshape(24, 56)
    padding = np.zeros((prediction_res.shape[0], 7))
    prediction_pad = np.hstack((padding, prediction_res))
    prediction_inv = scalers.inverse_transform(prediction_pad)
    prediction_inv = np.delete(prediction_inv, np.s_[:7], axis=1)
    prediction = prediction_inv.reshape(prediction.shape)

    # Store the prediction
    predictions.append(prediction)

# Combine all predictions
predictions = np.concatenate(predictions, axis=0)

# Create a DataFrame for predictions
# Reshape the predictions to align with the number of building_names
predictions = predictions.reshape(-1, len(building_names))
predictions_df = pd.DataFrame(predictions, columns=building_names)

predictions_df['total(KW)'] = predictions_df.sum(axis=1)

# Save to Excel file
output_filepath = '/home/kimyirum/EMS/ict-2023-ems/load/predict_for_'+predict_day+'.xlsx'  # adjust this as necessary
predictions_df.to_excel(output_filepath, index=False)


ValueError: need at least one array to concatenate