In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [9]:
df = pd.read_csv('use_this.csv', index_col=0)
df.head()

Unnamed: 0,stn4contest,v01,v02,v03,v04,v05,v06,v07,v08,v09,vv,class_interval,year,month,day,hour,ef_datetime
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2020,5,1,12,2020-05-01 12:00:00
1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2020,5,1,15,2020-05-01 15:00:00
2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2020,5,1,18,2020-05-01 18:00:00
3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2020,5,1,21,2020-05-01 21:00:00
4,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2020,5,2,0,2020-05-02 00:00:00


In [10]:
train_df = df[df['stn4contest'].isin(range(0, 18))]
test_df = df[df['stn4contest'].isin([18, 19])]

train = train_df.drop(columns=['ef_datetime', 'class_interval'])
test = test_df.drop(columns=['ef_datetime', 'class_interval'])

X_train = train[['stn4contest', 'year', 'month', 'day', 'hour', 'v01', 'v02', 'v03', 'v04', 'v05', 'v06', 'v07', 'v08', 'v09']]
y_train = train['vv'].values
X_test = test[['stn4contest', 'year', 'month', 'day', 'hour', 'v01', 'v02', 'v03', 'v04', 'v05', 'v06', 'v07', 'v08', 'v09']]
y_test = test['vv'].values

X_train_summer = X_train[(X_train['month']==7)|(X_train['month']==8)]
X_train_nonsummer = X_train[(X_train['month']!=7)&(X_train['month']!=8)]

y_train_summer = y_train[(X_train['month'] == 7) | (X_train['month'] == 8)]
y_train_nonsummer = y_train[(X_train['month'] != 7) & (X_train['month'] != 8)]

X_test_summer = X_test[(X_test['month']==7)|(X_test['month']==8)]
X_test_nonsummer = X_test[(X_test['month']!=7)&(X_test['month']!=8)]

y_test_summer = y_test[(X_test['month'] == 7) | (X_test['month'] == 8)]
y_test_nonsummer = y_test[(X_test['month'] != 7) & (X_test['month'] != 8)]

In [11]:
scaler_x = MinMaxScaler()
X_train_scaled_summer= scaler_x.fit_transform(X_train_summer)
X_train_scaled_nonsummer= scaler_x.fit_transform(X_train_nonsummer)

X_test_scaled_summer = scaler_x.transform(X_test_summer)
X_test_scaled_nonsummer = scaler_x.transform(X_test_nonsummer)

In [12]:
def create_dataset(X, y, time_step=1):
    Xs, ys = [], []
    for i in range(len(X) - time_step):
        Xs.append(X[i:(i + time_step)])
        ys.append(y[i + time_step])
    return np.array(Xs), np.array(ys)

time_step = 8  # 하루의 시간대 수
X_train_summer, y_train_summer = create_dataset(X_train_scaled_summer, y_train_summer, time_step)
X_train_nonsummer, y_train_nonsummer = create_dataset(X_train_scaled_nonsummer, y_train_nonsummer, time_step)
X_test_summer, y_test_summer = create_dataset(X_test_scaled_summer, y_test_summer, time_step)
X_test_nonsummer, y_test_nonsummer = create_dataset(X_test_scaled_nonsummer, y_test_nonsummer, time_step)

In [13]:
X_train_summer = torch.from_numpy(X_train_summer).float()
X_train_nonsummer = torch.from_numpy(X_train_nonsummer).float()

y_train_summer = torch.from_numpy(y_train_summer).float()
y_train_nonsummer = torch.from_numpy(y_train_nonsummer).float()

X_test_summer = torch.from_numpy(X_test_summer).float()
X_test_nonsummer = torch.from_numpy(X_test_nonsummer).float()

y_test_summer = torch.from_numpy(y_test_summer).float()
y_test_nonsummer = torch.from_numpy(y_test_nonsummer).float()

In [14]:
class DBN(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, output_dim):
        super(DBN, self).__init__()
        self.rbm1 = nn.Sequential(
            nn.Linear(input_dim, hidden_dim1),
            nn.ReLU(),
            nn.Dropout(0.2)  # Dropout 추가
        )
        self.rbm2 = nn.Sequential(
            nn.Linear(hidden_dim1, hidden_dim2),
            nn.ReLU(),
            nn.Dropout(0.2)  # Dropout 추가
        )
        self.regressor = nn.Linear(hidden_dim2, output_dim)

    def forward(self, x):
        x = self.rbm1(x)
        x = self.rbm2(x)
        return self.regressor(x)

In [16]:
# 모델 초기화
output_dim = 1

input_dim_summer = X_train_summer.shape[2] * time_step
input_dim_nonsummer = X_train_nonsummer.shape[2] * time_step

hidden_dim1 = 128
hidden_dim2 = 64

model_summer = DBN(input_dim_summer, hidden_dim1, hidden_dim2, output_dim)
model_nonsummer = DBN(input_dim_nonsummer, hidden_dim1, hidden_dim2, output_dim)

criterion_mse = nn.MSELoss()
criterion_mae = nn.L1Loss()

optimizer_summer = optim.Adam(model_summer.parameters(), lr=0.001)
optimizer_nonsummer = optim.Adam(model_nonsummer.parameters(), lr=0.001)

In [17]:
# 모델 학습
num_epochs = 300
for epoch in range(num_epochs):
    model_summer.train()

for epoch in range(num_epochs):
    model_nonsummer.train()


model_summer.eval()
model_nonsummer.eval()


with torch.no_grad():
    predictions_summer = model_summer(X_test_summer.view(X_test_summer.size(0), -1)).numpy()\

with torch.no_grad():
    predictions_nonsummer = model_nonsummer(X_test_nonsummer.view(X_test_nonsummer.size(0), -1)).numpy()\


eval_summer = evaluation_metrics(y_test_summer, predictions_summer)
eval_nonsummer = evaluation_metrics(y_test_nonsummer, predictions_nonsummer)
    
print(eval_summer)
print(eval_nonsummer)

NameError: name 'evaluation_metrics' is not defined

In [None]:
predictions_summer

In [None]:
prediction_df = train_df.copy()
predictions_extend = [0] * 8
predictions_extend.extend(predictions_train.flatten())
print(predictions_extend)
prediction_df['prediction_label'] = predictions_extend

In [14]:
def classify_class(value):
    if value < 0.1:
        return 0
    elif 0.1 <= value < 0.2:
        return 1
    elif 0.2 <= value < 0.5:
        return 2
    elif 0.5 <= value < 1.0:
        return 3
    elif 1.0 <= value < 2.0:
        return 4
    elif 2.0 <= value < 5.0:
        return 5
    elif 5.0 <= value < 10.0:
        return 6
    elif 10.0 <= value < 20.0:
        return 7
    elif 20.0 <= value < 30.0:
        return 8
    elif 30.0 <= value:
        return 9

In [None]:
df2=pd.read_csv("/content/drive/MyDrive/Colab Notebooks/test_stn+ef_label.csv")

df2_drop = df2[['stn4contest', 'year', 'month', 'day', 'hour', 'v01', 'v02', 'v03', 'v04', 'v05', 'v06', 'v07', 'v08', 'v09']]

scaler = MinMaxScaler()
X_test_scaled2 = scaler.fit_transform(df2_drop)

def create_dataset_X(X, time_step=1):
    Xs = []
    for i in range(len(X) - time_step):
        Xs.append(X[i:(i + time_step)])
    return np.array(Xs)

X_test2 = create_dataset_X(X_test_scaled2, time_step)
X_test2 = torch.from_numpy(X_test2).float()

model.eval()
with torch.no_grad():
    predictions_test = model(X_test2.view(X_test2.size(0), -1)).numpy()


predictions_extend = [0] * 8
predictions_extend.extend(predictions_test.flatten())
print(predictions_extend)

pd.DataFrame(predictions_extend).to_csv('/content/drive/MyDrive/Colab Notebooks/dbn_test.csv')
    optimizer.zero_grad()
    outputs = model(X_train.view(X_train.size(0), -1))
    # loss_r2 = criterion_r2(outputs, y_train.view(-1, 1))
    loss_mse = criterion_mse(outputs, y_train.view(-1, 1))
    loss_mae = criterion_mae(outputs, y_train.view(-1, 1))
    loss = loss_mse + loss_mae  # 두 손실 함수의 합
    # loss = loss_r2 + loss_mse + loss_mae
    # loss = loss_r2
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, MSE Loss: {loss_mse.item():.4f}, MAE Loss: {loss_mae.item():.4f}')
output_dim = 1