### 정리한 데이터로 노아 모델 돌려보기

1. 데이터 로드

In [1]:
import pandas as pd

file = '../../DATA/SMP_201004_202403.csv'
df = pd.read_csv(file, encoding='utf-8')
df.head()

Unnamed: 0,날짜,통합
0,2001-04-01,50.705398
1,2001-05-01,52.552364
2,2001-06-01,51.247385
3,2001-07-01,47.428339
4,2001-08-01,42.2932


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 276 entries, 0 to 275
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   날짜      276 non-null    object 
 1   통합      276 non-null    float64
dtypes: float64(1), object(1)
memory usage: 4.4+ KB


In [3]:
# 날짜 데이터를 datetime 형식으로 변환
df['날짜'] = pd.to_datetime(df['날짜'])
df.info()

# 통합 열 정규화
def normalize(x):
    return (x - x.mean()) / x.std()

def denormalize(x, mean, std):
    return x * std + mean

df['norm'] = normalize(df['통합'])
df['denorm'] = denormalize(df['norm'], df['통합'].mean(), df['통합'].std())
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 276 entries, 0 to 275
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   날짜      276 non-null    datetime64[ns]
 1   통합      276 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 4.4 KB


Unnamed: 0,날짜,통합,norm,denorm
0,2001-04-01,50.705398,-1.165759,50.705398
1,2001-05-01,52.552364,-1.123687,52.552364
2,2001-06-01,51.247385,-1.153413,51.247385
3,2001-07-01,47.428339,-1.240408,47.428339
4,2001-08-01,42.2932,-1.357382,42.2932


normalize, denormalize 함수 잘 돌아간다

2. 이제 std 열을 모델 학습

In [4]:
import torch

# dataset 생성
class SMPDataset:
    def __init__(self, df, seq_len=12):
        data = df['norm'].values
        self.data = torch.FloatTensor(data)
        self.seq_len = seq_len

    def __len__(self):
        return len(self.data) - self.seq_len

    def __getitem__(self, idx):
        X = self.data[idx:idx+self.seq_len]
        y = self.data[idx+self.seq_len]
        return X, y

# Check
smp_data = SMPDataset(df)
print(len(smp_data))
for i in range(5):
    print(smp_data[i])
    break

264
(tensor([-1.1658, -1.1237, -1.1534, -1.2404, -1.3574, -1.3062, -1.2517, -1.2470,
        -1.0387, -0.9969, -1.1334, -1.1704]), tensor(-1.2558))


In [5]:
# DataLoader 생성
from torch.utils.data import DataLoader

smp_loader = DataLoader(smp_data, batch_size=8, shuffle=False)
for X, y in smp_loader:
    print(X)
    print(y)
    break

tensor([[-1.1658, -1.1237, -1.1534, -1.2404, -1.3574, -1.3062, -1.2517, -1.2470,
         -1.0387, -0.9969, -1.1334, -1.1704],
        [-1.1237, -1.1534, -1.2404, -1.3574, -1.3062, -1.2517, -1.2470, -1.0387,
         -0.9969, -1.1334, -1.1704, -1.2558],
        [-1.1534, -1.2404, -1.3574, -1.3062, -1.2517, -1.2470, -1.0387, -0.9969,
         -1.1334, -1.1704, -1.2558, -1.2220],
        [-1.2404, -1.3574, -1.3062, -1.2517, -1.2470, -1.0387, -0.9969, -1.1334,
         -1.1704, -1.2558, -1.2220, -1.4278],
        [-1.3574, -1.3062, -1.2517, -1.2470, -1.0387, -0.9969, -1.1334, -1.1704,
         -1.2558, -1.2220, -1.4278, -1.4082],
        [-1.3062, -1.2517, -1.2470, -1.0387, -0.9969, -1.1334, -1.1704, -1.2558,
         -1.2220, -1.4278, -1.4082, -1.4989],
        [-1.2517, -1.2470, -1.0387, -0.9969, -1.1334, -1.1704, -1.2558, -1.2220,
         -1.4278, -1.4082, -1.4989, -1.3924],
        [-1.2470, -1.0387, -0.9969, -1.1334, -1.1704, -1.2558, -1.2220, -1.4278,
         -1.4082, -1.4989, -1.

데이터로더도 순서대로 잘 나오는 걸 확인

3. 모델 학습

In [6]:
import torch
import torch.nn as nn
from sklearn.metrics import r2_score

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.RNN(input_size=12, hidden_size=30, num_layers=2, batch_first=True)
        self.fc = nn.Linear(30, 1)

    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x[:, -1])
        return x

model = RNN()
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

def compute_r2_score(y_true, y_pred):
    return r2_score(y_true.detach().cpu().numpy(), y_pred.detach().cpu().numpy())

for epoch in range(20000):
    for i, (x, y) in enumerate(smp_loader):
        optimizer.zero_grad()
        output = model(x.unsqueeze(1).float())
        loss = criterion(output.squeeze(), y.float())
        loss.backward()
        optimizer.step()

    if epoch % 1000 == 0:
        y_true = y.float().unsqueeze(1)  
        y_pred = model(x.unsqueeze(1).float())  
        r2 = compute_r2_score(y_true, y_pred.squeeze())

        print(f"Epoch {epoch}: Loss: {loss}, R-squared: {r2}")

Epoch 0: Loss: 0.10526145994663239, R-squared: -1.894234989837042
Epoch 1000: Loss: 0.01657789759337902, R-squared: 0.6769675508822709
Epoch 2000: Loss: 0.005285430233925581, R-squared: 0.9164240711651997
Epoch 3000: Loss: 0.003487494308501482, R-squared: 0.9392556295290679
Epoch 4000: Loss: 0.003656781977042556, R-squared: 0.9324326518946167
Epoch 5000: Loss: 0.0010331152006983757, R-squared: 0.9799151558306072
Epoch 6000: Loss: 0.001092730090022087, R-squared: 0.9778705960490869
Epoch 7000: Loss: 0.0005202708416618407, R-squared: 0.9904133291363871
Epoch 8000: Loss: 0.0014594937674701214, R-squared: 0.9768426614074148
Epoch 9000: Loss: 7.635726069565862e-05, R-squared: 0.9985444802936521
Epoch 10000: Loss: 0.00032284107874147594, R-squared: 0.9924786695093757
Epoch 11000: Loss: 0.001871407381258905, R-squared: 0.9741340960225611
Epoch 12000: Loss: 0.0009127602097578347, R-squared: 0.9829513961929747
Epoch 13000: Loss: 0.0028625924605876207, R-squared: 0.9662136755398583
Epoch 14000: 

In [8]:
# 모델 저장
torch.save(model.state_dict(), 'noo_model_12m.pth')