### 정리한 데이터로 노아 모델 돌려보기

1. 데이터 로드

In [1]:
import pandas as pd

file = '../DATA/SMP_201004_202403.csv'
df = pd.read_csv(file, encoding='utf-8')
df.head()

Unnamed: 0,날짜,통합
0,2001-04-01,50.705398
1,2001-05-01,52.552364
2,2001-06-01,51.247385
3,2001-07-01,47.428339
4,2001-08-01,42.2932


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 276 entries, 0 to 275
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   날짜      276 non-null    object 
 1   통합      276 non-null    float64
dtypes: float64(1), object(1)
memory usage: 4.4+ KB


In [16]:
# 날짜 데이터를 datetime 형식으로 변환
df['날짜'] = pd.to_datetime(df['날짜'])
df.info()

# 통합 열 정규화
def normalize(x):
    return (x - x.mean()) / x.std()

def denormalize(x, mean, std):
    return x * std + mean

df['norm'] = normalize(df['통합'])
df['denorm'] = denormalize(df['norm'], df['통합'].mean(), df['통합'].std())
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 276 entries, 0 to 275
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   날짜      276 non-null    datetime64[ns]
 1   통합      276 non-null    float64       
 2   std     276 non-null    float64       
 3   norm    276 non-null    float64       
 4   denorm  5 non-null      float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 10.9 KB


Unnamed: 0,날짜,통합,std,norm,denorm
0,2001-04-01,50.705398,-1.165759,-1.165759,50.705398
1,2001-05-01,52.552364,-1.123687,-1.123687,52.552364
2,2001-06-01,51.247385,-1.153413,-1.153413,51.247385
3,2001-07-01,47.428339,-1.240408,-1.240408,47.428339
4,2001-08-01,42.2932,-1.357382,-1.357382,42.2932


normalize, denormalize 함수 잘 돌아간다

2. 이제 std 열을 모델 학습

In [17]:
import torch

# dataset 생성
class SMPDataset:
    def __init__(self, df, seq_len=12):
        data = df['std'].values
        self.data = torch.FloatTensor(data)
        self.seq_len = seq_len

    def __len__(self):
        return len(self.data) - self.seq_len

    def __getitem__(self, idx):
        X = self.data[idx:idx+self.seq_len]
        y = self.data[idx+self.seq_len]
        return X, y

# Check
smp_data = SMPDataset(df)
print(len(smp_data))
for i in range(5):
    print(smp_data[i])
    break

264
(tensor([-1.1658, -1.1237, -1.1534, -1.2404, -1.3574, -1.3062, -1.2517, -1.2470,
        -1.0387, -0.9969, -1.1334, -1.1704]), tensor(-1.2558))


In [22]:
# DataLoader 생성
from torch.utils.data import DataLoader

smp_loader = DataLoader(smp_data, batch_size=8, shuffle=False)
for X, y in smp_loader:
    print(X)
    print(y)
    break

tensor([[-1.1658, -1.1237, -1.1534, -1.2404, -1.3574, -1.3062, -1.2517, -1.2470,
         -1.0387, -0.9969, -1.1334, -1.1704],
        [-1.1237, -1.1534, -1.2404, -1.3574, -1.3062, -1.2517, -1.2470, -1.0387,
         -0.9969, -1.1334, -1.1704, -1.2558],
        [-1.1534, -1.2404, -1.3574, -1.3062, -1.2517, -1.2470, -1.0387, -0.9969,
         -1.1334, -1.1704, -1.2558, -1.2220],
        [-1.2404, -1.3574, -1.3062, -1.2517, -1.2470, -1.0387, -0.9969, -1.1334,
         -1.1704, -1.2558, -1.2220, -1.4278],
        [-1.3574, -1.3062, -1.2517, -1.2470, -1.0387, -0.9969, -1.1334, -1.1704,
         -1.2558, -1.2220, -1.4278, -1.4082],
        [-1.3062, -1.2517, -1.2470, -1.0387, -0.9969, -1.1334, -1.1704, -1.2558,
         -1.2220, -1.4278, -1.4082, -1.4989],
        [-1.2517, -1.2470, -1.0387, -0.9969, -1.1334, -1.1704, -1.2558, -1.2220,
         -1.4278, -1.4082, -1.4989, -1.3924],
        [-1.2470, -1.0387, -0.9969, -1.1334, -1.1704, -1.2558, -1.2220, -1.4278,
         -1.4082, -1.4989, -1.

데이터로더도 순서대로 잘 나오는 걸 확인

3. 모델 학습

In [24]:
import torch
import torch.nn as nn
from sklearn.metrics import r2_score

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.RNN(input_size=12, hidden_size=30, num_layers=2, batch_first=True)
        self.fc = nn.Linear(30, 1)

    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x[:, -1])
        return x

model = RNN()
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.01)

def compute_r2_score(y_true, y_pred):
    return r2_score(y_true.detach().cpu().numpy(), y_pred.detach().cpu().numpy())

for epoch in range(20000):
    for i, (x, y) in enumerate(smp_loader):
        optimizer.zero_grad()
        output = model(x.unsqueeze(1).float())
        loss = criterion(output.squeeze(), y.float())
        loss.backward()
        optimizer.step()

    if epoch % 1000 == 0:
        y_true = y.float().unsqueeze(1)  
        y_pred = model(x.unsqueeze(1).float())  
        r2 = compute_r2_score(y_true, y_pred.squeeze())

        print(f"Epoch {epoch}: Loss: {loss}, R-squared: {r2}")

Epoch 0: Loss: 0.5884777903556824, R-squared: -14.657509517359172
Epoch 1000: Loss: 3.158687114715576, R-squared: -51.80693239202994
Epoch 2000: Loss: 0.058236263692379, R-squared: -0.8353330845929758
Epoch 3000: Loss: 0.10228029638528824, R-squared: -0.939897020225609
Epoch 4000: Loss: 0.14891520142555237, R-squared: -1.728776129921088
Epoch 5000: Loss: 0.03082852065563202, R-squared: 0.2708598671948067
Epoch 6000: Loss: 0.09411229938268661, R-squared: -0.6788758701931552
Epoch 7000: Loss: 0.07331418246030807, R-squared: -0.4222568164056417
Epoch 8000: Loss: 0.199615478515625, R-squared: -2.9832241359865397
Epoch 9000: Loss: 0.08042310178279877, R-squared: -0.20642671227327725
Epoch 10000: Loss: 0.15546779334545135, R-squared: -2.813070248656389
Epoch 11000: Loss: 0.041518837213516235, R-squared: 0.11616140081449244
Epoch 12000: Loss: 0.03944193199276924, R-squared: 0.03473109368330285
Epoch 13000: Loss: 0.030518528074026108, R-squared: 0.42493771217665244
Epoch 14000: Loss: 0.3650287

SyntaxError: invalid syntax (2835966877.py, line 1)