# RNN 사용하기

In [2]:
import pandas as pd
import numpy as np

In [3]:
pd.read_csv("../../data/stock.csv").head() # volume은 쿨하게 drop~

Unnamed: 0,Date,Open,High,Low,Volume,Close
0,2015-12-16,120,123,118,13181000,123
1,2015-12-17,124,126,122,17284900,123
2,2015-12-18,121,122,118,17948100,118
3,2015-12-21,120,120,116,11670000,117
4,2015-12-22,117,117,115,9689000,116


In [4]:
stockDF = pd.read_csv("../../data/stock.csv", usecols=[0, 1, 2, 3, 5])
stockDF.head()

Unnamed: 0,Date,Open,High,Low,Close
0,2015-12-16,120,123,118,123
1,2015-12-17,124,126,122,123
2,2015-12-18,121,122,118,118
3,2015-12-21,120,120,116,117
4,2015-12-22,117,117,115,116


In [5]:
stockDF.info() # 결측치만 확인하고 패스^~^

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 967 entries, 0 to 966
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    967 non-null    object
 1   Open    967 non-null    int64 
 2   High    967 non-null    int64 
 3   Low     967 non-null    int64 
 4   Close   967 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 37.9+ KB


In [6]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [7]:
class StockDS(Dataset):
    def __init__(self, stockDF):
        super().__init__()
        self.stockDF = stockDF
        
        # open, high, low data
        self.data = self.stockDF.iloc[:, 1:4].values
        self.data = self.data / np.max(self.data)
        
        # close data
        self.label = self.stockDF["Close"].values
        self.label = self.label / np.max(self.label)
        
    def __len__(self):
        return len(self.data) - 30 # 30개가 안되면 그 다음엔 예측할 값이 없어예 
    
    def __getitem__(self, index):
        data = self.data[index:index+30]
        label = self.label[index+30]
        return data, label 
        

In [8]:
stockDS = StockDS(stockDF)
stockDL = DataLoader(stockDS, batch_size=25)

In [23]:
from torch import nn

class StockRegression(nn.Module):
    def __init__(self, input_size, hidden_dim, n_layers, dropout=0.5, bidirectional=True):
        super().__init__()
        self.rnn = nn.RNN(input_size=3, hidden_size=8, num_layers=5, batch_first=True) # hidden이 퍼셉트론 수 
        
        self.fc1 = nn.Linear(240, 64)
        self.fc2 = nn.Linear(64, 1)
        
        self.relu = nn.ReLU() # 활성화 함수 정의
    
    def forward(self, x, h0):
        # RNN 레이어를 통과한 후에는 hidden state를 반환하지 않습니다.
        x, _ = self.rnn(x)
        
        # 출력값을 1차원으로 만들어줌 MLP층의 입력으로 사용될 수 있도록
        x = torch.reshape(x, (x.shape[0], -1))
        
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        
        # 예측한 종가를 1차원으로 표현
        x = torch.flatten(x)
        
        return x

In [24]:
from torch import optim

# 피쳐개수 = 단어 개수
hidden_dim = 8
n_layers = 5

device = "cuda" if torch.cuda.is_available() else "cpu"
model = StockRegression(3, hidden_dim, n_layers,).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [25]:
import numpy as np

def train(model, datasets, criterion, optimizer, device, interval):
    model.train()
    losses = list()

    for step, (input_ids, labels) in enumerate(datasets):
        input_ids = input_ids.to(device)
        labels = labels.to(device).unsqueeze(1)
        input_ids = input_ids.type(torch.float32)
        # print(input_ids, type(input_ids))
        logits = model(input_ids)
        loss = criterion(logits, labels).float()
        print(type(loss))
	
        losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % interval == 0:
            print(f"Train Loss {step} : {np.mean(losses)}")


def test(model, datasets, criterion, device):
    model.eval()
    losses = list()
    corrects = list()

    for step, (input_ids, labels) in enumerate(datasets):
        input_ids = input_ids.to(device)
        labels = labels.to(device).unsqueeze(1)
        input_ids = input_ids.type(torch.float32)
        # print(input_ids, type(input_ids))
        logits = model(input_ids)
        loss = criterion(logits, labels).float()
        losses.append(loss.item())
        


epochs = 5
interval = 500

for epoch in range(epochs):
    train(model, stockDL, criterion, optimizer, device, interval)
    # test(classifier, testDL, criterion, device)

TypeError: forward() missing 1 required positional argument: 'h0'