In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from torch.autograd import Variable

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

from tqdm import tqdm_notebook

In [2]:
# !conda install conda-forge::tqdm -y

In [3]:
# device = troch.device('cuda:0' if torch.cuda.is.available() else "cpu")
device = "cpu"

In [4]:
data = pd.read_csv("data/SBUX.csv", index_col=["Date"], date_format="%Y-%m-%d")
data["Volume"] = data["Volume"].astype(float)
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 253 entries, 2019-12-11 to 2020-12-10
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       253 non-null    float64
 1   High       253 non-null    float64
 2   Low        253 non-null    float64
 3   Close      253 non-null    float64
 4   Adj Close  253 non-null    float64
 5   Volume     253 non-null    float64
dtypes: float64(6)
memory usage: 13.8 KB


In [5]:
X = data.iloc[:,:-1]
y = data.iloc[:, 5:6]
print(X,y)

                  Open        High         Low       Close   Adj Close
Date                                                                  
2019-12-11   86.260002   86.870003   85.849998   86.589996   84.145752
2019-12-12   88.000000   88.889999   87.540001   88.209999   85.720032
2019-12-13   88.019997   88.790001   87.580002   88.669998   86.167046
2019-12-16   89.139999   89.300003   88.430000   88.779999   86.273941
2019-12-17   88.870003   88.970001   87.470001   88.129997   85.642288
...                ...         ...         ...         ...         ...
2020-12-04  101.349998  102.940002  101.070000  102.279999  101.442787
2020-12-07  102.010002  102.220001  100.690002  101.410004  100.579918
2020-12-08  100.370003  101.570000  100.010002  101.209999  100.381554
2020-12-09  101.940002  102.209999  100.099998  100.400002   99.578186
2020-12-10  103.510002  106.089996  102.750000  105.389999  104.527336

[253 rows x 5 columns]                 Volume
Date                  
2019-12

In [6]:
ms = MinMaxScaler()
ss = StandardScaler()

X_ss = ss.fit_transform(X)
y_ms = ms.fit_transform(y)

X_train = X_ss[:200,:]
X_test = X_ss[200:,:]

y_train = y_ms[:200,:]
y_test = y_ms[200:,:]

In [7]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(200, 5) (53, 5) (200, 1) (53, 1)


In [8]:
# X_train, X_test, y_train, y_test = train_test_split (X_ss, y_ms)

## 파이토치 관련
- 데이터를 텐서로 변경
- 모델 설계
- 모델 생성 후 학습
- 예측
- 검증

In [9]:
X_train_tensors = Variable(torch.Tensor(X_train))
X_test_tensors = Variable(torch.Tensor(X_test))
y_train_tensors = Variable(torch.Tensor(y_train))
y_test_tensors = Variable(torch.Tensor(y_test))

In [10]:
# 주의
X_train_tesors_f = torch.reshape(X_train_tensors, (X_train_tensors.shape[0],1,X_train_tensors.shape[1]))
X_test_tesors_f = torch.reshape(X_test_tensors, (X_test_tensors.shape[0],1,X_test_tensors.shape[1]))
## reshape하는 과정이나 이유에 대해서 아래쪽에서 설명
print(X_train_tensors.shape, X_train_tesors_f.shape)

torch.Size([200, 5]) torch.Size([200, 1, 5])


In [17]:
class LSTM(nn.Module):
    # 모델은 어떻게 작성?
        #생성자부터 작성 => "하이퍼 파라미터"
        # 순전파 작성
        # 역전파는 작성하지 않음
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
        super(LSTM, self).__init__()
        self.num_classes = num_classes
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.seq_length = seq_length

        self.lstm = nn.LSTM(
            input_size = input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True
        )
        self.fc_1 = nn.Linear(hidden_size, 128)
        self.fc = nn.Linear(128, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) # 초기값
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 

        output, (hn, cn) = self.lstm(x, (h_0, c_0))
        hn = hn.view(-1, self.hidden_size)
        out = self.relu(hn)
        out = self.fc_1(out)
        out = self.fc(out)
        return out

In [18]:
num_epochs = 1000
learning_rate = 0.0001

input_size = 5
hidden_size = 2
num_layers =1

num_classes = 1
model = LSTM(num_classes, input_size, hidden_size, num_layers, X_train_tesors_f.shape[1])

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [23]:
for epoch in range(num_epochs):
    outputs = model.forward(X_train_tesors_f)
    optimizer.zero_grad()
    loss = criterion(outputs, y_train_tensors)
    loss.backward()
    optimizer.step()
    if epoch % 100 ==0:
        print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

Epoch: 0, loss: 0.02960
Epoch: 100, loss: 0.02878
Epoch: 200, loss: 0.02788
Epoch: 300, loss: 0.02691
Epoch: 400, loss: 0.02592
Epoch: 500, loss: 0.02494
Epoch: 600, loss: 0.02404
Epoch: 700, loss: 0.02322
Epoch: 800, loss: 0.02251
Epoch: 900, loss: 0.02190
