# 주식데이터를 통한 RNN 구현

In [1]:
import numpy as np
import pandas as pd
from sklearn import svm, preprocessing
import matplotlib.pyplot as plt

In [2]:
stock = pd.read_csv('Google.csv')
stock.Date = pd.to_datetime(stock.Date)

In [3]:
stock.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume
0,2004-08-19,100.01,104.06,95.96,100.335,44659000.0,0.0,1.0,50.159839,52.191109,48.128568,50.322842,44659000.0
1,2004-08-20,101.01,109.08,100.5,108.31,22834300.0,0.0,1.0,50.661387,54.708881,50.405597,54.322689,22834300.0
2,2004-08-23,110.76,113.48,109.05,109.4,18256100.0,0.0,1.0,55.551482,56.915693,54.693835,54.869377,18256100.0
3,2004-08-24,111.24,111.6,103.57,104.87,15247300.0,0.0,1.0,55.792225,55.972783,51.94535,52.597363,15247300.0
4,2004-08-25,104.76,108.0,103.88,106.0,9188600.0,0.0,1.0,52.542193,54.167209,52.10083,53.164113,9188600.0


In [4]:
stock['Adj. Close'].plot()

<matplotlib.axes._subplots.AxesSubplot at 0x1a18e0ddd8>

In [5]:
Features = ['Open','High','Low','Close','Volume']

In [6]:
data = {'Date':stock['Date'],'Open':stock['Adj. Open'], 'High':stock['Adj. High'], 'Low':stock['Adj. Low'], 'Close':stock['Adj. Close'], 'Volume':stock['Adj. Volume']}

In [7]:
Stock = pd.DataFrame(data, columns = ['Date','Open','High','Low','Close','Volume'])

In [8]:
Stock.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2004-08-19,50.159839,52.191109,48.128568,50.322842,44659000.0
1,2004-08-20,50.661387,54.708881,50.405597,54.322689,22834300.0
2,2004-08-23,55.551482,56.915693,54.693835,54.869377,18256100.0
3,2004-08-24,55.792225,55.972783,51.94535,52.597363,15247300.0
4,2004-08-25,52.542193,54.167209,52.10083,53.164113,9188600.0


In [9]:
new_x = np.array(Stock[Features])[:-1,] # 맨 마지막 데이터는 제거
x_data = preprocessing.scale(new_x)
y_data = np.roll(Stock['Close'][:].tolist(),-1)[:-1] # 다음날 종가로 데이터 한칸씩 땡기기

In [10]:
new_x.shape

(3124, 5)

In [11]:
y_data = y_data.reshape(3124,1)

In [12]:
U = np.random.uniform(-np.sqrt(1./5), np.sqrt(1./100), (5, 100))
V = np.random.uniform(-np.sqrt(1./100), np.sqrt(1./100), (100,))
W = np.random.uniform(-np.sqrt(1./100), np.sqrt(1./100), (100, 100))

In [13]:
T = len(x_data)
# save all hidden states in a because need them later.
h = np.zeros((T+1, 100))
h[-1] = np.zeros(100)
# The outputs at each time step. save them for later.
o = np.zeros((T, 1))

In [14]:
def softmax(x):
    xt = np.exp(x - np.max(x))
    return xt / np.sum(xt)

In [24]:
for t in np.arange(T):
    h[t] = np.tanh(np.dot(x_data[t], U) + np.dot(h[t-1],W))
    o[t] = softmax(V.dot(h[t]))

In [25]:
np.dot(x_data[1], U).shape

(100,)

In [26]:
np.dot(h[0], W).shape

(100,)