# Stock 'Adj Close' value prediction

In [5]:
___Author___='LumberJack Jyss'

In [6]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as web
from pandas import datetime
import math, time
import itertools
from sklearn import preprocessing
import datetime
from operator import itemgetter
from sklearn.metrics import mean_squared_error
from math import sqrt
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.recurrent import LSTM
import seaborn as sns
sns.set(palette='bright',style='whitegrid')
%matplotlib inline

## Stock data function configured to only keep 'Open','High' and 'Adj Close'

In [8]:
def get_stock_data(stock_name,data_source,start,end):
    df = web.DataReader(stock_name,data_source,start,end)
    df.drop(df.columns[[1,3,4]], axis=1, inplace=True) 
    return df

## Loading STOCK stock data from yahoo.com

In [9]:
data_source = 'yahoo'
stock_name = 'GOOGL'
start = '2016-03-01'
end = '2019-03-20'
df = get_stock_data(stock_name,data_source,start,end)
df.tail()

Unnamed: 0_level_0,High,Open,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-03-14,1204.699951,1199.02002,1192.530029
2019-03-15,1201.719971,1198.0,1190.300049
2019-03-18,1194.949951,1189.689941,1188.550049
2019-03-19,1203.410034,1191.719971,1202.459961
2019-03-20,1229.98999,1201.400024,1226.430054


## Saving the data to a file for a future use

In [10]:
today = datetime.date.today()
file_name = stock_name+'_stock_%s.csv' % today
df.to_csv(file_name)

In [11]:
df['High'] = df['High'] / 1000
df['Open'] = df['Open'] / 1000
df['Adj Close'] = df['Adj Close'] / 1000
df.head(5)

Unnamed: 0_level_0,High,Open,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-03-01,0.74233,0.7213,0.74217
2016-03-02,0.74312,0.74287,0.73948
2016-03-03,0.741,0.73948,0.73159
2016-03-04,0.735,0.7348,0.73022
2016-03-07,0.72721,0.72515,0.7128


## Updated load_data function from lstm.py, configured to accept any amount of features.
## It is set to calculate the last feature as a result.

In [12]:
def load_data(stock, seq_len):
    amount_of_features = len(stock.columns)
    data = stock.as_matrix() #pd.DataFrame(stock)
    sequence_length = seq_len + 1
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])

    result = np.array(result)
    row = round(0.9 * result.shape[0])
    train = result[:int(row), :]
    x_train = train[:, :-1]
    y_train = train[:, -1][:,-1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1][:,-1]

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], amount_of_features))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], amount_of_features))  

    return [x_train, y_train, x_test, y_test]

## Building model functions

In [13]:
def build_model(layers):
    model = Sequential()

    model.add(LSTM(
        input_dim=layers[0],
        output_dim=layers[1],
        return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(
        layers[2],
        return_sequences=False))
    model.add(Dropout(0.2))

    model.add(Dense(
        output_dim=layers[2]))
    model.add(Activation("linear"))

    start = time.time()
    model.compile(loss="mse", optimizer="rmsprop",metrics=['accuracy'])
    print("Compilation Time : ", time.time() - start)
    return model

def build_model2(layers):
        d = 0.2
        model = Sequential()
        model.add(LSTM(128, input_shape=(layers[1], layers[0]), return_sequences=True))
        model.add(Dropout(d))
        model.add(LSTM(64, input_shape=(layers[1], layers[0]), return_sequences=False))
        model.add(Dropout(d))
        model.add(Dense(16,init='uniform',activation='relu'))        
        model.add(Dense(1,init='uniform',activation='relu'))
        model.compile(loss='mse',optimizer='adam',metrics=['accuracy'])
        return model

## Setting X and Y for training and testing

In [14]:
window = 5
X_train, y_train, X_test, y_test = load_data(df[::-1], window)
print("X_train", X_train.shape)
print("y_train", y_train.shape)
print("X_test", X_test.shape)
print("y_test", y_test.shape)

X_train (687, 5, 3)
y_train (687,)
X_test (76, 5, 3)
y_test (76,)


  This is separate from the ipykernel package so we can avoid doing imports until


## Loading the model sequence structure

In [15]:
# model = build_model([3,lag,1])
model = build_model2([3,window,1])



## Executing the model & RMS/RMSE results

In [None]:
model.fit(
    X_train,
    y_train,
    batch_size=512,
    epochs=500,
    validation_split=0.1,
    verbose=0)

  import sys


In [None]:
trainScore = model.evaluate(X_train, y_train, verbose=0)
print('Train Score: %.2f MSE (%.2f RMSE)' % (trainScore[0], math.sqrt(trainScore[0])))

testScore = model.evaluate(X_test, y_test, verbose=0)
print('Test Score: %.2f MSE (%.2f RMSE)' % (testScore[0], math.sqrt(testScore[0])))

In [None]:
# print(X_test[-1])
diff=[]
ratio=[]
p = model.predict(X_test)
for u in range(len(y_test)):
    pr = p[u][0]
    ratio.append((y_test[u]/pr)-1)
    diff.append(abs(y_test[u]- pr))
    #print(u, y_test[u], pr, (y_test[u]/pr)-1, abs(y_test[u]- pr))

## Predictions vs Real results

In [None]:
plt.plot(p,color='red', label='prediction',figsize=(16,6))
plt.plot(y_test,color='blue', label='y_test')
plt.legend(loc='upper left')