In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.recurrent import LSTM, GRU
from keras.layers import Convolution1D, MaxPooling1D, AtrousConvolution1D, RepeatVector, AveragePooling1D, Conv1D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, CSVLogger
from keras.layers.wrappers import Bidirectional
from keras import regularizers
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import *
from keras.optimizers import RMSprop, Adam, SGD, Nadam
from sklearn.model_selection import train_test_split
import keras
import talib

In [13]:
data_df = pd.read_csv('./225537_daily.csv', index_col=[0])
print (data_df.shape)
data_df.head()

(2710, 21)


Unnamed: 0_level_0,open,high,low,close,volume,sma_5,sma_10,ema_20,mtm6_mtm12,fastk,...,roc_10,bband_upper,bband_middle,bband_lower,macd,macdsignal,macdhist,adosc,cci_14,atr_14
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-10-28,449.0,459.0,413.05,421.4,26950.0,447.5,457.475,471.137197,0.909154,0.0,...,-7.506585,492.770388,447.5,402.229612,-24.489724,-23.933145,-0.556578,186580.861422,-84.887688,42.672054
2008-10-29,430.0,438.5,404.0,416.2,1254388.0,434.7,454.805,465.905083,0.977321,0.0,...,-6.028449,471.064378,434.7,398.335622,-25.928546,-24.332225,-1.596321,25451.757073,-109.706281,42.088336
2008-10-31,425.0,464.9,419.0,429.9,562630.0,427.56,452.655,462.476028,1.077911,89.362669,...,-4.76296,446.874078,427.56,408.245922,-25.667469,-24.599274,-1.068195,-136773.025365,-45.957285,42.560598
2008-11-03,436.0,436.0,398.15,403.0,853239.0,419.24,448.11,456.811644,1.025402,0.0,...,-10.134909,437.852082,419.24,400.627918,-27.316284,-25.142676,-2.173608,-392614.143495,-129.301056,42.224126
2008-11-04,407.0,419.0,395.0,401.25,899546.0,414.35,439.765,451.520059,1.086997,0.0,...,-17.216835,436.174023,414.35,392.525977,-28.436395,-25.80142,-2.634975,-598730.915519,-129.921943,40.922403


In [14]:
#data_df['date'] = data_df['date'].map(lambda x: dt.strptime(x, '%m/%d/%Y %I:%M:%S %p').date())
data_df['volume'] = data_df['volume'].astype(float)
#data_df = data_df.set_index('date')

# Simple Moving Average
data_df['sma_5'] = talib.SMA(data_df['close'].values, timeperiod=5)
data_df['sma_10'] = talib.SMA(data_df['close'].values, timeperiod=10)
# Exponential Moving Average
data_df['ema_20'] = talib.EMA(data_df['close'].values, timeperiod=20)
# Momentum 6 Month / Momentum 12 Month
data_df['mtm6_mtm12'] = talib.MOM(data_df['close'].values, timeperiod=126)/talib.MOM(data_df['close'].values, 
                                  timeperiod=252)
# Stochastic Relative Strength Index
data_df['fastk'], data_df['fastd'] = talib.STOCHRSI(data_df['close'].values, timeperiod=14, fastk_period=5,
                                                    fastd_period=3, fastd_matype=0)
# Rate Of Change
data_df['roc_10'] = talib.ROC(data_df['close'].values, timeperiod=10)
# Bollinger Bands
data_df['bband_upper'], data_df['bband_middle'], data_df['bband_lower'] = talib.BBANDS(data_df['close'].values,
                                                                                     timeperiod=5, nbdevup=2, nbdevdn=2,
                                                                                     matype=0)
# Moving Average Convergence Divergence
data_df['macd'], data_df['macdsignal'], data_df['macdhist'] = talib.MACD(data_df['close'].values, fastperiod=12, 
                                                                         slowperiod=26, signalperiod=9)
# Chaikin A/D Oscillator
data_df['adosc'] = talib.ADOSC(data_df['high'], data_df['low'], data_df['close'], data_df['volume'], fastperiod=3,
                             slowperiod=10)
# Commodity Channel Index
data_df['cci_14'] = talib.CCI(data_df['high'].values, data_df['low'].values, data_df['close'].values, timeperiod=14)
# Average True Range
data_df['atr_14'] = talib.ATR(data_df['high'].values, data_df['low'].values, data_df['close'].values, timeperiod=14)



# Target
data_df['target'] = np.append(data_df['close'][1:].values, [np.nan])
# Drop Rows With NA Values In Any Column
data_df = data_df.dropna(axis=0, how='any')

#### add another column with the class 0 (for downward trajectory) and 1(for positive trajectory)
data_df["prev"] = np.append([np.nan], data_df['close'][:-1],)
data_df = data_df.dropna()
data_df['class'] = np.where((data_df['prev'] < data_df['close']),1,0)

data_df.drop('prev',axis = 1, inplace = True)

# Popping The Target Column
target = data_df.pop('target').values

## Traget classes
target_class = data_df.pop('class').values

In [15]:
target

array([1110.75, 1107.65, 1098.9 , ..., 2633.5 , 2651.4 , 2651.9 ])

In [16]:
Y_train.shape

(1964,)

In [17]:
X_train, X_test, Y_train, Y_test = train_test_split(data_df,target,test_size=0.2,shuffle = False)

In [18]:
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

In [19]:
X_train = np.reshape(X_train,(X_train.shape[0],X_train.shape[1],1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [20]:
model = Sequential()
model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=(21, 1)))
model.add(BatchNormalization())
model.add(AveragePooling1D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(AveragePooling1D(pool_size=2))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(1, activation='linear'))
model.compile(optimizer='adam', loss='mae')

In [21]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 19, 128)           512       
_________________________________________________________________
average_pooling1d_1 (Average (None, 9, 128)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 7, 128)            49280     
_________________________________________________________________
average_pooling1d_2 (Average (None, 3, 128)            0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 384)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 385       
Total params: 50,177
Trainable params: 50,177
Non-trainable params: 0
__________________________________________________

In [22]:
history = model.fit(X_train, Y_train, 
          nb_epoch = 100, 
          batch_size = 32, 
          verbose=1, 
          validation_data=(X_test, Y_test),
          callbacks=[reduce_lr, checkpointer],
          shuffle=True)

NameError: name 'reduce_lr' is not defined

In [23]:
pd.DataFrame(model.predict(X_test),Y_test)

Unnamed: 0,0
2354.45,-2959.166992
2403.50,-2455.436523
2381.35,-2759.058105
2385.40,-2219.013916
2373.05,-2395.424805
...,...
2616.45,-3168.625977
2663.40,-3676.716797
2633.50,-3317.006348
2651.40,-2106.254883


In [24]:
print (X_test.shape)
print (X_train.shape)
print (Y_train.shape)
print (Y_test.shape)

(492, 21, 1)
(1964, 21, 1)
(1964,)
(492,)
