# Predicting direction of stock price index movement using artificial neural networks
### Por Yakup Kara, Melek Acar Boyacioglu y Omer Kaan Baykan
#### Replicación del estudio por Julio Gutiérrez

In [2]:
import numpy as np
import pandas as pd
import talib

Because the daily data for Istambul Stock Exchange National Index 100 couldn't be found, the study is replicated with the SP&5000 daily data from 2007 to 2017.

In [3]:
spy = pd.read_csv('data/SPY10.csv', index_col='Date', parse_dates=True)
spy.columns = ['open', 'high', 'low', 'close', 'adjclose', 'volume']
spy.tail()

Unnamed: 0_level_0,open,high,low,close,adjclose,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-10-24,256.600006,256.829987,256.149994,256.559998,256.559998,66935900
2017-10-25,256.179993,256.309998,254.0,255.289993,255.289993,103715300
2017-10-26,255.990005,256.299988,255.479996,255.619995,255.619995,69798000
2017-10-27,256.470001,257.890015,255.630005,257.709991,257.709991,85562500
2017-10-30,256.470001,257.600006,256.410004,256.75,256.75,53199200


The direction of daily change in the S&P500 is categorized as `0` or `1`. If the S&P500 Index at time `t` is higher than that at time `t-1`, direction `t` is `1`. If the S&P500 Index at time `t` is lower than that at time `t-1`, direction `t` is `0`.

In [4]:
spy['prior_close'] = spy.close.shift(1)
spy['direction'] = np.where(spy['close'] > spy['prior_close'], 1, 0)
spy['future_direction'] = spy.direction.shift(-1)
spy = spy.iloc[:-1]
spy['future_direction'] = spy.future_direction.apply(int)
spy.tail(10)

Unnamed: 0_level_0,open,high,low,close,adjclose,volume,prior_close,direction,future_direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-10-16,255.210007,255.509995,254.820007,255.289993,255.289993,38221700,254.949997,1,1
2017-10-17,255.229996,255.520004,254.979996,255.470001,255.470001,31561000,255.289993,1,1
2017-10-18,255.899994,255.949997,255.5,255.720001,255.720001,40888300,255.470001,1,1
2017-10-19,254.830002,255.830002,254.350006,255.789993,255.789993,61903800,255.720001,1,1
2017-10-20,256.700012,257.140015,255.770004,257.109985,257.109985,89176400,255.789993,1,0
2017-10-23,257.480011,257.51001,256.019989,256.109985,256.109985,63915300,257.109985,0,1
2017-10-24,256.600006,256.829987,256.149994,256.559998,256.559998,66935900,256.109985,1,0
2017-10-25,256.179993,256.309998,254.0,255.289993,255.289993,103715300,256.559998,0,1
2017-10-26,255.990005,256.299988,255.479996,255.619995,255.619995,69798000,255.289993,1,1
2017-10-27,256.470001,257.890015,255.630005,257.709991,257.709991,85562500,255.619995,1,0


The sample size by year varies in size

In [5]:
year_samples = spy.groupby(spy.index.year).count()
year_samples

Unnamed: 0_level_0,open,high,low,close,adjclose,volume,prior_close,direction,future_direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2007,251,251,251,251,251,251,250,251,251
2008,253,253,253,253,253,253,253,253,253
2009,252,252,252,252,252,252,252,252,252
2010,252,252,252,252,252,252,252,252,252
2011,252,252,252,252,252,252,252,252,252
2012,250,250,250,250,250,250,250,250,250
2013,252,252,252,252,252,252,252,252,252
2014,252,252,252,252,252,252,252,252,252
2015,252,252,252,252,252,252,252,252,252
2016,252,252,252,252,252,252,252,252,252


## Separation of the data set for training, testing and validation

We need to take an 80% of the samples of each year to build a training set, a 10% of the samples of each year to build a testing set and finally another 10% to build the validation set.

In [6]:
training_set_frac = 0.8
testing_set_frac = 0.1
validation_set_frac = 0.1

training_set, testing_set, validation_set = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

In [7]:
for ith in range(len(year_samples)):
    year = year_samples.index[ith]
    set_size = year_samples.iloc[ith].close

    training_size = int(set_size * training_set_frac)
    testing_size = int(set_size * testing_set_frac)
    validation_size = int(set_size * validation_set_frac)
    
    year_set = spy[spy.index.year == year]

    training_set = training_set.append(year_set[:training_size])
    testing_set = testing_set.append(year_set[training_size:training_size + testing_size + 1])
    validation_set = validation_set.append(year_set[training_size + testing_size + 1:])

The training set consists of the first 80% of samples of each year:

In [8]:
training_set.groupby(training_set.index.year).count()

Unnamed: 0_level_0,open,high,low,close,adjclose,volume,prior_close,direction,future_direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2007,200,200,200,200,200,200,199,200,200
2008,202,202,202,202,202,202,202,202,202
2009,201,201,201,201,201,201,201,201,201
2010,201,201,201,201,201,201,201,201,201
2011,201,201,201,201,201,201,201,201,201
2012,200,200,200,200,200,200,200,200,200
2013,201,201,201,201,201,201,201,201,201
2014,201,201,201,201,201,201,201,201,201
2015,201,201,201,201,201,201,201,201,201
2016,201,201,201,201,201,201,201,201,201


The testing set consists of 10% of samples of each year:

In [9]:
testing_set.groupby(testing_set.index.year).count()

Unnamed: 0_level_0,open,high,low,close,adjclose,volume,prior_close,direction,future_direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2007,26,26,26,26,26,26,26,26,26
2008,26,26,26,26,26,26,26,26,26
2009,26,26,26,26,26,26,26,26,26
2010,26,26,26,26,26,26,26,26,26
2011,26,26,26,26,26,26,26,26,26
2012,26,26,26,26,26,26,26,26,26
2013,26,26,26,26,26,26,26,26,26
2014,26,26,26,26,26,26,26,26,26
2015,26,26,26,26,26,26,26,26,26
2016,26,26,26,26,26,26,26,26,26


Finally the validation testing set consists of the last 10% of samples of each year:

In [10]:
validation_set.groupby(validation_set.index.year).count()

Unnamed: 0_level_0,open,high,low,close,adjclose,volume,prior_close,direction,future_direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2007,25,25,25,25,25,25,25,25,25
2008,25,25,25,25,25,25,25,25,25
2009,25,25,25,25,25,25,25,25,25
2010,25,25,25,25,25,25,25,25,25
2011,25,25,25,25,25,25,25,25,25
2012,24,24,24,24,24,24,24,24,24
2013,25,25,25,25,25,25,25,25,25
2014,25,25,25,25,25,25,25,25,25
2015,25,25,25,25,25,25,25,25,25
2016,25,25,25,25,25,25,25,25,25


## Computation of the indicators data

The input to the network is computed data of 10 technical indicators:

- SMA10
- WMA10
- Momentum
- Stochastic K%
- Stochastic D%
- RSI
- MACD
- Larry William's R%
- A/D Oscilator
- CCI

In [11]:
open_ = spy.open.values
high = spy.high.values
low = spy.low.values
close = spy.close.values
volume = spy.close.values
sample_size = 10

In [12]:
indicators = pd.DataFrame(index=spy.index)
indicators.tail()

2017-10-23
2017-10-24
2017-10-25
2017-10-26
2017-10-27


In [13]:
indicators['sma'] = talib.SMA(close, timeperiod=sample_size)

In [14]:
indicators['wma'] = talib.WMA(close, timeperiod=sample_size)

In [15]:
indicators['momentum'] = talib.MOM(close, timeperiod=sample_size)

In [16]:
# Revisar configuración, este produce los dos valores %K y %D
slowk, slowd = talib.STOCH(high, low, close, fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
indicators['stochk'] = slowk
indicators['stochd'] = slowd

In [17]:
indicators['rsi'] = talib.RSI(close, timeperiod=sample_size)

In [18]:
macd, macdsignal, macdhist = talib.MACD(close, fastperiod=12, slowperiod=26, signalperiod=9)
indicators['macd'] = macd
indicators['macdsig'] = macdsignal
indicators['macdhist'] = macdhist
macd

array([        nan,         nan,         nan, ...,  1.71919997,
        1.61562553,  1.68278911])

In [19]:
indicators['williamsr'] = talib.WILLR(high, low, close, timeperiod=sample_size)

In [20]:
indicators['ad'] = talib.AD(high, low, close, volume)

In [21]:
indicators['cci'] = talib.CCI(high, low, close, timeperiod=sample_size)

Now is necessary to merge the indicators columns in to the splitted sets (training, testing, validation).

In [22]:
indicators.tail()

Unnamed: 0_level_0,sma,wma,momentum,stochk,stochd,rsi,macd,macdsig,macdhist,williamsr,ad,cci
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-10-23,255.471995,255.801811,2.159988,81.539607,87.80684,69.398467,1.88333,1.843897,0.039434,-39.660608,56032.256499,138.654546
2017-10-24,255.665996,255.99963,1.940003,74.851831,82.615664,71.863007,1.859992,1.847116,0.012877,-29.780912,56085.083879,99.064502
2017-10-25,255.692994,255.931266,0.269989,54.127889,70.173109,57.373812,1.7192,1.821533,-0.102333,-63.248167,56114.921668,-46.220243
2017-10-26,255.790994,255.917993,0.979996,50.947257,59.975659,59.718643,1.615626,1.780351,-0.164726,-53.846428,55946.586778,9.34325
2017-10-27,256.066993,256.266902,2.759994,59.425852,54.833666,70.959988,1.682789,1.760839,-0.07805,-4.627848,56163.240328,124.15508


In [23]:
training_set = training_set.join(indicators)
testing_set = testing_set.join(indicators)
validation_set = validation_set.join(indicators)

In [24]:
training_set = training_set.iloc[33:]  # We discard first rows because some indicators don't have values
training_set.head()

Unnamed: 0_level_0,open,high,low,close,adjclose,volume,prior_close,direction,future_direction,sma,...,momentum,stochk,stochd,rsi,macd,macdsig,macdhist,williamsr,ad,cci
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007-02-21,145.610001,146.070007,145.350006,145.979996,116.886948,63971600,146.039993,0,0,145.144,...,1.089997,89.992783,91.767628,67.286692,1.023571,0.914752,0.108819,-7.309015,1437.598145,81.889256
2007-02-22,146.050003,146.419998,145.169998,145.869995,116.798935,79067400,145.979996,0,0,145.209999,...,0.659988,79.738918,87.692436,65.530479,1.014854,0.934773,0.080082,-17.027978,1455.101844,68.911731
2007-02-23,145.740005,145.789993,145.029999,145.300003,116.342499,71966200,145.869995,0,0,145.237999,...,0.279999,55.633764,75.121822,56.969556,0.95099,0.938016,0.012974,-34.674811,1413.043661,20.721379
2007-02-26,145.830002,145.949997,144.75,145.169998,116.238396,69192800,145.300003,0,0,145.360999,...,1.229996,35.847984,57.073555,55.143882,0.879746,0.926362,-0.046616,-38.699738,1369.492432,1.828678
2007-02-27,143.880005,144.199997,139.0,139.5,111.69841,274466500,145.169998,0,1,144.965999,...,-3.949997,17.67172,36.384489,21.599962,0.361595,0.813409,-0.451814,-93.261454,1256.819371,-292.735006


In [25]:
# macdsig y macdhist no estan en el paper pero igual se auto-calcularon (no se agregan)
cols = ['sma', 'wma', 'momentum', 'stochk', 'stochd', 'rsi', 'macd', 'williamsr', 'ad', 'cci']

Xtrain = training_set[cols].values
Ytrain = training_set['future_direction'].values

Xtest = testing_set[cols].values
Ytest = testing_set['future_direction'].values

Xval = validation_set[cols].values
Yval = validation_set['future_direction'].values

print('Training shape:', Xtrain.shape, Ytrain.shape)
print('Testing shape:', Xtest.shape, Ytest.shape)
print('Validation shape:', Xval.shape, Yval.shape)

Training shape: (2142, 10) (2142,)
Testing shape: (281, 10) (281,)
Validation shape: (270, 10) (270,)


## Normalization of the data

The original data was scaled into the range of `[-1, 1]`

In [26]:
from sklearn.preprocessing import MinMaxScaler

np.set_printoptions(precision=4)
scaler = MinMaxScaler(feature_range=(-1, 1))

Xtrain_norm = scaler.fit_transform(Xtrain)
Xtest_norm = scaler.fit_transform(Xtest)
Xval_norm = scaler.fit_transform(Xval)

print('Xtrain_norm:\n', Xtrain_norm[:5, :])
print('Xtest_norm:\n', Xtest_norm[:5, :])
print('Xval_norm:\n', Xval_norm[:5, :])

Xtrain_norm:
 [[-0.1604 -0.1529  0.4021  0.801   0.8725  0.5052  0.6364  0.8538 -0.9846
   0.3832]
 [-0.1596 -0.1514  0.3841  0.5919  0.7867  0.4577  0.6348  0.6594 -0.9839
   0.336 ]
 [-0.1593 -0.1512  0.3681  0.1004  0.522   0.226   0.6231  0.3065 -0.9856
   0.1609]
 [-0.1579 -0.1513  0.408  -0.3031  0.1419  0.1766  0.6101  0.226  -0.9872
   0.0922]
 [-0.1624 -0.1634  0.1908 -0.6738 -0.2938 -0.7309  0.5156 -0.8652 -0.9916
  -0.9782]]
Xtest_norm:
 [[-0.1458 -0.1389  0.1161 -0.5807 -0.5894 -0.1522  0.6403 -0.5165 -0.9941
  -0.606 ]
 [-0.1533 -0.151  -0.2664 -0.6343 -0.6676 -0.682   0.5526 -0.9975 -1.
  -0.7918]
 [-0.1587 -0.1599 -0.1553 -0.7142 -0.7113 -0.5205  0.4949 -0.5754 -0.9953
  -0.8519]
 [-0.1644 -0.1651 -0.171  -0.6551 -0.7378 -0.3121  0.4667 -0.2998 -0.9907
  -0.489 ]
 [-0.1702 -0.1699 -0.1723 -0.399  -0.6542 -0.3488  0.4398 -0.363  -0.9857
  -0.4648]]
Xval_norm:
 [[-0.2962 -0.3003 -0.51   -0.6721 -0.6988 -0.6768 -0.4367 -0.9428 -0.991
  -0.3954]
 [-0.2975 -0.3045 -0.2328 -0.

Compute distribution of Y classes in the datasets:

In [27]:
Ytrain1 = Ytrain.sum()
Ytrain0 = len(Ytrain) - Ytrain1
Ytrain1p = Ytrain1 / len(Ytrain) * 100
Ytrain0p = Ytrain0 / len(Ytrain) * 100

Ytest1 = Ytest.sum()
Ytest0 = len(Ytest) - Ytest1
Ytest1p = Ytest1 / len(Ytest) * 100
Ytest0p = Ytest0 / len(Ytest) * 100

Yval1 = Yval.sum()
Yval0 = len(Yval) - Yval1
Yval1p = Yval1 / len(Yval) * 100
Yval0p = Yval0 / len(Yval) * 100

print('Train examples: {}     | Y = 1: {} ({:.2f}%)| Y = 0: {} ({:.2f}%)'.format(len(Xtrain_norm), Ytrain1, Ytrain1p, Ytrain0, Ytrain0p))
print('Test examples: {}       | Y = 1: {} ({:.2f}%) | Y = 0: {} ({:.2f}%)'.format(len(Xtest_norm), Ytest1, Ytest1p, Ytest0, Ytest0p))
print('Validation examples: {} | Y = 1: {} ({:.2f}%) | Y = 0: {} ({:.2f}%)'.format(len(Xval_norm), Yval1, Yval1p, Yval0, Yval0p))
print('Total observations:', len(Xtrain) + len(Xtest) + len(Xval))

Train examples: 2142     | Y = 1: 1157 (54.01%)| Y = 0: 985 (45.99%)
Test examples: 281       | Y = 1: 153 (54.45%) | Y = 0: 128 (45.55%)
Validation examples: 270 | Y = 1: 153 (56.67%) | Y = 0: 117 (43.33%)
Total observations: 2693


Tested parameters:
- Number of neurons [10, 20, ..., 100]
- Epochs (max_iter): [1000, 2000, 10000]
- Momentum constant: [0.1, 0.2, ..., 0.9]
- Learning rate: 0.1

# SVM (RBF)

## Training the model (single)

In [28]:
from sklearn.svm import SVC

clf = SVC(C=100, gamma=2.5)
clf.fit(Xtrain_norm, Ytrain)

SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=2.5, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [29]:
Xtrain_norm.shape

(2142, 10)

In [30]:
svm_predictions = clf.predict(Xtest_norm)

In [31]:
print('Score: {:.2f}%'.format(clf.score(Xtest_norm, Ytest) * 100))

Score: 53.74%


In [32]:
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support
pr = confusion_matrix(Ytest, svm_predictions)
pr

array([[74, 54],
       [76, 77]])

In [33]:
from IPython.display import Markdown

true_negatives = pr[0][0]
false_negatives = pr[1][0]
false_positives = pr[0][1]
true_positives = pr[1][1]

Markdown("""
- **True negatives** (_were 0 and predicted as 0_): **{tn}**
- **False negatives** (_were 1 but predicted them as 0_): **{fn}**
- **False positives** (_were 0 but predicted them as 1_): **{fp}**
- **True positives** (_were 1 and predicted as 1_): **{tp}**
""".format(tn=true_negatives, fn=false_negatives, fp=false_positives, tp=true_positives))


- **True negatives** (_were 0 and predicted as 0_): **74**
- **False negatives** (_were 1 but predicted them as 0_): **76**
- **False positives** (_were 0 but predicted them as 1_): **54**
- **True positives** (_were 1 and predicted as 1_): **77**


In [34]:
print(classification_report(Ytest, svm_predictions))

             precision    recall  f1-score   support

          0       0.49      0.58      0.53       128
          1       0.59      0.50      0.54       153

avg / total       0.54      0.54      0.54       281



In [35]:
results = precision_recall_fscore_support(Ytest, svm_predictions)
results

(array([ 0.4933,  0.5878]),
 array([ 0.5781,  0.5033]),
 array([ 0.5324,  0.5423]),
 array([128, 153]))

In [36]:
correctly_predicted = true_positives + true_negatives
badly_predicted = false_positives + false_negatives
print('Accuracy: {:.2f}%'.format(correctly_predicted / (correctly_predicted + badly_predicted) * 100))

Accuracy: 53.74%


## Training the model (multi)

In [37]:
clf = SVC()

gamma = np.linspace(0, 5, 51)
c = np.array([1, 10, 100])

In [38]:
from itertools import product
comparative = {}

for params in product(gamma, c):
    cgamma, cc = params
    
    clf.set_params(C=cc, gamma=cgamma)
    clf.fit(Xtrain_norm, Ytrain)
    comparative[params] = clf.score(Xtest_norm, Ytest)

In [39]:
print('Lowest score in the 153 parameter combinations: {:.2f}%'.format(min(comparative.values()) * 100))
print('Max score in the 153 parameter combinations: {:.2f}%'.format(max(comparative.values()) * 100))

Lowest score in the 153 parameter combinations: 45.91%
Max score in the 153 parameter combinations: 57.65%


In [40]:
import heapq
key_params = heapq.nlargest(5, comparative, key=comparative.get) # Get top 5 by best score

for key in key_params:
    print('Gamma: {:.1f}  |  C: {}  |  Score: {:.4f}%'.format(*key, comparative[key] * 100))

Gamma: 2.8  |  C: 10  |  Score: 57.6512%
Gamma: 2.9  |  C: 10  |  Score: 56.9395%
Gamma: 3.0  |  C: 10  |  Score: 56.9395%
Gamma: 3.7  |  C: 100  |  Score: 56.9395%
Gamma: 2.7  |  C: 10  |  Score: 56.5836%


# SVM (Polynomial)

## Training the model (multi)

In [41]:
clf = SVC(kernel='poly')

degree = np.array([1, 2, 3, 4])
gamma = np.linspace(0, 5, 51)
c = np.array([1, 10, 100])

In [42]:
from itertools import product
comparative = {}

for params in product(degree, gamma, c):
    d, cgamma, cc = params
    
    clf.set_params(degree=d, C=cc, gamma=cgamma)
    clf.fit(Xtrain_norm, Ytrain)
    comparative[params] = clf.score(Xtest_norm, Ytest)

KeyboardInterrupt: 

In [None]:
print('Lowest score in the 612 parameter combinations: {:.2f}%'.format(min(comparative.values()) * 100))
print('Max score in the 612 parameter combinations: {:.2f}%'.format(max(comparative.values()) * 100))

In [None]:
import heapq
key_params = heapq.nlargest(5, comparative, key=comparative.get) # Get top 5 by best score

for key in key_params:
    print('Degree: {} | Gamma: {:.1f}  |  C: {}  |  Score: {:.4f}%'.format(*key, comparative[key] * 100))

# ANN

## Training the model (single)

In [None]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(
    hidden_layer_sizes=(30),
    max_iter=5000,
    momentum=0.7,
    learning_rate_init=0.1,
    activation='logistic')

mlp.fit(Xtrain_norm, Ytrain)

In [None]:
predictions = mlp.predict(Xtest_norm)

In [None]:
print('Score: {:.2f}%'.format(mlp.score(Xtest_norm, Ytest) * 100))

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support
pr = confusion_matrix(Ytest, predictions)
pr

In [None]:
from IPython.display import Markdown

true_negatives = pr[0][0]
false_negatives = pr[1][0]
false_positives = pr[0][1]
true_positives = pr[1][1]

Markdown("""
- **True negatives** (_were 0 and predicted as 0_): **{tn}**
- **False negatives** (_were 1 but predicted them as 0_): **{fn}**
- **False positives** (_were 0 but predicted them as 1_): **{fp}**
- **True positives** (_were 1 and predicted as 1_): **{tp}**
""".format(tn=true_negatives, fn=false_negatives, fp=false_positives, tp=true_positives))

- _Precision = tp / (tp + fp)_: From all predicted values which ratio was actually well predicted.
- _Recal = tp / (tp + fn)_: From all true values which ratio was actually well predicted.
- _F1 score_: The higher the value the better use of the precision & recall tradeof.
- _Support_: No. of observations of each class in the testing set.

In [None]:
print(classification_report(Ytest, predictions))

In [None]:
results = precision_recall_fscore_support(Ytest, predictions)
results

In [None]:
correctly_predicted = true_positives + true_negatives
badly_predicted = false_positives + false_negatives
print('Accuracy: {:.2f}%'.format(correctly_predicted / (correctly_predicted + badly_predicted) * 100))

## Training the model (multi)

In [None]:
mlp = MLPClassifier(learning_rate_init=0.1, activation='logistic')

epoch = np.linspace(1000, 10000, 10, dtype=int)
neurons = np.linspace(10, 100, 10, dtype=int)
momentum = np.linspace(.1, .9, 9)

In [None]:
from itertools import product
comparative = {}

for params in product(epoch, neurons, momentum):
    ep, n, mc = params
    
    mlp.set_params(max_iter=ep, hidden_layer_sizes=(n,), momentum=mc)
    mlp.fit(Xtrain_norm, Ytrain)
    comparative[params] = mlp.score(Xtest_norm, Ytest)

In [None]:
print('Lowest score in the 900 parameter combinations: {:.2f}%'.format(min(comparative.values()) * 100))
print('Max score in the 900 parameter combinations: {:.2f}%'.format(max(comparative.values()) * 100))

In [None]:
import heapq
key_params = heapq.nlargest(5, comparative, key=comparative.get) # Get top 5 by best score

for key in key_params:
    print('Epoch: {}  |  Neurons: {}  |  MC: {:.1f}  |  Score: {:.4f}%'.format(*key, comparative[key] * 100))

## Analyzing performance in sets of best score parameters

### Calculating yearly scores for the best 1st set of parameters

In [None]:
ep, n, mc = key_params[0]  # 1st set of best parameters

mlp = MLPClassifier(
    learning_rate_init=0.1,
    activation='logistic',
    max_iter=ep,
    hidden_layer_sizes=(n,),
    momentum=mc)

mlp.fit(Xtrain_norm, Ytrain)
Ypred_train = mlp.predict(Xtrain_norm)  # First analysis predicts same training set
Ypred_test = mlp.predict(Xtest_norm)  # Second analysis predicts testing set

In [None]:
train_classification = pd.DataFrame({'Ytrain': Ytrain, 'Ypred_train': Ypred_train}, index=training_set.index)
train_classification.head()

In [None]:
test_classification = pd.DataFrame({'Ytest': Ytest, 'Ypred_test': Ypred_test}, index=testing_set.index)
test_classification.head()

In [None]:
train_benchmark = train_classification['Ytrain'] == train_classification['Ypred_train']
test_benchmark = test_classification['Ytest'] == test_classification['Ypred_test']

train_score = train_benchmark.groupby(train_benchmark.index.year).apply(lambda s: s[s == True].count() / s.count()).rename('train_score')
test_score = test_benchmark.groupby(test_benchmark.index.year).apply(lambda s: s[s == True].count() / s.count()).rename('test_score')

In [None]:
benchmark_one = pd.concat([train_score, test_score], axis=1)
benchmark_one

In [None]:
print('Train average: {}\nTest average: {}'.format(
    benchmark_one['train_score'].mean(),
    benchmark_one['test_score'].mean()))

### Calculating yearly scores for the best 2nd set of parameters

In [None]:
ep, n, mc = key_params[1]  # 2nd set of best parameters

mlp = MLPClassifier(
    learning_rate_init=0.1,
    activation='logistic',
    max_iter=ep,
    hidden_layer_sizes=(n,),
    momentum=mc)

mlp.fit(Xtrain_norm, Ytrain)
Ypred_train = mlp.predict(Xtrain_norm)  # First analysis predicts same training set
Ypred_test = mlp.predict(Xtest_norm)  # Second analysis predicts testing set

In [None]:
train_classification = pd.DataFrame({'Ytrain': Ytrain, 'Ypred_train': Ypred_train}, index=training_set.index)
test_classification = pd.DataFrame({'Ytest': Ytest, 'Ypred_test': Ypred_test}, index=testing_set.index)

In [None]:
train_benchmark = train_classification['Ytrain'] == train_classification['Ypred_train']
test_benchmark = test_classification['Ytest'] == test_classification['Ypred_test']

train_score = train_benchmark.groupby(train_benchmark.index.year).apply(lambda s: s[s == True].count() / s.count()).rename('train_score')
test_score = test_benchmark.groupby(test_benchmark.index.year).apply(lambda s: s[s == True].count() / s.count()).rename('test_score')

In [None]:
benchmark_two = pd.concat([train_score, test_score], axis=1)
benchmark_two

In [None]:
print('Train average: {}\nTest average: {}'.format(
    benchmark_two['train_score'].mean(),
    benchmark_two['test_score'].mean()))

### Calculating yearly scores for the best 3rd set of parameters

In [None]:
ep, n, mc = key_params[2]  # 3rd set of best parameters

mlp = MLPClassifier(
    learning_rate_init=0.1,
    activation='logistic',
    max_iter=ep,
    hidden_layer_sizes=(n,),
    momentum=mc)

mlp.fit(Xtrain_norm, Ytrain)
Ypred_train = mlp.predict(Xtrain_norm)  # First analysis predicts same training set
Ypred_test = mlp.predict(Xtest_norm)  # Second analysis predicts testing set

In [None]:
train_classification = pd.DataFrame({'Ytrain': Ytrain, 'Ypred_train': Ypred_train}, index=training_set.index)
test_classification = pd.DataFrame({'Ytest': Ytest, 'Ypred_test': Ypred_test}, index=testing_set.index)

In [None]:
train_benchmark = train_classification['Ytrain'] == train_classification['Ypred_train']
test_benchmark = test_classification['Ytest'] == test_classification['Ypred_test']

train_score = train_benchmark.groupby(train_benchmark.index.year).apply(lambda s: s[s == True].count() / s.count()).rename('train_score')
test_score = test_benchmark.groupby(test_benchmark.index.year).apply(lambda s: s[s == True].count() / s.count()).rename('test_score')

In [None]:
benchmark_three = pd.concat([train_score, test_score], axis=1)
benchmark_three

In [None]:
print('Train average: {:.2f}%\nTest average: {:.2f}%'.format(
    benchmark_three['train_score'].mean() * 100,
    benchmark_three['test_score'].mean() * 100))

In [None]:
# Later: Also test various values of learning_rate, hidden_layers and activation function.
# Later: train the network X times for each parameters to have statistical significance
# Later: train the network with multiple market data to see how it behaves in other markets
# Later: Backtest a strategy using the prediction results to get a set of trade results
# Later: Analyze trade results with CDF and Montecarlo