In [None]:
# https://blog.quantinsti.com/random-forest-algorithm-in-python/

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

import pandas_datareader.data as web

In [None]:
apple = web.DataReader('aapl'
                       , 'yahoo'
                       , start = '12/12/1980'
                      )
apple.shape

In [None]:
apple.head()

In [None]:
apple['Close_Shift1_DOwn'] = apple.Close.shift(1, fill_value = 0.0)
apple.head()

In [None]:
# Function to find the max daily change in the direction of overall daily change 
def hl(h, l, o, c):
    if o - c > 0:
        return l-h
    else:
        return h-l

In [None]:
# Max daily change in the same direction fo daily change
apple['Max Daily Change'] = apple.apply(lambda x: hl(x[0], x[1], x[2], x[3]), axis=1)
apple.sample(10)

In [None]:
# negative means the next day opened lower than the pervious day
apple['Interday Change'] = apple.apply(lambda x: x[2] - x[6], axis = 1)
apple.head()

In [None]:
# Positive 'Close Adjusted Close' means overvalued price at close of day
apple['Close Adjusted Close'] = apple.apply(lambda x: x[3] - x[5], axis = 1)
apple.head()

In [None]:
apple[(apple['Max Daily Change'] > 0) & (apple['Interday Change'] < 0)]

In [None]:
apple[apple['Open'] == apple['Close']]

In [None]:
apple['Open Close % Change'] = (apple['Close'] - apple['Open']) / apple['Open'] * 100
apple.head()

In [None]:
apple['Open Close % Change'].describe()

In [None]:
apple['Open Close % Change'].hist(bins=20)
plt.vlines(0, 0, 4500, colors='r', label = '50%')
plt.xlim(-10, 10)
plt.legend();

In [None]:
apple['Open Close % Change'].quantile([0.84])

In [None]:
apple['Close'].plot();

In [None]:
apple.sample(10)

In [None]:
apple['5 Day SMA'] = apple['Close'].rolling(window=5).mean()
apple['5 Day SMA'].fillna(apple['Close'], inplace = True)
apple['10 Day SMA'] = apple['Close'].rolling(window=10).mean()
apple['10 Day SMA'].fillna(apple['Close'], inplace = True)
apple['5 Day STDev'] = apple['Close'].rolling(window=5).std()
apple['5 Day STDev'].fillna(0, inplace = True)
apple.head(30)

In [None]:
apple['5 Day STDev'].min(), apple['5 Day STDev'].max(), apple['5 Day STDev'].mean(), apple['5 Day STDev'].median()

In [None]:
def crossover(five, ten):
    if five > ten:
        return 1
    else:
        return 0

In [None]:
apple['5 10 Day Crossover'] = apple.apply(lambda x: crossover(x[11], x[12]), axis = 1)
apple.head()

In [None]:
# Min Max Normalization of Features
vol_max = apple['Volume'].max()
vol_min = apple['Volume'].min()
apple['Volume Norm'] = (apple['Volume'] - vol_min) / (vol_max - vol_min)

daily_max = apple['Max Daily Change'].max()
daily_min = apple['Max Daily Change'].min()
apple['Max Daily Change Norm'] = (apple['Max Daily Change'] - daily_min) / (daily_max - daily_min)

inter_max = apple['Interday Change'].max()
inter_min = apple['Interday Change'].min()
apple['Interday Change Norm'] = (apple['Interday Change'] - inter_min) / (inter_max - inter_min)

adj_max = apple['Close Adjusted Close'].max()
adj_min = apple['Close Adjusted Close'].min()
apple['Close Adjusted Close Norm'] = (apple['Close Adjusted Close'] - adj_min) / (adj_max - adj_min)

apple.head()

In [None]:
def decision(p):
    if p > 1.5:
        return 1
    elif p < -0.75:
        return -1
    else:
        return 0

In [None]:
apple['Buy Hold Sell'] = apple.apply(lambda x: decision(x[10]), axis = 1)

In [None]:
apple['Buy Hold Sell Shift1 Up'] = apple['Buy Hold Sell'].shift(-1, fill_value=0).astype(int)
apple.head()

In [None]:
# Select features
apple.columns

In [None]:
apple_ = apple[['5 Day STDev'
                , '5 Day SMA'
                , '5 10 Day Crossover'
                , 'Volume Norm'
                , 'Max Daily Change Norm'
                , 'Interday Change Norm'
                , 'Close Adjusted Close Norm'
                , 'Buy Hold Sell Shift1 Up'
               ]
              ].reset_index(drop=True)

In [None]:
apple_.head()

In [None]:
apple_.shape

In [None]:
apple_['Buy Hold Sell Shift1 Up'].value_counts()

In [None]:
10091*.10/3

In [None]:
buy_idxs = list(apple_[apple_['Buy Hold Sell Shift1 Up'] == 1].sample(336).index)
hold_idxs = list(apple_[apple_['Buy Hold Sell Shift1 Up'] == 0].sample(336).index)
sell_idxs = list(apple_[apple_['Buy Hold Sell Shift1 Up'] == -1].sample(336).index)

In [None]:
test_idxs = buy_idxs + hold_idxs + sell_idxs
len(test_idxs)

In [None]:
apple_test = apple_.iloc[test_idxs]
apple_test['Buy Hold Sell Shift1 Up'].value_counts()

In [None]:
apple_train = apple_.drop(test_idxs, axis = 0)
apple_train.shape

In [None]:
10091-336*3

In [None]:
apple_test = apple_test.sample(frac=1).reset_index(drop=True)
apple_test.head()

In [None]:
apple_test['Buy Hold Sell Shift1 Up'].value_counts()

In [None]:
targets = list(apple_train['Buy Hold Sell Shift1 Up'].value_counts().index)
targets_counts = list(apple_train['Buy Hold Sell Shift1 Up'].value_counts().values)

In [None]:
targets

In [None]:
targets_counts

In [None]:
from sklearn.utils import resample

In [None]:
X_major = apple_train[apple_train['Buy Hold Sell Shift1 Up'] == 0]
X1_minor = apple_train[apple_train['Buy Hold Sell Shift1 Up'] == -1]
X2_minor = apple_train[apple_train['Buy Hold Sell Shift1 Up'] == 1]

X1_minor_upsampled = resample(X1_minor
                              , replace = True
                              , n_samples = targets_counts[0]
                              , random_state = 42
                             )

X2_minor_upsampled = resample(X2_minor
                              , replace = True
                              , n_samples = targets_counts[0]
                              , random_state = 42
                             )

apple_train_upsampled = pd.concat([X_major, X1_minor_upsampled, X2_minor_upsampled])

In [None]:
apple_train_upsampled['Buy Hold Sell Shift1 Up'].value_counts()

In [None]:
apple_train_upsampled = apple_train_upsampled.sample(frac = 1).reset_index(drop=True)
apple_train_upsampled.head()

In [None]:
X_train = apple_train_upsampled.drop('Buy Hold Sell Shift1 Up', axis = 1)
X_test = apple_test.drop('Buy Hold Sell Shift1 Up', axis = 1)
y_train = apple_train_upsampled['Buy Hold Sell Shift1 Up']
y_test = apple_test['Buy Hold Sell Shift1 Up']

In [None]:
X_train.head()

In [None]:
X_train.shape

In [None]:
X_test.head()

In [None]:
X_test.shape

In [None]:
y_train.head()

In [None]:
y_train.shape

In [None]:
y_test.head()

In [None]:
y_test.shape

In [None]:
# https://www.analyticsvidhya.com/blog/2020/06/auc-roc-curve-machine-learning/

In [None]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators = 101
                             , random_state = 42
                             , n_jobs = -1
                             , verbose = 0
                            )
rfc.fit(X_train, y_train)
y_pred = rfc.predict(X_test)
pred_probs = rfc.predict_proba(X_test)
from sklearn import metrics
print(f'Accuracy: {metrics.accuracy_score(y_test, y_pred)}')

In [None]:
apple_ = apple_.sample(frac=1).reset_index(drop=True)
from sklearn.model_selection import cross_val_score
cvals = cross_val_score(rfc
                       , apple_.drop('Buy Hold Sell Shift1 Up', axis=1)
                       , apple_['Buy Hold Sell Shift1 Up']
                       , scoring = 'accuracy'
                       , cv = 10
                       , n_jobs = -1
                       , verbose = 0
                      )
print(f'10 Fold Cross-Validation Score for First RF Model: {np.mean(cvals)}')

In [None]:
cvals

In [None]:
pred_probs[:,0]

In [None]:
pred_probs[:,1]

In [None]:
pred_probs[:,2]

In [None]:
from sklearn.metrics import (roc_auc_score
                             , precision_score
                             , recall_score
                             , roc_curve
                             , confusion_matrix
                             , plot_confusion_matrix
                             , precision_recall_curve
                             , auc
                            )

In [None]:
y_probs = rfc.predict_proba(X_test)[:, 1]
y_probs

In [None]:
# roc_score1 = roc_auc_score(y_test
#                            , y_probs
#                            , multi_class = 'ovr'
#                            , average = 'macro'
#                           )
# roc_score1

In [None]:
rfc.feature_importances_

In [None]:
feature_imp = pd.Series(rfc.feature_importances_, index=apple_upsampled.columns[:4]).sort_values(ascending=False)
feature_imp

In [None]:
import seaborn as sns
sns.barplot(x=feature_imp
            , y=feature_imp.index
           );

In [None]:
# Drop 'Interday Change Norm' Column
apple_upsampled_reduced = apple_upsampled.drop('Interday Change Norm', axis =1)
apple_upsampled_reduced.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(apple_upsampled_reduced.drop('Buy Hold Sell Shift1 Down', axis=1)
                                                    , apple_upsampled_reduced['Buy Hold Sell Shift1 Down']
                                                    , test_size = 0.1
                                                    , random_state = 42
                                                   )

In [None]:
# rfcr for Random Forest Classifier Reduced
rfcr = RandomForestClassifier(n_estimators = 100)
rfcr.fit(X_train, y_train)
y_pred = rfcr.predict(X_test)
print(f'Accuracy: {metrics.accuracy_score(y_test, y_pred)}')

In [None]:
cvalr = cross_val_score(rfcr
                       , apple_upsampled_reduced.drop('Buy Hold Sell Shift1 Down', axis=1)
                       , apple_upsampled_reduced['Buy Hold Sell Shift1 Down']
                       , scoring = 'accuracy'
                       , cv = 7
                      )
print(f'7 Fold Cross-Validation Score for Reduced RF Model: {np.mean(cvalr)}')