## Import Libraries

In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
plt.rcParams['font.size'] = 12
plt.rcParams['figure.figsize'] = (8, 7)
plt.style.use('fivethirtyeight')

import warnings
warnings.filterwarnings('ignore')

import yfinance as yf

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (roc_auc_score
                             , precision_score
                             , recall_score
                             , roc_curve
                             , confusion_matrix
                             , plot_confusion_matrix
                             , precision_recall_curve
                             , auc
                            )

## Global Variables

In [5]:
interval = '1wk'
history = 'max'
perc_inc = 0.02
symbol = 'IYW'
perc_train = 0.17
load_new = 1
current = 1

In [70]:
stock_df = yf.download(tickers = symbol # symbol of stock 

                   , period = history # length of history back in time

                   , interval = interval # time periods of subinterval e.g. 1m or 1hr

                   # , start = start # start date, time

                   # , end = end # end date, time

                   , prepost = True # pre/post market data
                  ).dropna(axis=0)

stock_df['percent change'] = (stock_df.Close - stock_df.Open) / stock_df.Open
stock_df.tail()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,percent change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-06-01,251.25,261.140015,250.339996,260.130005,259.679108,1098400.0,0.035343
2020-06-08,259.880005,269.420013,251.440002,255.570007,255.127014,746100.0,-0.016585
2020-06-15,250.369995,268.450012,250.350006,264.230011,263.772003,517800.0,0.055358
2020-06-22,264.470001,273.350006,261.01001,261.529999,261.529999,780400.0,-0.011117
2020-06-26,266.880005,267.359985,261.01001,261.529999,261.529999,96539.0,-0.020046


In [71]:
last = stock_df.iloc[-1]
last

Open                266.880005
High                267.359985
Low                 261.010010
Close               261.529999
Adj Close           261.529999
Volume            96539.000000
percent change       -0.020046
Name: 2020-06-26 00:00:00, dtype: float64

In [72]:
def direction(last
              ,  interval = interval
              , history = history
              , perc_inc = perc_inc
              , symbol = symbol
              , perc_train = perc_train
              , load_new = load_new 
              , current = current
             ):
    
    import pandas as pd
    import numpy as np

    import warnings
    warnings.filterwarnings('ignore')

    import yfinance as yf

    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import (roc_auc_score
                                 , precision_score
                                 , recall_score
                                 , roc_curve
                                 , confusion_matrix
                                 , plot_confusion_matrix
                                 , precision_recall_curve
                                 , auc
                                )
    
    # Load New From Server
    if load_new == 1:
        stock_df = yf.download(tickers = symbol # symbol of stock 

                           , period = history # length of history back in time

                           , interval = interval # time periods of subinterval e.g. 1m or 1hr

                           # , start = start # start date, time

                           # , end = end # end date, time

                           , prepost = True # pre/post market data
                          ).dropna(axis=0)
        
        # Create Percent Change
        stock_df['percent change'] = (stock_df.Close - stock_df.Open) / stock_df.Open

        #create decision column
        stock_df['decision'] = np.where(stock_df['percent change'] > perc_inc

                                        , 1  # 1 if up % or greater

                                        , 0  # 0 if not up % or greater

                                       )
        # create one offs
        decisions = stock_df.decision.values
        decisions = decisions[1:len(decisions)]
        stock_df_one_off = stock_df.iloc[0:stock_df.shape[0]-1]
        stock_df_one_off['decision'] = decisions

        # Save DF to CSV
        stock_df.to_csv('stock_df.csv')
        
    else:
        if interval == '1d' or interval == '1wk':
            index_col = 'Date'
        else:
            index_col = 'Datetime'
   
        stock_df = pd.read_csv('stock_df.csv', index_col=index_col)
    
        # create one offs
        decisions = stock_df.decision.values
        decisions = decisions[1:len(decisions)]
        stock_df_one_off = stock_df.iloc[0:stock_df.shape[0]-1]
        stock_df_one_off['decision'] = decisions
 
    # balance the data
    ones = stock_df_one_off.decision.value_counts()[1]
    stock_df_bal = stock_df_one_off.groupby('decision').apply(lambda x: x.sample(n=ones)).reset_index(drop=True)
    
    # check for NaN's
    stock_df_bal.dropna(axis=0, inplace=True)
    
    # Train Test Split
    X = stock_df_bal.drop(columns=['decision']) # get columns other than decision
    y = stock_df_bal['decision'] # get decision column
    
    X_train, X_test, y_train, y_test = train_test_split(X
                                                        , y
                                                        , test_size=0.20
                                                        , random_state = 42
                                                       )
    
    # random forest classifier rfc
    rfc = RandomForestClassifier(n_estimators = int(perc_train * len(X_train)) # odd number 
                                 , max_depth = 3
                                 , max_features = 4
                                 , random_state = 42
                                 , verbose=0 # no showing backend work
                                 , n_jobs = -1 # access all of your processor cores Lenovo P50 i7-6820HQ 2.7GHz
                                )
    rfc.fit(X_train, y_train)
    y_pred = rfc.predict(X_test)
    y_probs = rfc.predict_proba(X_test)[:, 1]
    
    # ROC Value
    roc_value = roc_auc_score(y_test
                              , y_probs
                             )
    
    # AUC Score
    rf_prec, rf_recall, _ = precision_recall_curve(y_test, y_probs)
    auc_value = round(auc(rf_recall, rf_prec), 3)
    
    # Feature Importances
    # feature_df = pd.DataFrame({'feature': X_train.columns
    #                            , 'importances': rfc.feature_importances_
    #                           }
    #                          ).sort_values('importances', ascending=False)
    
    # Current = 1 uses most up to date data, = 0 for one pervious current data
    if current == 1:
        loc = -1
    else:
        loc = -2
        
    # Prediction
    # last = stock_df[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'percent change']].iloc[loc]
    last = last
    if rfc.predict(np.array(last).reshape(1, -1))[0] == 1:
        # result = f'Buy {symbol} on the {interval} interval for {round(perc_inc *100, 1)}% increase.'
        result = 1
    else:
        # result = f'Sell or hold {symbol} on the {interval} interval.'
        result = 0
        
    return ones, roc_value, auc_value, result

In [73]:
ones, roc_value, auc_value, result = direction(last
                                              ,  interval = '1wk'
                                              , history = 'max'
                                              , perc_inc = 0.05
                                              , symbol = 'IYW'
                                              , perc_train = 0.17
                                              , load_new = 1
                                              , current = 1
                                             )
print(ones)
print(roc_value)
print(auc_value)
print(result)

[*********************100%***********************]  1 of 1 completed
60
0.8111888111888111
0.683
1


In [74]:
stock_df['percent change'].quantile([0.84, 0.97, 0.998])

0.840    0.027864
0.970    0.064772
0.998    0.143371
Name: percent change, dtype: float64

In [75]:
direction(last
          , interval = '1wk'
          , history = 'max'
          , perc_inc = 0.05
          , symbol = 'IYW'
          , perc_train = 0.17
          , load_new = 1
          , current = 1
         )[-1]

[*********************100%***********************]  1 of 1 completed


1

In [124]:
interval = '1wk'
history = 'max'
perc_inc = 0.05
symbol = 'IYW'
perc_train = 0.3
load_new = 0
current = 1

check_df = yf.download(tickers = symbol # symbol of stock 

                   , period = history # length of history back in time

                   , interval = interval # time periods of subinterval e.g. 1m or 1hr

                   # , start = start # start date, time

                   # , end = end # end date, time

                   , prepost = True # pre/post market data
                  ).dropna(axis=0)

check_df['Percent Change'] = (check_df.Close - check_df.Open) / check_df.Open

check_df['Decision'] = np.where(check_df['Percent Change'] > perc_inc
                                , 1
                                , 0
                               )

check_df['Prediction'] = check_df.drop(columns=['Decision'], axis =1).apply(lambda row: np.int64(direction(row)[-1]), axis = 1)

check_df['Success'] = check_df.Decision == check_df.Prediction
check_df.tail(10)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Percent Change,Decision,Prediction,Success
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-04-27,229.809998,235.619995,224.820007,227.550003,227.155579,660700.0,-0.009834,0,0,True
2020-05-04,226.350006,243.25,225.610001,243.220001,242.798416,1199400.0,0.074531,1,0,False
2020-05-11,241.679993,247.009995,231.699997,239.789993,239.374344,624200.0,-0.00782,0,0,True
2020-05-18,244.190002,251.5,243.300003,249.009995,248.578369,1399200.0,0.019739,0,0,True
2020-05-25,254.190002,254.190002,242.160004,251.929993,251.493301,722700.0,-0.008891,0,1,False
2020-06-01,251.25,261.140015,250.339996,260.130005,259.679108,1098400.0,0.035343,0,0,True
2020-06-08,259.880005,269.420013,251.440002,255.570007,255.127014,746100.0,-0.016585,0,1,False
2020-06-15,250.369995,268.450012,250.350006,264.230011,263.772003,517800.0,0.055358,1,0,False
2020-06-22,264.470001,273.350006,261.01001,261.529999,261.529999,780400.0,-0.011117,0,1,False
2020-06-26,266.880005,267.359985,261.01001,261.529999,261.529999,96539.0,-0.020046,0,1,False


In [123]:
check_df.Success.value_counts()

False    988
True      63
Name: Success, dtype: int64

In [121]:
check_df.shape

(1051, 10)

In [126]:
check_df.tail(20)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Percent Change,Decision,Prediction,Success
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-02-17,256.859985,261.25,250.830002,252.029999,250.672363,580000.0,-0.018804,0,1,False
2020-02-24,240.619995,244.610001,214.229996,226.229996,225.011337,2444900.0,-0.059804,0,1,False
2020-03-02,229.169998,240.210007,218.919998,224.679993,223.469681,1201500.0,-0.019592,0,1,False
2020-03-09,207.179993,220.580002,187.470001,210.369995,209.236771,2126400.0,0.015397,0,0,True
2020-03-16,185.179993,197.820007,177.490005,181.919998,180.940033,1594400.0,-0.017604,0,1,False
2020-03-23,181.199997,208.270004,175.25,199.039993,197.967804,1319000.0,0.098455,1,0,False
2020-03-30,201.020004,211.240005,193.070007,195.270004,194.931534,819200.0,-0.028604,0,1,False
2020-04-06,203.639999,219.039993,202.160004,215.919998,215.545731,799500.0,0.060302,1,0,False
2020-04-13,215.070007,229.699997,212.520004,228.360001,227.964172,794200.0,0.061794,1,0,False
2020-04-20,225.279999,228.630005,214.089996,227.160004,226.766251,938400.0,0.008345,0,0,True


In [135]:
check_df.loc[(check_df.Decision ==1) & (check_df.Prediction ==1)].shape

(26, 10)