In [27]:
import os
import talib as ta
import pandas as pd
import numpy as np
import datetime as dt
import alpaca_trade_api as tradeapi
import hvplot.pandas
from dotenv import load_dotenv

load_dotenv()
TICKERS = ["AAPL", "MSFT", "AMZN", "FB", "TSLA"]
START = "today"
YEARS = 5

In [28]:
def get_workday(date):
    """ Convert Date to its latest workday
        Parameter:
            date - Any format usagle with pd.to_datetime
        Return:
            timestamp object of the nearest workday earlier than the given date
    """
    date = pd.to_datetime(date)
    offset = max(1, (date.weekday() + 6) % 7 - 3)
    timedelta = dt.timedelta(offset)
    return pd.Timestamp((date - timedelta).date(),tz="America/New_York").isoformat()

def get_alpaca_data():
    """ Get 2 years OHLCV data from Alpaca API for the global set of tickers
        Parameter:
            None
        Return:
            Pandas Dataframe of 2 years of OHLC data for the global tickers
    """
    # Set Keys for Alpaca Trade API
    alpaca_api_key = os.getenv("ALPACA_API_KEY")
    alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")
    
    # Initiate REST API
    api = tradeapi.REST(
        alpaca_api_key,
        alpaca_secret_key,
        api_version = "v2"
    )
    
    # Set start and end date
    end_date = get_workday(START)
    two_yrs = dt.timedelta(days=(365*YEARS))
    start_date = get_workday((pd.to_datetime("today") - two_yrs).date())
    # Set timeframe to '1D' for Alpaca API
    timeframe = "1D"
    # Get OHLCV Data
    portfolio = api.get_barset(
        TICKERS,
        timeframe,
        start = start_date,
        end = end_date
    ).df
    
    return portfolio

In [29]:
def applyCDL (input_df):
    """
        Add columns of candlestick patterns found in the dataframe
        Parameter:
            input_df - dataframe with date index and columns of OHLCV data
        Returns:
            df - the same dataframe received with additional columns of the candlestick patterns found
    """
    
    df = input_df.copy()
    # Bullish
    df["CDLHAMMER"] =  ta.CDLHAMMER(df["open"], df["high"], df["low"], df["close"])/100
    df["CDLINVERTEDHAMMER "] =  ta.CDLINVERTEDHAMMER(df["open"], df["high"], df["low"], df["close"])/100
    df["CDLPIERCING"] =  ta.CDLPIERCING(df["open"], df["high"], df["low"], df["close"])/100
    df["CDLMORNINGSTAR"] =  ta.CDLMORNINGSTAR(df["open"], df["high"], df["low"], df["close"], penetration = 0)/100
    df["CDL3WHITESOLDIERS"] =  ta.CDL3WHITESOLDIERS(df["open"], df["high"], df["low"], df["close"])/100
    # Bearish
    df["CDLHANGINGMAN"] =  ta.CDLHANGINGMAN(df["open"], df["high"], df["low"], df["close"])/100
    df["CDLSHOOTINGSTAR"] =  ta.CDLSHOOTINGSTAR(df["open"], df["high"], df["low"], df["close"])/100
    df["CDLEVENINGSTAR"] =  ta.CDLEVENINGSTAR(df["open"], df["high"], df["low"], df["close"], penetration = 0)/100
    df["CDL3BLACKCROWS"] =  ta.CDL3BLACKCROWS(df["open"], df["high"], df["low"], df["close"])/100
    df["CDLDARKCLOUDCOVER"] =  ta.CDLDARKCLOUDCOVER(df["open"], df["high"], df["low"], df["close"], penetration = 0)/100
    # Both
    df["CDLENGULFING"] =  ta.CDLENGULFING(df["open"], df["high"], df["low"], df["close"])/100

    return df

In [41]:
port_df = get_alpaca_data()
aapl_df = applyCDL(port_df["AAPL"])
aapl_df["pct_change"] = aapl_df["close"].pct_change()
aapl_df["Return"] = np.where(aapl_df["pct_change"] >= 0, 1.0, -1.0)
FEATURES = ['CDLHAMMER', 'CDLINVERTEDHAMMER ', 'CDLPIERCING', 'CDLMORNINGSTAR',
            'CDL3WHITESOLDIERS', 'CDLHANGINGMAN', 'CDLSHOOTINGSTAR','CDLEVENINGSTAR', 
            'CDL3BLACKCROWS', 'CDLDARKCLOUDCOVER', 'CDLENGULFING']
aapl_df[FEATURES] = aapl_df[FEATURES].shift(1)
aapl_df.drop(columns = ["open", "high", "low", "close", "volume", "pct_change"], inplace = True)
aapl_df.dropna(inplace = True)
aapl_df.tail()

Unnamed: 0_level_0,CDLHAMMER,CDLINVERTEDHAMMER,CDLPIERCING,CDLMORNINGSTAR,CDL3WHITESOLDIERS,CDLHANGINGMAN,CDLSHOOTINGSTAR,CDLEVENINGSTAR,CDL3BLACKCROWS,CDLDARKCLOUDCOVER,CDLENGULFING,Return
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-01-04 00:00:00-05:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0
2021-01-05 00:00:00-05:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2021-01-06 00:00:00-05:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0
2021-01-07 00:00:00-05:00,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2021-01-08 00:00:00-05:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [42]:
# Construct training start and end dates
training_start = aapl_df.index.min().strftime(format= '%Y-%m-%d')
training_end = '2019-12-30'
# Construct testing start and end dates
testing_start =  '2020-01-02'
testing_end = aapl_df.index.max().strftime(format= '%Y-%m-%d')
# Print training and testing start/end dates
print(f"Training Start: {training_start}")
print(f"Training End: {training_end}")
print(f"Testing Start: {testing_start}")
print(f"Testing End: {testing_end}")

Training Start: 2016-01-11
Training End: 2019-12-30
Testing Start: 2020-01-02
Testing End: 2021-01-08


In [43]:
# Construct datasets
X_train = aapl_df[FEATURES][training_start:training_end]
y_train = aapl_df["Return"][training_start:training_end]
X_test = aapl_df[FEATURES][testing_start:testing_end]
y_test = aapl_df["Return"][testing_start:testing_end]

In [44]:
from sklearn.svm import NuSVC
from sklearn.model_selection import train_test_split

In [51]:
model = NuSVC(nu=0.5,kernel="rbf")
model.fit(X_train, y_train)

predictions = model.predict(X_test)

In [52]:
Results = y_test.to_frame()
Results["Predicted Value"] = predictions
Results.head()

Unnamed: 0_level_0,Return,Predicted Value
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-02 00:00:00-05:00,1.0,1.0
2020-01-03 00:00:00-05:00,-1.0,1.0
2020-01-06 00:00:00-05:00,1.0,1.0
2020-01-07 00:00:00-05:00,-1.0,1.0
2020-01-08 00:00:00-05:00,1.0,1.0


In [53]:
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions,
                            target_names=["Up", "Down"]))

              precision    recall  f1-score   support

          Up       0.31      0.03      0.06       117
        Down       0.54      0.94      0.68       141

    accuracy                           0.53       258
   macro avg       0.42      0.49      0.37       258
weighted avg       0.43      0.53      0.40       258

