In [None]:
import pandas as pd
import os
filtered_us_stocks = pd.read_csv('stocks/FINAL_US_STOCKS.csv')

In [None]:
# PACAKGE VERSION REQUIREMENTS - YAHOOQUERY
import yahooquery as yq
import requests
import urllib3

print(yq.__version__) # >=2.3.0
print(requests.__version__) # >= 2.28.2
print(urllib3.__version__) # >= 1.26.14

In [None]:
# ROUGH
import subprocess

def read_dir(dirname):
    lst = os.listdir(dirname)
    lst = [e.split('_')[0] for e in lst]
    return lst

print("len fundamental: ", len(read_dir('fundamental')))
print("len technical", len(read_dir('ohlc')))


In [None]:
# AVERAGE VOLUME (v20)

def get_avg_volume(symbol="META", start="", days=20):
    if start is None:
      return None
    ohlc_df = pd.read_csv(fr"ohlc/{symbol}_ADJUSTED_OHLC.csv")
    ohlc_df['Date'] = ohlc_df['date']
    ohlc_df.set_index('date')
    myrows = []
    for i in range(days):
      try:
        vols = ohlc_df.iloc[ohlc_df.index.get_loc(ohlc_df.index[ohlc_df['Date'] == start].tolist()[0]) + i]
        myrows.append(vols['volume'])
      except IndexError:
         pass      
    return pd.Series(myrows).mean()

get_avg_volume("META", "2023-02-01")

In [None]:

def get_52_week_data(date, symbol):
    """
    Return the 52 week (252 days) 
    Note: This function ignores the first 51 weeks of the historical OHLC as 52Wh is not defined
    """
    ohlc_df = pd.read_csv('ohlc/{}_ADJUSTED_OHLC.csv'.format(symbol))
    myrows = []
    for i in range(252):
      try:
        c1 = ohlc_df.iloc[ohlc_df.index.get_loc(ohlc_df.index[ohlc_df['date'] == date].tolist()[0]) - i]
        myrows.append(c1)
      except IndexError:
        pass
    frame = pd.DataFrame(myrows)
    high = max(frame['high'])
    high_date = frame['high'].idxmax()
    low_date = frame['low'].idxmin()
    low = min(frame['low'])
    wh52 = frame.index.get_indexer_for([high_date])
    wl52 = frame.index.get_indexer_for([low_date])
    return (high, int(wh52), low, int(wl52))

def get_65_day_low(date, symbol):
    myrows = []
    ohlc_df = pd.read_csv("ohlc/{}_ADJUSTED_OHLC.csv".format(symbol))
    for i in range(65):
      try:
        c1 = ohlc_df.iloc[ohlc_df.index.get_loc(ohlc_df.index[ohlc_df['date'] == date].tolist()[0]) + i]
        myrows.append(c1)
      except IndexError:
        pass
    return min(pd.DataFrame(myrows)['close'])
# get_52_week_data('2023-03-03', 'META')

In [None]:
def add_ma(df, n):
    col_name = 'DMA'+str(n)
    df[col_name] = df["close"].rolling(n).mean()
    return df
def find_high(df):
    high = max(df['high'])
    high_date = (df[df['high'] == high].iloc[0,1])
    # print(high, high_date)
    return (high, high_date)
def find_low(df):
    low = min(df['low'])
    low_date = df[df['low'] == low].iloc[0,1]
    # print(low, low_date)
    return (low, low_date)
def returns(buy_price, sell_price):
    return round((sell_price/buy_price - 1) * 100, 2)

In [None]:
# FROM YASH
def buy_hold(df, ex_date, next_ex_date = ''):
    output = dict()
    if len(next_ex_date) > 0:
        df_buy_hold = df[(df['date'] >= ex_date) & (df['date'] < next_ex_date)]
    else:
        df_buy_hold = df[df['date'] >= ex_date]
        df_buy_hold = df_buy_hold[:63]
    buy_price = df_buy_hold.iloc[0].open
    sell_price = df_buy_hold.iloc[-1].close
    output['bh_returns'] = returns(buy_price, sell_price)
    output['bh_close'] = sell_price
    output['bh_high'], x = find_high(df_buy_hold)
    output['bh_dth'] = df_buy_hold.loc[ex_date: x].shape[0] - 1
    output['bh_low'], y = find_low(df_buy_hold)
    output['bh_dtl'] = df_buy_hold.loc[ex_date: y].shape[0] - 1
    output['bh_high_returns'] = returns(buy_price, output['bh_high'])
    return output

In [None]:
def trail(df, SMA):
    sl = df.iloc[0].low
    exit_price = -1
    exit_method = ''

    dma_col = 'DMA'+str(SMA)
    df['above_DMA'] = (df['close'] >= df[dma_col])

    if df['above_DMA'].iloc[0] == False:
        print('Below DMA at entry, cannot trail!')
        return -1
    
    for i, row in df.iterrows():
        # print(i, row)
        if row.low < sl:
            exit_price = min(row.open, sl)
            exit_method = 'SL'
            break
        elif row.above_DMA == False:
            # print(row, dma_col)
            exit_price = min(row.open, row[dma_col])
            exit_method = 'DMA cross'
            break
        else:
            exit_price = row.close
            exit_method = 'In Position'
    return (exit_method, row.date, exit_price)

In [None]:
def trail_strategy(df, ex_date, SMA=10):
    output = dict()
    add_ma(df, SMA)
    df_trail = df[df['date'] >= ex_date]
    buy_price = df_trail.iloc[0].open
    temp = trail(df_trail,SMA)
    if temp == -1:
        return output
    else:
        exit_method, exit_date, sell_price = temp
    print(temp)
    df_trail = df_trail[df_trail['date'] <= exit_date]
    # print(df_trail)
    output['tr_returns'] = returns(buy_price, sell_price)
    output['tr_high'], x = find_high(df_trail)
    output['tr_dth'] = df_trail.loc[ex_date: x].shape[0] - 1
    output['tr_low'], y = find_low(df_trail)
    output['tr_dtl'] = df_trail.loc[ex_date: y].shape[0] - 1
    output['tr_high_returns'] = returns(buy_price, output['tr_high'])
    output['method'] = exit_method
    return output

In [None]:
df = pd.read_csv('ohlc/META_ADJUSTED_OHLC.csv')
print(trail_strategy(df, ex_date='2022-10-26'))
df

In [None]:
import datetime
import calendar
myrows = []

# get filtered OHLC
symbol = "META"
ohlc_df = pd.read_csv('ohlc/{}_ADJUSTED_OHLC.csv'.format(symbol))
earnings_dates = pd.read_csv('fundamental/{}_finnhub.csv'.format(symbol))['Date']
ohlc_df['Date'] = ohlc_df['date']
ohlc_df = ohlc_df.set_index('date')
earnings_ohlc = ohlc_df.loc[ohlc_df.index.isin(earnings_dates)]

myrows = []

# 26th AMC OR 27th BMO => Ex-date = 27th
row_iterator = earnings_ohlc.iterrows()
index, row = next(row_iterator)
for next_index, next_row in row_iterator:
    if ohlc_df.index[ohlc_df.index == index].tolist()[0]:
        c1 = ohlc_df.iloc[ohlc_df.index.get_loc(
            ohlc_df.index[ohlc_df.index == index].tolist()[0])-1] # NOTE: Change this index to select dates +-1
        mydict = {}
        mydict['exdate'] = index
        mydict['weekday'] = calendar.day_name[datetime.datetime.strptime(index, "%Y-%m-%d").date().weekday()]
        mydict['O'] = round(row['open'], 2)
        mydict['H'] = round(row['high'], 2)
        mydict['L'] = round(row['low'], 2)
        mydict['C'] = round(row['close'], 2)
        mydict['C1 date'] = c1['Date']
        mydict['C1'] = round(c1['close'], 2)
        mydict['Gap%'] = round((row['open'] / c1['close'] - 1) * 100, 2)

        # Formula: (52WH - Close) / 52WH
        WH_52, WH52_recency, WL_52, WL52_recency = get_52_week_data(c1['Date'], row['symbol'])
        DL_65 = get_65_day_low(c1['Date'], row['symbol'])
        # print(WH_52, WH52_recency, WL_52, WL52_recency, "C1", c1['Unnamed: 0'])

        mydict['52WH_dist'] = round((WH_52 - c1['close']) / WH_52, 2)
        mydict['52WH_recency'] = WH52_recency
        mydict['52WL_dist'] = round((c1['close'] - WH_52) / WL_52, 2)
        mydict['52WL_recency'] = WL52_recency

        # TODO: check validity
        mydict['65DL_dist'] = round((c1['close'] - DL_65) / DL_65, 2)

        mydict['v0'] = row['volume']
        mydict['v1'] = c1['volume']
        mydict['avg_v20'] = get_avg_volume(symbol=row['symbol'], start=c1['Date'], days=20)

        # Reporting period
        reportingPeriod = pd.read_csv('fundamental/{}_finnhub.csv'.format(row['symbol']))
        reportingPeriod = reportingPeriod[reportingPeriod['Date'] == index]
        mydict['ReportingPeriod'] = reportingPeriod['Reporting period'].values[0]


        # Sector and Industry
        sector = pd.read_csv('stocks/FINAL_US_STOCKS.csv')
        sector = sector[sector['Ticker'] == row['symbol']]
        mydict['Sector'] = sector['Sector'].values[0]
        mydict['Industry'] = sector['Industry'].values[0]

        # Market Cap
        sharesOutStanding = pd.read_csv('sharesoutstanding/{}_SHARESOUTSTANDING.csv'.format(symbol))
        sharesOutStanding['fiscalDateEnding'] = pd.to_datetime(sharesOutStanding['fiscalDateEnding'])
        ok = sharesOutStanding.loc[sharesOutStanding['fiscalDateEnding'] <= index]
        ok.sort_values(by='fiscalDateEnding', ascending=False)
        if len(ok) > 0:
            ok = ok.iloc[0]
            mydict['MktCap'] = round(float(ok['commonStockSharesOutstanding']) * float(row['open']), 2)

        # Number of Earnings released that day
        # TODO

        # POST EP
        SMA = 10

        add_ma(ohlc_df, SMA)
        df_trail = ohlc_df[ohlc_df['Date'] >= index]
        buy_price = df_trail.iloc[0].open
        exit_method, exit_date, sell_price = trail(df_trail, SMA)
        df_trail = df_trail[df_trail['Date'] <= exit_date]

        exit_method, exit_date, sell_price = trail(df_trail,SMA)
        df_trail.to_csv('tmp/DFTRAIL.csv')
        mydict['tr_returns'] = returns(buy_price, sell_price)
        mydict['tr_high'], x = find_high(df_trail)
        mydict['tr_dth'] = df_trail.loc[exit_date: x].shape[0]
        mydict['tr_low'], y = find_low(df_trail)
        mydict['tr_dtl'] = df_trail.loc[exit_date: y].shape[0]
        mydict['tr_high_returns'] = returns(buy_price, mydict['tr_high'])
        mydict['method'] = exit_method

        # BUY and HOLD

        next_ex_date = next_index
        df_buy_hold = ohlc_df[(ohlc_df['Date'] >= index) & (ohlc_df['Date'] < next_ex_date)]
        buy_price = df_buy_hold.iloc[0].open
        sell_price = df_buy_hold.iloc[-1].close
        mydict['bh_returns'] = returns(buy_price, sell_price)
        mydict['bh_high'], x = find_high(df_buy_hold)
        mydict['bh_dth'] = df_buy_hold.loc[index: x].shape[0]
        mydict['bh_low'], y = find_low(df_buy_hold)
        mydict['bh_dtl'] = df_buy_hold.loc[index: y].shape[0]
        mydict['bh_high_returns'] = returns(buy_price, mydict['bh_high'])

        # fundamentals
        fundamental_df = pd.read_csv('fundamental/{}_finnhub.csv'.format(row['symbol']))
        fundamental_row = fundamental_df[fundamental_df['Date'] == index] # NOTE: This can be c1['Date']

        mydict['reportedEPS'] = round(fundamental_row['EPS Actual'].values[0], 2)
        mydict['estimatedEPS'] = round(fundamental_row['EPS Estimate'].values[0], 2)
        mydict['surprise'] = round((fundamental_row['EPS Actual'].values[0] - fundamental_row['EPS Estimate'].values[0]), 2)
        mydict['surprise%'] = round((fundamental_row['EPS Actual'].values[0] - fundamental_row['EPS Estimate'].values[0]) * 100 / fundamental_row['EPS Estimate'].values[0], 2)
 
        mydict['reportedRev'] = round(fundamental_row['Revenue Actual'].values[0], 2)
        mydict['estimatedRev'] = round(fundamental_row['Revenue Estimate'].values[0], 2)
        mydict['revenueSurprise%'] = round((fundamental_row['Revenue Actual'].values[0] - fundamental_row['Revenue Estimate'].values[0]) * 100 / fundamental_row['Revenue Estimate'].values[0], 2)

        myrows.append(mydict)
        row = next_row
        index = next_index

g = pd.DataFrame(myrows)
g.to_csv('tmp/META_GAP_TABLE.csv')