In [1]:
# Import statements
import yfinance as yf
import pandas as pd
import os
import numpy as np
from datetime import date
from pathlib import Path
import pandas_ta as ta
from tqdm import tqdm
from price_data_pull_yfinance import gethistoricalOHLC, saveHistStockData, loadHistDataFromDisk

from global_vars import dataPathToOHLC, pathToMasterDF, float16Cols, float32Cols, dataPathToTechIndicators

In [2]:
pd.set_option('display.max_columns', 500)

In [3]:
masterDf = pd.read_csv(pathToMasterDF);
tickerDF = pd.read_csv('../01_data/SnP500_constituents.csv');
masterDf = pd.merge(masterDf, tickerDF[['Symbol', 'Sector']], how='right', left_on=['TICKER'], right_on=['Symbol']).drop('Symbol', axis=1)

In [4]:
masterDf.rename(columns = {'Sector': 'SECTOR'}, inplace = True)


In [9]:
masterDf['FILEPATH_TECH_IND_CALC'] = ''

In [10]:
masterDf

Unnamed: 0,TICKER,FIRST_DATE_OHLC,LAST_DATE_OHLC,FILEPATH,SECTOR,FILEPATH_TECH_IND_CALC
0,MMM,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Industrials,
1,AOS,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Industrials,
2,ABT,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Health Care,
3,ABBV,2013-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Health Care,
4,ABMD,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Health Care,
...,...,...,...,...,...,...
500,YUM,1997-09-17,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Consumer Discretionary,
501,ZBRA,1991-08-15,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Information Technology,
502,ZBH,2001-07-25,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Health Care,
503,ZION,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Financials,


### Get the data from the masterDF and calculate the technical indicators

In [11]:
currMasterDF = masterDf.head(2)

In [32]:
def batchCalculationOfTechnicalIndicators(masterdf, pathToOutpu = dataPathToTechIndicators, strategyList = 'all'):
    for index, row in tqdm(masterdf.iterrows(), total=masterdf.shape[0], leave = False):
        # get the ticker from the current row
        ticker = row[0]

        # load the data from disk
        df = loadHistDataFromDisk(ticker)
        df.set_index(pd.DatetimeIndex(df['Date']), inplace = True);
        df = df.drop('Date',axis = 1);

        # calculate the technical indicators
        df.ta.log_return(cumalative=True,append=True);
        
        # check if a different strategy has been supplied 
        if strategyList == 'all':
            strategyList = ['Candles', 'Cycles', 'Overlap', 'Momentum', 'Statistics','Trend', 'Volatility', 'Volume']
        
        # loop through the strategies and calculate the technical indicators 
        for strategy in tqdm(strategyList, leave = False):
            try:
                df.ta.strategy(strategy)
            except:
                print(f"There has been a problem calculating the techincal indicator for {strategy} for the ticker {ticker}.")

        # save the created dataframe to disk 
        try:
            tickerFilename = ticker + '_tech_ind_calculated'
            filepath = dataPathToTechIndicators.joinpath(
                tickerFilename
            )
            df = df.reset_index()
            df.to_feather(filepath)
        except:
            print(f'Could not save the calculated technical indicators for the ticker {ticker}!')
        
        # add a reference to the masterDF
        try:
            masterdf.loc[masterdf.TICKER == ticker, 'FILEPATH_TECH_IND_CALC'] = filepath
        except:
            print(f'Did not write the {ticker}-technical indicator filetpath to masterDf!') 

In [31]:
batchCalculationOfTechnicalIndicators(masterDf.tail(2))

  0%|          | 0/2 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:03<00:00,  3.18s/it][A
 50%|█████     | 1/2 [00:03<00:03,  3.22s/it][A
  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:01<00:00,  1.66s/it][A
                                             [A

## Technical Indicator Strategies
Based on the implementation of https://github.com/twopirllc/pandas-ta

In [16]:
df = loadHistDataFromDisk('AAPL')
df.set_index(pd.DatetimeIndex(df['Date']), inplace = True);
df = df.drop('Date',axis = 1);

In [73]:
df = df.iloc[:1000]

#### Candles Technical Indicators

In [75]:
before = len(df.columns)
df.ta.strategy('Candles')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 0 features added


#### Cycles Technical Indicators

In [78]:
before = len(df.columns)
df.ta.strategy('Cycles')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 1 features added


#### Overlap Technical Indicators

In [80]:
before = len(df.columns)
df.ta.strategy('Overlap')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 40 features added


#### Momentum Technical Indicators

In [79]:
before = len(df.columns)
df.ta.strategy('Momentum')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 60 features added


#### Statistics Technical Indicators

In [82]:
before = len(df.columns)
df.ta.strategy('Statistics')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 9 features added


#### Trend Technical Indicators

In [83]:
before = len(df.columns)
df.ta.strategy('Trend')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 23 features added


#### Volatility Technical Indicators

In [85]:
before = len(df.columns)
df.ta.strategy('Volatility')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 31 features added


#### Volume Technical Indicators

In [86]:
before = len(df.columns)
df.ta.strategy('Volume')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 18 features added


Unnamed: 0,TICKER,FIRST_DATE_OHLC,LAST_DATE_OHLC,FILEPATH,SECTOR
0,MMM,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Industrials
1,AOS,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Industrials
2,ABT,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Health Care
3,ABBV,2013-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Health Care
4,ABMD,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Health Care
5,ACN,2001-07-19,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Information Technology
6,ATVI,1993-10-25,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Communication Services
7,ADBE,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Information Technology
8,AAP,2001-11-29,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Consumer Discretionary
9,AMD,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Information Technology


In [43]:
dftest.ta.strategy('Volume')

In [44]:
dftest.tail()

Unnamed: 0_level_0,open,high,low,close,volume,EFI_13,EOM_14_100000000,MFI_14,NVI_1,OBV,PVI_1,PVOL,PVR,AD,ADOSC_3_10,OBV_min_2,OBV_max_2,OBVe_4,OBVe_12,AOBV_LR_2,AOBV_SR_2,CMF_20,PVT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2021-03-08,120.9375,121.0,116.1875,116.375,153918592.0,-179975400.0,-4.050747,23.851101,1371.392822,87155730000.0,1310.421265,17912280000.0,3.0,78708970000.0,-97199470.0,87155730000.0,87309650000.0,87248570000.0,87421750000.0,0,1,-0.116858,29832290000.0
2021-03-09,119.0,122.0625,118.8125,121.0625,129159600.0,-67773850.0,-3.022866,30.521265,1375.420166,87284890000.0,1310.421265,15636380000.0,2.0,78758640000.0,-82810990.0,87155730000.0,87284890000.0,87263100000.0,87400700000.0,0,1,-0.117862,30352460000.0
2021-03-10,121.6875,122.1875,119.4375,120.0,111760400.0,-75055510.0,-2.555383,30.390823,1374.542725,87173130000.0,1310.421265,13411250000.0,4.0,78692600000.0,-90410490.0,87173130000.0,87284890000.0,87227110000.0,87365690000.0,0,1,-0.115577,30254400000.0
2021-03-11,122.5625,123.1875,121.25,121.9375,102753600.0,-35892560.0,-2.524725,30.751091,1376.156982,87275880000.0,1310.421265,12529520000.0,2.0,78659450000.0,-95846330.0,87173130000.0,87275880000.0,87246620000.0,87351870000.0,0,1,-0.119369,30420270000.0
2021-03-12,120.375,121.1875,119.1875,121.0,87963400.0,-42545870.0,-2.274888,31.111531,1375.387939,87187910000.0,1310.421265,10643570000.0,4.0,78730920000.0,-66617270.0,87187910000.0,87275880000.0,87223140000.0,87326650000.0,0,1,-0.090416,30352620000.0


In [45]:
dftest.shape

(5333, 23)

In [2]:
tickerDF = pd.read_csv('../01_data/SnP500_constituents.csv')#Google Drive/Code_for_bothComputers/PythonStockMarketEvalTool/PythonStockScreener/01_data/SnP500_constituents.csv

In [5]:
tickerDF.head()

Unnamed: 0,Symbol,Name,Sector
0,MMM,3M Company,Industrials
1,AOS,A.O. Smith Corp,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie Inc.,Health Care
4,ABMD,Abiomed,Health Care


In [9]:
tickerDF.Sector.unique().shape

(11,)

In [25]:
masterDf.FILEPATH.iloc[2]

'C:\\Users\\phili\\Google Drive\\Code_for_bothComputers\\PythonStockMarketEvalTool\\PythonStockScreener\\01_data\\01_price_data\\AAPL1'

In [31]:
pd.read_feather('C:\\Users\\phili\\Google Drive\\Code_for_bothComputers\\PythonStockMarketEvalTool\\PythonStockScreener\\01_data\\01_price_data\\AAPL1')

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,1999-12-31,0.776367,0.791016,0.765137,0.790527,163811200.0,0.0,0.0
1,2000-01-03,0.806641,0.865234,0.782227,0.860840,535796800.0,0.0,0.0
2,2000-01-04,0.832520,0.850586,0.778320,0.788086,512377600.0,0.0,0.0
3,2000-01-05,0.797852,0.850098,0.791992,0.799805,778321600.0,0.0,0.0
4,2000-01-06,0.816406,0.822754,0.730469,0.730469,767972800.0,0.0,0.0
...,...,...,...,...,...,...,...,...
5332,2021-03-12,120.375000,121.187500,119.187500,121.000000,87963400.0,0.0,0.0
5333,2021-03-15,121.437500,124.000000,120.437500,124.000000,92403800.0,0.0,0.0
5334,2021-03-16,125.687500,127.250000,124.750000,125.562500,114740000.0,0.0,0.0
5335,2021-03-17,124.062500,125.875000,122.312500,124.750000,111437504.0,0.0,0.0
