In [6]:
# Import statements
import yfinance as yf
import pandas as pd
import os
import numpy as np
from datetime import date
from pathlib import Path
import pandas_ta as ta
from tqdm import tqdm
from price_data_pull_yfinance import gethistoricalOHLC, saveHistStockData, loadHistDataFromDisk

from global_vars import dataPathToOHLC, pathToMasterDF, float16Cols, float32Cols, dataPathToTechIndicators

In [7]:
pd.set_option('display.max_columns', 500)

In [8]:
masterDf = pd.read_csv(pathToMasterDF);
tickerDF = pd.read_csv('../01_data/SnP500_constituents.csv');
masterDf = pd.merge(masterDf, tickerDF[['Symbol', 'Sector']], how='right', left_on=['TICKER'], right_on=['Symbol']).drop('Symbol', axis=1)

In [9]:
masterDf.rename(columns = {'Sector': 'SECTOR'}, inplace = True)


In [10]:
masterDf['FILEPATH_TECH_IND_CALC'] = ''

In [11]:
masterDf

Unnamed: 0,TICKER,FIRST_DATE_OHLC,LAST_DATE_OHLC,FILEPATH,SECTOR,FILEPATH_TECH_IND_CALC
0,MMM,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Industrials,
1,AOS,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Industrials,
2,ABT,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Health Care,
3,ABBV,2013-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Health Care,
4,ABMD,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Health Care,
...,...,...,...,...,...,...
500,YUM,1997-09-17,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Consumer Discretionary,
501,ZBRA,1991-08-15,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Information Technology,
502,ZBH,2001-07-25,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Health Care,
503,ZION,1990-01-02,2021-03-19,C:\Users\Philipp\Google Drive\Code_for_bothCom...,Financials,


### Get the data from the masterDF and calculate the technical indicators

In [11]:
currMasterDF = masterDf.head(2)

In [10]:
ticker = 'AAPL'
problemDict = {}
problemDict[ticker] = f'Loading data problem for ticker {ticker}'
problemDict

{'AAPL': 'Loading data problem for ticker AAPL'}

In [17]:
def batchCalculationOfTechnicalIndicators(masterdf, pathToOutpu = dataPathToTechIndicators, strategyList = 'all', ):
    for index, row in tqdm(masterdf.iterrows(), total=masterdf.shape[0], leave = False):
        # get the ticker from the current row
        ticker = row[0]
        
        tickerFilename = ticker + '_tech_ind_calculated'
        filepath = dataPathToTechIndicators.joinpath(
                tickerFilename
            )
        if filepath.is_file() == False:
            problemDict = {}

            try:
                # load the data from disk
                df = loadHistDataFromDisk(ticker)
                df.set_index(pd.DatetimeIndex(df['Date']), inplace = True);
                df = df.drop('Date',axis = 1);
            except:
                print(f'Problem arose loading data from disk for the ticker {ticker}')
                problemDict[ticker] = f'Loading data problem for ticker {ticker}'


            try:
                # calculate the technical indicators
                df.ta.log_return(cumalative=True,append=True);
            except:
                print(f'Calculate the log-returns failed for the ticker {ticker}')
                problemDict[ticker] = f'Problem for ticker {ticker} when caluclating the log-returns'
            # check if a different strategy has been supplied 
            if strategyList == 'all':
                strategyList = ['Cycles', 'Overlap', 'Momentum', 'Statistics','Trend', 'Volatility', 'Volume']

            # loop through the strategies and calculate the technical indicators 
            for strategy in strategyList:#tqdm(strategyList, leave = False):
                try:
                    df.ta.strategy(strategy)
                except:
                    print(f"There has been a problem calculating the techincal indicator for {strategy} for the ticker {ticker}.")
                    problemDict[ticker] = f"There has been a problem calculating the techincal indicator for {strategy} for the ticker {ticker}."


            # save the created dataframe to disk 
            try:

                df = df.reset_index()
                df.to_feather(filepath)
            except:
                print(f'Could not save the calculated technical indicators for the ticker {ticker}!')
                problemDict[ticker] = f'Could not save the calculated technical indicators for the ticker {ticker}!'

        
        # add a reference to the masterDF
        try:
            masterdf.loc[masterdf.TICKER == ticker, 'FILEPATH_TECH_IND_CALC'] = filepath
        except:
            print(f'Did not write the {ticker}-technical indicator filetpath to masterDf!')
            problemDict[ticker] = f'Did not write the {ticker}-technical indicator filetpath to masterDf!'
        return problemDict

In [18]:
batchCalculationOfTechnicalIndicators(masterDf)

  0%|          | 0/505 [00:00<?, ?it/s]

Problem arose loading data from disk for the ticker BRK.B
Calculate the log-returns failed for the ticker BRK.B
There has been a problem calculating the techincal indicator for Candles for the ticker BRK.B.
There has been a problem calculating the techincal indicator for Cycles for the ticker BRK.B.
There has been a problem calculating the techincal indicator for Overlap for the ticker BRK.B.
There has been a problem calculating the techincal indicator for Momentum for the ticker BRK.B.
There has been a problem calculating the techincal indicator for Statistics for the ticker BRK.B.
There has been a problem calculating the techincal indicator for Trend for the ticker BRK.B.
There has been a problem calculating the techincal indicator for Volatility for the ticker BRK.B.
There has been a problem calculating the techincal indicator for Volume for the ticker BRK.B.
Could not save the calculated technical indicators for the ticker BRK.B!


 15%|█▌        | 77/505 [05:07<1:55:56, 16.25s/it]

Problem arose loading data from disk for the ticker BF.B
There has been a problem calculating the techincal indicator for Overlap for the ticker BF.B.


 22%|██▏       | 112/505 [19:14<2:44:12, 25.07s/it]

There has been a problem calculating the techincal indicator for Candles for the ticker KO.


 27%|██▋       | 136/505 [29:57<2:41:09, 26.21s/it]

There has been a problem calculating the techincal indicator for Candles for the ticker DVN.


 31%|███       | 155/505 [37:18<2:40:16, 27.47s/it]

There has been a problem calculating the techincal indicator for Candles for the ticker EMN.


 31%|███▏      | 158/505 [38:35<2:31:38, 26.22s/it]

There has been a problem calculating the techincal indicator for Candles for the ticker ECL.


 44%|████▍     | 222/505 [1:05:57<2:13:20, 28.27s/it]

There has been a problem calculating the techincal indicator for Candles for the ticker HES.


 64%|██████▎   | 321/505 [1:48:15<1:20:23, 26.21s/it]

There has been a problem calculating the techincal indicator for Candles for the ticker MSI.


 68%|██████▊   | 342/505 [1:56:44<1:07:45, 24.94s/it]

There has been a problem calculating the techincal indicator for Candles for the ticker NVDA.


 74%|███████▍  | 373/505 [2:09:57<58:37, 26.65s/it]  

There has been a problem calculating the techincal indicator for Candles for the ticker PGR.


 77%|███████▋  | 390/505 [2:17:22<52:29, 27.39s/it]

There has been a problem calculating the techincal indicator for Candles for the ticker RF.


 86%|████████▌ | 434/505 [2:36:26<29:39, 25.06s/it]

There has been a problem calculating the techincal indicator for Candles for the ticker TXT.


 87%|████████▋ | 439/505 [2:38:47<30:47, 27.99s/it]

There has been a problem calculating the techincal indicator for Candles for the ticker MOS.


 97%|█████████▋| 492/505 [3:01:26<05:19, 24.58s/it]

There has been a problem calculating the techincal indicator for Candles for the ticker WHR.


 98%|█████████▊| 493/505 [3:01:53<05:03, 25.30s/it]

There has been a problem calculating the techincal indicator for Candles for the ticker WMB.


 98%|█████████▊| 497/505 [3:03:33<03:23, 25.44s/it]

There has been a problem calculating the techincal indicator for Candles for the ticker XRX.


                                                   

In [25]:
problematicTickers = ["BRK.B", "KO", "DVN", "EMN", "ECL", "HES", "MSI", "NVDA", "PGR", "RF", "TXT", "MOS", "WHR", "WMB", "XRX"]

In [22]:
masterDf.to_csv(pathToMasterDF, index=False)

### check some of the problematic results


In [34]:
df = loadHistDataFromDisk('PGR')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,1990-01-02,0.572266,0.577637,0.568359,0.575684,889200.0,0.0,0.0
1,1990-01-03,0.577637,0.586914,0.577637,0.57959,1198800.0,0.0,0.0
2,1990-01-04,0.575684,0.575684,0.57373,0.575684,1760400.0,0.0,0.0
3,1990-01-05,0.575684,0.57959,0.575684,0.577637,100800.0,0.0,0.0
4,1990-01-08,0.575684,0.57959,0.575684,0.57959,644400.0,0.0,0.0


## Technical Indicator Strategies
Based on the implementation of https://github.com/twopirllc/pandas-ta

In [16]:
df = loadHistDataFromDisk('AAPL')
df.set_index(pd.DatetimeIndex(df['Date']), inplace = True);
df = df.drop('Date',axis = 1);

In [73]:
df = df.iloc[:1000]

#### Candles Technical Indicators

In [75]:
before = len(df.columns)
df.ta.strategy('Candles')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 0 features added


#### Cycles Technical Indicators

In [78]:
before = len(df.columns)
df.ta.strategy('Cycles')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 1 features added


#### Overlap Technical Indicators

In [80]:
before = len(df.columns)
df.ta.strategy('Overlap')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 40 features added


#### Momentum Technical Indicators

In [79]:
before = len(df.columns)
df.ta.strategy('Momentum')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 60 features added


#### Statistics Technical Indicators

In [82]:
before = len(df.columns)
df.ta.strategy('Statistics')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 9 features added


#### Trend Technical Indicators

In [83]:
before = len(df.columns)
df.ta.strategy('Trend')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 23 features added


#### Volatility Technical Indicators

In [85]:
before = len(df.columns)
df.ta.strategy('Volatility')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 31 features added


#### Volume Technical Indicators

In [86]:
before = len(df.columns)
df.ta.strategy('Volume')
after = len(df.columns)
print(f'There have been {after-before} features added')

There have been 18 features added
