In [15]:
import yfinance as yf
import pandas as pd
import numpy as np
from ta.momentum import StochRSIIndicator
from ta.momentum import RSIIndicator
from ta.momentum import ROCIndicator
from ta.volume import ForceIndexIndicator
from ta.volume import EaseOfMovementIndicator
from ta.volume import VolumePriceTrendIndicator
from ta.volume import NegativeVolumeIndexIndicator
from ta.trend import SMAIndicator
from ta.trend import DPOIndicator
from ta.volatility import BollingerBands
from ta.volatility import KeltnerChannel
from ta.volatility import AverageTrueRange
from ta.volatility import DonchianChannel
from ta.others import DailyLogReturnIndicator
from ta.momentum import ROCIndicator
from IPython.display import clear_output

payload=np.array(pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0].iloc[:,0])
payload=payload[payload!='BRK.B']
payload=payload[payload!='BF.B']
#print(payload)


class algo:
    start="2010-01-01"
    end=['2022-01-01','2021-01-01','2020-01-01','2019-01-01','2018-01-01','2017-01-01','2016-01-01','2015-01-01','2014-01-01']
    interval='1mo'
    thr1=1
    thr2=99
    
    def __init__(self,stock):
        self.stock=stock
        
    # get raw stock data       
    def getdata(self):
        df=(yf.download(self.stock,algo.start,algo.end[0],interval=algo.interval).dropna())
        #df['log_current']=np.log(df.Close/df.Open)
        df['log_current']=((df.Close-df.Open)/df.Open)
        df['stock']=self.stock
        df['Year'] = df.index.strftime('%Y-%m-%d').str[:4]
        #output = stock data from start to end
        clear_output(wait=True)
        return(df)

    # apply indicators (offset 1)
    def indicators(self):
        df=self.stock
        #df['log_past']=DailyLogReturnIndicator(close=df.Close.shift(1)).daily_log_return()
        #df['condition']=((df.Close-df.Close.shift(1))/df.Close.shift(1)).shift(1)
        #df['condition']=RSIIndicator(close=df.Close.shift(1)).rsi()
        df['condition']=ROCIndicator(close=df.Close.shift(1)).roc()
        df=df.dropna()
        
        #output = stock data with indicators
        return(df)
    
    # calculate percentiles based on the hustorical period (cutoff)
    def percentiles(self):
        df=self.stock
        dic1={}
        dic2={}
        for t in range (1,len(algo.end)):
            time=algo.end[t]
            df=(df[df.index<time])
            dic1[time]=np.percentile((df.condition),algo.thr1)
            dic2[time]=np.percentile((df.condition),algo.thr2)
            
        #output = 2 dictionaries: year - percentile of preveious year (1,99)
        return(dic1,dic2)
    
    # apply trade conditions
    def trade(self,perc1,perc2):
        df=self.stock
        dff=pd.DataFrame()
        for t in range (1,len(algo.end)):
            origin=algo.end[t]
            end=algo.end[t-1]
            
            #select origin<stock data<end
            dft=(df[(df.index>=origin) & (df.index <end)])
            
            #apply trade conditions based on thresholds
            dft['trade_pass']=np.select([(dft.condition<perc1[origin]),(dft.condition>perc2[origin])],[1,-1],default=0)
            dft['perc1']=perc1[origin]
            dft['perc2']=perc2[origin]
            dff=pd.concat([dff,dft])

        return(dff)
    

    def stockloop():
        l1=0
        l2=500
        boom=pd.DataFrame()
        for i in range (l1,l2):
            try:
                rawdata=algo(payload[i]).getdata()
       
                ta=algo(rawdata).indicators()
                trades=algo(ta).trade(algo(ta).percentiles()[0],algo(ta).percentiles()[1])
                boom=pd.concat([boom,trades])
               
            except Exception as e:
                print(e)
                pass
        return(boom)
    


boom=algo.stockloop()
boom.to_csv('Master_list.csv')
clear_output(wait=True)
print('finish')

finish


In [16]:
def yearloop(boom):

    years=np.array(['2021','2020','2019','2018','2017','2016'])
    for i in range(0,6):
        y=years[i]
        #print(y)
        boomy=boom[boom.Year==y]
        #print(boom)
        long=boomy[boomy['trade_pass']==1]
        short=boomy[boomy['trade_pass']==-1]        
        short['log_current']=-short['log_current']
        group=([boomy,long,short])
        #print(long)
        long.to_csv('looong.csv')
        for i in range(0,3): 
            trade_count=group[i].shape[0]
            win_rate=(((group[i][group[i]['log_current']>0]).shape[0]))/trade_count
            avg_per_trade=np.average(group[i].log_current)/trade_count
            minimum=np.min(group[i].log_current)
            print(trade_count,win_rate,avg_per_trade,minimum)
          
yearloop(boom)


6032 0.508289124668435 1.4333698296201548e-06 -0.30401240200408175
100 0.49 5.3771957379621134e-05 -0.13723428883010677
1188 0.4730639730639731 -3.574290617831575e-06 -0.2645776483956554
5568 0.5510057471264368 3.082840248453079e-06 -0.8360141284475476
848 0.6674528301886793 8.93890007081298e-05 -0.6551518971926646
159 0.4716981132075472 -4.655830032059753e-05 -0.36372153583672207
5568 0.6449353448275862 3.993455946845856e-06 -0.40198016783157425
417 0.7266187050359713 0.00013266322804491052 -0.25498862985389753
111 0.4864864864864865 -4.480753046411125e-06 -0.1387734881089756
5568 0.4951508620689655 -9.816590135301138e-07 -0.43766522985161704
385 0.5298701298701298 -7.802116931105431e-06 -0.2909442285242806
147 0.5510204081632653 4.810818453412402e-05 -0.39143724043727435
5568 0.6090158045977011 2.499101920590304e-06 -0.3613445765385711
291 0.5292096219931272 2.4305489849792143e-05 -0.2393680951310462
291 0.41580756013745707 -3.364920841831448e-05 -0.37817461378283584
5568 0.583692528

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
