In [20]:
import yfinance as yf
from datetime import date, datetime
import pandas as pd 
import numpy as np

#names_list = ['^GSPC', '000001.SS', '^KS11', '^N225']
#real_names_kist = ['SP500', 'SSE Composite Index', 'KOSPI', 'Nikkei225']

In [21]:
start_date = date(2000, 2, 1)
end_date = date(2021, 3, 12)

tickerSymbol = '^GSPC'
tickerData = yf.Ticker(tickerSymbol)

df = tickerData.history(period='1d', start=start_date, end=end_date).dropna()
df.drop(['Stock Splits', 'Dividends'], axis='columns', inplace=True)
df = pd.DataFrame(data = df)
df.index = pd.to_datetime(df.index)
# df["Date"] = df.index

df_copy = tickerData.history(period='1d', start=start_date, end=end_date).dropna()

df.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-31,1360.16,1394.48,1350.14,1394.46,993800000
2000-02-01,1394.46,1412.49,1384.79,1409.28,981000000
2000-02-02,1409.28,1420.61,1403.49,1409.12,1038600000


In [3]:
df["Percent change 1"] = df["Close"].pct_change(periods=1)
df["Percent change 5"] = df["Close"].pct_change(periods=5)
df["Percent change 30"] = df["Close"].pct_change(periods=30)

df.iloc[100:103, :]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Percent change 1,Percent change 5,Percent change 30
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2000-06-22,1479.13,1479.13,1448.03,1452.18,1022700000,-0.01822,-0.017955,0.049984
2000-06-23,1452.18,1459.94,1438.31,1441.48,847600000,-0.007368,-0.015692,0.023917
2000-06-26,1441.48,1459.66,1441.48,1455.31,889000000,0.009594,-0.020653,0.024174


In [4]:
df["Percent change Open"] = df["Open"].pct_change(periods=1)
df["Percent change High"] = df["High"].pct_change(periods=1)
df["Percent change Low"] = df["Low"].pct_change(periods=1)
df["Percent change Volume"] = df["Volume"].pct_change(periods=1)

df.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Percent change 1,Percent change 5,Percent change 30,Percent change Open,Percent change High,Percent change Low,Percent change Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-03-09,3851.93,3903.76,3851.93,3875.44,5496340000,0.014155,0.001331,0.005208,0.001961,0.005849,0.008557,-0.060814
2021-03-10,3891.99,3917.35,3885.73,3898.81,5827250000,0.00603,0.020706,0.012778,0.0104,0.003481,0.008775,0.060206
2021-03-11,3915.54,3960.27,3915.54,3939.34,5300010000,0.010395,0.045342,0.050275,0.006051,0.010956,0.007672,-0.090478


# Simple Moving Average

In [5]:
def SMA(data,n):
    
    #додаємо NaN на початку
	sma_list=[np.nan for k in range(n-1)]
    
	for i in range(len(data)-n+1):
		sum_n = 0
		for j in range(n):
			sum_n = sum_n + data[i+j]
		sma=sum_n/n
		sma_list.append(round(sma, 2))
        
	return sma_list

# Linear Weighted Moving Average

In [6]:
def LWMA(data,n):
    
	lma_list=[np.nan for k in range(n-1)]

	for i in range(len(data)-n+1):
		sum_n, sum_j = 0, 0
		for j in range(n):
			sum_n = sum_n + (j+1)*data[i+j]
			sum_j = sum_j + (j+1)
		lma=sum_n/sum_j
		lma_list.append(lma)

	return lma_list

# Exponential Moving Average

In [7]:
def EMA(data, n, a=0):
	ema_list=[data[0]]
	if a==0:
		a=2/(n+1)

	for i in range (1,len(data)):
		ema = 0
		ema = (1-a)*ema_list[i-1]+a*data[i]
		ema_list.append(ema)

	return ema_list

# Disparsity Index

In [39]:
def DI(data, n, MA_type):
    if MA_type == "EMA":
        MA = EMA(data, n)
    elif MA_type == "LWMA":
        MA = LWMA(data, n)
    else:
        MA = SMA(data, n)
    
    di_list = []
    for i in range(len(MA)):
        if (np.isnan(MA[i])):
            di_list.append(np.nan)
        else:
            di_list.append((data[i]-MA[i])/(100*MA[i]))
        
    return di_list

# CCI

In [8]:
def CCI(data_close, data_high, data_low, n):
	p_typical = []

	for i in range(len(data_close)):
		p_typical.append((data_close[i]+data_low[i]+data_high[i])/3)
	sma = SMA(p_typical,n)

	MAD = [np.nan for k in range(n-1)]
	for j in range(n-1, len(data_close)):
		mad = 0
		for s in range(n):
			mad = mad +  abs(p_typical[j-s]-sma[j])
		MAD.append(mad/n)

	CCI = [np.nan for n in range(n-1)]
	for t in range(n-1, len(data_close)):
		cci = (p_typical[t]-sma[t])/(0.015*MAD[t])
		CCI.append(cci)

	return CCI

# SO

In [9]:
def SO(data_close, data_high, data_low, n):
	K, D = [np.nan for k in range(n-1)], [np.nan for k in range(n-1)]
	highest_high, lowest_low = [np.nan for k in range(n-1)], [np.nan for k in range(n-1)]

	for i in range(len(data_close)-n+1):
		list_n_high, list_n_low = [], []
		for j in range(0, n):
			list_n_high.append(data_high[i+j])
			list_n_low.append(data_low[i+j])
		highest_high.append(max(list_n_high))
		lowest_low.append(min(list_n_low))

	for i in range(n-1, len(highest_high)):
		K.append(100*(data_close[i]-lowest_low[i])/(highest_high[i]-lowest_low[i]))
	D = SMA(K, n)

	return K, D

# Elder-Ray Index

In [42]:
def ERI(data_close, data_high, data_low, n):
    MA = EMA(data_close, n)
    
    BuP, BeP = [], []
    for i in range(len(data_close)):
        BuP.append(data_high[i] - MA[i])
        BeP.append(data_low[i] - MA[i])
    return BuP, BeP

# CMO

In [11]:
def CMO(data, n):
	CMO1, CMO2 = [np.nan], [np.nan]

	for i in range(1,len(data)):
		if data[i]>data[i-1]:
			CMO1.append(data[i]-data[i-1])
			CMO2.append(0)
		elif data[i]<data[i-1]:
			CMO1.append(0)
			CMO2.append(data[i-1]-data[i])
		else:
			CMO1.append(0)
			CMO2.append(0)

	sH, sL = [np.nan for k in range(n)], [np.nan for k in range(n)]
	for i in range(n,len(data)):
		sh, sl = 0, 0
		for t in range(n):
			sh = sh + CMO1[i-t]
			sl = sl + CMO2[i-t]
		sH.append(sh)
		sL.append(sl)

	CMO_list=[np.nan for k in range(n)] 
	for i in range(n,len(sH)):
		if (sH[i]+sL[i]) != 0:
			CMO_list.append(100*(sH[i]-sL[i])/(sH[i]+sL[i]))
		else:
			CMO_list.append(100*(sH[i-1]-sL[i-1])/(sH[i-1]+sL[i-1]))
		

	return CMO_list

# Strategy with moving averages


Сигнали - перетин ціни і плинного середнього

P - list of Close prices

n1 - к-ть періодів для середнього

MA_type - тип середнього ("SMA", "EMA", "LWMA")

In [12]:
def strategy_MA_1(P, n1, MA_type, n2=26, n3=9):
	MA= []
	if MA_type == "SMA":
		MA = SMA(P, n1)
	elif MA_type == "EMA":
		MA = EMA(P, n1)
	elif MA_type == "LWMA":
		MA = LWMA(P, n1)
	elif MA_type == "KAMA":
		MA = KAMA(P, n1, n2, n3)

	if MA_type != "EMA":
		s=[np.nan for k in range(n1)]
		for i in range(n1, len(MA)):
			if (P[i-1]<MA[i-1]) and (P[i]>MA[i]):
				s.append(1)
			elif (P[i-1]>MA[i-1]) and (P[i]<MA[i]):
				s.append(-1)
			else:
				s.append(0)
	elif MA_type == "EMA":
		s=[np.nan]
		for i in range(1,len(MA)):
			if (P[i-1]<MA[i-1]) and (P[i]>MA[i]):
				s.append(1)
			elif (P[i-1]>MA[i-1]) and (P[i]<MA[i]):
				s.append(-1)
			else:
				s.append(0)
	return s

# Strategy SO

In [13]:
def strategy_SO(P_close, P_high, P_low, n):
	K, D = SO(P_close, P_high, P_low, n)
	s=[np.nan for k in range(n)]
	for i in range(n, len(P_close)):
		if (K[i]>D[i]) and (K[i-1]<D[i-1]):
			s.append(1)
		elif (K[i]<D[i]) and (K[i-1]>D[i-1]):
			s.append(-1)
		else:
			s.append(0)
	return s

# Strategy CCI

In [14]:
def strategy_CCI(data_close, data_high, data_low, n):
	cci = CCI(data_close, data_high, data_low, n)
	s=[]
	for i in range(len(data_close)):
		if ((cci[i]>0) and (cci[i-1]<0)):
			s.append(1)
		elif ((cci[i]<0) and (cci[i-1]>0)):
			s.append(-1)
		else:
			s.append(0)
	return s

# Strategy CMO

In [16]:
def strategy_CMO(P, n):
	CMO_list = CMO(P, n)
	s=[np.nan for k in range(n)]
	for i in range(n, len(P)):
		if (CMO_list[i]>0) and (CMO_list[i-1]<0):
			s.append(1)
		elif (CMO_list[i]<0) and (CMO_list[i-1]>0):
			s.append(-1)
		else:
			s.append(0)
	return s

# Strategy MAE

In [17]:
def strategy_MAE(P, upper, lower, n1, MA_type, MAE_type="LL"):
	if n1 >= 0:
		UL, LL, MA = [], [], []
		if MA_type == "SMA":
			MA = SMA(P, n1)
		elif MA_type == "EMA":
			MA = EMA(P, n1)
		elif MA_type == "LWMA":
			MA = LWMA(P, n1)

		for i in range(len(P)):
			UL.append((1+upper)*MA[i])
			LL.append((1-lower)*MA[i])
		if MA_type != "EMA":
			s=[np.nan for k in range(n1)]
			if MAE_type == "UL":
				for i in range(n1, len(MA)):
					if (P[i-1]<UL[i-1]) and (P[i]>UL[i]):
						s.append(1)
					elif (P[i-1]>UL[i-1]) and (P[i]<UL[i]):
						s.append(-1)
					else:
						s.append(0)
			elif MAE_type == "LL":
				for i in range(n1, len(MA)):
					if (P[i-1]<LL[i-1]) and (P[i]>LL[i]):
						s.append(1)
					elif (P[i-1]>LL[i-1]) and (P[i]<LL[i]):
						s.append(-1)
					else:
						s.append(0)
		elif MA_type == "EMA":
			s=[np.nan]
			if MAE_type == "UL":
				for i in range(1, len(MA)):
					if (P[i-1]<UL[i-1]) and (P[i]>UL[i]):
						s.append(1)
					elif (P[i-1]>UL[i-1]) and (P[i]<UL[i]):
						s.append(-1)
					else:
						s.append(0)
			elif MAE_type == "LL":
				for i in range(1, len(MA)):
					if (P[i-1]<LL[i-1]) and (P[i]>LL[i]):
						s.append(1)
					elif (P[i-1]>LL[i-1]) and (P[i]<LL[i]):
						s.append(-1)
					else:
						s.append(0)
		return s
	else:
		return ["nan", "nan"]

# data for recalculating in excel

In [22]:
df_copy.drop(['Stock Splits', 'Dividends', 'Open', 'High', 'Low', 'Volume'], axis='columns', inplace=True)
df_copy.head(3)

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2000-01-31,1394.46
2000-02-01,1409.28
2000-02-02,1409.12


In [23]:
df_copy["SMA 10"] = SMA(df_copy["Close"],10)
df_copy["SMA 5"] = SMA(df_copy["Close"],5)
df_copy.tail(3)

Unnamed: 0_level_0,Close,SMA 10,SMA 5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-03-09,3875.44,3846.5,3825.38
2021-03-10,3898.81,3843.83,3841.2
2021-03-11,3939.34,3854.83,3875.38


In [24]:
df_copy["LWMA 10"] = LWMA(df_copy["Close"],10)
df_copy["LWMA 5"] = LWMA(df_copy["Close"],5)
df_copy.tail(3)

Unnamed: 0_level_0,Close,SMA 10,SMA 5,LWMA 10,LWMA 5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-03-09,3875.44,3846.5,3825.38,3839.199455,3836.338667
2021-03-10,3898.81,3843.83,3841.2,3848.711273,3860.814
2021-03-11,3939.34,3854.83,3875.38,3866.076182,3893.526667


In [25]:
df_copy["EMA 10"] = EMA(df_copy["Close"],10)
df_copy["EMA 5"] = EMA(df_copy["Close"],5)
df_copy.tail(3)

Unnamed: 0_level_0,Close,SMA 10,SMA 5,LWMA 10,LWMA 5,EMA 10,EMA 5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-03-09,3875.44,3846.5,3825.38,3839.199455,3836.338667,3848.888691,3843.696263
2021-03-10,3898.81,3843.83,3841.2,3848.711273,3860.814,3857.965293,3862.067508
2021-03-11,3939.34,3854.83,3875.38,3866.076182,3893.526667,3872.760694,3887.825006


In [27]:
df_copy["CMO 10"] = CMO(df_copy["Close"],10)
df_copy["CMO 5"] = CMO(df_copy["Close"],5)
df_copy.tail(3)

Unnamed: 0_level_0,Close,SMA 10,SMA 5,LWMA 10,LWMA 5,EMA 10,EMA 5,CMO 10,CMO 5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-03-09,3875.44,3846.5,3825.38,3839.199455,3836.338667,3848.888691,3843.696263,-1.117792,2.060247
2021-03-10,3898.81,3843.83,3841.2,3848.711273,3860.814,3857.965293,3862.067508,-5.221451,35.502985
2021-03-11,3939.34,3854.83,3875.38,3866.076182,3893.526667,3872.760694,3887.825006,24.215207,80.580052


In [40]:
df_copy["DI SMA 5"] = DI(df_copy["Close"],5, "SMA")
df_copy["DI EMA 5"] = DI(df_copy["Close"],5, "EMA")
df_copy["DI LWMA 5"] = DI(df_copy["Close"],5, "LWMA")
df_copy.tail(3)

Unnamed: 0_level_0,Close,SMA 10,SMA 5,LWMA 10,LWMA 5,EMA 10,EMA 5,CMO 10,CMO 5,DI SMA 5,DI EMA 5,DI LWMA 5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-03-09,3875.44,3846.5,3825.38,3839.199455,3836.338667,3848.888691,3843.696263,-1.117792,2.060247,0.000131,8.3e-05,0.000102
2021-03-10,3898.81,3843.83,3841.2,3848.711273,3860.814,3857.965293,3862.067508,-5.221451,35.502985,0.00015,9.5e-05,9.8e-05
2021-03-11,3939.34,3854.83,3875.38,3866.076182,3893.526667,3872.760694,3887.825006,24.215207,80.580052,0.000165,0.000133,0.000118


In [41]:
df_copy.to_excel('d:\Курсова\Курсова 2021\check 2.xlsx')

# additional checks

In [76]:
df_copy["SMA 20"] = SMA(df_copy["Close"],20)
df_copy["SMA 50"] = SMA(df_copy["Close"],50)
df_copy["SMA 100"] = SMA(df_copy["Close"],100)
df_copy["SMA 200"] = SMA(df_copy["Close"],200)
df_copy.head()

Unnamed: 0_level_0,Close,SMA 20,SMA 50,SMA 100,SMA 200
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-31,1394.46,,,,
2000-02-01,1409.28,,,,
2000-02-02,1409.12,,,,
2000-02-03,1424.97,,,,
2000-02-04,1424.37,,,,


In [77]:
df_copy.tail()

Unnamed: 0_level_0,Close,SMA 20,SMA 50,SMA 100,SMA 200
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-03-05,3841.94,3884.27,3821.99,3687.15,3471.53
2021-03-08,3821.35,3880.99,3824.67,3690.02,3475.78
2021-03-09,3875.44,3878.99,3828.38,3693.65,3480.41
2021-03-10,3898.81,3878.37,3832.3,3697.75,3485.13
2021-03-11,3939.34,3879.84,3836.38,3702.31,3489.87


In [79]:
df_copy["MA_signals"] = strategy_MA_1(P = df_copy["Close"], n1 = 20, MA_type = "SMA")
df_copy.head()

Unnamed: 0_level_0,Close,SMA 20,SMA 50,SMA 100,SMA 200,MA_signals
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-31,1394.46,,,,,
2000-02-01,1409.28,,,,,
2000-02-02,1409.12,,,,,
2000-02-03,1424.97,,,,,
2000-02-04,1424.37,,,,,


In [80]:
df_copy.tail()

Unnamed: 0_level_0,Close,SMA 20,SMA 50,SMA 100,SMA 200,MA_signals
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-03-05,3841.94,3884.27,3821.99,3687.15,3471.53,0.0
2021-03-08,3821.35,3880.99,3824.67,3690.02,3475.78,0.0
2021-03-09,3875.44,3878.99,3828.38,3693.65,3480.41,0.0
2021-03-10,3898.81,3878.37,3832.3,3697.75,3485.13,1.0
2021-03-11,3939.34,3879.84,3836.38,3702.31,3489.87,0.0


In [81]:
missing_values_count = df_copy.isna().sum()

# how many total missing values do we have?
total_cells = np.product(df_copy.shape)
total_missing = missing_values_count.sum()

# percent of data that is missing
percent_missing = (total_missing/total_cells) * 100
print(total_cells)
print(total_missing)
print(percent_missing)

31872
386
1.2110943775100402
