In [41]:
import yfinance as yf
from datetime import date, datetime
import pandas as pd 
import numpy as np

In [42]:
#names_list = ['^GSPC', '000001.SS', '^KS11', '^N225']
#real_names_kist = ['SP500', 'SSE Composite Index', 'KOSPI', 'Nikkei225']

In [43]:
start_date = date(2000, 2, 1)
end_date = date(2021, 3, 18)

tickerSymbol = '^GSPC'
tickerData = yf.Ticker(tickerSymbol)

df = tickerData.history(period='1d', start=start_date, end=end_date).dropna()
df.drop(['Stock Splits', 'Dividends'], axis='columns', inplace=True)
df = pd.DataFrame(data = df)
df.index = pd.to_datetime(df.index)

df.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-31,1360.160034,1394.47998,1350.140015,1394.459961,993800000
2000-02-01,1394.459961,1412.48999,1384.790039,1409.280029,981000000
2000-02-02,1409.280029,1420.609985,1403.48999,1409.119995,1038600000


# Simple Moving Average

In [44]:
def SMA(data,n):
    
    #додаємо NaN на початку
	sma_list=[np.nan for k in range(n-1)]
    
	for i in range(len(data)-n+1):
		sum_n = 0
		for j in range(n):
			sum_n = sum_n + data[i+j]
		sma=sum_n/n
		sma_list.append(round(sma, 2))
        
	return sma_list

# Linear Weighted Moving Average

In [45]:
def LWMA(data,n):
    
	lma_list=[np.nan for k in range(n-1)]

	for i in range(len(data)-n+1):
		sum_n, sum_j = 0, 0
		for j in range(n):
			sum_n = sum_n + (j+1)*data[i+j]
			sum_j = sum_j + (j+1)
		lma=sum_n/sum_j
		lma_list.append(lma)

	return lma_list

# Exponential Moving Average

In [46]:
def EMA(data, n, a=0):
	ema_list=[data[0]]
	if a==0:
		a=2/(n+1)

	for i in range (1,len(data)):
		ema = 0
		ema = (1-a)*ema_list[i-1]+a*data[i]
		ema_list.append(ema)

	return ema_list

# Disparsity Index

In [47]:
def DI(data, n, MA_type):
    if MA_type == "EMA":
        MA = EMA(data, n)
    elif MA_type == "LWMA":
        MA = LWMA(data, n)
    else:
        MA = SMA(data, n)
    
    di_list = []
    for i in range(len(MA)):
        if (np.isnan(MA[i])):
            di_list.append(np.nan)
        else:
            di_list.append((data[i]-MA[i])/(100*MA[i]))
        
    return di_list

# CCI

In [48]:
def CCI(data_close, data_high, data_low, n):
	p_typical = []

	for i in range(len(data_close)):
		p_typical.append((data_close[i]+data_low[i]+data_high[i])/3)
	sma = SMA(p_typical,n)

	MAD = [np.nan for k in range(n-1)]
	for j in range(n-1, len(data_close)):
		mad = 0
		for s in range(n):
			mad = mad +  abs(p_typical[j-s]-sma[j])
		MAD.append(mad/n)

	CCI = [np.nan for n in range(n-1)]
	for t in range(n-1, len(data_close)):
		cci = (p_typical[t]-sma[t])/(0.015*MAD[t])
		CCI.append(cci)

	return CCI

# SO

In [49]:
def SO(data_close, data_high, data_low, n):
	K, D = [np.nan for k in range(n-1)], [np.nan for k in range(n-1)]
	highest_high, lowest_low = [np.nan for k in range(n-1)], [np.nan for k in range(n-1)]

	for i in range(len(data_close)-n+1):
		list_n_high, list_n_low = [], []
		for j in range(0, n):
			list_n_high.append(data_high[i+j])
			list_n_low.append(data_low[i+j])
		highest_high.append(max(list_n_high))
		lowest_low.append(min(list_n_low))

	for i in range(n-1, len(highest_high)):
		K.append(100*(data_close[i]-lowest_low[i])/(highest_high[i]-lowest_low[i]))
	D = SMA(K, n)

	return K, D

# Elder-Ray Index

In [50]:
def ERI(data_close, data_high, data_low, n):
    MA = EMA(data_close, n)
    
    BuP, BeP = [], []
    for i in range(len(data_close)):
        BuP.append(data_high[i] - MA[i])
        BeP.append(data_low[i] - MA[i])
    return BuP, BeP

# CMO

In [51]:
def CMO(data, n):
	CMO1, CMO2 = [np.nan], [np.nan]

	for i in range(1,len(data)):
		if data[i]>data[i-1]:
			CMO1.append(data[i]-data[i-1])
			CMO2.append(0)
		elif data[i]<data[i-1]:
			CMO1.append(0)
			CMO2.append(data[i-1]-data[i])
		else:
			CMO1.append(0)
			CMO2.append(0)

	sH, sL = [np.nan for k in range(n)], [np.nan for k in range(n)]
	for i in range(n,len(data)):
		sh, sl = 0, 0
		for t in range(n):
			sh = sh + CMO1[i-t]
			sl = sl + CMO2[i-t]
		sH.append(sh)
		sL.append(sl)

	CMO_list=[np.nan for k in range(n)] 
	for i in range(n,len(sH)):
		if (sH[i]+sL[i]) != 0:
			CMO_list.append(100*(sH[i]-sL[i])/(sH[i]+sL[i]))
		else:
			CMO_list.append(100*(sH[i-1]-sL[i-1])/(sH[i-1]+sL[i-1]))
		

	return CMO_list

# Strategy with moving averages


Сигнали - перетин ціни і плинного середнього

P - list of Close prices

n1 - к-ть періодів для середнього

MA_type - тип середнього ("SMA", "EMA", "LWMA")

In [52]:
def strategy_MA_1(P, n1, MA_type, n2=26, n3=9):
	MA= []
	if MA_type == "SMA":
		MA = SMA(P, n1)
	elif MA_type == "EMA":
		MA = EMA(P, n1)
	elif MA_type == "LWMA":
		MA = LWMA(P, n1)
	elif MA_type == "KAMA":
		MA = KAMA(P, n1, n2, n3)

	if MA_type != "EMA":
		s=[np.nan for k in range(n1)]
		for i in range(n1, len(MA)):
			if (P[i-1]<MA[i-1]) and (P[i]>MA[i]):
				s.append(1)
			elif (P[i-1]>MA[i-1]) and (P[i]<MA[i]):
				s.append(-1)
			else:
				s.append(0)
	elif MA_type == "EMA":
		s=[np.nan]
		for i in range(1,len(MA)):
			if (P[i-1]<MA[i-1]) and (P[i]>MA[i]):
				s.append(1)
			elif (P[i-1]>MA[i-1]) and (P[i]<MA[i]):
				s.append(-1)
			else:
				s.append(0)
	return s

# Strategy SO

In [53]:
def strategy_SO(P_close, P_high, P_low, n):
	K, D = SO(P_close, P_high, P_low, n)
	s=[np.nan for k in range(n)]
	for i in range(n, len(P_close)):
		if (K[i]>D[i]) and (K[i-1]<D[i-1]):
			s.append(-1)
		elif (K[i]<D[i]) and (K[i-1]>D[i-1]):
			s.append(1)
		else:
			s.append(0)
	return s

# Strategy CCI

In [54]:
def strategy_CCI(data_close, data_high, data_low, n):
	cci = CCI(data_close, data_high, data_low, n)
	s=[]
	for i in range(len(data_close)):
		if ((cci[i]>0) and (cci[i-1]<0)):
			s.append(1)
		elif ((cci[i]<0) and (cci[i-1]>0)):
			s.append(-1)
		else:
			s.append(0)
	return s

# Strategy CMO

In [55]:
def strategy_CMO(P, n):
	CMO_list = CMO(P, n)
	s=[np.nan for k in range(n)]
	for i in range(n, len(P)):
		if (CMO_list[i]>0) and (CMO_list[i-1]<0):
			s.append(1)
		elif (CMO_list[i]<0) and (CMO_list[i-1]>0):
			s.append(-1)
		else:
			s.append(0)
	return s

# Strategy MAE

In [56]:
def strategy_MAE(P, upper, lower, n1, MA_type, MAE_type="LL"):
	if n1 >= 0:
		UL, LL, MA = [], [], []
		if MA_type == "SMA":
			MA = SMA(P, n1)
		elif MA_type == "EMA":
			MA = EMA(P, n1)
		elif MA_type == "LWMA":
			MA = LWMA(P, n1)

		for i in range(len(P)):
			UL.append((1+upper)*MA[i])
			LL.append((1-lower)*MA[i])
		if MA_type != "EMA":
			s=[np.nan for k in range(n1)]
			if MAE_type == "UL":
				for i in range(n1, len(MA)):
					if (P[i-1]<UL[i-1]) and (P[i]>UL[i]):
						s.append(1)
					elif (P[i-1]>UL[i-1]) and (P[i]<UL[i]):
						s.append(-1)
					else:
						s.append(0)
			elif MAE_type == "LL":
				for i in range(n1, len(MA)):
					if (P[i-1]<LL[i-1]) and (P[i]>LL[i]):
						s.append(1)
					elif (P[i-1]>LL[i-1]) and (P[i]<LL[i]):
						s.append(-1)
					else:
						s.append(0)
		elif MA_type == "EMA":
			s=[np.nan]
			if MAE_type == "UL":
				for i in range(1, len(MA)):
					if (P[i-1]<UL[i-1]) and (P[i]>UL[i]):
						s.append(1)
					elif (P[i-1]>UL[i-1]) and (P[i]<UL[i]):
						s.append(-1)
					else:
						s.append(0)
			elif MAE_type == "LL":
				for i in range(1, len(MA)):
					if (P[i-1]<LL[i-1]) and (P[i]>LL[i]):
						s.append(1)
					elif (P[i-1]>LL[i-1]) and (P[i]<LL[i]):
						s.append(-1)
					else:
						s.append(0)
		return s
	else:
		return ["nan", "nan"]

# Strategy ERI

In [57]:
def strategy_ERI(P_close, P_high, P_low, n=13):
    BuP_ = ERI(P_close, P_high, P_low, n)[0]
    BeP_ = ERI(P_close, P_high, P_low, n)[1]
    
    s=[np.nan]
    for i in range(1, len(BuP_)):
        if ((BeP_[i]>0)&(BeP_[i-1]<0)):
            s.append(1)
        elif ((BuP_[i]<0)&(BuP_[i-1]>0)):

            s.append(-1)
        else:
            s.append(0)
            
    return s

# Strategy DI

In [58]:
def strategy_DI(P_close, n):
    DI_ = DI(P_close, n, MA_type="SMA")
    
    s=[np.nan]
    for i in range(1,len(DI_)):
        if np.isnan(DI_[i]):
            s.append(np.nan)
        elif ((DI_[i]>0)&(DI_[i-1]<0)):
            s.append(1)
        elif ((DI_[i]<0)&(DI_[i-1]>0)):
            s.append(-1)
        else:
            s.append(0)
            
    return s

In [59]:
df["Percent change 1"] = df["Close"].pct_change(periods=1)
df["Percent change 5"] = df["Close"].pct_change(periods=5)
df["Percent change 30"] = df["Close"].pct_change(periods=30)

In [60]:
df["Percent change Open"] = df["Open"].pct_change(periods=1)
df["Percent change High"] = df["High"].pct_change(periods=1)
df["Percent change Low"] = df["Low"].pct_change(periods=1)
df["Percent change Volume"] = df["Volume"].pct_change(periods=1)

In [61]:
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Percent change 1,Percent change 5,Percent change 30,Percent change Open,Percent change High,Percent change Low,Percent change Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-03-11,3915.540039,3960.27002,3915.540039,3939.340088,5300010000,0.010395,0.045342,0.050275,0.006051,0.010956,0.007672,-0.090478
2021-03-12,3924.52002,3944.98999,3915.209961,3943.340088,4469240000,0.001015,0.026393,0.041179,0.002293,-0.003858,-8.4e-05,-0.156749
2021-03-15,3942.959961,3970.080078,3923.540039,3968.939941,4882190000,0.006492,0.038622,0.068574,0.004699,0.00636,0.002128,0.092398
2021-03-16,3973.590088,3981.040039,3953.439941,3962.709961,4604870000,-0.00157,0.022519,0.050042,0.007768,0.002761,0.007621,-0.056802
2021-03-17,3949.570068,3983.870117,3935.73999,3974.120117,4541620000,0.002879,0.019316,0.03863,-0.006045,0.000711,-0.004477,-0.013735


In [62]:
df["DI"] = strategy_DI(P_close=df.Close, n=14)
df["ERI"] = strategy_ERI(P_close=df.Close, P_high=df.High, P_low=df.Low, n=13)

In [63]:
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Percent change 1,Percent change 5,Percent change 30,Percent change Open,Percent change High,Percent change Low,Percent change Volume,DI,ERI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-03-11,3915.540039,3960.27002,3915.540039,3939.340088,5300010000,0.010395,0.045342,0.050275,0.006051,0.010956,0.007672,-0.090478,0.0,0.0
2021-03-12,3924.52002,3944.98999,3915.209961,3943.340088,4469240000,0.001015,0.026393,0.041179,0.002293,-0.003858,-8.4e-05,-0.156749,0.0,0.0
2021-03-15,3942.959961,3970.080078,3923.540039,3968.939941,4882190000,0.006492,0.038622,0.068574,0.004699,0.00636,0.002128,0.092398,0.0,0.0
2021-03-16,3973.590088,3981.040039,3953.439941,3962.709961,4604870000,-0.00157,0.022519,0.050042,0.007768,0.002761,0.007621,-0.056802,0.0,0.0
2021-03-17,3949.570068,3983.870117,3935.73999,3974.120117,4541620000,0.002879,0.019316,0.03863,-0.006045,0.000711,-0.004477,-0.013735,0.0,0.0


In [64]:
df["SMA 1"] = strategy_MA_1(P = df.Close, n1 = 27, MA_type = "SMA")
df["EMA 1"] = strategy_MA_1(P = df.Close, n1 = 16, MA_type = "EMA")
df["LWMA 1"] = strategy_MA_1(P = df.Close, n1 = 23, MA_type = "LWMA")
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Percent change 1,Percent change 5,Percent change 30,Percent change Open,Percent change High,Percent change Low,Percent change Volume,DI,ERI,SMA 1,EMA 1,LWMA 1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021-03-11,3915.540039,3960.27002,3915.540039,3939.340088,5300010000,0.010395,0.045342,0.050275,0.006051,0.010956,0.007672,-0.090478,0.0,0.0,0.0,0.0,0.0
2021-03-12,3924.52002,3944.98999,3915.209961,3943.340088,4469240000,0.001015,0.026393,0.041179,0.002293,-0.003858,-8.4e-05,-0.156749,0.0,0.0,0.0,0.0,0.0
2021-03-15,3942.959961,3970.080078,3923.540039,3968.939941,4882190000,0.006492,0.038622,0.068574,0.004699,0.00636,0.002128,0.092398,0.0,0.0,0.0,0.0,0.0
2021-03-16,3973.590088,3981.040039,3953.439941,3962.709961,4604870000,-0.00157,0.022519,0.050042,0.007768,0.002761,0.007621,-0.056802,0.0,0.0,0.0,0.0,0.0
2021-03-17,3949.570068,3983.870117,3935.73999,3974.120117,4541620000,0.002879,0.019316,0.03863,-0.006045,0.000711,-0.004477,-0.013735,0.0,0.0,0.0,0.0,0.0


In [65]:
df["MAE 1"] = strategy_MAE(P=df.Close, upper=0, lower=0.01, n1=5, MA_type="SMA", MAE_type="LL")
df["MAE 2"] = strategy_MAE(P=df.Close, upper=0, lower=0.04, n1=10, MA_type="EMA", MAE_type="LL")
df["MAE 3"] = strategy_MAE(P=df.Close, upper=0, lower=0.01, n1=6, MA_type="LWMA", MAE_type="LL")
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Percent change 1,Percent change 5,Percent change 30,Percent change Open,Percent change High,Percent change Low,Percent change Volume,DI,ERI,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2021-03-11,3915.540039,3960.27002,3915.540039,3939.340088,5300010000,0.010395,0.045342,0.050275,0.006051,0.010956,0.007672,-0.090478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-03-12,3924.52002,3944.98999,3915.209961,3943.340088,4469240000,0.001015,0.026393,0.041179,0.002293,-0.003858,-8.4e-05,-0.156749,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-03-15,3942.959961,3970.080078,3923.540039,3968.939941,4882190000,0.006492,0.038622,0.068574,0.004699,0.00636,0.002128,0.092398,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-03-16,3973.590088,3981.040039,3953.439941,3962.709961,4604870000,-0.00157,0.022519,0.050042,0.007768,0.002761,0.007621,-0.056802,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-03-17,3949.570068,3983.870117,3935.73999,3974.120117,4541620000,0.002879,0.019316,0.03863,-0.006045,0.000711,-0.004477,-0.013735,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [66]:
df["CCI 1"] = strategy_CCI(data_close=df.Close, data_high=df.High, data_low=df.Low, n=3)
df["SO 1"] = strategy_SO(P_close=df.Close, P_high=df.High, P_low=df.Low, n=26)
df["CMO 1"] = strategy_CMO(P=df.Close, n=3)
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Percent change 1,Percent change 5,Percent change 30,Percent change Open,Percent change High,...,ERI,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3,CCI 1,SO 1,CMO 1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-03-11,3915.540039,3960.27002,3915.540039,3939.340088,5300010000,0.010395,0.045342,0.050275,0.006051,0.010956,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0
2021-03-12,3924.52002,3944.98999,3915.209961,3943.340088,4469240000,0.001015,0.026393,0.041179,0.002293,-0.003858,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0
2021-03-15,3942.959961,3970.080078,3923.540039,3968.939941,4882190000,0.006492,0.038622,0.068574,0.004699,0.00636,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0
2021-03-16,3973.590088,3981.040039,3953.439941,3962.709961,4604870000,-0.00157,0.022519,0.050042,0.007768,0.002761,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0
2021-03-17,3949.570068,3983.870117,3935.73999,3974.120117,4541620000,0.002879,0.019316,0.03863,-0.006045,0.000711,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0


In [67]:
def signal_convertation(signals):
    non_zero_element = 0
    converted_signals = []
    #елементи на початку ряду можуть бути нулями, ми їх не змінюємо до появи першого сигналу
    
    for i in range(len(signals)):
        if np.isnan(signals[i]):
            converted_signals.append(np.nan)
        else:
            if signals[i] != 0:
                non_zero_element = signals[i]
                converted_signals.append(signals[i])
            else:
                converted_signals.append(non_zero_element)
                
    return converted_signals

In [68]:
df["SMA 1"] = signal_convertation(df["SMA 1"])
df["EMA 1"] = signal_convertation(df["EMA 1"])
df["LWMA 1"] = signal_convertation(df["LWMA 1"])
df["MAE 1"] = signal_convertation(df["MAE 1"])
df["MAE 2"] = signal_convertation(df["MAE 2"])
df["MAE 3"] = signal_convertation(df["MAE 3"])
df["CCI 1"] = signal_convertation(df["CCI 1"])
df["SO 1"] = signal_convertation(df["SO 1"])
df["CMO 1"] = signal_convertation(df["CMO 1"])
df["DI"] = signal_convertation(df["DI"])
df["ERI"] = signal_convertation(df["ERI"])
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Percent change 1,Percent change 5,Percent change 30,Percent change Open,Percent change High,...,ERI,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3,CCI 1,SO 1,CMO 1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-03-11,3915.540039,3960.27002,3915.540039,3939.340088,5300010000,0.010395,0.045342,0.050275,0.006051,0.010956,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,-1.0,1.0
2021-03-12,3924.52002,3944.98999,3915.209961,3943.340088,4469240000,0.001015,0.026393,0.041179,0.002293,-0.003858,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,-1.0,1.0
2021-03-15,3942.959961,3970.080078,3923.540039,3968.939941,4882190000,0.006492,0.038622,0.068574,0.004699,0.00636,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,-1.0,1.0
2021-03-16,3973.590088,3981.040039,3953.439941,3962.709961,4604870000,-0.00157,0.022519,0.050042,0.007768,0.002761,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,-1.0,1.0
2021-03-17,3949.570068,3983.870117,3935.73999,3974.120117,4541620000,0.002879,0.019316,0.03863,-0.006045,0.000711,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,-1.0,1.0


In [69]:
#якщо ціна наступного дня зросла - ставимо 1
to_predict = []
for i in range(0,len(df.Close)-1):
    if df.Close[i+1] > df.Close[i]:
        to_predict.append(1)
    else:
        to_predict.append(0)

to_predict.append(np.nan)

In [70]:
df["Y"] = to_predict
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Percent change 1,Percent change 5,Percent change 30,Percent change Open,Percent change High,...,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3,CCI 1,SO 1,CMO 1,Y
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-03-11,3915.540039,3960.27002,3915.540039,3939.340088,5300010000,0.010395,0.045342,0.050275,0.006051,0.010956,...,1.0,1.0,1.0,1.0,1.0,1.0,1,-1.0,1.0,1.0
2021-03-12,3924.52002,3944.98999,3915.209961,3943.340088,4469240000,0.001015,0.026393,0.041179,0.002293,-0.003858,...,1.0,1.0,1.0,1.0,1.0,1.0,1,-1.0,1.0,1.0
2021-03-15,3942.959961,3970.080078,3923.540039,3968.939941,4882190000,0.006492,0.038622,0.068574,0.004699,0.00636,...,1.0,1.0,1.0,1.0,1.0,1.0,1,-1.0,1.0,0.0
2021-03-16,3973.590088,3981.040039,3953.439941,3962.709961,4604870000,-0.00157,0.022519,0.050042,0.007768,0.002761,...,1.0,1.0,1.0,1.0,1.0,1.0,1,-1.0,1.0,1.0
2021-03-17,3949.570068,3983.870117,3935.73999,3974.120117,4541620000,0.002879,0.019316,0.03863,-0.006045,0.000711,...,1.0,1.0,1.0,1.0,1.0,1.0,1,-1.0,1.0,


In [71]:
print(df.shape)

(5316, 24)


In [72]:
missing_values_count = df.isna().sum()

# how many total missing values do we have?
total_cells = np.product(df.shape)
total_missing = missing_values_count.sum()

# percent of data that is missing
percent_missing = (total_missing/total_cells) * 100
print(total_cells)
print(total_missing)
print(percent_missing)
print(len(df.Close))

127584
147
0.11521820917983445
5316


In [73]:
df = df.dropna()

In [74]:
missing_values_count = df.isna().sum()

# how many total missing values do we have?
total_cells = np.product(df.shape)
total_missing = missing_values_count.sum()

# percent of data that is missing
percent_missing = (total_missing/total_cells) * 100
print(total_cells)
print(total_missing)
print(percent_missing)
print(len(df.Close))

126840
0
0.0
5285


In [75]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Percent change 1,Percent change 5,Percent change 30,Percent change Open,Percent change High,...,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3,CCI 1,SO 1,CMO 1,Y
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-03-14,1383.619995,1395.150024,1359.150024,1359.150024,1094000000,-0.017685,0.002604,-0.025322,-0.008207,-0.002317,...,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1,0.0,-1.0,1.0
2000-03-15,1359.150024,1397.98999,1356.98999,1392.140015,1302800000,0.024273,0.018614,-0.012162,-0.017685,0.002036,...,1.0,1.0,1.0,1.0,0.0,1.0,1,0.0,-1.0,1.0
2000-03-16,1392.150024,1458.469971,1392.150024,1458.469971,1482300000,0.047646,0.040508,0.035022,0.02428,0.043262,...,1.0,1.0,1.0,1.0,0.0,1.0,1,0.0,1.0,1.0
2000-03-17,1458.469971,1477.329956,1453.319946,1464.469971,1295100000,0.004114,0.049747,0.02772,0.047639,0.012931,...,1.0,1.0,1.0,1.0,0.0,1.0,1,0.0,1.0,0.0
2000-03-20,1464.469971,1470.300049,1448.48999,1456.630005,920800000,-0.005353,0.052767,0.022649,0.004114,-0.004759,...,1.0,1.0,1.0,1.0,0.0,1.0,1,0.0,1.0,1.0


In [76]:
df.drop(['Open', 'High', 'Close', 'Low', 'Volume'], axis='columns', inplace=True)

In [77]:
df.head()

Unnamed: 0_level_0,Percent change 1,Percent change 5,Percent change 30,Percent change Open,Percent change High,Percent change Low,Percent change Volume,DI,ERI,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3,CCI 1,SO 1,CMO 1,Y
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2000-03-14,-0.017685,0.002604,-0.025322,-0.008207,-0.002317,-0.004169,0.076666,-1.0,1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1,0.0,-1.0,1.0
2000-03-15,0.024273,0.018614,-0.012162,-0.017685,0.002036,-0.001589,0.190859,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1,0.0,-1.0,1.0
2000-03-16,0.047646,0.040508,0.035022,0.02428,0.043262,0.02591,0.13778,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1,0.0,1.0,1.0
2000-03-17,0.004114,0.049747,0.02772,0.047639,0.012931,0.043939,-0.12629,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1,0.0,1.0,0.0
2000-03-20,-0.005353,0.052767,0.022649,0.004114,-0.004759,-0.003323,-0.289012,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1,0.0,1.0,1.0


In [78]:
print(df.shape)

(5285, 19)


In [79]:
df.to_excel('d:\Курсова\Курсова 2021\Data for NN SP500.xlsx')