In [34]:
import yfinance as yf
from datetime import date, datetime
import pandas as pd 
import numpy as np

#names_list = ['^GSPC', '000001.SS', '^KS11', '^N225']
#real_names_kist = ['SP500', 'SSE Composite Index', 'KOSPI', 'Nikkei225']

In [35]:
start_date = date(2000, 2, 1)
end_date = date(2021, 3, 12)

tickerSymbol = '^GSPC'
tickerData = yf.Ticker(tickerSymbol)

df = tickerData.history(period='1d', start=start_date, end=end_date).dropna()
df.drop(['Stock Splits', 'Dividends'], axis='columns', inplace=True)
df = pd.DataFrame(data = df)
df.index = pd.to_datetime(df.index)
# df["Date"] = df.index

df_copy = tickerData.history(period='1d', start=start_date, end=end_date).dropna()

df.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-31,1360.16,1394.48,1350.14,1394.46,993800000
2000-02-01,1394.46,1412.49,1384.79,1409.28,981000000
2000-02-02,1409.28,1420.61,1403.49,1409.12,1038600000


# Simple Moving Average

In [6]:
def SMA(data,n):
    
    #додаємо NaN на початку
	sma_list=[np.nan for k in range(n-1)]
    
	for i in range(len(data)-n+1):
		sum_n = 0
		for j in range(n):
			sum_n = sum_n + data[i+j]
		sma=sum_n/n
		sma_list.append(round(sma, 2))
        
	return sma_list

# Linear Weighted Moving Average

In [7]:
def LWMA(data,n):
    
	lma_list=[np.nan for k in range(n-1)]

	for i in range(len(data)-n+1):
		sum_n, sum_j = 0, 0
		for j in range(n):
			sum_n = sum_n + (j+1)*data[i+j]
			sum_j = sum_j + (j+1)
		lma=sum_n/sum_j
		lma_list.append(lma)

	return lma_list

# Exponential Moving Average

In [8]:
def EMA(data, n, a=0):
	ema_list=[data[0]]
	if a==0:
		a=2/(n+1)

	for i in range (1,len(data)):
		ema = 0
		ema = (1-a)*ema_list[i-1]+a*data[i]
		ema_list.append(ema)

	return ema_list

# Disparsity Index

In [9]:
def DI(data, n, MA_type):
    if MA_type == "EMA":
        MA = EMA(data, n)
    elif MA_type == "LWMA":
        MA = LWMA(data, n)
    else:
        MA = SMA(data, n)
    
    di_list = []
    for i in range(len(MA)):
        if (np.isnan(MA[i])):
            di_list.append(np.nan)
        else:
            di_list.append((data[i]-MA[i])/(100*MA[i]))
        
    return di_list

# CCI

In [10]:
def CCI(data_close, data_high, data_low, n):
	p_typical = []

	for i in range(len(data_close)):
		p_typical.append((data_close[i]+data_low[i]+data_high[i])/3)
	sma = SMA(p_typical,n)

	MAD = [np.nan for k in range(n-1)]
	for j in range(n-1, len(data_close)):
		mad = 0
		for s in range(n):
			mad = mad +  abs(p_typical[j-s]-sma[j])
		MAD.append(mad/n)

	CCI = [np.nan for n in range(n-1)]
	for t in range(n-1, len(data_close)):
		cci = (p_typical[t]-sma[t])/(0.015*MAD[t])
		CCI.append(cci)

	return CCI

# SO

In [11]:
def SO(data_close, data_high, data_low, n):
	K, D = [np.nan for k in range(n-1)], [np.nan for k in range(n-1)]
	highest_high, lowest_low = [np.nan for k in range(n-1)], [np.nan for k in range(n-1)]

	for i in range(len(data_close)-n+1):
		list_n_high, list_n_low = [], []
		for j in range(0, n):
			list_n_high.append(data_high[i+j])
			list_n_low.append(data_low[i+j])
		highest_high.append(max(list_n_high))
		lowest_low.append(min(list_n_low))

	for i in range(n-1, len(highest_high)):
		K.append(100*(data_close[i]-lowest_low[i])/(highest_high[i]-lowest_low[i]))
	D = SMA(K, n)

	return K, D

# Elder-Ray Index

In [12]:
def ERI(data_close, data_high, data_low, n):
    MA = EMA(data_close, n)
    
    BuP, BeP = [], []
    for i in range(len(data_close)):
        BuP.append(data_high[i] - MA[i])
        BeP.append(data_low[i] - MA[i])
    return BuP, BeP

# CMO

In [13]:
def CMO(data, n):
	CMO1, CMO2 = [np.nan], [np.nan]

	for i in range(1,len(data)):
		if data[i]>data[i-1]:
			CMO1.append(data[i]-data[i-1])
			CMO2.append(0)
		elif data[i]<data[i-1]:
			CMO1.append(0)
			CMO2.append(data[i-1]-data[i])
		else:
			CMO1.append(0)
			CMO2.append(0)

	sH, sL = [np.nan for k in range(n)], [np.nan for k in range(n)]
	for i in range(n,len(data)):
		sh, sl = 0, 0
		for t in range(n):
			sh = sh + CMO1[i-t]
			sl = sl + CMO2[i-t]
		sH.append(sh)
		sL.append(sl)

	CMO_list=[np.nan for k in range(n)] 
	for i in range(n,len(sH)):
		if (sH[i]+sL[i]) != 0:
			CMO_list.append(100*(sH[i]-sL[i])/(sH[i]+sL[i]))
		else:
			CMO_list.append(100*(sH[i-1]-sL[i-1])/(sH[i-1]+sL[i-1]))
		

	return CMO_list

# Strategy with moving averages


Сигнали - перетин ціни і плинного середнього

P - list of Close prices

n1 - к-ть періодів для середнього

MA_type - тип середнього ("SMA", "EMA", "LWMA")

In [14]:
def strategy_MA_1(P, n1, MA_type, n2=26, n3=9):
	MA= []
	if MA_type == "SMA":
		MA = SMA(P, n1)
	elif MA_type == "EMA":
		MA = EMA(P, n1)
	elif MA_type == "LWMA":
		MA = LWMA(P, n1)
	elif MA_type == "KAMA":
		MA = KAMA(P, n1, n2, n3)

	if MA_type != "EMA":
		s=[np.nan for k in range(n1)]
		for i in range(n1, len(MA)):
			if (P[i-1]<MA[i-1]) and (P[i]>MA[i]):
				s.append(1)
			elif (P[i-1]>MA[i-1]) and (P[i]<MA[i]):
				s.append(-1)
			else:
				s.append(0)
	elif MA_type == "EMA":
		s=[np.nan]
		for i in range(1,len(MA)):
			if (P[i-1]<MA[i-1]) and (P[i]>MA[i]):
				s.append(1)
			elif (P[i-1]>MA[i-1]) and (P[i]<MA[i]):
				s.append(-1)
			else:
				s.append(0)
	return s

# Strategy SO

In [15]:
def strategy_SO(P_close, P_high, P_low, n):
	K, D = SO(P_close, P_high, P_low, n)
	s=[np.nan for k in range(n)]
	for i in range(n, len(P_close)):
		if (K[i]>D[i]) and (K[i-1]<D[i-1]):
			s.append(-1)
		elif (K[i]<D[i]) and (K[i-1]>D[i-1]):
			s.append(1)
		else:
			s.append(0)
	return s

# Strategy CCI

In [16]:
def strategy_CCI(data_close, data_high, data_low, n):
	cci = CCI(data_close, data_high, data_low, n)
	s=[]
	for i in range(len(data_close)):
		if ((cci[i]>0) and (cci[i-1]<0)):
			s.append(1)
		elif ((cci[i]<0) and (cci[i-1]>0)):
			s.append(-1)
		else:
			s.append(0)
	return s

# Strategy CMO

In [17]:
def strategy_CMO(P, n):
	CMO_list = CMO(P, n)
	s=[np.nan for k in range(n)]
	for i in range(n, len(P)):
		if (CMO_list[i]>0) and (CMO_list[i-1]<0):
			s.append(1)
		elif (CMO_list[i]<0) and (CMO_list[i-1]>0):
			s.append(-1)
		else:
			s.append(0)
	return s

# Strategy MAE

In [18]:
def strategy_MAE(P, upper, lower, n1, MA_type, MAE_type="LL"):
	if n1 >= 0:
		UL, LL, MA = [], [], []
		if MA_type == "SMA":
			MA = SMA(P, n1)
		elif MA_type == "EMA":
			MA = EMA(P, n1)
		elif MA_type == "LWMA":
			MA = LWMA(P, n1)

		for i in range(len(P)):
			UL.append((1+upper)*MA[i])
			LL.append((1-lower)*MA[i])
		if MA_type != "EMA":
			s=[np.nan for k in range(n1)]
			if MAE_type == "UL":
				for i in range(n1, len(MA)):
					if (P[i-1]<UL[i-1]) and (P[i]>UL[i]):
						s.append(1)
					elif (P[i-1]>UL[i-1]) and (P[i]<UL[i]):
						s.append(-1)
					else:
						s.append(0)
			elif MAE_type == "LL":
				for i in range(n1, len(MA)):
					if (P[i-1]<LL[i-1]) and (P[i]>LL[i]):
						s.append(1)
					elif (P[i-1]>LL[i-1]) and (P[i]<LL[i]):
						s.append(-1)
					else:
						s.append(0)
		elif MA_type == "EMA":
			s=[np.nan]
			if MAE_type == "UL":
				for i in range(1, len(MA)):
					if (P[i-1]<UL[i-1]) and (P[i]>UL[i]):
						s.append(1)
					elif (P[i-1]>UL[i-1]) and (P[i]<UL[i]):
						s.append(-1)
					else:
						s.append(0)
			elif MAE_type == "LL":
				for i in range(1, len(MA)):
					if (P[i-1]<LL[i-1]) and (P[i]>LL[i]):
						s.append(1)
					elif (P[i-1]>LL[i-1]) and (P[i]<LL[i]):
						s.append(-1)
					else:
						s.append(0)
		return s
	else:
		return ["nan", "nan"]

# checking strategies

In [56]:
df["SMA 1"] = strategy_MA_1(P = df.Close, n1 = 2, MA_type = "SMA")
df["EMA 1"] = strategy_MA_1(P = df.Close, n1 = 6, MA_type = "EMA")
df["LWMA 1"] = strategy_MA_1(P = df.Close, n1 = 2, MA_type = "LWMA")
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3,CCI 1,SO 1,CMO 1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-01-31,1360.16,1394.48,1350.14,1394.46,993800000,,,,,,,0,,
2000-02-01,1394.46,1412.49,1384.79,1409.28,981000000,,0.0,,,0.0,,0,,
2000-02-02,1409.28,1420.61,1403.49,1409.12,1038600000,-1.0,0.0,-1.0,,0.0,,0,,
2000-02-03,1409.12,1425.78,1398.52,1424.97,1146500000,1.0,0.0,1.0,,0.0,0.0,0,,0.0
2000-02-04,1424.97,1435.91,1420.63,1424.37,1045100000,-1.0,0.0,-1.0,0.0,0.0,0.0,0,,0.0


In [57]:
df["MAE 1"] = strategy_MAE(P=df.Close, upper=0, lower=0.04, n1=4, MA_type="SMA", MAE_type="LL")
df["MAE 2"] = strategy_MAE(P=df.Close, upper=0, lower=0.01, n1=3, MA_type="EMA", MAE_type="LL")
df["MAE 3"] = strategy_MAE(P=df.Close, upper=0, lower=0.02, n1=3, MA_type="LWMA", MAE_type="LL")
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3,CCI 1,SO 1,CMO 1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-01-31,1360.16,1394.48,1350.14,1394.46,993800000,,,,,,,0,,
2000-02-01,1394.46,1412.49,1384.79,1409.28,981000000,,0.0,,,0.0,,0,,
2000-02-02,1409.28,1420.61,1403.49,1409.12,1038600000,-1.0,0.0,-1.0,,0.0,,0,,
2000-02-03,1409.12,1425.78,1398.52,1424.97,1146500000,1.0,0.0,1.0,,0.0,0.0,0,,0.0
2000-02-04,1424.97,1435.91,1420.63,1424.37,1045100000,-1.0,0.0,-1.0,0.0,0.0,0.0,0,,0.0


In [58]:
df["CCI 1"] = strategy_CCI(data_close=df.Close, data_high=df.High, data_low=df.Low, n=9)
df["SO 1"] = strategy_SO(P_close=df.Close, P_high=df.High, P_low=df.Low, n=25)
df["CMO 1"] = strategy_CMO(P=df.Close, n=3)
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3,CCI 1,SO 1,CMO 1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-01-31,1360.16,1394.48,1350.14,1394.46,993800000,,,,,,,0,,
2000-02-01,1394.46,1412.49,1384.79,1409.28,981000000,,0.0,,,0.0,,0,,
2000-02-02,1409.28,1420.61,1403.49,1409.12,1038600000,-1.0,0.0,-1.0,,0.0,,0,,
2000-02-03,1409.12,1425.78,1398.52,1424.97,1146500000,1.0,0.0,1.0,,0.0,0.0,0,,0.0
2000-02-04,1424.97,1435.91,1420.63,1424.37,1045100000,-1.0,0.0,-1.0,0.0,0.0,0.0,0,,0.0


In [59]:
def signal_convertation(signals):
    non_zero_element = 0
    converted_signals = []
    #елементи на початку ряду можуть бути нулями, ми їх не змінюємо до появи першого сигналу
    
    for i in range(len(signals)):
        if np.isnan(signals[i]):
            converted_signals.append(np.nan)
        else:
            if signals[i] != 0:
                non_zero_element = signals[i]
                converted_signals.append(signals[i])
            else:
                converted_signals.append(non_zero_element)
                
    return converted_signals

In [60]:
df.to_excel('d:\Курсова\Курсова 2021\signal conversion 1.xlsx')
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3,CCI 1,SO 1,CMO 1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-03-05,3793.58,3851.69,3730.19,3841.94,6842570000,1.0,1.0,1.0,0.0,1.0,0.0,0,0.0,0.0
2021-03-08,3844.39,3881.06,3819.25,3821.35,5852240000,-1.0,-1.0,-1.0,0.0,0.0,0.0,0,0.0,1.0
2021-03-09,3851.93,3903.76,3851.93,3875.44,5496340000,1.0,1.0,1.0,0.0,0.0,0.0,1,0.0,0.0
2021-03-10,3891.99,3917.35,3885.73,3898.81,5827250000,0.0,0.0,0.0,0.0,0.0,0.0,0,-1.0,0.0
2021-03-11,3915.54,3960.27,3915.54,3939.34,5300010000,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0


In [61]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3,CCI 1,SO 1,CMO 1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-01-31,1360.16,1394.48,1350.14,1394.46,993800000,,,,,,,0,,
2000-02-01,1394.46,1412.49,1384.79,1409.28,981000000,,0.0,,,0.0,,0,,
2000-02-02,1409.28,1420.61,1403.49,1409.12,1038600000,-1.0,0.0,-1.0,,0.0,,0,,
2000-02-03,1409.12,1425.78,1398.52,1424.97,1146500000,1.0,0.0,1.0,,0.0,0.0,0,,0.0
2000-02-04,1424.97,1435.91,1420.63,1424.37,1045100000,-1.0,0.0,-1.0,0.0,0.0,0.0,0,,0.0


In [62]:
df["SMA 1"] = signal_convertation(df["SMA 1"])
df.head()
# print(len(answer))
# print(len(df["SMA 1"]))

Unnamed: 0_level_0,Open,High,Low,Close,Volume,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3,CCI 1,SO 1,CMO 1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-01-31,1360.16,1394.48,1350.14,1394.46,993800000,,,,,,,0,,
2000-02-01,1394.46,1412.49,1384.79,1409.28,981000000,,0.0,,,0.0,,0,,
2000-02-02,1409.28,1420.61,1403.49,1409.12,1038600000,-1.0,0.0,-1.0,,0.0,,0,,
2000-02-03,1409.12,1425.78,1398.52,1424.97,1146500000,1.0,0.0,1.0,,0.0,0.0,0,,0.0
2000-02-04,1424.97,1435.91,1420.63,1424.37,1045100000,-1.0,0.0,-1.0,0.0,0.0,0.0,0,,0.0


In [63]:
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3,CCI 1,SO 1,CMO 1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-03-05,3793.58,3851.69,3730.19,3841.94,6842570000,1.0,1.0,1.0,0.0,1.0,0.0,0,0.0,0.0
2021-03-08,3844.39,3881.06,3819.25,3821.35,5852240000,-1.0,-1.0,-1.0,0.0,0.0,0.0,0,0.0,1.0
2021-03-09,3851.93,3903.76,3851.93,3875.44,5496340000,1.0,1.0,1.0,0.0,0.0,0.0,1,0.0,0.0
2021-03-10,3891.99,3917.35,3885.73,3898.81,5827250000,1.0,0.0,0.0,0.0,0.0,0.0,0,-1.0,0.0
2021-03-11,3915.54,3960.27,3915.54,3939.34,5300010000,1.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0


In [64]:
df["EMA 1"] = signal_convertation(df["EMA 1"])
df["LWMA 1"] = signal_convertation(df["LWMA 1"])
df["MAE 1"] = signal_convertation(df["MAE 1"])
df["MAE 2"] = signal_convertation(df["MAE 2"])
df["MAE 3"] = signal_convertation(df["MAE 3"])
df["CCI 1"] = signal_convertation(df["CCI 1"])
df["SO 1"] = signal_convertation(df["SO 1"])
df["CMO 1"] = signal_convertation(df["CMO 1"])

In [65]:
df.to_excel('d:\Курсова\Курсова 2021\signal conversion 2.xlsx')

In [83]:
#якщо ціна наступного дня зросла - ставимо 1

to_predict = []
for i in range(0,len(df.Close)-1):
    if df.Close[i+1] > df.Close[i]:
        to_predict.append(1)
    else:
        to_predict.append(0)

to_predict.append(np.nan)
# print(len(to_predict))
# print(len(df.Close))

In [87]:
df["Y"] = to_predict

In [88]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,SMA 1,EMA 1,LWMA 1,MAE 1,MAE 2,MAE 3,CCI 1,SO 1,CMO 1,increase,Y
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2000-01-31,1360.16,1394.48,1350.14,1394.46,993800000,,,,,,,0,,,1.0,1.0
2000-02-01,1394.46,1412.49,1384.79,1409.28,981000000,,0.0,,,0.0,,0,,,0.0,0.0
2000-02-02,1409.28,1420.61,1403.49,1409.12,1038600000,-1.0,0.0,-1.0,,0.0,,0,,,1.0,1.0
2000-02-03,1409.12,1425.78,1398.52,1424.97,1146500000,1.0,0.0,1.0,,0.0,0.0,0,,0.0,0.0,0.0
2000-02-04,1424.97,1435.91,1420.63,1424.37,1045100000,-1.0,0.0,-1.0,0.0,0.0,0.0,0,,0.0,0.0,0.0


In [86]:
df.to_excel('d:\Курсова\Курсова 2021\increase.xlsx')