### 데이터 준비 및 환경 설정

In [1]:
import os
import pandas as pd
sp_data_dict = dict()
for file_name in os.listdir("../../데이터/주가데이터"):
    sp_data = pd.read_csv("../../데이터/주가데이터/" + file_name,
                          parse_dates = ['Date'])
    stock_name = file_name.replace('.csv', '')
    sp_data_dict[stock_name] = sp_data

In [2]:
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import rcParams
sns.set()
%matplotlib inline
rcParams['font.family'] = 'Malgun Gothic'
rcParams['axes.unicode_minus'] = False

### 주가 회복 비율 계산

In [3]:
def count_loss_and_recover(sp_data, n1, n2, m1, m2):
    num_loss = 0
    num_recover = 0
    cur_price_list = sp_data["Close"].values
    n1_rmax_list = sp_data["Close"].rolling(n1).max().values
    n2_rmax_list = sp_data["Close"].rolling(n2).max().values
    pre_loss_list = (n1_rmax_list - cur_price_list) / n1_rmax_list * 100
    
    for i in range(n1, len(sp_data) - n2):
        if pre_loss_list[i] >= m1:
            num_loss += 1
            if n2_rmax_list[i+n2] >= cur_price_list[i] * (1+m2 / 100):
                num_recover += 1
    return num_recover, num_loss

In [4]:
import numpy as np
def sum_count_loss_and_recover(sp_data_dict, n1, n2, m1, m2):
    total_num_recover, total_num_loss = 0, 0
    for stock_name in sp_data_dict.keys():
        sp_data = sp_data_dict[stock_name]
        num_recover, num_loss = count_loss_and_recover(sp_data, n1, n2, m1, m2)
        total_num_recover += num_recover
        total_num_loss += num_loss
    if total_num_loss > 0:
        result = total_num_recover / total_num_loss
    else:
        result = np.nan
    
    return result

### 주가 회복률 시각화

In [5]:
def make_heatmap(n1, n2, m1_range, m2_range):
    graph_data = []
    for m1 in m1_range:
        record = []
        for m2 in m2_range:
            result = sum_count_loss_and_recover(sp_data_dict, n1, n2, m1, m2)
            record.append(result)
        graph_data.append(record)

    sns.heatmap(graph_data, annot = True, fmt = ".2f", linewidth = 1)
    plt.xticks(np.arange(0.5, len(m2_range), 1), m2_range)
    plt.yticks(np.arange(0.5, len(m1_range), 1), m1_range)
    plt.show()

In [6]:
make_heatmap(60, 60, range(5, 31, 5), range(30, 0, -5))

KeyboardInterrupt: 

In [None]:
make_heatmap(120, 120, range(5, 31, 5), range(30, 0, -5))

In [None]:
make_heatmap(240, 240, range(5, 31, 5), range(30, 0, -5))

### 평균 회귀 전략 구현 및 검증

#### 전략 구현

In [7]:
def calc_ror_using_mean_reg(sp_data_dict, n1, n2, m1, m2):
    ror_list = []
    for stock_name in sp_data_dict.keys():
        sp_data = sp_data_dict[stock_name]
        cur_price_list = sp_data["Close"].values
        n1_rmax_list = sp_data["Close"].rolling(n1).max().values
        pre_loss_list = (n1_rmax_list - cur_price_list) / n1_rmax_list * 100
        for i in range(n1, len(sp_data) - n2 + 1):
            if pre_loss_list[i] >= m1:
                bp = cur_price_list[i] # 매수가
                cand_sp_list = sp_data.loc[i:i+n2, "Close"].values # 매도가 후보 
                if cand_sp_list.max() > bp * (1+m2/100): # 원하는 수준에 도달하면
                    sp = cand_sp_list[cand_sp_list > bp * (1+m2/100)][0]
                else:
                    sp = cand_sp_list[-1]
                ror = (sp - bp) / bp * 100
                ror_list.append(ror)
    return ror_list

#### 전 종목에 대한 검증

In [8]:
n1 = 20; n2 = 20; m1 = 5; m2 = 5
ror_list = calc_ror_using_mean_reg(sp_data_dict, n1, n2, m1, m2)
display(pd.Series(ror_list).describe().round(3))

count    2461008.000
mean           0.740
std           10.995
min          -89.120
25%           -5.294
50%            5.172
75%            6.852
max          900.114
dtype: float64

In [9]:
n1 = 60; n2 = 120; m1 = 20; m2 = 10
profit_list = calc_ror_using_mean_reg(sp_data_dict, n1, n2, m1, m2)
display(pd.Series(profit_list).describe().round(3))

count    1063384.000
mean           5.408
std           17.906
min          -96.688
25%           10.003
50%           11.303
75%           13.978
max          900.000
dtype: float64

In [10]:
n1 = 240; n2 = 240; m1 = 30; m2 = 30
profit_list = calc_ror_using_mean_reg(sp_data_dict, n1, n2, m1, m2)
display(pd.Series(profit_list).describe().round(3))

count    1430601.000
mean          10.136
std           32.543
min          -96.419
25%          -16.208
50%           30.315
75%           33.797
max          945.303
dtype: float64

#### 종목 유형에 따른 검증

In [11]:
# 종목 정보 가져오기
stock_info = pd.read_csv("../../데이터/종목정보.txt", sep = "\t", encoding = "euc-kr")
market_cap = pd.read_excel("../../데이터/211104_시가총액.xlsx")

In [12]:
stock_info = pd.read_excel("../../데이터/211104_시가총액.xlsx")
KOSPI_list = stock_info.loc[stock_info["시장구분"] == "KOSPI", "종목명"].values
KOSDAQ_list = stock_info.loc[stock_info["시장구분"] == "KOSDAQ", "종목명"].values
TOP100_list = stock_info.sort_values(by = "시가총액", ascending = False)["종목명"].values[:100]

In [13]:
KOSPI_sp_data_dict = dict()
KOSDAQ_sp_data_dict = dict()
TOP100_sp_data_dict = dict()

for stock_name in sp_data_dict.keys():
    if stock_name in KOSPI_list:
        KOSPI_sp_data_dict[stock_name] = sp_data_dict[stock_name]
    if stock_name in KOSDAQ_list:
        KOSDAQ_sp_data_dict[stock_name] = sp_data_dict[stock_name]
    if stock_name in TOP100_list:
        TOP100_sp_data_dict[stock_name] = sp_data_dict[stock_name]

#### 코스피 종목에 대한 검증

In [14]:
n1 = 20; n2 = 20; m1 = 5; m2 = 5
ror_list = calc_ror_using_mean_reg(KOSPI_sp_data_dict, n1, n2, m1, m2)
display(pd.Series(ror_list).describe().round(3))

count    902535.000
mean          0.558
std          11.277
min         -77.156
25%          -5.028
50%           5.019
75%           6.468
max         900.114
dtype: float64

In [15]:
n1 = 60; n2 = 120; m1 = 20; m2 = 10
ror_list = calc_ror_using_mean_reg(KOSPI_sp_data_dict, n1, n2, m1, m2)
display(pd.Series(ror_list).describe().round(3))

count    336763.000
mean          5.012
std          18.012
min         -89.824
25%           1.038
50%          11.093
75%          13.433
max         900.000
dtype: float64

In [16]:
n1 = 240; n2 = 240; m1 = 30; m2 = 30
ror_list = calc_ror_using_mean_reg(KOSPI_sp_data_dict, n1, n2, m1, m2)
display(pd.Series(ror_list).describe().round(3))

count    493029.000
mean          8.520
std          33.271
min         -95.737
25%         -16.722
50%          18.033
75%          32.960
max         945.303
dtype: float64

#### 코스닥 종목에 대한 검증

In [17]:
n1 = 20; n2 = 20; m1 = 5; m2 = 5
ror_list = calc_ror_using_mean_reg(KOSDAQ_sp_data_dict, n1, n2, m1, m2)
display(pd.Series(ror_list).describe().round(3))

count    1549629.000
mean           0.844
std           10.824
min          -89.120
25%           -5.481
50%            5.269
75%            7.089
max          137.304
dtype: float64

In [18]:
n1 = 60; n2 = 120; m1 = 20; m2 = 10
ror_list = calc_ror_using_mean_reg(KOSDAQ_sp_data_dict, n1, n2, m1, m2)
display(pd.Series(ror_list).describe().round(3))

count    721836.000
mean          5.596
std          17.841
min         -96.688
25%          10.047
50%          11.414
75%          14.230
max         200.266
dtype: float64

In [19]:
n1 = 240; n2 = 240; m1 = 30; m2 = 30
ror_list = calc_ror_using_mean_reg(KOSDAQ_sp_data_dict, n1, n2, m1, m2)
display(pd.Series(ror_list).describe().round(3))

count    931839.000
mean         10.954
std          32.120
min         -96.419
25%         -15.909
50%          30.551
75%          34.234
max         207.030
dtype: float64

#### 우량 종목에 대한 검증

In [None]:
n1 = 20; n2 = 20; m1 = 5; m2 = 5
ror_list = calc_ror_using_mean_reg(TOP100_sp_data_dict, n1, n2, m1, m2)
display(pd.Series(ror_list).describe().round(3))

In [None]:
n1 = 60; n2 = 120; m1 = 20; m2 = 10
ror_list = calc_ror_using_mean_reg(TOP100_sp_data_dict, n1, n2, m1, m2)
display(pd.Series(ror_list).describe().round(3))

In [None]:
n1 = 240; n2 = 240; m1 = 30; m2 = 30
ror_list = calc_ror_using_mean_reg(TOP100_sp_data_dict, n1, n2, m1, m2)
display(pd.Series(ror_list).describe().round(3))