### 그래프 환경설정

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import rcParams
sns.set()
%matplotlib inline
rcParams['font.family'] = 'Malgun Gothic'
rcParams['axes.unicode_minus'] = False

### 데이터 정제

In [None]:
import pandas as pd
data = pd.read_csv("../../데이터/주가리포트.txt", sep = "\t", encoding = "euc-kr")
display(data.head())

#### 의견 표현 통일

In [None]:
display(data['의견'].value_counts())

In [None]:
data = data.loc[~data['의견'].isin(['매도', '-', 'Market Under Perform'])]

In [None]:
buy_opinions = ['매수(유지)', "Outperform", "StrongBuy", "강력매수", "신규매수", "단기매수", "적극매수"]
data.loc[data['의견'].isin(buy_opinions), '의견'] = "매수"
data.loc[data['의견'] != "매수", "의견"] = "중립"

In [None]:
display(data['의견'].value_counts(normalize = True))

#### 적정 숫자로 표현 

In [None]:
import numpy as np
def filtering_numeric(value):
    if type(value) == float:
        return value
    else:
        output = ''
        for s in value:
            if s.isnumeric():
                output += s
        if len(output) == 0:
            return np.nan
        else:
            return int(output)

In [None]:
print(filtering_numeric('12345'))
print(filtering_numeric('없음'))
print(filtering_numeric('12345원'))

In [None]:
data['적정가격'] = data['적정가격'].apply(filtering_numeric)
data.dropna(inplace = True)

#### 주가 부착

In [None]:
import os
sp_data_dict = dict()
path = "../../데이터/주가데이터/"
for stock_name in data['종목명'].unique():
    if stock_name + ".csv" in os.listdir(path):
        sp_data = pd.read_csv(path + stock_name + ".csv", parse_dates = ['Date'])
        sp_data_dict[stock_name] = sp_data
    else:
        data = data.loc[data['종목명'] != stock_name]

In [None]:
def find_stock_price(stock_name, date, direction = "past"):    
    stock_data = sp_data_dict[stock_name]
    if (direction == "past") and (stock_data['Date'].min() > date):
        return np.nan    
    elif (direction == "future") and (stock_data['Date'].max() < date):
        return np.nan 
    else:
        while True:
            if sum(stock_data['Date'] == date) > 0: 
                value = stock_data.loc[stock_data['Date'] == date, 'Close'].iloc[0]  
                break
            else:
                if direction == "past":
                    date -= pd.to_timedelta(1, 'D')
                else:
                    date += pd.to_timedelta(1, 'D')
        return value

In [None]:
data["날짜"] = pd.to_datetime(data["날짜"])
sp_col = []
for idx, row in data.iterrows():
    value = find_stock_price(row["종목명"], row["날짜"], direction = "past")
    sp_col.append(value)
data["주가"] = sp_col

### 적정 가격과 주가 차이 탐색

In [None]:
temp = (data['적정가격'] - data['주가']) / data['주가'] * 100
temp.plot(kind = "box", ylim = (0, 100))

In [None]:
display(temp.describe().round(3))

In [None]:
display(data.iloc[temp.argmax()])
display(data.iloc[temp.argmin()])

In [None]:
data = data.loc[~data['종목명'].isin(['삼성전자', '대한해운'])]
data = data[data['적정가격'] > 0]

### 리포트에 따른 투자 시 기대 수익률 계산

In [None]:
def calc_ror_based_on_report(report_data, D):
    cur_sp_list = report_data['주가'].values
    fut_date_list = (report_data['날짜'] + pd.to_timedelta(D, "D")).values
    fut_sp_list = []
    for stock_name, fut_date in zip(report_data['종목명'], fut_date_list):
        fut_sp = find_stock_price(stock_name, fut_date, "future")
        fut_sp_list.append(fut_sp)
    fut_sp_list = np.array(fut_sp_list)
    ror_list = (fut_sp_list - cur_sp_list) / cur_sp_list * 100
    ror_list = ror_list[~np.isnan(ror_list)]
    return ror_list

In [None]:
buy_data = data.loc[data['의견'] == "매수"]
hold_data = data.loc[data['의견'] == "중립"]

In [None]:
buy_3M_ror_list = calc_ror_based_on_report(buy_data, 90)
hold_3M_ror_list = calc_ror_based_on_report(hold_data, 90)
buy_6M_ror_list = calc_ror_based_on_report(buy_data, 180)
hold_6M_ror_list = calc_ror_based_on_report(hold_data, 180)
buy_1Y_ror_list = calc_ror_based_on_report(buy_data, 365)
hold_1Y_ror_list = calc_ror_based_on_report(hold_data, 365)

#### 시각화

In [None]:
plt.figure(figsize = (10, 6))
plt.boxplot([buy_3M_ror_list, hold_3M_ror_list,
             buy_6M_ror_list, hold_6M_ror_list,
             buy_1Y_ror_list, hold_1Y_ror_list])
plt.ylim(-30, 100)
plt.xticks(range(1, 7), ['매수-3개월', '중립-3개월',
                         '매수-6개월', '중립-6개월',
                         '매수-1년', '중립-1년'])
plt.show()

In [None]:
result = pd.concat([pd.Series(buy_3M_ror_list).describe(),
                    pd.Series(hold_3M_ror_list).describe(),
                    pd.Series(buy_6M_ror_list).describe(),
                    pd.Series(hold_6M_ror_list).describe(),
                    pd.Series(buy_1Y_ror_list).describe(),
                    pd.Series(hold_1Y_ror_list).describe()],
                   axis = 1)

result.columns = ['매수-3개월', '중립-3개월', '매수-6개월', '중립-6개월', '매수-1년', '중립-1년']
display(result.round(2))

### 적정 가격 관련 분석

#### 적정 가격과 현재 가격의 차이 (%) 계산

In [None]:
data['적정증가율'] = (data['적정가격'] - data['주가']) / data['주가'] * 100
data = data.loc[data['적정증가율'] > 0]

In [None]:
def assign_group(inc):
    if inc >= 100:
        return "100%이상"
    elif inc >= 80:
        return "80-100%"
    elif inc >= 60:
        return "60-80%"
    elif inc>= 40:
        return "40-60%"
    elif inc>=20:
        return "20-40%"
    else:
        return "0-20%"

In [None]:
data['적정증가율_그룹'] = data['적정증가율'].apply(assign_group)
display(data['적정증가율_그룹'].value_counts())

#### 적정 가격 도달까지 걸리는 시간 계산

In [None]:
def cal_time_to_price(stock_name, date, price):
    stock_data = sp_data_dict[stock_name]
    cond = (stock_data['Date'] >= date) & (stock_data['Close'] >= price)
    if sum(cond) > 0:
        target_date = stock_data.loc[cond, 'Date'].iloc[0]
        return (target_date - date).days
    else:
        return np.nan

In [None]:
value_list = []
for idx, row in data.iterrows():
    value = cal_time_to_price(row["종목명"], row["날짜"], row["적정가격"])
    value_list.append(value)
data["적정가격_도달시간"] = value_list

#### 적정 가격까지 도달한 종목의 비율 확인

In [None]:
A = data["날짜"] < pd.to_datetime("2021-06-30")
B = data["적정가격_도달시간"].notnull()
print("3개월 이전: {}".format(round(sum(A&B) / sum(A),3)))

A = data["날짜"] < pd.to_datetime("2021-03-31")
B = data["적정가격_도달시간"].notnull()
print("6개월 이전: {}".format(round(sum(A&B) / sum(A),3)))

A = data["날짜"] < pd.to_datetime("2020-09-30")
B = data["적정가격_도달시간"].notnull()
print("1년 이전: {}".format(round(sum(A&B) / sum(A),3)))

#### 적정 가격에 도달하기까지 소요된 시간 분포 확인

In [None]:
data.dropna(inplace = True)

In [None]:
display(data['적정가격_도달시간'].describe().astype(int))

In [None]:
display(data.groupby(['적정증가율_그룹'])['적정가격_도달시간'].describe().round(2))