<a href="https://colab.research.google.com/github/zzhining/stock_market_analysis/blob/main/3%EC%A3%BC%EC%B0%A8/%EC%9E%AC%EB%AC%B4%EC%A0%9C%ED%91%9C%EA%B8%B0%EB%B0%98_%EC%A3%BC%EA%B0%80%EB%B6%84%EC%84%9D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 환경 설정

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import rcParams
sns.set()
%matplotlib inline
rcParams['font.family'] = 'Malgun Gothic'
rcParams['axes.unicode_minus'] = False

### 데이터 준비

In [None]:
import pandas as pd
data = pd.read_csv(
                   "../../데이터/재무제표_분석용데이터.csv",
                   encoding = "euc-kr",
                   usecols = ['기업', '연도', '현재_주가', '미래_주가', 'ROA', 'ROE', 'PER'],
                   )
data.dropna(inplace = True)

#### 산업군 추가

In [None]:
stock_info_data = pd.read_csv(
    "../../데이터/종목정보.txt", sep="\t", encoding="euc-kr", usecols=["Name", "Sector"]
)

data = pd.merge(data, stock_info_data, left_on="기업", right_on="Name", how="left")
data.drop(["Name"], axis=1, inplace=True)

In [None]:
data['수익률'] = (data['미래_주가'] - data['현재_주가']) / data['현재_주가'] * 100

### 수익률과 투자 지표 간 상관관계 확인

#### PER

In [None]:
plt.scatter(data["PER"], data["수익률"], alpha = 0.1)
plt.xlabel("PER")
plt.ylabel("수익률")
plt.xlim(0, 100)
plt.ylim(-100, 100)
plt.show()
print(data.corr("spearman").loc["PER", "수익률"])

#### ROA

In [None]:
plt.scatter(data["ROA"], data["수익률"], alpha = 0.1)
plt.xlabel("ROA")
plt.ylabel("수익률")
plt.xlim(0, 100)
plt.ylim(-100, 100)
plt.show()
print(data.corr("spearman").loc["ROA", "수익률"])

#### ROE

In [None]:
plt.scatter(data["ROE"], data["수익률"], alpha = 0.1)
plt.xlabel("ROE")
plt.ylabel("수익률")
plt.xlim(0, 100)
plt.ylim(-100, 100)
plt.show()
print(data.corr("spearman").loc["ROE", "수익률"])

### 산업군을 고려한 수익률과 투자 지표 간 상관관계 확인

In [None]:
display(data.drop_duplicates(['기업', 'Sector'])['Sector'].value_counts())

In [None]:
result = []
top10_S = data.drop_duplicates(['기업', 'Sector'])['Sector'].value_counts().index[:10]
for col in ["PER", "ROA", "ROE"]:
    record = []
    for sector in top10_S:
        temp = data.loc[data['Sector'] == sector, [col, '수익률']].dropna()
        corr = temp.corr("spearman").iloc[0, 1]
        record.append(corr)
    result.append(record)

plt.figure(figsize = (12, 5))
sns.heatmap(result, annot=True, fmt=".2f", linewidths=0.5)
plt.yticks([0.5, 1.5, 2.5], ["PER", "ROA", "ROE"])
plt.show()

### 동일 가중 교체 매매에 따른 수익 비교

#### 전체 산업군

In [None]:
import numpy as np
result = []
for col in ["PER", "ROA", "ROE"]:
    inve_list = np.array([10**8] * 10) # 투자금 초기화
    for year in range(2014, 2020):
        y_data = data.loc[data['연도'] == year]
        group_list = pd.qcut(y_data[col], 10, labels = range(10)).astype(int)
        for group in range(10):
            ror_list = y_data.loc[group_list == group, '수익률']
            inve_per_stock = inve_list[group] / len(ror_list)
            profit = (inve_per_stock * ror_list / 100).sum()
            inve_list[group] += profit
    profit_ratio_list = (inve_list - 10 ** 8) / 10 ** 8 * 100
    result.append(profit_ratio_list)
result = pd.DataFrame(result, columns = range(10), index = ["PER", "ROA", "ROE"])

plt.figure(figsize = (12, 5))
sns.heatmap(result, annot = True, linewidth = 1, fmt = ".2f")

#### 소프트웨어 개발 및 공급업

In [None]:
s_data = data.loc[data['Sector'] == "소프트웨어 개발 및 공급업"] # 필터링
result = []
for col in ["PER", "ROA", "ROE"]:
    inve_list = np.array([10**8] * 5)
    for year in range(2014, 2020):
        y_data = s_data.loc[data['연도'] == year]
        group_list = pd.qcut(y_data[col], 5, labels = range(5)).astype(int)
        for group in range(5):
            ror_list = y_data.loc[group_list == group, '수익률']
            inve_per_stock = inve_list[group] / len(ror_list)
            profit = (inve_per_stock * ror_list / 100).sum()
            inve_list[group] += profit
    profit_ratio_list = (inve_list - 10 ** 8) / 10 ** 8 * 100
    result.append(profit_ratio_list)
result = pd.DataFrame(result, columns = range(5), index = ["PER", "ROA", "ROE"])

plt.figure(figsize = (12, 5))
sns.heatmap(result, annot = True, linewidth = 1, fmt = ".2f")