### 환경 설정

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import rcParams
sns.set()
%matplotlib inline
rcParams['font.family'] = 'Malgun Gothic'
rcParams['axes.unicode_minus'] = False 

### 데이터 준비

In [None]:
import pandas as pd
data = pd.read_csv("../../데이터/기업월별평점.csv", encoding = "euc-kr")
display(data.head())

#### 연도와 월 컬럼 생성

In [None]:
data[['연도', '월']] = data['날짜'].str.split('. ', expand = True).astype(int)

#### 주가 데이터와 병합

In [None]:
import numpy as np
import os

path = "../../데이터/주가데이터/"

total_M_sp_data = pd.DataFrame()
for stock_name in data['기업'].unique():
    # 주가 데이터 불러오기 
    if stock_name + ".csv" in os.listdir(path):
        sp_data = pd.read_csv(path + stock_name + ".csv", parse_dates = ["Date"])
    else:
        data = data.loc[data['기업'] != stock_name]
        continue
    
    # 연도와 월 추출
    sp_data["연도"] = sp_data["Date"].dt.year
    sp_data["월"] = sp_data["Date"].dt.month
    
    # 월별 평균가격, 월말가격 계산
    M_sp_data = sp_data.groupby(['연도', '월'])['Close'].agg([np.mean,
                                                              lambda x:x.iloc[-1]])
    M_sp_data.reset_index(inplace = True)
    M_sp_data.columns = ["연도", "월", "평균가격", "월말가격"]
    M_sp_data["차월말가격"] = np.nan
    M_sp_data["차월말가격"].iloc[:-1] = M_sp_data["월말가격"].values[1:]
    M_sp_data["기업"] = stock_name

    # 데이터 병합
    total_M_sp_data = pd.concat([total_M_sp_data, M_sp_data],
                                axis = 0, ignore_index = True)

In [None]:
data = pd.merge(total_M_sp_data, data, on = ["연도", "월", "기업"])
display(data.head())

### 주가와 평점 간 관계 분석

#### 상관관계 분석

In [None]:
corr_result = []
cols = ["전체평점", "승진기회및가능성", "복지및급여", "업무와삶의균형", "사내문화", "경영진"]
for corp in data['기업'].unique():
    corp_data = data.loc[data['기업'] == corp]
    record = corp_data.corr().loc['평균가격', cols].values
    corr_result.append(record)
corr_result = pd.DataFrame(corr_result, columns = cols)

In [None]:
display(corr_result.describe().round(3))

In [None]:
plt.figure(figsize = (10 ,6))
x = data.loc[data['기업'] == "삼성전자", '날짜'].values
y1 = data.loc[data['기업'] == "삼성전자", '전체평점'].values
y2 = data.loc[data['기업'] == "삼성전자", '평균가격'].values
y2 = y2 / max(y2) * 5

xticks_range = np.arange(0, sum(data['기업'] == "삼성전자"), 12)
plt.xticks(xticks_range, x[xticks_range])
plt.title("삼성전자")
plt.xlabel("날짜")
plt.plot(x, y1, label = "평점")
plt.plot(x, y2, label = "주가")
plt.legend()

### 동일가중 교체 매매에 따른 수익 비교

In [None]:
def make_group(rating):
    if rating <= 3.0:
        return 1
    elif rating <= 3.3:
        return 2
    elif rating <= 3.5:
        return 3
    elif rating <= 4.0:
        return 4
    else:
        return 5

In [None]:
data['수익률'] = (data['차월말가격'] - data['월말가격']) / data['월말가격'] * 100
data.dropna(inplace = True)

In [None]:
def calc_ror_using_eqw_str(data, r_col): 
    group_list = data[r_col].apply(make_group)
    ym_list = data[['연도', '월']].sort_values(['연도', '월']).drop_duplicates().values
    result = []
    for year, month in ym_list:
        ym_data = data.loc[(data["연도"] == year) & (data["월"] == month)]
        for group in range(1, 6):
            group_data = ym_data.loc[group_list == group]
            if len(group_data) == 0:
                result.append([year, month, group, 0])
            else:
                ror_list = group_data['수익률']
                inve_per_stock = 10**8 / len(ror_list)
                profit = (inve_per_stock * ror_list / 100).sum()
                profit_ratio = profit / 10 ** 8 * 100
                result.append([year, month, group, profit_ratio])

    result = pd.DataFrame(result, columns = ["연도", "월", "그룹", "수익률"])
    return result

In [None]:
def visualize_result(result):
    ym_list = result[['연도', '월']].sort_values(['연도', '월']).drop_duplicates().values
    plt.figure(figsize = (14, 5))
    for group in range(1, 6):
        plt.plot(result.loc[result['그룹'] == group, '수익률'].values, label = group)
    xticks_range = np.arange(0, len(ym_list), 12)
    plt.xticks(xticks_range, ym_list[xticks_range, 0])
    plt.ylabel("수익률")
    plt.legend()

#### 전체평점

In [None]:
r_col = "전체평점"
result = calc_ror_using_eqw_str(data, r_col)

In [None]:
visualize_result(result)

In [None]:
display(result.groupby('그룹')['수익률'].describe().round(3))

#### 승진 기회 및 가능성

In [None]:
r_col = "승진기회및가능성"
result = calc_ror_using_eqw_str(data, r_col)

In [None]:
visualize_result(result)

In [None]:
display(result.groupby('그룹')['수익률'].describe().round(3))

#### 복지 및 급여

In [None]:
r_col = "복지및급여"
result = calc_ror_using_eqw_str(data, r_col)

In [None]:
visualize_result(result)

In [None]:
display(result.groupby('그룹')['수익률'].describe().round(3))

#### 업무와 삶의 균형

In [None]:
r_col = "업무와삶의균형"
result = calc_ror_using_eqw_str(data, r_col)

In [None]:
visualize_result(result)

In [None]:
display(result.groupby('그룹')['수익률'].describe().round(3))

#### 사내 문화

In [None]:
r_col = "사내문화"
result = calc_ror_using_eqw_str(data, r_col)

In [None]:
visualize_result(result)

In [None]:
display(result.groupby('그룹')['수익률'].describe().round(3))

#### 경영진

In [None]:
r_col = "경영진"
result = calc_ror_using_eqw_str(data, r_col)

In [None]:
visualize_result(result)

In [None]:
display(result.dropna().groupby('그룹')['수익률'].describe().round(3))

In [None]:
display(result.groupby('그룹')['수익률'].describe().round(3))