In [29]:
import pandas as pd

from core.fs import FsLoader
from core.repository.maria.conn import maria_home
from utils.timeutil import YearMonth
from utils.timeutil import YearQtr
import numpy as np

begin = YearMonth(2014, 4)
end = YearMonth(2023, 12)

# 월 차트 로드
print("Fetching month chart...")
month_chart = pd.read_sql(
    f"""
    select month_chart.* from month_chart
    where year(date) >= {begin.year}
    """,
    maria_home()
)

# 재무제표 로드
print("Loading financial data...")
fs_loader = FsLoader()

month_chart["년월"] = [YearMonth.from_date(d) for d in month_chart["date"]]
layer1 = pd.DataFrame()
yms = list(begin.to(end))
yms = pd.Series(yms)
yms = yms.drop_duplicates().sort_values()

# TODO: 중요!!! 시가총액 하위 1,2,3% 쯤에 Nan 대거 등장.. 이거 거래정지 또는 상장폐지 수순..
print("Making layer1...")
for ym1, ym2 in [(ym.iloc[0], ym.iloc[1]) for ym in yms.rolling(2) if len(ym) == 2]:
    print(f"{ym1} {ym2}", end="\r")
    df1 = month_chart[month_chart["년월"] == ym1].set_index("code")
    df1 = df1[df1["date"] == df1["date"].max()]  # date = date.max 아니면, 상장폐지 되는 종목. 매수일이 월말이기 때문에 매수 불가
    df2 = month_chart[month_chart["년월"] == ym2].set_index("code")
    df2["date"] = df2["date"].max()  # date = date.max 아니면, 상장폐지 되는 종목. 정리매매 거치기 때문에 별도 처리 필요 없음.
    df1 = df1[df1["open"] > 0]  # 시가=0 은 거래정지, 따라서 매수불가
    df1 = df1[df1["val_last"] > 1_000_0000]  # 거래량 일정 수준 이상(거래량 적을 시 매수 실패할거라고 가정)
    df2["close"] = df2["close"].astype(float)
    df2.loc[(df2["open"] == 0), "close"] /= 2  # 거래정지 시 투자금이 1/2 된다고 가정

    df = pd.DataFrame({
        "매수년월": ym1,
        "매도년월": ym2,
        "종목명": df1["name"],
        "매수일": df1["date"],
        "매수가": df1["close"],
        "P": df1["cap"],
        "vol": df1["vol"],
        "val": df1["val"],
        "shares": df1["shares"],
        "avg": df1["avg"],
        "전월수익률": df1["close"] / df1["open"] - 1,
        "매도가": df2["close"],
        "매도일": df2["date"]
    }).reindex(df1.index)

    shares = pd.concat([df1["shares"], df2["shares"]], axis=1)
    df = df.loc[shares.max(axis=1) / shares.min(axis=1) < 1.5]  # 액면분할 제거
    df.loc[df["매도가"].isna(), "매도가"] = 0
    df["수익률"] = df["매도가"] / df["매수가"] - 1
    # TODO: 중요 !!! 다음 데이터 거래정지 또는 상장폐지 -> 수익률 nan, 매도가 nan, open nan
    df["수익률"] = df["수익률"].replace(np.nan, -1)

    settled_qtr = YearQtr.settled_of(ym1.last_date)
    fn = fs_loader.load(settled_qtr.year, settled_qtr.qtr)
    layer1 = pd.concat([layer1, df.join(fn)])


def add_factor(name, value):
    factors.append(name)
    layer1[name] = value


factors = ["P", "val", "수익률"]
factors += [col for col in layer1.columns if col.endswith("QoQ")]

# 가격비율
add_factor("EQ/P", layer1["EQ"] / layer1["P"])

is_cols = ["R", "GP", "O", "EBT", "E"]
for col in is_cols:
    add_factor(f"{col}/P", layer1[f"{col}/Y"] / layer1["P"])
    add_factor(f"{col}/A", layer1[f"{col}/Y"] / layer1["A"])  # 자산비율
    add_factor(f"{col}/EQ", layer1[f"{col}/Y"] / layer1["EQ"])  # 자본비율
    if col != "R":
        add_factor(f"{col}/R", layer1[f"{col}/Y"] / layer1["R/Y"])  # 이익율

add_factor("유동부채비율", layer1["유동부채"] / layer1["유동자산"])

pct_scale = 100
for i in range(len(factors)):
    f = factors[i]
    print(f"[{i + 1}/{len(factors)}] {f}")
    rank = layer1.groupby("매도년월")[f].apply(lambda x: np.ceil(x.rank(pct=True) * pct_scale))
    layer1[f"{f}_pct"] = rank.reset_index(level=0, drop=True)

layer1.reset_index(inplace=True)
layer1.to_csv(".cache/historical_data.csv", index=False)
layer1

Fetching month chart...
Loading financial data...
Making layer1...
[1/39] P2023-12
[2/39] val
[3/39] 수익률
[4/39] R_QoQ
[5/39] R/EQ_QoQ
[6/39] R/A_QoQ
[7/39] GP_QoQ
[8/39] GP/EQ_QoQ
[9/39] GP/A_QoQ
[10/39] O_QoQ
[11/39] O/EQ_QoQ
[12/39] O/A_QoQ
[13/39] EBT_QoQ
[14/39] EBT/EQ_QoQ
[15/39] EBT/A_QoQ
[16/39] E_QoQ
[17/39] E/EQ_QoQ
[18/39] E/A_QoQ
[19/39] EQ/P
[20/39] R/P
[21/39] R/A
[22/39] R/EQ
[23/39] GP/P
[24/39] GP/A
[25/39] GP/EQ
[26/39] GP/R
[27/39] O/P
[28/39] O/A
[29/39] O/EQ
[30/39] O/R
[31/39] EBT/P
[32/39] EBT/A
[33/39] EBT/EQ
[34/39] EBT/R
[35/39] E/P
[36/39] E/A
[37/39] E/EQ
[38/39] E/R
[39/39] 유동부채비율


Unnamed: 0,code,매수년월,매도년월,종목명,매수일,매수가,P,vol,val,shares,...,O/R_pct,EBT/P_pct,EBT/A_pct,EBT/EQ_pct,EBT/R_pct,E/P_pct,E/A_pct,E/EQ_pct,E/R_pct,유동부채비율_pct
0,000020,2014-04,2014-05,동화약품,2014-04-30,6000.0,1.675888e+11,1566607.0,9.163852e+09,27931470.0,...,26.0,30.0,30.0,29.0,30.0,31.0,31.0,30.0,32.0,47.0
1,000040,2014-04,2014-05,KR모터스,2014-04-30,1245.0,1.485800e+11,118231719.0,1.300651e+11,119341379.0,...,14.0,21.0,15.0,16.0,15.0,21.0,15.0,16.0,16.0,60.0
2,000050,2014-04,2014-05,경방,2014-04-30,133500.0,3.659939e+11,35866.0,4.863769e+09,2741527.0,...,83.0,50.0,37.0,37.0,63.0,48.0,38.0,38.0,62.0,89.0
3,000060,2014-04,2014-05,메리츠화재,2014-04-30,12850.0,1.302075e+12,6373693.0,8.812803e+10,101328800.0,...,,,,,,,,,,
4,000070,2014-04,2014-05,삼양홀딩스,2014-04-30,72000.0,5.882735e+11,108108.0,7.798089e+09,8170465.0,...,22.0,26.0,27.0,27.0,27.0,21.0,26.0,27.0,26.0,46.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247214,950170,2023-11,2023-12,JTC,2023-11-30,3900.0,1.961256e+11,1211006.0,4.491614e+09,50288623.0,...,,,,,,,,,,
247215,950190,2023-11,2023-12,고스트스튜디오,2023-11-30,10860.0,1.474776e+11,387124.0,4.078192e+09,13579892.0,...,98.0,88.0,99.0,96.0,97.0,88.0,99.0,95.0,96.0,2.0
247216,950200,2023-11,2023-12,소마젠,2023-11-30,5230.0,1.005523e+11,485755.0,2.529368e+09,19226053.0,...,,,,,,,,,,
247217,950210,2023-11,2023-12,프레스티지바이오파마,2023-11-30,9910.0,5.955529e+11,867473.0,8.811107e+09,60096155.0,...,,,,,,,,,,
