In [1]:
import pandas as pd

from core.fs import FsLoader
from core.repository.maria.conn import maria_home
from utils.timeutil import YearMonth
from utils.timeutil import YearQtr
import numpy as np

begin = YearMonth(2023, 4)
end = YearMonth(2024, 5)

# 월 차트 로드
print("Fetching month chart...")
month_chart = pd.read_sql(
    f"""
    select month_chart.* from month_chart
    where year(date) >= {begin.year}
    """,
    maria_home()
)
print("Month chart loaded.")

# 재무제표 로드
print("Loading financial data...")
fs_loader = FsLoader()
print("Financial data loaded.")


def calc(ym1, ym2):
    print(f"{ym1} {ym2}", end="\r")
    df1 = month_chart[month_chart["년월"] == ym1].set_index("code")
    df1 = df1[df1["date"] == df1["date"].max()]  # date = date.max 아니면, 상장폐지 되는 종목. 매수일이 월말이기 때문에 매수 불가
    df2 = month_chart[month_chart["년월"] == ym2].set_index("code")
    df2["date"] = df2["date"].max()  # date = date.max 아니면, 상장폐지 되는 종목. 정리매매 거치기 때문에 별도 처리 필요 없음.
    df1 = df1[df1["open"] > 0]  # 시가=0 은 거래정지, 따라서 매수불가
    df1 = df1[df1["val_last"] > 1_000_0000]  # 거래량 일정 수준 이상(거래량 적을 시 매수 실패할거라고 가정)
    df2["close"] = df2["close"].astype(float)
    df2.loc[(df2["open"] == 0), "close"] /= 2  # 거래정지 시 투자금이 1/2 된다고 가정

    df = pd.DataFrame({
        "매수년월": ym1,
        "매도년월": ym2,
        "종목명": df1["name"],
        "매수일": df1["date"],
        "매수가": df1["close"],
        "P": df1["cap"],
        "vol": df1["vol"],
        "val": df1["val"],
        "shares": df1["shares"],
        "avg": df1["avg"],
        "전월수익률": df1["close"] / df1["open"] - 1,
        "매도가": df2["close"],
        "매도일": df2["date"]
    }).reindex(df1.index)

    shares = pd.concat([df1["shares"], df2["shares"]], axis=1)
    df = df.loc[shares.max(axis=1) / shares.min(axis=1) < 1.5]  # 증자, 액면분할 제거
    df.loc[df["매도가"].isna(), "매도가"] = 0
    df["수익률"] = df["매도가"] / df["매수가"] - 1
    df["수익률"] = df["수익률"].replace(np.nan, -1)

    settled_qtr = YearQtr.settled_of(ym1.last_date)
    return df.join(fs_loader.load(settled_qtr.year, settled_qtr.qtr))


print("Making historical data...")
month_chart["년월"] = month_chart["date"].apply(YearMonth.from_date)
result = pd.concat([calc(ym, ym.next) for ym in begin.to(end)[:-1]])


def add_factor(name, value):
    factors.append(name)
    result[name] = value


factors = ["P", "val", "수익률"]
factors += [col for col in result.columns if col.endswith("QoQ")]
add_factor("EQ/P", result["EQ"] / result["P"])

is_cols = ["R", "GP", "O", "EBT", "E"]
for col in is_cols:
    add_factor(f"{col}/P", result[f"{col}/Y"] / result["P"])
    add_factor(f"{col}/A", result[f"{col}/Y"] / result["A"])  # 자산비율
    add_factor(f"{col}/EQ", result[f"{col}/Y"] / result["EQ"])  # 자본비율
    if col != "R":
        add_factor(f"{col}/R", result[f"{col}/Y"] / result["R/Y"])  # 이익율

result = result.replace([np.inf, -np.inf], np.nan)

for f in factors:
    result[f"{f}_pct"] = (
        result.groupby("매도년월")[f]
        .apply(lambda x: np.ceil(x.rank(pct=True) * 100))
        .reset_index(level=0, drop=True)
    )

result.reset_index(inplace=True)
result.to_csv(".cache/historical_data.csv", index=False)
result

Fetching month chart...
Month chart loaded.
Loading financial data...
Loading fs db...
Financial data loaded.
Making historical data...
2024-04 2024-05

Unnamed: 0,code,매수년월,매도년월,종목명,매수일,매수가,P,vol,val,shares,...,O/EQ_pct,O/R_pct,EBT/P_pct,EBT/A_pct,EBT/EQ_pct,EBT/R_pct,E/P_pct,E/A_pct,E/EQ_pct,E/R_pct
0,000020,2023-04,2023-05,동화약품,2023-04-28,8380.0,2.340657e+11,1682244.0,1.441033e+10,27931470.0,...,58.0,75.0,68.0,64.0,53.0,66.0,72.0,67.0,56.0,69.0
1,000040,2023-04,2023-05,KR모터스,2023-04-28,670.0,6.441277e+10,101678340.0,7.080840e+10,96138465.0,...,11.0,18.0,5.0,11.0,5.0,14.0,8.0,14.0,9.0,16.0
2,000050,2023-04,2023-05,경방,2023-04-28,10560.0,2.895053e+11,116735.0,1.246954e+09,27415270.0,...,44.0,72.0,34.0,32.0,32.0,33.0,39.0,34.0,34.0,39.0
3,000070,2023-04,2023-05,삼양홀딩스,2023-04-28,75600.0,6.474589e+11,181738.0,1.380864e+10,8564271.0,...,48.0,51.0,86.0,48.0,48.0,51.0,86.0,49.0,49.0,52.0
4,000080,2023-04,2023-05,하이트진로,2023-04-28,22250.0,1.560473e+12,5078029.0,1.129301e+11,70133611.0,...,85.0,70.0,62.0,54.0,70.0,56.0,57.0,51.0,64.0,53.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32468,950170,2024-04,2024-05,JTC,2024-04-30,5880.0,2.956971e+11,1589964.0,8.349728e+09,50288623.0,...,,,,,,,,,,
32469,950190,2024-04,2024-05,고스트스튜디오,2024-04-30,10680.0,1.450332e+11,258976.0,2.764849e+09,13579892.0,...,,,,,,,,,,
32470,950200,2024-04,2024-05,소마젠,2024-04-30,4885.0,9.396812e+10,518444.0,2.570833e+09,19236053.0,...,,,,,,,,,,
32471,950210,2024-04,2024-05,프레스티지바이오파마,2024-04-30,8060.0,4.843750e+11,1111929.0,9.566181e+09,60096155.0,...,,,,,,,,,,
