In [1]:

import matplotlib.pyplot as plt

plt.rc("font", family="Malgun Gothic")
plt.rc('axes', unicode_minus=False)
plt.rcParams['grid.color'] = (0.5, 0.5, 0.5, 0.1)

plt.rc("font", family="Malgun Gothic")
plt.rc('axes', unicode_minus=False)
plt.rcParams['grid.color'] = (0.5, 0.5, 0.5, 0.1)

In [2]:
from __future__ import annotations

import sqlite3
from datetime import date

import pandas as pd

import repository.deepsearch as ds
from repository import get_day_chart, get_bussness_months
from repository.maria import corp

con = sqlite3.connect("../.out/analysis.db")
cursor = con.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' and name = 'layer1'")
if len(cursor.fetchall()) > 0:
    df = pd.read_sql("select * from layer1", con)
else:
    df = pd.DataFrame()
    from_date, to_date = date(2001, 4, 1), date(2022, 10, 31)
    prev_date = None
    for this_date in get_bussness_months(from_date, to_date):
        print(this_date)
        if prev_date is None:
            prev_date = this_date
            continue

        before = get_day_chart(prev_date)
        before = before[before['vol'] != 0]  # 거래량 미확인 종목 제외
        before = before[before['cap'] != 0]  # 시가총액 미확인 종목 제외
        # 매도일 주가데이터 조회
        after = get_day_chart(this_date)

        before_only = before.merge(after, how="outer", left_index=True, right_index=True).drop(after.index)
        if len(before_only) > 0:  # 상장폐지?
            pass

        # 팩터 계산
        this_df = before['close'].to_frame("before_close")
        this_df = this_df.join(after['close'].to_frame('after_close'))
        this_df = this_df.join((this_df["after_close"] / this_df["before_close"] - 1).to_frame("revenue"))
        this_df = this_df.join(before['cap'].to_frame('before_cap'))
        this_df = this_df.join(after['cap'].to_frame('after_cap'))
        this_df = this_df.join(pd.Series([prev_date] * len(before.index), index=before.index).to_frame("before_date"))
        this_df = this_df.join(pd.Series([this_date] * len(after.index), index=after.index).to_frame("after_date"))

        this_df = this_df.join(ds.load_and("매출총이익", prev_date.year, prev_date.month, 4, sum))
        this_df = this_df.join(ds.load_and("영업이익", prev_date.year, prev_date.month, 4, sum))
        this_df = this_df.join(ds.load_and("당기순이익", prev_date.year, prev_date.month, 4, sum))
        this_df = this_df.join(ds.load_one("자산", prev_date.year, prev_date.month))
        this_df = this_df.join(ds.load_one("자본", prev_date.year, prev_date.month))
        this_df['name'] = [corp.get_name(code) for code in this_df.index]

        df = pd.concat([df, this_df])
        prev_date = this_date

        df.to_sql("layer1", con, if_exists="replace")
        con.commit()

con.close()
df

Unnamed: 0,code,before_close,after_close,revenue,before_cap,after_cap,before_date,after_date,매출총이익,영업이익,당기순이익,자산,자본,name
0,000020,1426,1926.0,0.350631,38668000000,5.222700e+10,2001-04-30,2001-05-31,3.630961e+10,8.167152e+09,1.911550e+09,2.538914e+11,1.283201e+11,동화약품
1,000040,38545,41987.0,0.089298,6013000000,6.549000e+09,2001-04-30,2001-05-31,1.772354e+10,-1.394038e+10,-5.754296e+10,1.224912e+11,-6.835715e+10,KR모터스
2,000050,1720,1817.0,0.056395,38036000000,4.018100e+10,2001-04-30,2001-05-31,5.425668e+10,6.776929e+09,-4.210050e+08,6.484120e+11,3.746775e+11,경방
3,000060,423,753.0,0.780142,34737000000,6.183700e+10,2001-04-30,2001-05-31,,-4.037319e+10,,1.650858e+12,8.583474e+10,메리츠화재
4,000070,11066,14420.0,0.303091,125975000000,1.641570e+11,2001-04-30,2001-05-31,,,,,,삼양홀딩스
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
398206,950170,3500,3120.0,-0.108571,122517000000,1.092150e+11,2022-09-30,2022-10-31,-2.450100e+07,-3.807051e+10,-3.313825e+10,2.465567e+11,2.495650e+10,JTC
398207,950190,14250,14100.0,-0.010526,193500000000,1.914630e+11,2022-09-30,2022-10-31,6.398071e+10,2.903573e+10,2.445984e+10,1.298360e+11,1.047950e+11,미투젠
398208,950200,7240,7100.0,-0.019337,138841000000,1.361560e+11,2022-09-30,2022-10-31,7.863329e+09,-6.851537e+09,-6.829142e+09,5.276798e+10,3.466701e+10,소마젠
398209,950210,7900,7590.0,-0.039241,474758000000,4.561280e+11,2022-09-30,2022-10-31,,-5.429810e+10,-2.282415e+11,4.998132e+11,4.299359e+11,프레스티지바이오파마


In [3]:
from typing import *
from analysis.sub import calc_factor
from multiprocessing import Pool

col_map = {
    "매출총이익": "GP",
    "영업이익": "OP",
    "당기순이익": "NP",

    "자산": "A",
    "자본": "E",

    "영업활동으로인한현금흐름": "CFO",
    "투자활동으로인한현금흐름": "CFI",
    "재무활동으로인한현금흐름": "CFF"
}
profit_kinds = [
    "GP",
    "OP",
    "NP"
]
base_kinds = [
    "E",
    "A"
]
profit_weights = [
    0.8,
    1,
    1.2,
]
base_weights = [
    0.8,
    1,
    1.2,
]


def combine(l1, l2) -> Iterable[Tuple[int, int]]:
    for i_l1 in l1:
        for i_l2 in l2:
            yield i_l1, i_l2


df = df[df["after_date"].notnull()]
df = df.dropna()
df.columns = [col_map[col] if col in col_map else col for col in df.columns]
df2 = df.copy()

params_set = []

for pk, pw in combine(profit_kinds, profit_weights):
    for bk, bw in combine(base_kinds, base_weights):
        params_set.append((df, pk, pw, bk, bw))

tasks = []
results = []
with Pool(8) as p:
    for params in params_set:
        tasks.append(p.apply_async(calc_factor, params))

    for task in tasks:
        result = task.get()
        results.append(result)
        df2 = df2.merge(result, left_index=True, right_index=True)

con = sqlite3.connect("../.out/analysis.db")
df2.to_sql("layer2", con, if_exists="replace")
con.close()
results

[        GP^0.7/E^0.7/P  Normalize(GP^0.7/E^0.7/P)  Rank(GP^0.7/E^0.7/P)
 0         1.068703e-11                  -0.206644                 346.0
 1                  NaN                        NaN                   NaN
 2         6.797702e-12                  -0.239687                 428.0
 5         1.974022e-12                  -0.280667                 534.0
 6         7.840189e-12                  -0.230830                 403.0
 ...                ...                        ...                   ...
 398204    3.142727e-12                  -0.229729                 896.0
 398205    5.171481e-12                   0.162739                 550.0
 398206             NaN                        NaN                   NaN
 398207    3.658600e-12                  -0.129932                 794.0
 398208    2.549561e-12                  -0.344479                1048.0
 
 [344798 rows x 3 columns],
         GP^0.7/E^0.8/P  Normalize(GP^0.7/E^0.8/P)  Rank(GP^0.7/E^0.8/P)
 0         8.279948e-

In [5]:

con = sqlite3.connect("../.out/analysis.db")
cursor = con.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' and name = 'layer2'")
if len(cursor.fetchall()) > 0:
    df2 = pd.read_sql("select * from layer2", con)

pairs = []
for pk in profit_kinds:
    for bk in base_kinds:
        pairs.append((pk, bk))


def mean(s: pd.Series, n: int):
    return round(s[:n].mean(), 4)


log_lines = []

for result in results:
    df.corr(result[result.columns[1]].merge(df["revenue"], left_index=True))

    if True:
        continue

    x_label = result.columns[2]
    y_label = "revenue"
    x = sorted(set(result[x_label].dropna()))
    y = [df2[result[x_label] == rank][y_label].mean() for rank in x]
    y_series = pd.Series(y)
    # plt.figure(figsize=(20, 8))
    # plt.subplot(1, 2, 1)
    # plt.grid(True)
    # plt.xlabel(x_label)
    # plt.ylabel(y_label)
    # plt.bar(x, y)
    # plt.subplot(1, 2, 2)
    # plt.xlabel(x_label)
    # plt.ylabel(y_label)
    # plt.bar(x, y_series.rolling(10).mean())
    # plt.show()
    log = ",".join([str(x) for x in [x_label, mean(y_series, 10), mean(y_series, 30), mean(y_series, 50)]])
    print(log)
    log_lines.append(log)

print("=" * 80)
for line in log_lines:
    print(line)


Rank(GP^0.7/E^0.7/P),0.0561,0.0455,0.0415
Rank(GP^0.7/E^0.8/P),0.0575,0.046,0.0414
Rank(GP^0.7/E^0.9/P),0.0582,0.0465,0.0411
Rank(GP^0.7/E^1/P),0.0577,0.0456,0.0407
Rank(GP^0.7/E^1.1/P),0.0596,0.0448,0.0409
Rank(GP^0.7/E^1.2/P),0.059,0.0448,0.0409
Rank(GP^0.7/E^1.3/P),0.0574,0.0438,0.0411
Rank(GP^0.7/A^0.7/P),0.0547,0.0466,0.0414
Rank(GP^0.7/A^0.8/P),0.0558,0.0488,0.0407
Rank(GP^0.7/A^0.9/P),0.0558,0.0484,0.0405
Rank(GP^0.7/A^1/P),0.0542,0.0475,0.0408
Rank(GP^0.7/A^1.1/P),0.052,0.0472,0.0395
Rank(GP^0.7/A^1.2/P),0.0518,0.0458,0.0387
Rank(GP^0.7/A^1.3/P),0.0518,0.0453,0.0384
Rank(GP^0.8/E^0.7/P),0.0538,0.0449,0.0401
Rank(GP^0.8/E^0.8/P),0.0556,0.0446,0.0401
Rank(GP^0.8/E^0.9/P),0.058,0.0453,0.0402
Rank(GP^0.8/E^1/P),0.0566,0.0458,0.0398
Rank(GP^0.8/E^1.1/P),0.0592,0.0441,0.0407
Rank(GP^0.8/E^1.2/P),0.0583,0.0443,0.0403
Rank(GP^0.8/E^1.3/P),0.0586,0.0422,0.0403
Rank(GP^0.8/A^0.7/P),0.0536,0.0458,0.0393
Rank(GP^0.8/A^0.8/P),0.0544,0.0463,0.0409
Rank(GP^0.8/A^0.9/P),0.0541,0.0459,0.0403
Ra