In [None]:
# volatitlityが低下している日を探す
import datetime 

import tqdm
import matplotlib.pyplot as plt
import polars as pl
import time

import stock

daily_csv_dir = stock.PROJECT_ROOT / "data/daily"

In [None]:
code = "4107"
#start_date = datetime.date(year=2024, month=1, day=1)
#end_date = datetime.date.today() # datetime.date(year=2024, month=2, day=21)
end_date = datetime.date(year=2024, month=3, day=10)
start_date = end_date - datetime.timedelta(days=365)
csv_path = daily_csv_dir / f"{code}.csv"
df = stock.kabutan.read_data_csv(csv_path)
extremal_df = stock.algorithm.extremal.calc_extremal(df, is_exact=False, start_date=start_date, end_date=end_date)
print(check_vcp_pattern(end_date, df, debug=True), check_fundamentals(code, end_date, debug=True))

In [None]:
extremal_df

In [None]:
def polars_map_batch(func):
    """
    ```python
    @polars_map_batch
    def test(dates, days=30):
        return dates - datetime.timedelta(days=days)

    df = df.with_columns(pl.col("date").map_batches(test).alias("test"))
    ```
    """
    def _map_batch(pl_obj, *args, **kwargs):
        res = [func(data, *args, **kwargs) for data in pl_obj]
        return pl.Series(res)

    return _map_batch


In [None]:
target_date = end_date
df = df.with_columns(
    avg200=pl.col("close").rolling_mean(window_size=200),
    diff200=pl.col("close").rolling_mean(window_size=200).diff()
)
start_date = target_date - datetime.timedelta(days=365)

# trend templateのチェック
# 200日移動平均線が上向き
df = df.filter(pl.col("diff200").is_not_null()).filter(pl.col("date") <= target_date)
    
# # 出来高が少なすぎない
# if df["volume"] < 3000:
#     return False


In [None]:
# VCPのパターンを自動的に探してくる
def check_vcp_pattern(target_date: datetime.date, df: pl.DataFrame, debug: bool = False):
    df = df.with_columns(
        avg200=pl.col("close").rolling_mean(window_size=200),
        diff200=pl.col("close").rolling_mean(window_size=200).diff()
    )
    start_date = target_date - datetime.timedelta(days=365)

    # trend templateのチェック
    # 200日移動平均線が上向き
    df = df.filter(pl.col("diff200").is_not_null()).filter(pl.col("date") <= target_date)
    if len(df) > 0:
        if df["diff200"][-1] < 0:
            not debug or print("avg200 is downward.")
            return False
        if df["avg200"][-1] > df["close"][-1]:
            not debug or print("lower than avg200")
            return False
    else:
        return False
        
    # 出来高が少なすぎない
    if df["volume"][-30:].mean() < 3000:
        not debug or print("Too few volume.", df["volume"][-30:].mean())
        return False

    extremal_df = stock.algorithm.extremal.calc_extremal(
        df, window_size=4, is_exact=False, use_future=False, start_date=start_date, end_date=target_date)
    if len(extremal_df) == 0:
        not debug or print("No extremal")
        return False
    
    # 高値が2回以上
    high_extremal_df = extremal_df.filter(pl.col("high") == pl.col("rolling_high"))
    if len(high_extremal_df) < 2:
        not debug or print("Too few high extremal")
        return False
    
    highest_idx = high_extremal_df["high"][:-1].arg_max()
    # baseの形成開始から3週間以上経過していることをチェック
    if high_extremal_df["date"][highest_idx] + datetime.timedelta(21) > target_date:  
        not debug or print("Too few days from starting base")
        return False

    highest = high_extremal_df["high"][highest_idx]
    thresh_high = highest * 0.90
    thresh_low = highest * 0.65
    i_cnt_high = 0

    # 現在値が高値付近
    if highest * 0.95 > df["close"][-1]:
        not debug or print("lower than highest")
        return False
    
    # relative strengthが高い
    if df["rs"][-1] < 1.0:
        not debug or print("rs < 0")
        return False

    if high_extremal_df["high"][-1] > highest * 1.02:
        not debug or print("higher than base top")
        return False

    for i in range(len(extremal_df)):
        row = extremal_df[-1 - i]
        if row["high"][0] == row["rolling_high"][0]:
            if row["high"][0] > thresh_high:
                i_cnt_high += 1
            
        if row["low"][0] == row["rolling_low"][0]:
            if row["low"][0] < thresh_low:
                break

    return i_cnt_high > 1

In [None]:
import requests
from bs4 import BeautifulSoup


def get_market_capitalization(
    code: str, base_url: str = "https://kabutan.jp/stock/?code={}"
) -> int:
    res = requests.get(base_url.format(code))
    soup = BeautifulSoup(res.text, features="lxml")

    market_cap = 0
    market_cap_div = soup.find("div", {"id": "stockinfo_i3"})
    if market_cap_div is None:
        return market_cap
    market_cap_table = market_cap_div.find("table")
    if market_cap_table is None:
        return market_cap
    for table_row in market_cap_table.find_all("tr"):
        th = table_row.find("th")
        if th is not None and th.text == "時価総額":
            td = table_row.find("td")
            market_cap = stock.util.convert_to_number(td.text)
            break

    return market_cap


In [None]:
def check_fundamentals(code, current_date = datetime.date.today(), debug: bool = False):
    csv_path = stock.PROJECT_ROOT / "data/financial" / f"{code}.csv"
    df = stock.kabutan.read_financial_csv(csv_path)
    df = df.filter(pl.col("annoounce_date") <= current_date)
    
    # 実績がプラス
    yearly_df = df.filter((pl.col("duration") == 12) &  (pl.col("is_prediction") == False)).sort(pl.col("annoounce_date"))
    latest_year = yearly_df[-1]
    net_income = latest_year["net_income"][0]
    operating_income = latest_year["operating_income"][0]
    if net_income is None or net_income < 0 or operating_income is None or operating_income < 0:
        not debug or print("net_income < 0 or operating_income < 0")
        return False

    # 予測がプラス
    pred_df = df.filter((pl.col("duration") == 12) &  (pl.col("is_prediction") == True)).sort(pl.col("annoounce_date"))
    latest_pred = pred_df[-1]
    pred_net_income = latest_pred["net_income"][0]
    pred_operating_income = latest_pred["operating_income"][0]
    if pred_net_income is None or pred_net_income < 0 or pred_operating_income is None or pred_operating_income < 0:
        not debug or print("pred net income < 0 or pred operating_income < 0")
        return False
    
    # 業績予測がプラス
    # if pred_net_income < net_income  or pred_operating_income < operating_income:
    #     not debug or print("pred < actual")
    #     return False
    
    # 時価総額が適切な大きさ
    market_capitalization = get_market_capitalization(code)
    if market_capitalization < 100 or 5000 < market_capitalization:
        not debug or print("invalid market capitalization")
        return False
    time.sleep(0.05)
    return True

In [None]:
def get_watch_list(current_date = datetime.date.today()):
    code_csv_path = stock.PROJECT_ROOT / "data/data_j.csv"
    code_df = pl.read_csv(code_csv_path)
    code_df = code_df.filter(pl.col("市場・商品区分").str.contains("内国株式"))

    watch_list = []
    for i in tqdm.tqdm(range(len(code_df))):
        code = code_df["コード"][i]
        csv_path = daily_csv_dir / f"{code}.csv"
        df = stock.kabutan.read_data_csv(csv_path)
        if check_vcp_pattern(current_date, df) and check_fundamentals(code, current_date):
            watch_list.append(code)
    return watch_list

In [None]:
watch_list_cur = get_watch_list(datetime.date.today())
#watch_list_prev = get_watch_list(datetime.date.today() - datetime.timedelta(days=1))

In [None]:
len(watch_list_cur)

In [None]:
len(watch_list_cur)

In [None]:
watch_list_cur

In [None]:
df = df.with_columns(
    pl.col("date").map_batches(lambda data : polars_map_batch(check_vcp_pattern)(data, df)).alias("is_vcp")
)

In [None]:
pl.Config.set_tbl_rows(-1)
print(df.filter(pl.col("is_vcp")))
pl.Config.set_tbl_rows(10)

In [None]:
len(df)

In [None]:
code = "3696"
csv_path = daily_csv_dir / f"{code}.csv"
df = stock.kabutan.read_data_csv(csv_path)
# チャート上の極値を計算
df = df.with_columns(
    high_extremal_cand=((pl.col("high").diff() > 0) & (pl.col("high").diff().shift(-1) < 0)),
    low_extremal_cand=((pl.col("low").diff() < 0) & (pl.col("low").diff().shift(-1) > 0)),
    rolling_high=pl.col("high").rolling_max(window_size=7, center=True).fill_null(strategy="forward").fill_null(strategy="backward"),
    rolling_low=pl.col("low").rolling_min(window_size=7, center=True).fill_null(strategy="forward").fill_null(strategy="backward"),
)
extremal_df = df.filter(
    (pl.col("high_extremal_cand") & (pl.col("rolling_high") == pl.col("high"))) |
    (pl.col("low_extremal_cand") & (pl.col("rolling_low") == pl.col("low")))
)

In [None]:
df

In [None]:
extremal_df

In [None]:
extremal_df = extremal_df.with_columns(
    high_extremal=(pl.col("high_extremal_cand") & (pl.col("rolling_high") == pl.col("high"))),
    low_extremal=(pl.col("low_extremal_cand") & (pl.col("rolling_low") == pl.col("low"))),
)

rows = [extremal_df[0]]
for i in range(1, len(extremal_df)):
    row = extremal_df[i]
    if (rows[-1]["high_extremal"][0] and rows[-1]["low_extremal"][0]) or (
        row["high_extremal"][0] and row["low_extremal"][0]
    ):
        rows.append(row)  # 一日のうちに極大と極小が現れる場合は追加
    elif (rows[-1]["high_extremal"][0] and row["low_extremal"][0]) or (
        rows[-1]["low_extremal"][0] and row["high_extremal"][0]
    ):
        rows.append(row)
    else:
        # 極大(極小)が連続して並んでいる場合は、高い(低い)方を残す
        if rows[-1]["high_extremal"][0] and rows[-1]["high"][0] < row["high"][0]:
            rows[-1] = row
        elif rows[-1]["low_extremal"][0] and rows[-1]["low"][0] > row["low"][0]:
            rows[-1] = row
extremal_df = pl.concat(rows)


In [None]:
pl.Config.set_tbl_rows(-1)
print(extremal_df
      .filter(pl.col("date").is_between(datetime.date(year=2023, month=11, day=29), datetime.date(year=2024, month=1, day=15)))
      .select(pl.col("date"), pl.col("high"), pl.col("high_extremal_cand"), pl.col("rolling_high"), pl.col("low"), pl.col("low_extremal_cand"), pl.col("rolling_low"))
)
pl.Config.set_tbl_rows(10)

In [None]:
pl.Config.set_tbl_rows(-1)
print(extremal_df
      .filter(pl.col("date").is_between(datetime.date(year=2023, month=11, day=29), datetime.date(year=2024, month=1, day=15)))
      .select(pl.col("date"), pl.col("high"), pl.col("high_extremal_cand"), pl.col("rolling_high"), pl.col("low"), pl.col("low_extremal_cand"), pl.col("rolling_low"))
)
pl.Config.set_tbl_rows(10)

In [None]:
code_csv_path = stock.PROJECT_ROOT / "data/data_j.csv"
code_df = pl.read_csv(code_csv_path)
code_df = code_df.filter(pl.col("市場・商品区分").str.contains("内国株式"))
code_df

In [None]:
daily_data_dir = stock.PROJECT_ROOT / "data/daily"

def get_pivot_df(code: str) -> pl.DataFrame:
    df = stock.kabutan.read_data_csv(daily_data_dir / f"{code}.csv")

    df = df.with_columns(
        co=(pl.col("close") - pl.col("open")),
        hl=(pl.col("high") - pl.col("low")),
        next_open=pl.col("open").shift(-1),
        next_co=((pl.col("close") - pl.col("open")) / pl.col("open")).shift(-1),
        prev_volume=pl.col("volume").shift(1),
        close_avg=pl.col("close").rolling_mean(window_size=20), 
        hl_median=(pl.col("high") - pl.col("low")).rolling_median(window_size=20),
        volume_avg=pl.col("volume").rolling_mean(window_size=20),  # 直近1ヶ月の出来高の平均
        volume_min=pl.col("volume").rolling_min(window_size=20),  # 直近1ヶ月の出来高の最低
    )
    df = df.with_columns(
        diff_co=pl.col("co").diff(n=1),
        diff_hl=pl.col("hl").diff(n=1),
        diff_high=pl.col("high").diff(n=1),
        diff_low=pl.col("low").diff(n=1)
    )
    pivot_df = df.filter(
        pl.col("next_open").is_not_null()
        & ((pl.col("diff_high") < 0) & (pl.col("diff_low") > 0)) # 前日の値幅に値動きが収まっている.
        # & (pl.col("co").abs() * 2 < pl.col("hl"))  # 始値と終値がほぼ同じ
        & (pl.col("hl") < pl.col("hl_median")) # 値動きが小さい
        # & (pl.col("co") < 0)  # 始値と終値がほぼ同じ
        & (pl.col("close") < pl.col("close_avg"))  # 移動平均より株価が上
        & (pl.col("volume") - pl.col("volume_min") * 1.05 < 0)  # 出来高が直近1ヶ月で最低に近い
        & (pl.col("volume") * 2 < pl.col("volume_avg"))  # 出来高が1ヶ月の平均値より十分小さい
        & (pl.col("prev_volume") < pl.col("volume_avg"))
        & (pl.col("volume")  > 1000)
        #& (pl.col("low") > pl.col("next_open"))  # 次の日の始a値が前日の高値より高い
        #& (pl.col("low") * 0.95 < pl.col("next_open"))
    )
    return pivot_df

In [None]:
dfs = []
others = []
for csv_path in daily_data_dir.glob("*.csv"):
    code = csv_path.stem
    df = get_pivot_df(code)
    if code in code_df["コード"]:
        dfs.append(df.with_columns(
            code=pl.lit(code).cast(str)
        ))
    else:
        others.append(df.with_columns(
            code=pl.lit(code).cast(str)
        ))

In [None]:
all_df = pl.concat(dfs)
all_df = all_df.sort(pl.col("date"))
#df = all_df.filter(pl.col("low") > pl.col("next_open"))
df = all_df.filter(pl.col("high") < pl.col("next_open"))

In [None]:
num_up, num_down = (df["next_co"] > 0).sum(), (df["next_co"] < 0).sum()
(df["next_co"]).sum(), num_up / (num_up + num_down)

In [None]:
for year in range(2014, 2025):
    df_year = df.filter(pl.col("date").is_between(datetime.date(year=year, month=1, day=1), datetime.date(year=year, month=12, day=31)))
    #df_year.sort(pl.col("next_co"))
    print("year = {}, total return = {}".format(year, df_year["next_co"].sum()))

In [None]:
profits = {}
for i in range(len(df)):
    date = df["date"][i]
    if date not in profits:
        profits[date] = []
    profits[date].append(df["next_co"][i])

In [None]:
profits_per_day = {key: sum(val) / len(val) for key, val in profits.items()}
profits_per_year = {}
for day, val in profits_per_day.items():
    year = day.year
    if year not in profits_per_year:
        profits_per_year[year] = []
    profits_per_year[year].append(val)
profits_per_year = {key: sum(val) for key, val in profits_per_year.items()}

In [None]:
profits_per_year

In [None]:
year = 2024
df_year = df.filter(pl.col("date").is_between(datetime.date(year=year, month=1, day=1), datetime.date(year=year, month=12, day=31)))
pl.Config.set_tbl_rows(-1)
print(df_year.select(pl.col("date"), pl.col("next_co"), pl.col("code")))
pl.Config.set_tbl_rows(10)