In [None]:
import datetime
import pprint
from typing import Any

import polars as pl
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
from pydantic import ConfigDict

import stock
from stock.kabutan import read_data_csv, read_financial_csv
from stock.algorithm.market import is_limit_high

In [None]:
def calc_for_watch_list(code, start_date=None, end_date=datetime.date.today()):
    df = read_data_csv(code, start_date=start_date, end_date=end_date)
    # 過去10日の値動きの大きさを計算
    window_size = 10
    avg_key = "avg{}".format(window_size)
    stddev_key = "stddev{}".format(window_size)
    df = df.with_columns(
        pl.col("close").rolling_mean(window_size=window_size).alias(avg_key),
        pl.col("close").rolling_std(window_size=window_size).alias(stddev_key),
    )

    # ギャップアップしている
    df = df.with_columns(
        (pl.col("close") > pl.col(avg_key) + pl.col(stddev_key)).alias("breakpoint")
    )

    # 直近の安値が安すぎない & 値幅が狭すぎない
    window_size = 10
    df = df.with_columns(
        (pl.col("close").rolling_min(window_size=window_size)).alias("min_close")
    ).with_columns(
        ((pl.col("min_close") > pl.col("close") * 0.7) & (pl.col("min_close") < pl.col("close") * 0.95)).alias("price_range")
    )

    # 高値が多すぎない
    df = df.with_columns(pl.col("close").rolling_map(
        function=lambda d : sum(d > d[-1]),
        window_size=30,
    ).alias("high_count"))    

    # 出来高が増加（急増）
    df = df.with_columns(
        pl.col("volume").rolling_max(window_size=window_size).shift().alias("max_volume")
    )
    df = df.with_columns((
        (pl.col("volume") > pl.col("max_volume") * 2) & 
        (pl.col("volume") * pl.col("close") > 20000 * 100) &
        (pl.col("volume").rolling_max(window_size=30).shift() * 0.9 < pl.col("volume"))
    ).alias("volume_increase"))

    # watch listの条件判定
    df = df.with_columns(
        (
            pl.col("breakpoint") 
            & pl.col("price_range") 
            & pl.col("volume_increase")
            & (pl.col("high_count") < 7)
            & ((pl.col("close") >= pl.col("open")) | (pl.col("volume") > pl.col("max_volume") * 20))
        ).alias("watch_list")
    )

    # 直前にwatch list候補になっている場合はwatch listから除く
    df = df.with_columns(
        ((pl.col("watch_list").cast(int).rolling_max(window_size=5).shift() == 0) & pl.col("watch_list")).alias("watch_list")
    )

    # 決算発表前後の日はwatch_listから除く
    fdf = (
        read_financial_csv(code)
        .filter(pl.col("annoounce_date") <= end_date)
        .sort(pl.col("annoounce_date"))
    )
    for announce_date in fdf["annoounce_date"]:
        df = df.with_columns(
            (
                pl.col("watch_list")
                & (
                    ~pl.col("date").is_between(
                        announce_date - datetime.timedelta(7), announce_date + datetime.timedelta(7)
                    )
                )
            ).alias("watch_list")
        )
    return df
k

In [None]:
stacked = []
codes = stock.kabutan.get_code_list()
for code in tqdm(codes):
    capt = stock.kabutan.data.calc_estimated_capitalization(code)
    if capt > 100000000000: # 時価総額1000億円以上の場合はスキップ
        continue
    
    df = calc_for_watch_list(code)
    stacked.append(df.filter(pl.col("watch_list")).with_columns(pl.lit(code).alias("code")).select(pl.col("code"), pl.col("date")))
stacked_df = pl.concat(stacked)

In [None]:
len(stacked_df)

In [None]:
def plot_chart(df :pl.DataFrame, normalize=False, before_days=-1):
    df = df.sort("date")
    fig = make_subplots(
        rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.0, row_heights=[0.7, 0.3]
    )
    if before_days == len(df):
        base = df["close"][before_days - 1] if normalize else 1
    else:
        base = df["open"][before_days] if normalize else 1

    fig.add_trace(
        go.Candlestick(
            x=df["date"],
            open=df["open"] / base,
            high=df["high"] / base,
            low=df["low"] / base,
            close=df["close"] / base,
            name="candle",
        ),
        row=1,
        col=1,
    )
    
    if before_days > 0 and len(df) > before_days: # 売り買いポイント
        fig.add_trace(
            go.Scatter(
                x=df[before_days]["date"],
                y=df[before_days]["open"] / base,
                mode="markers",
                name="buy",
                marker=dict(size=10, color="blue"),
            ),
            row=1,
            col=1,
        )
    # 売買高
    fig.add_trace(go.Bar(x=df["date"], y=df["volume"], name="volume"), row=2, col=1)
    # グラフの設定
    fig.update_layout(
        xaxis_rangeslider_visible=False,
        #xaxis2_rangeslider_visible=False,
        margin=go.layout.Margin(l=5, r=5, t=5, b=5, autoexpand=True),
    )
    fig.update_layout(hovermode="x unified")
    #fig.update_traces(xaxis="x2")
    fig.update_xaxes(rangebreaks=[dict(bounds=["sat", "mon"])])  # 土日を除外
    if  normalize:
        fig.update_layout(
            yaxis_range=[0.7, 1.5]
        )
    return fig

In [None]:
def plot(code, date, prev_days=30, after_days=20, normalize=False):
    df = stock.kabutan.read_data_csv(code)
    prev_df = df.filter(pl.col("date") <= date)
    after_df = df.filter(pl.col("date") > date)
    target_df = pl.concat([prev_df[-prev_days:], after_df[:after_days]])
    days = prev_days + after_days
    if len(df) < days: 
        return
    return plot_chart(target_df, before_days=prev_days, normalize=normalize)

In [None]:
df = stock.kabutan.read_data_csv("6254", start_date=datetime.date(2024, 1, 1), end_date=datetime.date(2024, 1, 31))
plot_chart(df).show()

In [None]:
idx = 49
plot(stacked_df["code"][idx], stacked_df["date"][idx], normalize=True, after_days=0).show()
plot(stacked_df["code"][idx], stacked_df["date"][idx], normalize=True).show()

In [None]:
class CustomStopCondition(stock.simulation.simulate.BaseCondition):
    model_config = ConfigDict(arbitrary_types_allowed=True)

    # 入力パラメータ
    max_loss_rate: float = 0.1  # 買値からの最大損失率
    trailling_stop_rate: float = 0.2  # ここまで値下がりしたら売る
    sell_rate: float = 0.5  # ここまで値上がりしたら半分売る
    max_days: int = 7  # 最大保持日数
    total_max_days: int = 7 * 4  # 最大保持日数
    # 結果変数
    buying_price: float = -1
    buying_date: datetime.date = datetime.date.today()
    selling_price: float = -1
    selling_date: datetime.date = datetime.date.today()
    # 内部計算用変数
    loss_cut_price: float = -1
    profit_fixed_price: float = -1
    reach_target_price: bool = False
    target_selling_price: float = -1
    highest_updated: bool = False
    index: int = -1
    df: pl.DataFrame = pl.DataFrame()
    src_df: pl.DataFrame = pl.DataFrame()
    

    def reset_results(self):
        self.buying_price = -1
        self.buying_date = datetime.date.today()
        self.selling_price = -1
        self.selling_date = datetime.date.today()
        self.loss_cut_price = -1
        self.profit_fixed_price = -1
        self.reach_target_price = False
        self.target_selling_price = -1
        self.highest_updated = False
        self.index = -1
        self.df = pl.DataFrame()
        self.src_df = pl.DataFrame()


    def set_start(self, src_df: pl.DataFrame, start_date: datetime.date) -> float:
        self.reset_results()
        df = src_df.filter(pl.col("date") >= start_date).sort(pl.col("date"))
        prev_df = src_df.filter(pl.col("date") < start_date).sort(pl.col("date"))
        if len(df) <= 30:
            return - 1
            
        if df["date"][0] - start_date > datetime.timedelta(days=10):
            return -1

        # stop高は回避
        if is_limit_high(df["close"][0], df["open"][1]):
            return -1

        # ベースから上離れしすぎている場合はスキップ
        if prev_df["close"][-1] * 1.4 < df["open"][1]:
            return -1

        # 前日終値から下がりすぎている場合は買わない
        if df["open"][1] < prev_df["close"][-1] or df["open"][1] < df["close"][0] * 0.9:
            return -1
        
        self.buying_price = df["open"][1]
        self.buying_date = df["date"][1]

        self.loss_cut_price = self.buying_price * (1 - self.max_loss_rate)
        self.profit_fixed_price = self.buying_price * (1 + self.sell_rate)
        self.index = 1
        self.df = df
        self.src_df = src_df
        return self.buying_price

    def run_simulation(self) -> float:
        """ """
        #print(df["date"][index], self.target_selling_price, self.loss_cut_price)
        df = self.df
        index = self.index
        # 最大保持日数を超えた場合は売る
        if df["date"][index] - self.buying_date > datetime.timedelta(days=self.total_max_days):
            self.selling_date = df["date"][index]
            if self.reach_target_price:
                self.selling_price = (self.target_selling_price + min(self.loss_cut_price, df["open"][index])) * 0.5
            else:
                self.selling_price = df["open"][index]
            return self.selling_price

        # 値上がりも値下がりもせず、一定期間過ぎた場合は売る
        if not self.reach_target_price and df["date"][index] - self.buying_date > datetime.timedelta(days=self.max_days):
            self.selling_price = df["open"][index]
            self.selling_date = df["date"][index]
            return self.selling_price

        # 最大損失率を超えた場合は売る
        if df["low"][index] < self.loss_cut_price:
            self.selling_date = df["date"][index]
            if self.reach_target_price:
                self.selling_price = (self.target_selling_price + min(self.loss_cut_price, df["open"][index])) * 0.5
            else:
                self.selling_price =  min(self.loss_cut_price, df["open"][index])
            return self.selling_price

        # ここまで値上がりしたら半分売る
        if df["high"][index] > self.profit_fixed_price and not self.reach_target_price:
            self.reach_target_price = True
            self.target_selling_price = max(self.profit_fixed_price, df["open"][index])

        # 十分値上がりしたらtrailling stop lossを適用
        if self.reach_target_price:
            self.loss_cut_price = max(
                self.loss_cut_price, df["high"][index] * (1 - self.trailling_stop_rate)
            )

        # # 高値更新したか
        # if not self.highest_updated:
        #     if df["high"][index] > df["high"][0]:
        #         self.highest_updated = True
        #     if self.index == 3: # 購入から最初の3日間で高値更新なしの場合はloss cut引き上げ
        #         self.loss_cut_price = self.buying_price * 0.96


        self.index += 1
        return -1.0

In [None]:
conds = CustomStopCondition(
        max_loss_rate=0.08,
        trailling_stop_rate=0.1,
        sell_rate=0.4,
        max_days=14,
        total_max_days=28
    )
stock.simulation.simulate.run("1382", datetime.date(2023, 9, 1), conds)

In [None]:
all_results = []
for idx in tqdm(range(len(stacked_df))):
    conds = CustomStopCondition(
        max_loss_rate=0.08,
        trailling_stop_rate=0.1,
        sell_rate=0.2,
        max_days=14,
        total_max_days=28
    )
    result = stock.simulation.simulate.run(stacked_df["code"][idx], stacked_df["date"][idx], conds).dict()
    if result["profit"] > 10:
        break
    result["code"] = stacked_df["code"][idx]
    all_results.append(result)

In [None]:
results = [res for res in all_results if res["buying_price"] > 0]

In [None]:
profits = [res["profit"] for res in results if res["profit"] < 1.0]
print("Average profit : {}".format(sum(profits) / len(profits)))

In [None]:
# 年度別
profits_per_year = {}
for res in results:
    year = res["buying_date"].year
    if year not in profits_per_year.keys():
        profits_per_year[year] = []
    if res["profit"] < 1.0:
        profits_per_year[year].append(res["profit"])

for year in sorted(profits_per_year.keys()):
    print("Year {} : Average profit : {}".format(year, sum(profits_per_year[year]) / len(profits_per_year[year])))

In [None]:
# 業種別
profits_per_class33 = {}
profits_per_class17 = {}

# 前準備
code_df = stock.kabutan.get_code_df()
code_class33_dict = {}
code_class17_dict = {}
for idx in range(len(code_df)):
    code_class33_dict[code_df["コード"][idx]] = code_df["33業種区分"][idx]
    code_class17_dict[code_df["コード"][idx]] = code_df["17業種区分"][idx]

# 集計
for res in results:
    cls33 = code_class33_dict[res["code"]]
    cls17 = code_class17_dict[res["code"]]
    if cls33 not in profits_per_class33.keys():
        profits_per_class33[cls33] = []
    if cls17 not in profits_per_class17.keys():
        profits_per_class17[cls17] = []

    profits_per_class33[cls33].append(res["profit"])
    profits_per_class17[cls17].append(res["profit"])

# summary
for cls, res_list in sorted(profits_per_class33.items(), key=lambda d: sum(d[1]) / len(d[1])):
    if len(res_list) > 0:
        print("class = {:<20}, Average profit = {}, Number of data = {}".format(cls, sum(res_list) / len(res_list), len(res_list)))

# for cls, res_list in profits_per_class17.items():
#     if len(res_list) > 0:
#         print("class = {:<20}, Average profit = {}".format(cls, sum(res_list) / len(res_list)))

In [None]:
# 曜日別
profits_per_weekday = [[] for _ in range(5)]
for res in results:
    profits_per_weekday[res["buying_date"].weekday()].append(res["profit"])

for idx, plist in enumerate(profits_per_weekday):
    print("weekday = {}, Average profits = {}, Number of data = {}".format(
        idx, sum(plist) / len(plist), len(plist)
    ))

# 月別
profits_per_month = [[] for _ in range(12)]
for res in results:
    profits_per_month[res["buying_date"].month - 1].append(res["profit"])

for idx, plist in enumerate(profits_per_month):
    print("month = {}, Average profits = {}, Number of data = {}".format(
        idx, sum(plist) / len(plist), len(plist)
    ))

# 日別
profits_per_day = [[] for _ in range(31)]
for res in results:
    profits_per_day[res["buying_date"].day - 1].append(res["profit"])
plt.plot([sum(plist) / len(plist) for plist in profits_per_day])

In [None]:
sum(np.array(profits) > 0), sum(np.array(profits) < 0)

In [None]:
days = [(res["selling_date"] - res["buying_date"]).days for  res in results]
profits = [res["profit"] for res in results]
plt.scatter(days, profits)

In [None]:
plt.hist(profits, bins=20)

In [None]:
super_neg = [res for res in results if res["profit"] < -0.1]
super_pos = [res for res in results if res["profit"] > 0.2]
loss_cuts = [res for res in results if -0.09 < res["profit"] < -0.07]

In [None]:
len(super_pos), len(super_neg),len(loss_cuts), len(super_pos) / len(results) 

In [None]:
idx = 9
pprint.pprint(super_neg[idx])
plot(super_neg[idx]["code"], super_neg[idx]["buying_date"]).show()
plot(super_neg[idx]["code"], super_neg[idx]["buying_date"], normalize=True).show()

In [None]:
idx = 8
pprint.pprint(super_pos[idx])
plot(super_pos[idx]["code"], super_pos[idx]["buying_date"]).show()
plot(super_pos[idx]["code"], super_pos[idx]["buying_date"], normalize=True).show()

In [None]:
idx = 16
pprint.pprint(loss_cuts[idx])
plot(loss_cuts[idx]["code"], loss_cuts[idx]["buying_date"]).show()
plot(loss_cuts[idx]["code"], loss_cuts[idx]["buying_date"], normalize=True).show()