In [None]:
import datetime
from pathlib import Path
import xml.etree.ElementTree as ET
import re

from tqdm import tqdm
import yfinance as yf
import polars as pl
import matplotlib.pyplot as plt
import numpy as np

import stock
from stock.kabutan import get_code_list, read_data_csv, read_financial_csv
from stock.kabutan.data import calc_estimated_capitalization

In [None]:
# minerviniの手法を日本株に取り入れてみる
jumpups = stock.data.get_jumpups(
    window_size=30, min_growing_rate=0.4, exclude_duplicate=True
)

In [None]:
total_jumpups = sum([len(val) for val in jumpups.values()])
print("Number of jumpups : {}".format(total_jumpups))

In [None]:
def plot(code ,date):
    df = read_data_csv(code, start_date=date - datetime.timedelta(days=30), end_date=date + datetime.timedelta(days=30))
    idx = len(df.filter(pl.col("date") <= date))
    df = df.select( 
        pl.col("date"),
        pl.col("open") / df["open"][idx],
        pl.col("high") / df["open"][idx],
        pl.col("low") / df["open"][idx],
        pl.col("close") / df["open"][idx],
        pl.col("volume"),
    )
    fig = stock.visualize.plot_chart(df, before_days=idx)
    fig.update_layout(yaxis_range=[0.7, 1.5])
    fig.show()

In [None]:
def get_watch_list_all() -> pl.DataFrame:
    stacked: list[pl.DataFrame] = []
    codes = get_code_list()
    for code in tqdm(codes):
        if code not in jumpups.keys(): # or len(jumpups[code]) < 2:
            continue

        start_date = jumpups[code]["date"][0]
        capt = calc_estimated_capitalization(code)

        if capt > 100000000000:  # 時価総額1000億円以上の場合はスキップ
            continue

        # df = calc_for_watch_list(code)
        df = calc_for_watch_list(code, start_date=start_date + datetime.timedelta(30))
        df = df.filter(pl.col("watch_list"))#.with_columns(pl.lit(code).alias("code")).select(pl.col("code"), pl.col("date"))
        
        # stacked += check_fundamentals(code, df)
        stacked.append(df.select(pl.lit(code).alias("code"), pl.col("date")))
    stacked_df = pl.concat(stacked)
    return stacked_df
    #return stacked


def calc_for_watch_list(
    code: str,
    start_date: datetime.date | None = None,
    end_date: datetime.date = datetime.date.today(),
):
    df = read_data_csv(code, start_date=start_date, end_date=end_date)
    # 過去10日の値動きの大きさを計算
    window_size = 10
    avg_key = "avg{}".format(window_size)
    stddev_key = "stddev{}".format(window_size)
    df = df.with_columns(
        pl.col("close").rolling_mean(window_size=window_size).alias(avg_key),
        pl.col("close").rolling_std(window_size=window_size).alias(stddev_key),
    )

    # ギャップアップしている
    df = df.with_columns(
        (pl.col("close") > pl.col(avg_key) + pl.col(stddev_key)).alias("breakpoint")
    )

    # 出来高が増加（急増）
    window_size = 10
    df = df.with_columns(
        pl.col("volume").rolling_max(window_size=window_size).shift().alias("max_volume")
    )
    df = df.with_columns(
        (
            (pl.col("volume") > pl.col("max_volume") * 2)
            & (pl.col("volume") * pl.col("close") > 20000 * 100)
            & (pl.col("volume").rolling_max(window_size=30).shift() * 0.9 < pl.col("volume"))
        ).alias("volume_increase")
    )

    # 移動平均線の計算 / 高値・安値からの距離
    df = df.with_columns(
        pl.col("close").rolling_mean(window_size=50).alias("ma50"),
        pl.col("close").rolling_mean(window_size=150).alias("ma150"),
        pl.col("close").rolling_mean(window_size=200).alias("ma200"),
        pl.col("close").rolling_max(window_size=150).alias("max150"),
        pl.col("close").rolling_min(window_size=150).alias("min150"),
    )
    df = df.with_columns(
        (((pl.col("ma150").shift(1) - pl.col("ma150")) > 0).cast(pl.Int32).rolling_sum(window_size=30) > 20).alias("ma_up"),
        ((pl.col("close") > pl.col("min150") * 1.3)).alias("high_low_dist")
    )

    # watch listの条件判定
    df = df.with_columns(
        (
          pl.col("breakpoint")
          & pl.col("volume_increase")
          & (pl.col("close") > pl.col("ma150"))
        #   & pl.col("ma_up")
          # & pl.col("high_low_dist")
          & (pl.col("close") > 50)
      ).alias("watch_list")
    )


    # 直前にwatch list候補になっている場合はwatch listから除く
    df = df.with_columns(
        (
            (pl.col("watch_list").cast(int).rolling_max(window_size=5).shift() == 0)
            & pl.col("watch_list")
        ).alias("watch_list")
    )

    # 決算発表前後の日はwatch_listから除く
    # fdf = (
    #     read_financial_csv(code)
    #     .filter(pl.col("annoounce_date") <= end_date)
    #     .sort(pl.col("annoounce_date"))
    # )
    # for announce_date in fdf["annoounce_date"]:
    #     df = df.with_columns(
    #         (
    #             pl.col("watch_list")
    #             & (
    #                 ~pl.col("date").is_between(
    #                     announce_date - datetime.timedelta(7), announce_date + datetime.timedelta(7)
    #                 )
    #             )
    #         ).alias("watch_list")
    #     )

    return df

def check_fundamentals(code: str, df: pl.DataFrame):
    fdf = read_financial_csv(code)
    
    watch_list = []
    # fundamentalsのチェック
    for idx in range(len(df)):
        ffdf = fdf.filter((pl.col("annoounce_date") <= df["date"][idx]) & (pl.col("duration") == 3)).sort(pl.col("annoounce_date"))
        if len(ffdf) < 4 or df["date"][idx] - ffdf["annoounce_date"][-1] > datetime.timedelta(100):
            #watch_list.append([code, df["date"][idx]])
            continue
        if ffdf["operating_income"][-1] is None or ffdf["total_revenue"][-1] is None:
            #watch_list.append([code, df["date"][idx]])
            continue
        if ffdf["operating_income"][-4] is None or ffdf["total_revenue"][-4] is None:
            #watch_list.append([code, df["date"][idx]])
            continue
        if ffdf["operating_income"][-1] > ffdf["operating_income"][-4] * 1.2 and ffdf["total_revenue"][-1] > ffdf["total_revenue"][-4] * 1.2:
            if ffdf["eps"][-1] > 0 and df["close"][idx] / (ffdf["eps"][-1] * 4) > 30:
                watch_list.append([code, df["date"][idx]])
                continue
        

    return watch_list


In [None]:
watch_list = get_watch_list_all()

In [None]:
# jumpupデータがwatchlistの中にどれくらい含まれているかチェック
tp = 0
tp_sum = 0
jps = jumpups.copy()
tp_list = []
for idx in range(len(watch_list)):
    #code, date = watch_list[idx][0], watch_list[idx][1]
    code, date = watch_list["code"][idx], watch_list["date"][idx]
    if code in jumpups:
        #if (jumpups[code]["date"] == date).any():
        if len(jumpups[code].filter(pl.col("date").is_between(date - datetime.timedelta(days=10), date))) > 0:
            tp += 1
            tp_sum += len(jps[code].filter(pl.col("date").is_between(
                date - datetime.timedelta(days=10),
                date + datetime.timedelta(days=10)
            )))
            jps[code] = jps[code].filter(pl.col("date").is_between(
                date - datetime.timedelta(days=10),
                date + datetime.timedelta(days=10)
            ).not_())
            tp_list.append((code, date))

print("Number of tp : {}, recall ={}, precision = {}".format(
    tp, tp_sum / total_jumpups, tp / len(watch_list)
))

In [None]:
simulation_results = []
for idx in tqdm(range(len(watch_list))):
    #code, date = watch_list[idx][0], watch_list[idx][1]
    code, date = watch_list["code"][idx], watch_list["date"][idx]
    
    cond = stock.simulation.CustomStopCondition(sell_rate=0.2)
    simulation_results.append(stock.simulation.simulate.run(code, date, condition=cond))

profits = [res.profit for res in simulation_results]
print("Average profits : {}".format(np.mean(profits)))    

In [None]:
sorted_results = sorted(simulation_results, key=lambda x: x.profit, reverse=True)

In [None]:
output_dir = Path("./tmp")
output_dir.mkdir(exist_ok=True)

In [None]:
# random sampleで適当に選んでみて検証
sampled_df = watch_list.sample(100)
for i in tqdm(range(len(sampled_df))):
    code, date = sampled_df["code"][i], sampled_df["date"][i]
    df = read_data_csv(code, start_date=date - datetime.timedelta(days=30), end_date=date)
    df = df.select(
        pl.col("date"),
        pl.col("open") / df["close"][-1],
        pl.col("high") / df["close"][-1],
        pl.col("low") / df["close"][-1],
        pl.col("close") / df["close"][-1],
        pl.col("volume"),
    )
    fig = stock.visualize.plot_chart(
        df#, before_days=idx
    )

    fig.update_layout(yaxis_range=[0.7, 1.5])
    output_path = output_dir / "{}_{}.jpg".format(code, date)
    fig.write_image(str(output_path))

In [None]:
# アノテーション結果を読み込んで検証
xml_path = Path("./tmp/annotations.xml")
tree = ET.parse(xml_path)
root = tree.getroot()

target_list = []
non_target_list = []
all_list = []
for child in root:
    if child.tag == "image":
        filename = child.attrib["name"]
        all_list.append(filename)
        for tag in child:
            if tag.tag == "tag" and tag.attrib["label"] == "good":
                target_list.append(filename)
                break
        else:
            non_target_list.append(filename)


def simulation(filename_list):
    simulation_results = []
    regex = re.compile(r"(\d+)_(\d+-\d+-\d+).jpg")
    for target in filename_list:
        res = regex.search(target)
        if res is None:
            raise RuntimeError("Invalid filename : {}".format(target))
        code ,date = res.group(1), datetime.datetime.strptime(res.group(2), "%Y-%m-%d").date()

        cond = stock.simulation.CustomStopCondition(sell_rate=0.2)
        simulation_results.append(stock.simulation.simulate.run(code, date, condition=cond))
    return simulation_results

target_results = simulation(target_list)
target_profits = [res.profit for res in target_results]
print("Average profits : {}".format(np.mean(target_profits)))

non_target_results = simulation(non_target_list)
non_target_profits = [res.profit for res in non_target_results]
print("Average profits : {}".format(np.mean(non_target_profits)))

all_results = simulation(all_list)
all_profits = [res.profit for res in all_results]
print("Average profits : {}".format(np.mean(all_profits)))

In [None]:
sorted_list = sorted(all_list, key=lambda x: all_profits[all_list.index(x)])[::-1]
sorted_results = simulation(sorted_list)
sorted_profits = [res.profit for res in sorted_results]

In [None]:
didx = 0

filename = sorted_list[didx]
profit = sorted_profits[didx]
summary = sorted_results[didx].model_dump_json(indent=4)

print(code, date, profit)
print(summary)

regex = re.compile(r"(\d+)_(\d+-\d+-\d+).jpg")
res = regex.search(filename)
code ,date = res.group(1), datetime.datetime.strptime(res.group(2), "%Y-%m-%d").date()
plot(code, date)

In [None]:
# jumpupデータがwatchlistの中にどれくらい含まれているかチェック
tp = 0
tp_sum = 0
jps = jumpups.copy()
tp_list = []
for idx in range(len(watch_list)):
    #code, date = watch_list[idx][0], watch_list[idx][1]
    code, date = watch_list["code"][idx], watch_list["date"][idx]
    if code in jumpups:
        #if (jumpups[code]["date"] == date).any():
        if len(jumpups[code].filter(pl.col("date").is_between(date - datetime.timedelta(days=10), date))) > 0:
            tp += 1
            tp_sum += len(jps[code].filter(pl.col("date").is_between(
                date - datetime.timedelta(days=10),
                date + datetime.timedelta(days=10)
            )))
            jps[code] = jps[code].filter(pl.col("date").is_between(
                date - datetime.timedelta(days=10),
                date + datetime.timedelta(days=10)
            ).not_())
            tp_list.append((code, date))

print("Number of tp : {}, recall ={}, precision = {}".format(
    tp, tp_sum / total_jumpups, tp / len(watch_list)
))

In [None]:
# jumpupデータがwatchlistの中にどれくらい含まれているかチェック
tp = 0
tp_sum = 0
jps = jumpups.copy()
tp_list = []
for idx in range(len(watch_list)):
    code, date = watch_list[idx][0], watch_list[idx][1]
    if code in jumpups:
        if (jumpups[code]["date"] == date).any():
            tp += 1
            tp_sum += len(jps[code].filter(pl.col("date").is_between(
                date - datetime.timedelta(days=10),
                date + datetime.timedelta(days=10)
            )))
            jps[code] = jps[code].filter(pl.col("date").is_between(
                date - datetime.timedelta(days=10),
                date + datetime.timedelta(days=10)
            ).not_())
            tp_list.append((code, date))

print("Number of tp : {}, recall ={}, precision = {}".format(
    tp, tp_sum / total_jumpups, tp / len(watch_list)
))

In [None]:
results = []
for idx in tqdm(range(len(watch_list))):
    code, date = watch_list["code"][idx], watch_list["date"][idx]
    cond = stock.simulation.CustomStopCondition(max_loss_rate=0.08, sell_rate=0.4, trailling_stop_rate=0.15)
    results.append(stock.simulation.simulate.run(
        code, start_date=date, condition=cond
    ))

In [None]:
# minerviniの売買をtrend templateに当てはまるかチェック
# minerviniが実際に買った銘柄を探索する
# https://wallstreettrader.substack.com/p/how-mark-minervini-won-us-investing

In [None]:
target_list = [
    ["ANF", datetime.date(2021, 1, 4)],
    ["GM", datetime.date(2021, 1, 11)],
    ["STAA", datetime.date(2021, 1, 12)],
    ["NNOX", datetime.date(2021, 1, 20)],
    ["UAVS", datetime.date(2021, 2, 9)],
    ["MP", datetime.date(2021, 2, 9)],
    ["YETI", datetime.date(2021, 4, 6)],
    ["ZIM", datetime.date(2021, 4, 8)],
    ["BNTX", datetime.date(2021, 6, 2)],
    ["AAPL", datetime.date(2021, 6, 17)],
    ["MRNA", datetime.date(2021, 6, 25)],
    ["SKY", datetime.date(2021, 6, 30)],
    ["NUE", datetime.date(2021, 8, 9)],
    ["PAG", datetime.date(2021, 9, 1)],
    ["TSLA", datetime.date(2021, 9, 24)],
    ["OLN", datetime.date(2021, 10, 11)],
    ["ASYS", datetime.date(2021, 10, 26)],
    ["UPST", datetime.date(2021, 10, 12)],
]

target_list_short = [
    ["NVDA", datetime.date(2021, 12, 3)],
]

In [None]:
nasdaq_df = stock.util.get_history_data("^IXIC")
sp500_df = stock.util.get_history_data("^GSPC")

In [None]:
rss = []
for code, buy_date in target_list:
    df = stock.util.get_history_data(code)
    df = df.filter(pl.col("date").is_between(buy_date - datetime.timedelta(days=365), buy_date + datetime.timedelta(days=365)))
    #ndf = nasdaq_df.filter(pl.col("date").is_between(buy_date - datetime.timedelta(days=365), buy_date + datetime.timedelta(days=365)))
    ndf = sp500_df.filter(pl.col("date").is_between(buy_date - datetime.timedelta(days=365), buy_date + datetime.timedelta(days=365)))

    duration = 260
    prev_df = df.filter(pl.col("date").is_between(buy_date- datetime.timedelta(days=duration), buy_date))
    prev_nasdaq_df = ndf.filter(pl.col("date").is_between(buy_date - datetime.timedelta(days=duration), buy_date))

    rs = stock.algorithm.relative_strength.relative_strength(
        prev_df["close"].to_numpy(), 
        prev_nasdaq_df["close"].to_numpy(),
        num_division=duration // 10,
        division_factor=1.02
    )
    rss.append(rs)
    if rss[-1] < 100:
        print(code, buy_date)

In [None]:
# relative strengthの計算
def relative_strength_df(target_df, ref_df, offset=20, num_division=3, division_factor=1.02):
    window_size = num_division * offset
    # 不要な日付を削除
    dates = target_df["date"].to_list()
    ref_dates = ref_df.filter(pl.col("date") >= dates[0])["date"].to_list()
    excludes = [d for d in ref_dates if d not in dates]
    ref_df = ref_df.filter(pl.col("date").is_in(excludes).not_())

    ref_df = ref_df.with_columns(
        (pl.col("close") / pl.col("close").shift(offset)).alias("strength")
    )

    # dfの用意
    target_df = target_df.filter(pl.col("date").is_between(ref_df["date"][0], ref_df["date"][-1]))
    target_df = target_df.with_columns(
        (pl.col("close")  / pl.col("close").shift(offset)).alias("strength"),
        pl.Series(name="ref_strength", values=ref_df.filter(pl.col("date").is_between(df["date"][0], df["date"][-1]))["strength"]),
    )
    # 重みの計算
    weights = np.array([division_factor**i for i in range(num_division)], dtype=float)
    weights /= np.linalg.norm(weights, ord=1)
    weights = [weights[i // offset] if (i % offset) == 0 else 0 for i in range(window_size)][::-1]
    # rs算出
    target_df = target_df.filter(
        pl.col("strength").is_not_null(), 
        pl.col("ref_strength").is_not_null()
    ).with_columns(
        (pl.col("strength") / pl.col("ref_strength")).rolling_sum(window_size, weights=weights).alias("rs")
    )
    return target_df

In [None]:
def plot(df ,date, before=30, after=30, yrange=[0.7, 1.4]):
    df = df.filter(pl.col("date").is_between(date - datetime.timedelta(days=before), date + datetime.timedelta(days=after)))
    idx = len(df.filter(pl.col("date") <= date))
    if idx == len(df):
        idx -= 1
    df = df.select( 
        pl.col("date"),
        pl.col("open") / df["open"][idx],
        pl.col("high") / df["open"][idx],
        pl.col("low") / df["open"][idx],
        pl.col("close") / df["open"][idx],
        pl.col("volume"),
    )
    fig = stock.visualize.plot_chart(df, before_days=idx)
    fig.update_layout(yaxis_range=yrange)
    fig.show()

In [None]:
# trend templateに当てはまるかチェック
nikkei_df = read_data_csv("0000", exclude_none=False)

def check_trendtemplate(df):
    df =  df.with_columns(
        pl.col("close").rolling_mean(window_size=50, min_periods=10).alias("ma50"),
        pl.col("close").rolling_mean(window_size=150, min_periods=10).alias("ma150"),
        pl.col("close").rolling_mean(window_size=200, min_periods=10).alias("ma200"),
        pl.col("close").rolling_max(window_size=260, min_periods=10).alias("max260"),
        pl.col("close").rolling_min(window_size=260, min_periods=10).alias("min260"),
    )
    
    df = df.with_columns(
        # 移動平均線が順番に並んでいる
        ((pl.col("close") > pl.col("ma50")) & (pl.col("ma50") + 1e-5 > pl.col("ma150")) & (pl.col("ma150") + 1e-5 > pl.col("ma200"))).alias("ma_order"),
        # 移動平均線が上向き
        *[((pl.col(ma) > pl.col(ma).shift()).cast(pl.Int32).rolling_sum(window_size=5) >= 4).alias(f"{ma}_uptrend") for ma in ["ma50", "ma150", "ma200"]],
        # 高値・安値からの距離が適切
        ((pl.col("close") > pl.col("high") * 0.75) & (pl.col("close") > pl.col("min260") * 1.3)).alias("high_low_dist"),
    )
    # relative strengthの計算
    df = relative_strength_df(df, nikkei_df, offset=20, num_division=10, division_factor=1.02)
    
    df = df.with_columns(
        (pl.col("ma_order") & pl.col("ma50_uptrend") & pl.col("ma150_uptrend") & pl.col("ma200_uptrend") & pl.col("high_low_dist") & (pl.col("rs") > 0.98)).alias("trend_template")
    )
    return df

In [None]:
# trend templateを日本株に適用
code_list = get_code_list()
trend_rate = []
for code in tqdm(code_list):
    df = read_data_csv(code)
    if len(df) < 100:
        continue
    df = check_trendtemplate(df)
    trend_rate.append(len(df.filter(pl.col("trend_template"))) / len(df))

In [None]:
growing_rates = []

for code in tqdm(code_list):
    df = read_data_csv(code)
    if len(df) < 100:
        continue

    df = check_trendtemplate(df)
    df = df.with_columns(
        (pl.col("close").shift(-30) / pl.col("close")).alias("growing_rate")
    ).filter(
        pl.col("growing_rate").is_not_null()
    )
    if len(df.filter(pl.col("trend_template"))) > 10 and len(df.filter(pl.col("trend_template").not_())) > 10:
        growing_rates.append([
            df.filter(pl.col("trend_template"))["growing_rate"].mean(), 
            df.filter(pl.col("trend_template").not_())["growing_rate"].mean()
        ])

In [None]:
# minerviniの手法を日本株に取り入れてみる
jumpups = stock.data.get_jumpups(
    window_size=30, min_growing_rate=0.4, exclude_duplicate=True
)

In [None]:
rows = []
for code in jumpups.keys():
    target_dates = jumpups[code]["date"].to_list()
    for d in target_dates:
        rows.append([code, d])

In [None]:
# 上昇銘柄の特徴を調査
index = 161
code, target_date = rows[index]
df = read_data_csv(code)

print(code, target_date)
plot(df, target_date, before=150, after=50, yrange=[0.7, 1.4])
#plot(df, target_date, before=150, after=10, yrange=[0.8, 1.2])

In [None]:
import csv

output_path = Path("./tmp/jumpups.csv")
with open(output_path, "w") as f:
    writer = csv.writer(f)
    writer.writerow(["code", "date"])
    writer.writerows(rows)


In [None]:
df

In [None]:
from datetime import date


from polars import DataFrame


class StopCondition(stock.simulation.CustomStopCondition):

    def set_start(self, src_df: DataFrame, start_date: date) -> float:
        res = super().set_start(src_df, start_date)
        if res != -1:
            df = src_df.filter(pl.col("date") <= start_date).sort(pl.col("date"))
            # 寄り付きで値上がりしすぎの場合は買わない
            limit_range = stock.algorithm.market.get_limit_range(df["close"][-1])
            if abs(df["open"][1] - df["close"][0]) > limit_range * 0.3:
                self.reset_results()
                return -1
            # loss cutを定率か抵抗線で設定
            self.loss_cut_price = max(df[-15:]["low"].min(), self.loss_cut_price)
        return res

In [None]:

code_list = get_code_list()

window_size = 15
value_range = 0.05
low_vola_rates = []
bp_dfs = []
for code in tqdm(code_list):
    df = read_data_csv(code)
    df = df.with_columns(
        (((pl.col("high").rolling_max(window_size=window_size) - pl.col("low").rolling_min(window_size=window_size)) / pl.col("close")) < value_range).alias("low_volatility")
    )
    df = df.with_columns(
        (
            pl.col("low_volatility").shift() 
            & pl.col("low_volatility").not_() 
            & (pl.col("close") > pl.col("high").rolling_max(window_size=window_size).shift())
            & (pl.col("volume") > 10000)
            & (pl.col("volume") < 100000)
            & (pl.col("volume") > pl.col("volume").rolling_min(window_size=5).shift() * 2)
            & (pl.col("volume") > pl.col("volume").rolling_max(window_size=5).shift() * 0.8)
            & (pl.col("volume") < pl.col("volume").rolling_max(window_size=20).shift() * 0.8)
         ).alias("breakpoint")
    )
    df = df.with_columns(
        (pl.col("high").rolling_max(window_size=30).shift(-30) / pl.col("open").shift(-1)).alias("max30"),
        (pl.col("low").rolling_min(window_size=30).shift(-30) / pl.col("open").shift(-1)).alias("min30"),
    )
    if len(df.filter(pl.col("breakpoint"))) > 0:
        low_vola_rates.append(len(df.filter(pl.col("breakpoint"))) / len(df))
        bp_dfs.append(df.filter(pl.col("breakpoint")).with_columns(pl.lit(code).alias("code")))
    
bp_df = pl.concat(bp_dfs)

In [None]:
results = []
for i in tqdm(range(len(bp_df))):
    code, date = bp_df["code"][i], bp_df["date"][i]
    res = stock.simulation.run(code, date, condition=StopCondition(sell_rate=0.2, max_loss_rate=0.08, max_days=14))
    if res.duration.days == 0 and abs(res.profit) < 1e-5:
        continue
    results.append(res)

profits = [res.profit for res in results]
days = [res.duration.days for res in results]
dates = [res.buying_date for res in results]
cand_per_day = len(results) / ((max(dates) - min(dates)).days / 7 * 5)
print("Average profits : {}, Average days : {}".format(np.mean(profits), np.mean(days)))
print("Yearly estimated profits : {}".format(52 * 5 / np.mean(days) * np.mean(profits)))
print("Candidates per day : {:.3f}, per average duration : {:.3f}".format(cand_per_day, cand_per_day * np.mean(days)))

In [None]:
results = []
for i in tqdm(range(len(bp_df))):
    code, date = bp_df["code"][i], bp_df["date"][i]
    res = stock.simulation.run(code, date, condition=StopCondition(sell_rate=0.2, max_loss_rate=0.08))
    if res.duration.days == 0 and abs(res.profit) < 1e-5:
        continue
    results.append(res)

profits = [res.profit for res in results]
days = [res.duration.days for res in results]
dates = [res.buying_date for res in results]
cand_per_day = len(results) / ((max(dates) - min(dates)).days / 7 * 5)
print("Average profits : {}, Average days : {}".format(np.mean(profits), np.mean(days)))
print("Yearly estimated profits : {}".format(52 * 5 / np.mean(days) * np.mean(profits)))
print("Candidates per day : {:.3f}, per average duration : {:.3f}".format(cand_per_day, cand_per_day * np.mean(days)))

In [None]:
plt.scatter(days, profits)

In [None]:
len(results)

In [None]:
results = sorted(results, key=lambda x: x.profit, reverse=True)

idx = -7
code, date = results[idx].code, results[idx].buying_date
print(code, date, results[idx].profit)
df = read_data_csv(code)
plot(df, date, before=150, after=0, yrange=[0.7, 1.4])
plot(df, date, before=150, after=50, yrange=[0.7, 1.4])

In [None]:
code_list[0]