In [None]:
from pathlib import Path

import pandas as pd
import numpy as np

import stock

import warnings
warnings.filterwarnings('ignore')

In [None]:
daily_data_dir = stock.PROJECT_ROOT / "data" / "daily"
financial_data_dir = stock.PROJECT_ROOT / "data" / "financial"
assert financial_data_dir.exists()
csv_list = sorted(financial_data_dir.glob("*.csv"))
len(csv_list)

In [None]:
def add_growth_rate_columns(df):
    df["total_revenue_yoy"] = np.nan
    df["operating_income_yoy"] = np.nan
    df["ordinary_profit_yoy"] = np.nan
    df["eps_yoy"] = np.nan

    for idx, row in df.iterrows():
        for _, ref_row in df.iterrows():
            if ref_row.is_prediction:
                continue
            if ref_row.year + 1 == row.year and ref_row.month == row.month and ref_row.duration == row.duration:
                df.total_revenue_yoy[idx] = row.total_revenue / (ref_row.total_revenue + 1e-5)
                df.operating_income_yoy[idx] = row.operating_income / (ref_row.operating_income + 1e-5)
                df.ordinary_profit_yoy[idx] = row.ordinary_profit / (ref_row.ordinary_profit + 1e-5)
                df.eps_yoy[idx] = row.eps / (ref_row.eps + 1e-5)
                break
    return df


def get_growth_rate(csv_path: Path):
    df = pd.read_csv(csv_path)

    quarterly_df = df[df.duration == 3]
    yearly_df = df[df.duration == 12]

    quarterly_df.sort_values("annoounce_date", inplace=True)
    yearly_df.sort_values("annoounce_date", inplace=True)

    quarterly_df = add_growth_rate_columns(quarterly_df)
    yearly_df = add_growth_rate_columns(yearly_df)

    code = csv_path.stem
    daily_csv_path = daily_data_dir / f"{code}.csv"
    if not daily_csv_path.exists():
        return
    daily_df = pd.read_csv(daily_csv_path)
    daily_df = daily_df[np.isnan(daily_df.close) == False]
    daily_df.sort_values("date")

    open_values = daily_df.open.to_numpy()
    close_values = daily_df.close.to_numpy()
    tr_yoy = quarterly_df.total_revenue_yoy.to_numpy()
    eps_yoy = quarterly_df.eps_yoy.to_numpy()
    announce_date = quarterly_df.annoounce_date.to_numpy()
    for i in range(3, len(quarterly_df) - 1):
        if (eps_yoy[i - 2:i + 1] > 1.1).all() and (tr_yoy[i - 2: i + 1] > 1.05).all():
            date = announce_date[i]
            idx = daily_df.date.searchsorted(f"20{date}", side="right")
            next_date = announce_date[i + 1]
            next_idx = daily_df.date.searchsorted(f"20{next_date}", side="right")

            if idx >= len(close_values) or next_idx == idx:
                continue
            
            start_val = open_values[idx]
            max_val = close_values[idx:next_idx].max()
            min_val = close_values[idx:next_idx].min()
            print(code, date, next_date, max_val / start_val, min_val / start_val)

In [None]:
for csv_path in csv_list:
    get_growth_rate(csv_path)

In [None]:
quarterly_df.head()