In [None]:
from pathlib import Path
import csv
import math
import multiprocessing as mp
from datetime import datetime

from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import stock

In [None]:
def calc_max_growth_rate(csv_path: Path):
    daily_df = pd.read_csv(csv_path)
    if len(daily_df) == 0:
        return 0, 0, 0
    daily_df = daily_df[np.isnan(daily_df.close) == False]
    if len(daily_df) < 100:
        return 0, 0, 0
    close_values = daily_df.close.to_numpy()
        
    num_data = len(close_values)

    min_idx = 0
    max_idx = 0
    max_growth_rate = 0.0
    for i in range(10, len(close_values)):
        vals = (close_values[i:] / close_values[:num_data - i])
        idx = vals.argmax()
        if vals[idx] > 2.0:
            growth_rate = vals[idx] / i > max_growth_rate
            if growth_rate > max_growth_rate:
                max_growth_rate = growth_rate
                min_idx = idx
                max_idx = idx + i

    if max_idx - min_idx > 0:
        growth_rate = (close_values[max_idx] / close_values[min_idx]) ** (1.0 / (max_idx - min_idx))
        return growth_rate, min_idx, max_idx
    return 0, 0, 0

In [None]:
daily_data_dir = stock.PROJECT_ROOT / "data" / "daily"
financial_data_dir = stock.PROJECT_ROOT / "data" / "financial"

daily_csv_lists = sorted(daily_data_dir.glob("*.csv"))

growth_rates = []
ques = []
with mp.Pool(8) as pool:
    for daily_csv in daily_csv_lists:
        ques.append(pool.apply_async(calc_max_growth_rate, (daily_csv,)))

    for que in tqdm(ques):
        growth_rates.append(que.get()[0])
growth_rates = np.array(growth_rates)

In [None]:
index_arr = np.arange(0, len(daily_csv_lists))
sorted_arr = sorted(index_arr, key=lambda x: growth_rates[x])[::-1]