In [None]:
import numpy as np
import pandas as pd
import tqdm
from pathlib import Path
import multiprocessing as mp
from src.selv.monte_carlo import STRATEGIES, simulate_and_run_strategy  # your strategy definitions
from src.selv.vis import plot_synthetic_paths, compare_strategies, analyze_monte_carlo_results

CSV_PATH = Path("/Users/timschultz/repos/ai-hedge-fund/btc_data.csv")
N_PATHS = 5_000  # simulations
SEED = 42

original_df = pd.read_csv(CSV_PATH, parse_dates=["datetime"], index_col="datetime")


In [5]:


np.random.seed(SEED)

tasks = []
print("strategies:", STRATEGIES)
for strategy_name, funcs in STRATEGIES.items():
    for i in range(N_PATHS):
        # Each task: (unique_id_for_rng_and_path, strategy_name, long_func, short_func)
        # To ensure unique paths for each (strategy, path_num) combination,
        # we can use a global path counter for the seed or combine strategy index and path index.
        # Here, (i) will be the path_id for a given strategy.
        # The RNG seed will be SEED + i, meaning path i for strategy A is same as path i for strategy B.
        # If truly independent paths are needed for each strategy-path combo, adjust seeding.
        tasks.append(
            (i, strategy_name, funcs["long_entry_fun"], funcs["short_entry_fun"], SEED, original_df)
        )
print(f"Number of tasks: {len(tasks)}")

with mp.Pool() as pool:
    stats = list(
        tqdm.tqdm(
            pool.imap_unordered(simulate_and_run_strategy, tasks), total=len(tasks)
        )
    )

mc_df = pd.DataFrame(stats)
mc_df.to_csv("mc_results.csv", index=False)
print(mc_df.describe(percentiles=[0.05, 0.5, 0.95]))

strategies: {'EMA_10_30_Cross': {'long_entry_fun': <function long_ema_10_30_cross at 0x122c64e00>, 'short_entry_fun': <function short_ema_10_30_cross at 0x122c7fb00>}}
Number of tasks: 5000


100%|██████████| 5000/5000 [17:30<00:00,  4.76it/s]


            equity       sharpe       max_dd      path_id
count  5000.000000  5000.000000  5000.000000  5000.000000
mean      1.034297    -0.407674     0.471711  2499.500000
std       0.569342     2.063525     0.131279  1443.520003
min       0.161007    -7.453837     0.166815     0.000000
5%        0.389110    -3.826326     0.277303   249.950000
50%       0.904466    -0.405264     0.463326  2499.500000
95%       2.097457     2.951880     0.701231  4749.050000
max       6.771377     7.563630     0.858385  4999.000000


In [None]:


np.random.seed(SEED)

tasks = []
print("strategies:", STRATEGIES)
for strategy_name, funcs in STRATEGIES.items():
    for i in range(N_PATHS):
        # Each task: (unique_id_for_rng_and_path, strategy_name, long_func, short_func)
        # To ensure unique paths for each (strategy, path_num) combination,
        # we can use a global path counter for the seed or combine strategy index and path index.
        # Here, (i) will be the path_id for a given strategy.
        # The RNG seed will be SEED + i, meaning path i for strategy A is same as path i for strategy B.
        # If truly independent paths are needed for each strategy-path combo, adjust seeding.
        tasks.append(
            (i, strategy_name, funcs["long_entry_fun"], funcs["short_entry_fun"], SEED, original_df)
        )
print(f"Number of tasks: {len(tasks)}")

with mp.Pool() as pool:
    stats = list(
        tqdm.tqdm(
            pool.imap_unordered(simulate_and_run_strategy, tasks), total=len(tasks)
        )
    )

mc_df = pd.DataFrame(stats)
mc_df.to_csv("mc_results.csv", index=False)
print(mc_df.describe(percentiles=[0.05, 0.5, 0.95]))

strategies: {'EMA_10_30_Cross': {'long_entry_fun': <function long_ema_10_30_cross at 0x122c64e00>, 'short_entry_fun': <function short_ema_10_30_cross at 0x122c7fb00>}}
Number of tasks: 5000


 97%|█████████▋| 4865/5000 [17:10<00:26,  5.05it/s]

In [None]:
# Questions:
#  1. Is def simulate_path correct?
#  2. Build model?? On historical data then run on recent data? Is that even correct term that this is a model?
# 3. Compare each aspect of https://media.licdn.com/dms/document/media/v2/D561FAQFmtQMpnT5XUg/feedshare-document-pdf-analyzed/feedshare-document-pdf-analyzed/0/1722857702819?e=1747267200&v=beta&t=5No7UJQwEZxhjN_O7qN5dBhFXclHIf_Fe6W2jQKPkUY to the implementation and try to explain each
# 4. Give report a grade and be able to defend the grade. For this will at least need to be able to explain sharpe ratio



In [4]:


np.random.seed(SEED)

tasks = []
print("strategies:", STRATEGIES)
for strategy_name, funcs in STRATEGIES.items():
    for i in range(N_PATHS):
        # Each task: (unique_id_for_rng_and_path, strategy_name, long_func, short_func)
        # To ensure unique paths for each (strategy, path_num) combination,
        # we can use a global path counter for the seed or combine strategy index and path index.
        # Here, (i) will be the path_id for a given strategy.
        # The RNG seed will be SEED + i, meaning path i for strategy A is same as path i for strategy B.
        # If truly independent paths are needed for each strategy-path combo, adjust seeding.
        tasks.append(
            (i, strategy_name, funcs["long_entry_fun"], funcs["short_entry_fun"], SEED, original_df)
        )
print(f"Number of tasks: {len(tasks)}")

with mp.Pool() as pool:
    stats = list(
        tqdm.tqdm(
            pool.imap_unordered(simulate_and_run_strategy, tasks), total=len(tasks)
        )
    )

mc_df = pd.DataFrame(stats)
mc_df.to_csv("mc_results.csv", index=False)
print(mc_df.describe(percentiles=[0.05, 0.5, 0.95]))

strategies: {'EMA_10_30_Cross': {'long_entry_fun': <function long_ema_10_30_cross at 0x122c64e00>, 'short_entry_fun': <function short_ema_10_30_cross at 0x122c7fb00>}}
Number of tasks: 5000


100%|██████████| 5000/5000 [17:37<00:00,  4.73it/s]

            equity       sharpe       max_dd      path_id
count  5000.000000  5000.000000  5000.000000  5000.000000
mean      1.034297    -0.407674     0.471711  2499.500000
std       0.569342     2.063525     0.131279  1443.520003
min       0.161007    -7.453837     0.166815     0.000000
5%        0.389110    -3.826326     0.277303   249.950000
50%       0.904466    -0.405264     0.463326  2499.500000
95%       2.097457     2.951880     0.701231  4749.050000
max       6.771377     7.563630     0.858385  4999.000000





In [7]:
fig, paths_df = plot_synthetic_paths(debug_dir_path="src/selv/debug")

# Compare strategies (average performance)
compare_fig = compare_strategies(paths_df)

# Analyze Monte Carlo results
summary, boxplots = analyze_monte_carlo_results("mc_results.csv")
print(summary)


FileNotFoundError: No sim_path_*.parquet files found in src/selv/debug