In [3]:
import numpy as np
import pandas as pd
import tqdm
from pathlib import Path
import multiprocessing as mp
from src.selv.monte_carlo import simulate_and_run_strategy  # your strategy definitions
from src.selv.vis import plot_synthetic_paths, compare_strategies, analyze_monte_carlo_results
from src.selv.strategies import STRATEGIES

ONE_YR_1M_BTC = Path("/Users/timschultz/repos/ai-hedge-fund/btc_data.csv")
FIVE_YR_1M_BTC = Path("/Users/timschultz/repos/ai-hedge-fund/btc_1m_5yr.csv")
N_PATHS = 5_000  # simulations
SEED = 42

five_year_1m_btc = pd.read_csv(FIVE_YR_1M_BTC, parse_dates=["datetime"], index_col="datetime")


In [4]:
STRATEGIES

{'EMA_10_30_Cross': {'long_entry_fun': <function src.selv.strategies.long_ema_10_30_cross(df: pandas.core.frame.DataFrame) -> pandas.core.series.Series>,
  'short_entry_fun': <function src.selv.strategies.short_ema_10_30_cross(df: pandas.core.frame.DataFrame) -> pandas.core.series.Series>,
  'tp': 0.015,
  'sl': 0.0075,
  'max_minutes': 720},
 'SMA_50_200_Cross': {'long_entry_fun': <function src.selv.strategies.long_sma_50_200_cross(df: pandas.core.frame.DataFrame) -> pandas.core.series.Series>,
  'short_entry_fun': <function src.selv.strategies.short_sma_50_200_cross(df: pandas.core.frame.DataFrame) -> pandas.core.series.Series>,
  'tp': 0.04,
  'sl': 0.02,
  'max_minutes': 4320},
 'RSI_30_70': {'long_entry_fun': <function src.selv.strategies.long_rsi_30_70(df: pandas.core.frame.DataFrame) -> pandas.core.series.Series>,
  'short_entry_fun': <function src.selv.strategies.short_rsi_30_70(df: pandas.core.frame.DataFrame) -> pandas.core.series.Series>,
  'tp': 0.025,
  'sl': 0.0125,
  'ma

### Incorrect and Missing Data:
- Coinbase should be a valid source
- Script below checks for missing values


In [5]:
# ---------- 1. Boolean mask of bad rows ----------
# True  -> this row contains at least one NaN
# False -> row is fully populated
bad_rows = five_year_1m_btc.isna().any(axis=1)

# ---------- 2a. Simple pass/fail check ----------
if not bad_rows.any():
    print("✅ No missing values — every row is complete.")
else:
    print(f"❌ Found {bad_rows.sum()} incomplete rows out of {len(df)}.")
    # optional preview
    print(five_year_1m_btc[bad_rows].head())

✅ No missing values — every row is complete.


Data Representativeness
Implement  Min-Track-Record-Length to determine sufficient level of observations

In [None]:
five_year_1m_btc = pd.read_csv(FIVE_YR_1M_BTC, parse_dates=["datetime"], index_col="datetime")
N_PATHS = 5_0
tasks = []
print("strategies:", STRATEGIES)
for strategy_name, funcs in STRATEGIES.items():
    for i in range(N_PATHS):
        # Each task: (unique_id_for_rng_and_path, strategy_name, long_func, short_func)
        # To ensure unique paths for each (strategy, path_num) combination,
        # we can use a global path counter for the seed or combine strategy index and path index.
        # Here, (i) will be the path_id for a given strategy.
        # The RNG seed will be SEED + i, meaning path i for strategy A is same as path i for strategy B.
        # If truly independent paths are needed for each strategy-path combo, adjust seeding.
        tasks.append(
            (i, strategy_name,
            funcs["long_entry_fun"], funcs["short_entry_fun"],
            funcs["tp"], funcs["sl"], funcs["max_minutes"],
            SEED, five_year_1m_btc)
        )
print(f"Number of tasks: {len(tasks)}")

with mp.Pool() as pool:
    stats = list(
        tqdm.tqdm(
            pool.imap_unordered(simulate_and_run_strategy, tasks), total=len(tasks)
        )
    )

mc_df = pd.DataFrame(stats)
mc_df.to_csv("mc_results.csv", index=False)
print(mc_df.describe(percentiles=[0.05, 0.5, 0.95]))

strategies: {'EMA_10_30_Cross': {'long_entry_fun': <function long_ema_10_30_cross at 0x16a94dc60>, 'short_entry_fun': <function short_ema_10_30_cross at 0x16a94dd00>, 'tp': 0.015, 'sl': 0.0075, 'max_minutes': 720}, 'SMA_50_200_Cross': {'long_entry_fun': <function long_sma_50_200_cross at 0x16a94dda0>, 'short_entry_fun': <function short_sma_50_200_cross at 0x16a94de40>, 'tp': 0.04, 'sl': 0.02, 'max_minutes': 4320}, 'RSI_30_70': {'long_entry_fun': <function long_rsi_30_70 at 0x16a94dee0>, 'short_entry_fun': <function short_rsi_30_70 at 0x16a94df80>, 'tp': 0.025, 'sl': 0.0125, 'max_minutes': 1440}, 'MACD_Cross': {'long_entry_fun': <function long_macd_cross at 0x16a94e020>, 'short_entry_fun': <function short_macd_cross at 0x16a94e0c0>, 'tp': 0.03, 'sl': 0.015, 'max_minutes': 2880}, 'MACD_RSI_Confirm': {'long_entry_fun': <function long_macd_rsi_confirm at 0x16a94e160>, 'short_entry_fun': <function short_macd_rsi_confirm at 0x16a94e200>, 'tp': 0.02, 'sl': 0.01, 'max_minutes': 1440}}
Number o

  2%|▏         | 515/25000 [07:11<3:29:38,  1.95it/s]Process SpawnPoolWorker-1:
Process SpawnPoolWorker-4:
Process SpawnPoolWorker-3:
Process SpawnPoolWorker-2:
Process SpawnPoolWorker-10:
Process SpawnPoolWorker-8:
Process SpawnPoolWorker-7:
  2%|▏         | 515/25000 [07:11<5:42:11,  1.19it/s]Process SpawnPoolWorker-5:

Process SpawnPoolWorker-6:
Process SpawnPoolWorker-9:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/timschultz/.pyenv/versions/3.12.8/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/timschultz/.pyenv/versions/3.12.8/lib/python3.12/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/timschultz/.pyenv/versions/3.12.8/lib/python3.12/multiprocessing/pool.py", line 125, in worker
    result = (True, func(*args, **kwds))
                    ^^^^^^^^^^^^^^^^^^^
  File "/Users/timschultz/repos/ai-hedge-fund/src/selv/monte_carlo.py", l

KeyboardInterrupt: 

In [None]:
sig = mc_df["MinTRL"] <= len(five_year_1m_btc)    # statistically significant
print("Share of paths with significant Sharpe:", sig.mean())

Share of paths with significant Sharpe: 1.0


In [None]:
# 1. Is def simulate_path correct?
# TODO


In [None]:
# 3. Compare each aspect of https://media.licdn.com/dms/document/media/v2/D561FAQFmtQMpnT5XUg/feedshare-document-pdf-analyzed/feedshare-document-pdf-analyzed/0/1722857702819?e=1747267200&v=beta&t=5No7UJQwEZxhjN_O7qN5dBhFXclHIf_Fe6W2jQKPkUY to the implementation and try to explain each

In [None]:
# Slippage, TX Fees, Liquidity


In [None]:
# 4. Give report a grade and be able to defend the grade. For this will at least need to be able to explain sharpe ratio

In [None]:
# 5. Improve the report. Add graphs. Fix up actual strategy

In [None]:
# 6. Run on live data. Finish up the live data script????

In [None]:
tasks = []
print("strategies:", STRATEGIES)
for strategy_name, funcs in STRATEGIES.items():
    for i in range(N_PATHS):
        # Each task: (unique_id_for_rng_and_path, strategy_name, long_func, short_func)
        # To ensure unique paths for each (strategy, path_num) combination,
        # we can use a global path counter for the seed or combine strategy index and path index.
        # Here, (i) will be the path_id for a given strategy.
        # The RNG seed will be SEED + i, meaning path i for strategy A is same as path i for strategy B.
        # If truly independent paths are needed for each strategy-path combo, adjust seeding.
        tasks.append(
            (i, strategy_name, funcs["long_entry_fun"], funcs["short_entry_fun"], SEED, original_df)
        )
print(f"Number of tasks: {len(tasks)}")

with mp.Pool() as pool:
    stats = list(
        tqdm.tqdm(
            pool.imap_unordered(simulate_and_run_strategy, tasks), total=len(tasks)
        )
    )

mc_df = pd.DataFrame(stats)
mc_df.to_csv("mc_results.csv", index=False)
print(mc_df.describe(percentiles=[0.05, 0.5, 0.95]))

strategies: {'EMA_10_30_Cross': {'long_entry_fun': <function long_ema_10_30_cross at 0x122c64e00>, 'short_entry_fun': <function short_ema_10_30_cross at 0x122c7fb00>}}
Number of tasks: 5000


100%|██████████| 5000/5000 [17:37<00:00,  4.73it/s]


       equity_ratio          cash       sharpe       max_dd      path_id
count   5000.000000   5000.000000  5000.000000  5000.000000  5000.000000
mean       1.034297  10342.967655    -0.407674     0.471711  2499.500000
std        0.569342   5693.422866     2.063525     0.131279  1443.520003
min        0.161007   1610.073643    -7.453837     0.166815     0.000000
5%         0.389110   3891.099979    -3.826326     0.277303   249.950000
50%        0.904466   9044.659734    -0.405264     0.463326  2499.500000
95%        2.097457  20974.565951     2.951880     0.701231  4749.050000
max        6.771377  67713.765237     7.563630     0.858385  4999.000000


In [None]:
tasks = []
print("strategies:", STRATEGIES)
for strategy_name, funcs in STRATEGIES.items():
    for i in range(N_PATHS):
        # Each task: (unique_id_for_rng_and_path, strategy_name, long_func, short_func)
        # To ensure unique paths for each (strategy, path_num) combination,
        # we can use a global path counter for the seed or combine strategy index and path index.
        # Here, (i) will be the path_id for a given strategy.
        # The RNG seed will be SEED + i, meaning path i for strategy A is same as path i for strategy B.
        # If truly independent paths are needed for each strategy-path combo, adjust seeding.
        tasks.append(
            (i, strategy_name, funcs["long_entry_fun"], funcs["short_entry_fun"], SEED, original_df)
        )
print(f"Number of tasks: {len(tasks)}")

with mp.Pool() as pool:
    stats = list(
        tqdm.tqdm(
            pool.imap_unordered(simulate_and_run_strategy, tasks), total=len(tasks)
        )
    )

mc_df = pd.DataFrame(stats)
mc_df.to_csv("mc_results.csv", index=False)
print(mc_df.describe(percentiles=[0.05, 0.5, 0.95]))

strategies: {'EMA_10_30_Cross': {'long_entry_fun': <function long_ema_10_30_cross at 0x122c64e00>, 'short_entry_fun': <function short_ema_10_30_cross at 0x122c7fb00>}}
Number of tasks: 5000


100%|██████████| 5000/5000 [17:37<00:00,  4.73it/s]

            equity       sharpe       max_dd      path_id
count  5000.000000  5000.000000  5000.000000  5000.000000
mean      1.034297    -0.407674     0.471711  2499.500000
std       0.569342     2.063525     0.131279  1443.520003
min       0.161007    -7.453837     0.166815     0.000000
5%        0.389110    -3.826326     0.277303   249.950000
50%       0.904466    -0.405264     0.463326  2499.500000
95%       2.097457     2.951880     0.701231  4749.050000
max       6.771377     7.563630     0.858385  4999.000000





In [None]:
fig, paths_df = plot_synthetic_paths(debug_dir_path="src/selv/debug")

# Compare strategies (average performance)
compare_fig = compare_strategies(paths_df)

# Analyze Monte Carlo results
summary, boxplots = analyze_monte_carlo_results("mc_results.csv")
print(summary)


FileNotFoundError: No sim_path_*.parquet files found in src/selv/debug