In [None]:
# üßæ QEPC Project Backup (Fast / Filtered)
#
# This backup:
#   - Detects the project root (folder that has both data/ and notebooks/)
#   - Creates backups/<qepc_backup_YYYYMMDD_HHMMSS>.zip
#   - INCLUDES: code, notebooks, configs, light data
#   - SKIPS:
#       * data/raw (huge CSVs you can regenerate)
#       * notebooks/02_utilities/data/raw
#       * backups/ (older backups)
#       * .git, __pycache__, .ipynb_checkpoints
#       * any single file larger than MAX_FILE_MB

from pathlib import Path
import datetime
import zipfile
import os

# ----------------------------
# 1. Detect project root
# ----------------------------
here = Path.cwd().resolve()
project_root = None

for p in [here] + list(here.parents):
    if (p / "data").exists() and (p / "notebooks").exists():
        project_root = p
        break

if project_root is None:
    raise RuntimeError(
        "Could not detect project root. "
        "Open this notebook from inside C:/Users/wdors/qepc_project or a subfolder."
    )

print(f"‚úÖ Project root detected: {project_root}")

# ----------------------------
# 2. Backup config
# ----------------------------

backups_dir = project_root / "backups"
backups_dir.mkdir(exist_ok=True)
print(f"üìÅ Backups directory: {backups_dir}")

timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"qepc_backup_{timestamp}.zip"
backup_path = backups_dir / backup_name

# Maximum single-file size to include (in MB)
MAX_FILE_MB = 60
MAX_FILE_BYTES = MAX_FILE_MB * 1024 * 1024

print(f"üóÇÔ∏è Backup file will be created as: {backup_path}")
print(f"‚öôÔ∏è Max single-file size: {MAX_FILE_MB} MB\n")

# ----------------------------
# 3. Include / exclude rules
# ----------------------------

def should_exclude(path: Path) -> bool:
    """
    Return True if this path should be excluded from the backup.
    Decisions are based on the path *relative* to project_root.
    """
    rel = path.relative_to(project_root)
    parts = rel.parts

    # Skip backup folder itself
    if parts[0] == "backups":
        return True

    # Skip git metadata / caches / checkpoints
    if parts[0] in {".git", "__pycache__"}:
        return True
    if ".ipynb_checkpoints" in parts:
        return True

    # Skip virtualenvs if any live inside the project
    if parts[0] in {"venv", ".venv", "env", ".env"}:
        return True

    # Skip heavy raw data ‚Äì these are regenerable
    #  - data/raw/...
    if len(parts) >= 2 and parts[0] == "data" and parts[1] == "raw":
        return True

    #  - notebooks/02_utilities/data/raw/...
    if (
        len(parts) >= 4
        and parts[0] == "notebooks"
        and parts[1] == "02_utilities"
        and parts[2] == "data"
        and parts[3] == "raw"
    ):
        return True

    # Skip existing zip/tar archives inside project (other than the one we're making)
    if rel.suffix in {".zip", ".tar", ".gz", ".bz2"} and parts[0] != "data":
        # You can adjust this rule if needed
        return True

    return False

# ----------------------------
# 4. Collect files to back up
# ----------------------------

files_to_add = []
skipped_large = []
skipped_rules = []

for path in project_root.rglob("*"):
    if not path.is_file():
        continue

    if should_exclude(path):
        skipped_rules.append(path)
        continue

    try:
        size = path.stat().st_size
    except OSError:
        continue

    if size > MAX_FILE_BYTES:
        skipped_large.append((path, size))
        continue

    files_to_add.append((path, size))

total_size_bytes = sum(size for _, size in files_to_add)
total_size_mb = total_size_bytes / (1024 * 1024)

print(f"üì¶ Files to include: {len(files_to_add)}")
print(f"üì¶ Approx compressed input size: {total_size_mb:.2f} MB")
print(f"üö´ Skipped by rules: {len(skipped_rules)} files")
print(f"üö´ Skipped as too large (> {MAX_FILE_MB} MB): {len(skipped_large)} files\n")

# ----------------------------
# 5. Create the ZIP
# ----------------------------

with zipfile.ZipFile(backup_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
    for idx, (path, size) in enumerate(files_to_add, start=1):
        rel = path.relative_to(project_root)
        zf.write(path, rel)
        if idx % 100 == 0:
            print(f"  ‚Üí Added {idx} files so far...")

print("\n‚úÖ Backup complete!")
final_size_mb = backup_path.stat().st_size / (1024 * 1024)
print(f"üßæ Backup file: {backup_path.name} ({final_size_mb:.2f} MB)")

# ----------------------------
# 6. Show what was skipped (summary)
# ----------------------------

if skipped_large:
    print("\n‚ö†Ô∏è The following large files were skipped (size > "
          f"{MAX_FILE_MB} MB):")
    for path, size in skipped_large:
        print(f"  - {path.relative_to(project_root)}  ({size / (1024*1024):.2f} MB)")

print("\nDone.")


In [None]:
import sys
sys.path.insert(0, r"C:\Users\wdors\qepc_project")

from qepc.backtest.backtest_engine import run_daily_backtest

# Try backtest with verbose=True to see what's happening
result = run_daily_backtest("2025-01-15", num_trials=100, verbose=True)

if not result.empty:
    print("\n‚úÖ Backtest successful!")
    print(result.head())
else:
    print("‚ùå No data for that date")

In [None]:
from qepc.notebook_header import qepc_notebook_setup
env = qepc_notebook_setup(run_diagnostics=False)

from qepc.backtest.backtest_engine import run_daily_backtest

# Try a backtest for a date you have data for
result = run_daily_backtest("2025-11-22", num_trials=1000, verbose=True)
print(result)

In [None]:
import sys
import os

# Add your project to Python path
project_path = r"C:\Users\wdors\qepc_project"
if project_path not in sys.path:
    sys.path.insert(0, project_path)

# Check if files exist
print("Checking file structure...")
print(f"\nProject root: {project_path}")
print(f"Exists: {os.path.exists(project_path)}")

# Check for qepc folder
qepc_path = os.path.join(project_path, "qepc")
print(f"\nqepc folder: {qepc_path}")
print(f"Exists: {os.path.exists(qepc_path)}")

# Check for __init__.py
init_path = os.path.join(qepc_path, "__init__.py")
print(f"\nqepc/__init__.py: {init_path}")
print(f"Exists: {os.path.exists(init_path)}")

# Check for notebook_header.py
nb_header_path = os.path.join(project_path, "notebook_header.py")
print(f"\nnotebook_header.py: {nb_header_path}")
print(f"Exists: {os.path.exists(nb_header_path)}")

# List what's in the project root
print(f"\nFiles in project root:")
for item in os.listdir(project_path):
    item_path = os.path.join(project_path, item)
    if os.path.isdir(item_path):
        print(f"  üìÅ {item}/")
    else:
        print(f"  üìÑ {item}")

In [None]:
import pandas as pd
from pathlib import Path

# Try to get the true project root from QEPC's autoload paths module
try:
    from qepc.autoload.paths import get_project_root
    project_root = get_project_root()
except Exception:
    # Fallback if that import fails for some reason
    project_root = Path.cwd()
    print("‚ö†Ô∏è Falling back to cwd as project root")

print("Project root:", project_root)

# Helper: pick the "best" match for a file name among many
def pick_best_match(matches):
    if not matches:
        return None
    # Prefer paths that live under a 'data' folder and NOT under 'notebooks'
    scored = []
    for p in matches:
        score = 0
        parts = [str(part).lower() for part in p.parts]
        if "data" in parts:
            score += 2
        if "raw" in parts:
            score += 1
        if "props" in parts:
            score += 1
        if "results" in parts:
            score += 1
        if "notebooks" in parts:
            score -= 2
        if ".ipynb_checkpoints" in str(p):
            score -= 5
        scored.append((score, p))
    scored.sort(key=lambda x: x[0], reverse=True)
    return scored[0][1]

# (label, filename)
targets = [
    # Core game/team data
    ("TeamStatistics (team game logs)",      "TeamStatistics.csv"),
    ("Team_Stats (team season stats)",       "Team_Stats.csv"),
    ("PlayerStatistics (player logs)",       "PlayerStatistics.csv"),
    ("Canonical Games (schedule)",           "Games.csv"),
    ("GameResults_2025 (results)",           "GameResults_2025.csv"),
    ("Schedule_with_Rest",                   "Schedule_with_Rest.csv"),
    ("TeamForm",                             "TeamForm.csv"),

    # Roster / players
    ("Players",                              "Players.csv"),
    ("Players_Processed",                    "Players_Processed.csv"),

    # Injuries
    ("Injury_Overrides",                     "Injury_Overrides.csv"),
    ("Injury_Overrides_MASTER",              "Injury_Overrides_MASTER.csv"),
    ("Injury_Overrides_live_espn",           "Injury_Overrides_live_espn.csv"),

    # Props / aggregates
    ("Player_Season_Averages",               "Player_Season_Averages.csv"),
    ("Player_Averages_With_CI",              "Player_Averages_With_CI.csv"),
    ("Player_Recent_Form_L5",                "Player_Recent_Form_L5.csv"),
    ("Player_Recent_Form_L10",               "Player_Recent_Form_L10.csv"),
    ("Player_Recent_Form_L15",               "Player_Recent_Form_L15.csv"),
    ("Player_Home_Away_Splits",              "Player_Home_Away_Splits.csv"),
]

def preview_by_filename(label: str, filename: str, n: int = 3):
    print("\n" + "=" * 80)
    print(f"üìÑ {label}")
    print(f"Looking for filename: {filename}")

    # Find all matches anywhere under project_root
    matches = [p for p in project_root.rglob(filename)]
    if not matches:
        print("‚ö†Ô∏è No matches found in project.")
        return

    print("Found matches:")
    for m in matches:
        try:
            rel = m.relative_to(project_root)
        except ValueError:
            rel = m
        print("   ‚Ä¢", rel)

    best = pick_best_match(matches)
    if best is None:
        print("‚ö†Ô∏è Could not choose a best match.")
        return

    try:
        rel_best = best.relative_to(project_root)
    except ValueError:
        rel_best = best

    print(f"\n‚úÖ Using best match: {rel_best}")

    # Load a small sample (nrows=3) to avoid pulling full 300MB files
    try:
        df_sample = pd.read_csv(best, nrows=n)
        print(f"Sample shape: {df_sample.shape}")
        print("Columns:", list(df_sample.columns))
        print("\nSample rows:")
        display(df_sample)
    except Exception as e:
        print(f"‚ùå Error reading CSV sample: {e}")

for label, filename in targets:
    preview_by_filename(label, filename)



In [None]:
import sys
sys.path.insert(0, r"C:\Users\wdors\qepc_project")

# Clear cache
for mod in list(sys.modules.keys()):
    if 'qepc' in mod:
        del sys.modules[mod]

from qepc.backtest.backtest_engine import run_daily_backtest

# Try a date from your data - based on your samples, you have games around 2025-11-17
result = run_daily_backtest("2025-11-17", num_trials=100, verbose=True)

if not result.empty:
    print("\n‚úÖ Success!")
    print(result.head())
else:
    print("No data found - might need a different date")

In [None]:
# Try different dates from your data
result = run_daily_backtest("2025-11-16", num_trials=100, verbose=True)
print(f"Games found: {len(result)}")

In [None]:
season_result = run_season_backtest("2024-11-15", "2025-11-30")