# üóÇÔ∏è QEPC v2 - Setup Data Library

**Purpose:** One-time setup of your data folder structure

**What this does:**
1. Creates all necessary folders
2. Copies your existing 254k player logs
3. Copies your 12k team games
4. Validates everything is ready

**Run this ONCE, then never again!**

---

## Step 1: Create Folder Structure

In [None]:
from pathlib import Path
import shutil
import pandas as pd

print("Setting up QEPC v2 Data Library")
print("=" * 70)

# Define base directory
BASE_DIR = Path(r"C:\Users\wdors\qepc_project\experimental\CLAUDE_REWRITE")
DATA_DIR = BASE_DIR / "data"

# Step 1: Create all folders
print("\n1. Creating folder structure...")
folders = [
    "raw/player_logs",
    "raw/team_stats",
    "raw/schedule",
    "live",
    "processed/player_states",
    "processed/matchups",
    "processed/predictions",
    "cache/api_responses",
    "cache/temp",
    "logs/daily_predictions",
    "logs/backtest_results"
]

for folder in folders:
    folder_path = DATA_DIR / folder
    folder_path.mkdir(parents=True, exist_ok=True)

print(f"   ‚úì Created {len(folders)} folders")

# Step 2: Copy Player Logs
print("\n2. Copying player game logs...")
YOUR_PLAYER_LOGS = Path(r"C:\Users\wdors\qepc_project\notebooks\data\comprehensive\Player_Game_Logs_All_Seasons.csv")

if YOUR_PLAYER_LOGS.exists():
    df_player = pd.read_csv(YOUR_PLAYER_LOGS, on_bad_lines='skip')
    dest_player = DATA_DIR / "raw" / "player_logs" / "all_seasons.csv"
    shutil.copy2(YOUR_PLAYER_LOGS, dest_player)
    print(f"   ‚úì Copied {len(df_player):,} player records ({dest_player.stat().st_size / 1024 / 1024:.1f} MB)")
else:
    print(f"   ‚ö† File not found: {YOUR_PLAYER_LOGS}")

# Step 3: Copy Team Games
print("\n3. Copying team game data...")
YOUR_TEAM_GAMES = Path(r"C:\Users\wdors\qepc_project\data\historical\NBA_API_QEPC_Format.csv")

if YOUR_TEAM_GAMES.exists():
    df_team = pd.read_csv(YOUR_TEAM_GAMES)
    dest_team = DATA_DIR / "raw" / "team_stats" / "team_games.csv"
    shutil.copy2(YOUR_TEAM_GAMES, dest_team)
    print(f"   ‚úì Copied {len(df_team):,} team records ({dest_team.stat().st_size / 1024 / 1024:.1f} MB)")
else:
    print(f"   ‚ö† File not found (optional): {YOUR_TEAM_GAMES}")

# Step 4: Validate
print("\n4. Validating data library...")
checks = {
    "Player Logs": DATA_DIR / "raw" / "player_logs" / "all_seasons.csv",
    "Team Games": DATA_DIR / "raw" / "team_stats" / "team_games.csv",
}

files_found = 0
for name, path in checks.items():
    if path.exists():
        df = pd.read_csv(path)
        print(f"   ‚úì {name}: {len(df):,} records")
        files_found += 1
    else:
        print(f"   ‚úó {name}: Not found")

# Step 5: Create data loader helper
print("\n5. Creating data loader helper...")
loader_code = '''"""Data Loader for QEPC v2"""
from pathlib import Path
import pandas as pd

BASE_DIR = Path(r"C:\\Users\\wdors\\qepc_project\\experimental\\CLAUDE_REWRITE")
DATA_DIR = BASE_DIR / "data"

def load_player_logs():
    """Load all player game logs"""
    path = DATA_DIR / "raw" / "player_logs" / "all_seasons.csv"
    return pd.read_csv(path)

def load_team_games():
    """Load team game data"""
    path = DATA_DIR / "raw" / "team_stats" / "team_games.csv"
    return pd.read_csv(path)

def save_predictions(df, filename):
    """Save predictions to logs"""
    from datetime import datetime
    timestamp = datetime.now().strftime("%Y%m%d")
    path = DATA_DIR / "logs" / "daily_predictions" / f"{timestamp}_{filename}"
    df.to_csv(path, index=False)
    return path
'''

loader_path = BASE_DIR / "qepc_v2" / "qepc" / "data" / "loader.py"
loader_path.parent.mkdir(parents=True, exist_ok=True)

with open(loader_path, 'w') as f:
    f.write(loader_code)

print(f"   ‚úì Created loader: {loader_path}")

# Summary
print("\n" + "=" * 70)
print("‚úì DATA LIBRARY SETUP COMPLETE!")
print("=" * 70)
print(f"\nFiles ready: {files_found}/2")
print(f"Data directory: {DATA_DIR}")
print("\nYou can now use in any notebook:")
print("  from qepc.data.loader import load_player_logs")
print("  df = load_player_logs()")

---