In [None]:
import sys
from pathlib import Path

# Add parent directory to path
notebook_dir = Path.cwd()
project_root = notebook_dir.parent
sys.path.append(str(project_root))

# Now import with correct path
from src.database import Database
from src.pipeline.data_validator import DataValidator

# Initialize database with correct path
db = Database(db_path=str(project_root / "data" / "trading.db"))

# Check if database file exists
db_path = project_root / "data" / "trading.db"
print(f"Database path: {db_path}")
print(f"Database exists: {db_path.exists()}")
print(f"Database size: {db_path.stat().st_size / 1024 / 1024:.2f} MB" if db_path.exists() else "N/A")

# Now try loading traders
traders = db.get_all_traders()
print(f"\nFound {len(traders)} traders")

if len(traders) > 0:
    print("\nTraders overview:")
    print(traders[['account_id', 'trader_name', 'trading_days', 'total_pnl']].head(10))

In [None]:
# Validate data for first few traders
validator = DataValidator()

for _, trader in traders.head(3).iterrows():
    print(f"\n{'='*60}")
    print(f"Analyzing {trader['trader_name']} (ID: {trader['account_id']})")
    print(f"Trading days: {trader['trading_days']}, Total P&L: ${trader['total_pnl']:,.2f}")
    
    # Get detailed data
    totals, fills = db.get_trader_data(trader['account_id'])
    
    print(f"Totals records: {len(totals)}")
    print(f"Fills records: {len(fills)}")
    
    # Validate
    result = validator.validate_combined(totals, fills)
    print(f"Validation: {'✓ PASSED' if result.is_valid else '✗ FAILED'}")
    
    if result.errors:
        print("Errors:", result.errors[:2])  # First 2 errors
    if result.warnings:
        print("Warnings:", result.warnings[:2])  # First 2 warnings

In [None]:
import matplotlib.pyplot as plt

# Pick a trader with good data
trader_id = "3957"
totals, fills = db.get_trader_data(trader_id)

display(totals.tail())


if not totals.empty:
    # Plot P&L over time
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
    
    trader_name = db.get_trader_data
    
    # Daily P&L
    ax1.plot(totals['date'], totals['net_pnl'])
    ax1.set_title(f"Daily")
    ax1.set_ylabel("Net P&L ($)")
    
    # Cumulative P&L
    ax2.plot(totals['date'], totals['net_pnl'].cumsum())
    ax2.set_title("Cumulative P&L")
    ax2.set_ylabel("Cumulative P&L ($)")
    ax2.set_xlabel("Date")
    
    plt.tight_layout()
    plt.show()
    