In [None]:
#!/usr/bin/env python3
"""
GRAND SLAM STEP 1: Fetch Grand Slam Matches ONLY
"""

!pip install fuzzywuzzy
import requests
from datetime import datetime, timedelta
import pandas as pd

API_KEY = '9d9573f10859420b9fd4f1211decb3ff0e50a72a1c01ebc5626306ecb657065c'

def grand_slam_step1_fetch_matches():
    """Fetch Grand Slam tennis matches only"""
    print("="*70)
    print("GRAND SLAM STEP 1: Fetching Grand Slam Matches")
    print("="*70)

    base_url = "https://api.api-tennis.com/tennis/"

    today = datetime.now()
    tomorrow = today + timedelta(days=1)

    today_str = today.strftime('%Y-%m-%d')
    tomorrow_str = tomorrow.strftime('%Y-%m-%d')

    print(f"\nüìÖ Fetching: {today_str} and {tomorrow_str}")
    print("   (Looking for Grand Slam matches only)")

    all_matches = []

    # Fetch both days
    for date_str, label in [(today_str, "today"), (tomorrow_str, "tomorrow")]:
        params = {
            'method': 'get_fixtures',
            'APIkey': API_KEY,
            'date_start': date_str,
            'date_stop': date_str
        }

        print(f"\nüîç Fetching {label}'s matches...")

        response = requests.get(base_url, params=params)

        if response.status_code != 200:
            print(f"‚ùå Error: HTTP {response.status_code}")
            continue

        data = response.json()

        if data.get('success') != 1:
            print(f"‚ùå API Error: {data}")
            continue

        matches = data.get('result', [])
        print(f"‚úì Found {len(matches)} total matches")

        all_matches.extend(matches)

    print(f"\n‚úì Total matches across both days: {len(all_matches)}")

    # Filter for Grand Slam ATP/WTA singles ONLY
    filtered = []
    for match in all_matches:
        event_type = match.get('event_type_type', '').lower()
        event_status = match.get('event_status', '')
        tournament = match.get('tournament_name', '').lower()

        # ONLY Grand Slams
        grand_slam_keywords = ['australian open', 'french open', 'roland garros',
                               'wimbledon', 'us open', 'grand slam']
        is_grand_slam = any(slam in tournament for slam in grand_slam_keywords)

        if not is_grand_slam:
            continue  # Skip non-Grand Slam matches

        # Skip finished or cancelled matches
        if event_status in ['Finished', 'Cancelled', 'Postponed']:
            continue

        # Only singles
        if 'singles' in event_type and 'doubles' not in event_type:
            # Only ATP/WTA
            if any(tour in event_type for tour in ['atp singles', 'wta singles']):
                filtered.append({
                    'event_key': match.get('event_key'),
                    'player1': match.get('event_first_player'),
                    'player2': match.get('event_second_player'),
                    'tournament': match.get('tournament_name'),
                    'date': match.get('event_date'),
                    'time': match.get('event_time'),
                    'status': event_status,
                    'tour': 'WTA' if 'wta' in event_type else 'ATP',
                    'surface': 'Hard'  # Default
                })

    print(f"\n‚úì Filtered to {len(filtered)} Grand Slam ATP/WTA singles matches")

    if len(filtered) == 0:
        print("\n‚ö†Ô∏è  No Grand Slam matches found")
        print("   Either no Grand Slams are currently happening,")
        print("   or today's matches have already finished.")
        return None

    # Create DataFrame
    df = pd.DataFrame(filtered)

    # Show breakdown
    print(f"\nüìä Breakdown by tournament:")
    for tournament, count in df.groupby('tournament').size().items():
        tour_counts = df[df['tournament'] == tournament].groupby('tour').size()
        tour_str = ", ".join([f"{tour}: {count}" for tour, count in tour_counts.items()])
        print(f"   {tournament}: {tour_str}")

    print(f"\n‚úÖ GRAND SLAM STEP 1 COMPLETE - {len(df)} matches ready")

    # Show sample
    print("\n" + "="*70)
    print("Sample matches:")
    print("="*70)
    print(df[['event_key', 'player1', 'player2', 'tournament', 'time', 'status', 'tour']].head(10).to_string())

    # Save
    df.to_csv('/tmp/grand_slam_step1_matches.csv', index=False)
    print(f"\nüíæ Saved to /tmp/grand_slam_step1_matches.csv")

    return df

if __name__ == "__main__":
    if API_KEY == 'YOUR_API_KEY_HERE':
        print("‚ùå Please add your API key first!")
    else:
        grand_slam_step1_fetch_matches()

GRAND SLAM STEP 1: Fetching Grand Slam Matches

üìÖ Fetching: 2026-01-19 and 2026-01-20
   (Looking for Grand Slam matches only)

üîç Fetching today's matches...
‚úì Found 113 total matches

üîç Fetching tomorrow's matches...
‚úì Found 234 total matches

‚úì Total matches across both days: 347

‚úì Filtered to 43 Grand Slam ATP/WTA singles matches

üìä Breakdown by tournament:
   ATP Australian Open: ATP: 21
   WTA Australian Open: WTA: 22

‚úÖ GRAND SLAM STEP 1 COMPLETE - 43 matches ready

Sample matches:
   event_key         player1             player2           tournament   time   status tour
0   12096867       N. Borges  F. Auger-Aliassime  ATP Australian Open  01:15  Retired  ATP
1   12096974          P. Hon         M. Stakusic  WTA Australian Open  02:45  Retired  WTA
2   12096978  L. Klimovicova            F. Jones  WTA Australian Open  03:35  Retired  WTA
3   12096871     G. Dimitrov           T. Machac  ATP Australian Open  06:00        1  ATP
4   12096873      J. Fonseca 

In [None]:
#!/usr/bin/env python3
"""
GRAND SLAM STEP 2: Load Historical Data (INCLUDING Grand Slam matches)
"""

import pandas as pd
import requests
from io import StringIO
from fuzzywuzzy import fuzz
import numpy as np

def load_tennis_data_from_github(tour, years):
    """Load historical tennis data from Jeff Sackmann's GitHub"""
    print(f"\nLoading {tour.upper()} historical data...")

    base_url = "https://raw.githubusercontent.com/JeffSackmann/tennis_atp/master/"
    if tour == 'wta':
        base_url = "https://raw.githubusercontent.com/JeffSackmann/tennis_wta/master/"

    all_data = []

    for year in years:
        url = f"{base_url}atp_matches_{year}.csv" if tour == 'atp' else f"{base_url}wta_matches_{year}.csv"

        try:
            response = requests.get(url, timeout=10)
            if response.status_code == 200:
                df = pd.read_csv(StringIO(response.text))
                all_data.append(df)
        except:
            pass

    if len(all_data) == 0:
        return pd.DataFrame()

    combined = pd.concat(all_data, ignore_index=True)
    print(f"‚úì Loaded {len(combined)} {tour.upper()} matches")

    return combined

def prepare_grand_slam_data(df):
    """
    Prepare historical data - FILTER FOR GRAND SLAMS ONLY
    This is the key difference from regular model
    """
    df['tourney_date'] = pd.to_datetime(df['tourney_date'], format='%Y%m%d')
    df['surface'] = df['surface'].fillna('Hard')

    # CRITICAL: Filter for Grand Slams ONLY
    grand_slam_keywords = ['australian open', 'french open', 'roland garros',
                           'wimbledon', 'us open', 'grand slam']

    df['tourney_name_lower'] = df['tourney_name'].str.lower()
    df_slams = df[df['tourney_name_lower'].str.contains('|'.join(grand_slam_keywords), na=False)].copy()

    print(f"\n‚úì Filtered to {len(df_slams)} Grand Slam matches (from {len(df)} total)")

    if len(df_slams) == 0:
        print("‚ö†Ô∏è  WARNING: No Grand Slam matches found in historical data!")
        return pd.DataFrame()

    df_slams = df_slams.sort_values('tourney_date').reset_index(drop=True)

    # Create balanced dataset: randomly assign who is "player 1"
    balanced_data = []

    for idx, row in df_slams.iterrows():
        swap = np.random.rand() > 0.5

        if swap:
            balanced_data.append({
                'tourney_date': row['tourney_date'],
                'surface': row['surface'],
                'tourney_name': row['tourney_name'],
                'winner_name': row['loser_name'],
                'loser_name': row['winner_name'],
                'winner_rank': row.get('loser_rank', 100),
                'loser_rank': row.get('winner_rank', 100),
                'p1_won': 0
            })
        else:
            balanced_data.append({
                'tourney_date': row['tourney_date'],
                'surface': row['surface'],
                'tourney_name': row['tourney_name'],
                'winner_name': row['winner_name'],
                'loser_name': row['loser_name'],
                'winner_rank': row.get('winner_rank', 100),
                'loser_rank': row.get('loser_rank', 100),
                'p1_won': 1
            })

    return pd.DataFrame(balanced_data)

def fuzzy_match_player(name, candidates, threshold=60):
    """Fuzzy match player name - handles abbreviations, MORE LENIENT"""
    if pd.isna(name):
        return None

    # Normalize the input name
    name_normalized = name.lower().strip()

    # Extract last name (handles multi-part last names)
    name_parts = name_normalized.replace('.', ' ').split()
    # Last name is everything after first initial(s)
    if len(name_parts) >= 2:
        # If first part is single letter, skip it
        if len(name_parts[0]) <= 2:
            last_name_parts = name_parts[1:]
        else:
            last_name_parts = name_parts[-1:]
        last_name = ' '.join(last_name_parts)
    else:
        last_name = name_normalized

    best_match = None
    best_score = 0

    for candidate in candidates:
        if pd.isna(candidate):
            continue

        candidate_normalized = candidate.lower().strip()

        # Extract candidate last name
        candidate_parts = candidate_normalized.split()
        if len(candidate_parts) >= 2:
            candidate_last = ' '.join(candidate_parts[-1:])
        else:
            candidate_last = candidate_normalized

        # If last names match well, good chance it's the same person
        last_name_score = fuzz.ratio(last_name, candidate_last)

        # More lenient: if last name matches 85%+, check full name
        if last_name_score >= 85:
            # Score the full names with multiple methods
            score = max(
                fuzz.ratio(name_normalized, candidate_normalized),
                fuzz.partial_ratio(name_normalized, candidate_normalized),
                fuzz.token_sort_ratio(name_normalized, candidate_normalized),
                last_name_score  # Also consider just the last name score
            )

            if score > best_score and score >= threshold:
                best_score = score
                best_match = candidate

    return best_match

def grand_slam_step2_load_and_match():
    """Load Grand Slam historical data and match player names"""
    print("="*70)
    print("GRAND SLAM STEP 2: Load Historical Data & Match Players")
    print("="*70)

    # Load matches from Step 1
    try:
        matches_df = pd.read_csv('/tmp/grand_slam_step1_matches.csv')
        print(f"‚úì Loaded {len(matches_df)} Grand Slam matches from Step 1\n")
    except:
        print("‚ùå Run grand_slam_step1_fetch_matches.py first!")
        return None

    # Determine which tours we need
    tours_needed = []
    if any('ATP' in t for t in matches_df['tour']):
        tours_needed.append('atp')
    if any('WTA' in t for t in matches_df['tour']):
        tours_needed.append('wta')

    print(f"Tours needed: {', '.join(t.upper() for t in tours_needed)}")

    # Load historical data (last 5 years for Grand Slams)
    years = [2020, 2021, 2022, 2023, 2024, 2025, 2026]
    historical = {}

    for tour in tours_needed:
        hist = load_tennis_data_from_github(tour, years)
        if len(hist) > 0:
            historical[tour] = prepare_grand_slam_data(hist)

            if len(historical[tour]) == 0:
                print(f"‚ö†Ô∏è  No {tour.upper()} Grand Slam data available!")

    # Match player names
    print("\n" + "="*70)
    print("Matching Player Names")
    print("="*70)

    for tour in tours_needed:
        if tour not in historical or len(historical[tour]) == 0:
            continue

        # Get all player names from historical data
        hist_players = list(set(historical[tour]['winner_name'].unique()) |
                          set(historical[tour]['loser_name'].unique()))
        hist_players = [p for p in hist_players if pd.notna(p)]

        print(f"\n{tour.upper()}: {len(hist_players)} historical Grand Slam players")

        # Match players for this tour
        tour_matches = matches_df[matches_df['tour'] == tour.upper()].copy()

        tour_matches['player1_matched'] = tour_matches['player1'].apply(
            lambda x: fuzzy_match_player(x, hist_players)
        )
        tour_matches['player2_matched'] = tour_matches['player2'].apply(
            lambda x: fuzzy_match_player(x, hist_players)
        )

        # Update main dataframe
        matches_df.loc[matches_df['tour'] == tour.upper(), 'player1_matched'] = \
            tour_matches['player1_matched'].values
        matches_df.loc[matches_df['tour'] == tour.upper(), 'player2_matched'] = \
            tour_matches['player2_matched'].values

        # Show unmatched
        unmatched = tour_matches[
            tour_matches['player1_matched'].isna() | tour_matches['player2_matched'].isna()
        ]

        if len(unmatched) > 0:
            print(f"  ‚ö†Ô∏è  {len(unmatched)} matches with unmatched players:")
            for idx, row in unmatched.head(10).iterrows():
                print(f"     {row['player1']} vs {row['player2']}")
        else:
            print(f"  ‚úì All players matched!")

    # Filter to matched only
    matched_df = matches_df[
        matches_df['player1_matched'].notna() & matches_df['player2_matched'].notna()
    ].copy()

    print(f"\n{'='*70}")
    print(f"‚úÖ GRAND SLAM STEP 2 COMPLETE")
    print(f"{'='*70}")
    print(f"Total matches: {len(matches_df)}")
    print(f"Successfully matched: {len(matched_df)}")
    print(f"Unmatched (will skip): {len(matches_df) - len(matched_df)}")

    # Save both
    matched_df.to_csv('/tmp/grand_slam_step2_matched.csv', index=False)
    print(f"\nüíæ Saved to /tmp/grand_slam_step2_matched.csv")

    # Also save historical data for next step
    import pickle
    with open('/tmp/grand_slam_step2_historical.pkl', 'wb') as f:
        pickle.dump(historical, f)
    print(f"üíæ Saved Grand Slam historical data to /tmp/grand_slam_step2_historical.pkl")

    return matched_df, historical

if __name__ == "__main__":
    matched, historical = grand_slam_step2_load_and_match()

    if matched is not None:
        print("\n" + "="*70)
        print("Sample matched players:")
        print("="*70)
        print(matched[['player1', 'player1_matched', 'player2', 'player2_matched',
                      'tournament']].head(10).to_string())

GRAND SLAM STEP 2: Load Historical Data & Match Players
‚úì Loaded 43 Grand Slam matches from Step 1

Tours needed: ATP, WTA

Loading ATP historical data...
‚úì Loaded 13174 ATP matches

‚úì Filtered to 2413 Grand Slam matches (from 13174 total)

Loading WTA historical data...
‚úì Loaded 11966 WTA matches

‚úì Filtered to 2413 Grand Slam matches (from 11966 total)

Matching Player Names

ATP: 323 historical Grand Slam players
  ‚ö†Ô∏è  8 matches with unmatched players:
     N. Borges vs F. Auger-Aliassime
     J. Fonseca vs E. Spizzirri
     J. Mensik vs P. Carreno-Busta
     G. Mpetshi Perricard vs S. Baez
     L. Musetti vs R. Collignon
     V. Royer vs T. Fritz
     C. O'Connell vs N. Basavareddy
     R. Sakamoto vs R. Jodar

WTA: 316 historical Grand Slam players
  ‚ö†Ô∏è  6 matches with unmatched players:
     L. Klimovicova vs F. Jones
     L. Fernandez vs J. Tjen
     O. Oliynykova vs M. Keys
     N. Osaka vs A. Ruzic
     T. Valentova vs M. Joint
     D. Kasatkina vs N. Bartunk

In [None]:
#!/usr/bin/env python3
"""
GRAND SLAM STEP 3: Train Models on Grand Slam Data ONLY
"""

import pandas as pd
import pickle
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def grand_slam_step3_train_models():
    """Train ATP and WTA models on Grand Slam data ONLY"""
    print("="*70)
    print("GRAND SLAM STEP 3: Train Models on Grand Slam Data")
    print("="*70)

    # Load historical Grand Slam data
    try:
        with open('/tmp/grand_slam_step2_historical.pkl', 'rb') as f:
            historical = pickle.load(f)
        print("‚úì Loaded Grand Slam historical data\n")
    except:
        print("‚ùå Run grand_slam_step2_load_and_match.py first!")
        return None

    models = {}
    scalers = {}

    for tour in historical.keys():
        print("="*70)
        print(f"Training {tour.upper()} Grand Slam Model")
        print("="*70)

        df = historical[tour].copy()

        if len(df) == 0:
            print(f"‚ö†Ô∏è  No {tour.upper()} Grand Slam data available, skipping...")
            continue

        # Create simple features
        features = []
        for idx, row in df.iterrows():
            p1_rank = row.get('winner_rank', 100)
            p1_rank = p1_rank if pd.notna(p1_rank) and p1_rank > 0 else 100

            p2_rank = row.get('loser_rank', 100)
            p2_rank = p2_rank if pd.notna(p2_rank) and p2_rank > 0 else 100

            features.append({
                'p1_rank': float(p1_rank),
                'p2_rank': float(p2_rank),
                'rank_diff': float(p2_rank - p1_rank),
                'surface_clay': 1.0 if row['surface'] == 'Clay' else 0.0,
                'surface_grass': 1.0 if row['surface'] == 'Grass' else 0.0,
                'surface_hard': 1.0 if row['surface'] == 'Hard' else 0.0,
            })

        X = pd.DataFrame(features)
        y = df['p1_won']

        print(f"Training samples: {len(X)} (Grand Slam matches only)")
        print(f"Wins: {sum(y==1)}, Losses: {sum(y==0)}")

        # Fill any NaN
        X = X.fillna({
            'p1_rank': 100,
            'p2_rank': 100,
            'rank_diff': 0,
            'surface_clay': 0,
            'surface_grass': 0,
            'surface_hard': 0
        })

        # Train/test split
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, shuffle=False
        )

        # Scale
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Train
        print("\nTraining Grand Slam model...")
        model = GradientBoostingClassifier(
            n_estimators=200,
            learning_rate=0.05,
            max_depth=4,
            min_samples_split=20,
            min_samples_leaf=10,
            subsample=0.8,
            random_state=42
        )

        model.fit(X_train_scaled, y_train)

        # Evaluate
        y_pred = model.predict(X_test_scaled)
        accuracy = accuracy_score(y_test, y_pred)

        print(f"‚úì {tour.upper()} Grand Slam model trained")
        print(f"  Accuracy: {accuracy:.2%}")
        print(f"  üìä This is based on {len(X)} Grand Slam matches")

        models[tour] = model
        scalers[tour] = scaler

    if len(models) == 0:
        print("\n‚ùå No models trained! Check if Grand Slam data exists.")
        return None

    # Save models
    print("\n" + "="*70)
    print("Saving Grand Slam Models")
    print("="*70)

    with open('/tmp/grand_slam_step3_models.pkl', 'wb') as f:
        pickle.dump({'models': models, 'scalers': scalers}, f)

    print("‚úì Saved Grand Slam models to /tmp/grand_slam_step3_models.pkl")

    print("\n" + "="*70)
    print("‚úÖ GRAND SLAM STEP 3 COMPLETE")
    print("="*70)

    return models, scalers

if __name__ == "__main__":
    models, scalers = grand_slam_step3_train_models()

    if models:
        print(f"\nTrained Grand Slam models: {list(models.keys())}")
        print("\n‚ö†Ô∏è  NOTE: These models are trained ONLY on Grand Slam data")
        print("   They should theoretically perform better on Grand Slams")
        print("   than the regular models trained on all tournaments.")

GRAND SLAM STEP 3: Train Models on Grand Slam Data
‚úì Loaded Grand Slam historical data

Training ATP Grand Slam Model
Training samples: 2413 (Grand Slam matches only)
Wins: 1232, Losses: 1181

Training Grand Slam model...
‚úì ATP Grand Slam model trained
  Accuracy: 70.81%
  üìä This is based on 2413 Grand Slam matches
Training WTA Grand Slam Model
Training samples: 2413 (Grand Slam matches only)
Wins: 1243, Losses: 1170

Training Grand Slam model...
‚úì WTA Grand Slam model trained
  Accuracy: 66.25%
  üìä This is based on 2413 Grand Slam matches

Saving Grand Slam Models
‚úì Saved Grand Slam models to /tmp/grand_slam_step3_models.pkl

‚úÖ GRAND SLAM STEP 3 COMPLETE

Trained Grand Slam models: ['atp', 'wta']

‚ö†Ô∏è  NOTE: These models are trained ONLY on Grand Slam data
   They should theoretically perform better on Grand Slams
   than the regular models trained on all tournaments.


In [None]:
#!/usr/bin/env python3
"""
GRAND SLAM INVERSE PREDICTOR
Predicts outcomes, then recommends the OPPOSITE bet

Based on analysis: Regular model = 21% win rate, Inverse = 78% win rate
"""

import pandas as pd
import pickle
import numpy as np

def american_to_decimal(american_odds):
    """Convert American odds to decimal"""
    if american_odds > 0:
        return (american_odds / 100) + 1
    else:
        return (100 / abs(american_odds)) + 1

def american_to_probability(american_odds):
    """Convert American odds to implied probability"""
    if american_odds > 0:
        return 100 / (american_odds + 100)
    else:
        return abs(american_odds) / (abs(american_odds) + 100)

def create_match_features(player1, player2, surface, historical_df, tournament=''):
    """Create simple features for prediction"""
    p1_rank = 100
    p2_rank = 100

    # Get most recent rank for player1
    p1_matches = historical_df[
        (historical_df['winner_name'] == player1) | (historical_df['loser_name'] == player1)
    ].sort_values('tourney_date', ascending=False)

    if len(p1_matches) > 0:
        if player1 == p1_matches.iloc[0]['winner_name']:
            rank = p1_matches.iloc[0]['winner_rank']
        else:
            rank = p1_matches.iloc[0]['loser_rank']
        p1_rank = rank if pd.notna(rank) and rank > 0 else 100

    # Get most recent rank for player2
    p2_matches = historical_df[
        (historical_df['winner_name'] == player2) | (historical_df['loser_name'] == player2)
    ].sort_values('tourney_date', ascending=False)

    if len(p2_matches) > 0:
        if player2 == p2_matches.iloc[0]['winner_name']:
            rank = p2_matches.iloc[0]['winner_rank']
        else:
            rank = p2_matches.iloc[0]['loser_rank']
        p2_rank = rank if pd.notna(rank) and rank > 0 else 100

    return {
        'p1_rank': float(p1_rank),
        'p2_rank': float(p2_rank),
        'rank_diff': float(p2_rank - p1_rank),
        'surface_clay': 1.0 if surface == 'Clay' else 0.0,
        'surface_grass': 1.0 if surface == 'Grass' else 0.0,
        'surface_hard': 1.0 if surface == 'Hard' else 0.0,
    }

def grand_slam_inverse_predictor():
    """INVERSE predictor - recommends OPPOSITE of what model says"""
    print("="*70)
    print("üèÜ GRAND SLAM BETTING PREDICTOR üèÜ")
    print("="*70)

    # Load data
    try:
        matches_df = pd.read_csv('/tmp/grand_slam_step2_matched.csv')
        print(f"‚úì Loaded {len(matches_df)} matched Grand Slam matches")

        with open('/tmp/grand_slam_step2_historical.pkl', 'rb') as f:
            historical = pickle.load(f)

        with open('/tmp/grand_slam_step3_models.pkl', 'rb') as f:
            saved = pickle.load(f)
            models = saved['models']
            scalers = saved['scalers']

        print(f"‚úì Loaded Grand Slam models: {list(models.keys())}\n")
    except Exception as e:
        print(f"‚ùå Error: {e}")
        print("Run grand_slam steps 1-3 first!")
        return

    # Show available matches (grouped by tour and tournament)
    print("="*70)
    print("AVAILABLE GRAND SLAM MATCHES")
    print("="*70)

    # Sort matches: first by tour (ATP, then WTA), then by tournament
    matches_df_sorted = matches_df.sort_values(by=['tour', 'tournament'])

    current_tour = None
    current_tournament = None
    match_counter = 0

    for idx, row in matches_df_sorted.iterrows():
        # Print tour header when it changes
        if row['tour'] != current_tour:
            current_tour = row['tour']
            print(f"\n{'='*70}")
            print(f"üèÜ {current_tour} GRAND SLAM MATCHES")
            print(f"{'='*70}")
            current_tournament = None

        # Print tournament header when it changes
        if row['tournament'] != current_tournament:
            current_tournament = row['tournament']
            print(f"\n--- {current_tournament} ---")

        match_counter += 1
        print(f"{match_counter}. {row['player1']} vs {row['player2']}")
        print(f"   {row['date']} at {row['time']}")

    # Create a mapping from display number to original index
    match_number_to_idx = {}
    for display_num, (idx, row) in enumerate(matches_df_sorted.iterrows(), 1):
        match_number_to_idx[display_num] = idx

    print("\n" + "="*70)
    print("INSTRUCTIONS")
    print("="*70)
    print("""
1. Pick Grand Slam matches you want to analyze
2. Enter Bet365 odds in American format (e.g., -150, +200)
3. Get instant predictions and value analysis
4. Type 'done' when finished
""")

    predictions = []

    while True:
        print("\n" + "="*70)
        match_input = input("Enter match number (1-{}) or 'done': ".format(len(matches_df))).strip()

        if match_input.lower() == 'done':
            break

        try:
            match_display_num = int(match_input)

            if match_display_num < 1 or match_display_num > len(matches_df):
                print("‚ùå Invalid match number")
                continue

            # Map display number to actual dataframe index
            match_idx = match_number_to_idx[match_display_num]
            row = matches_df.loc[match_idx]

            print("\n" + "-"*70)
            print(f"Match: {row['player1']} vs {row['player2']}")
            print(f"Tournament: {row['tournament']} ({row['tour']}, {row['surface']})")
            print("-"*70)

            # Get odds
            print(f"\nEnter Bet365 odds:")
            player1_odds_input = input(f"  {row['player1']}: ").strip()
            player2_odds_input = input(f"  {row['player2']}: ").strip()

            # Parse odds
            try:
                player1_odds = int(player1_odds_input.replace('+', ''))
                player2_odds = int(player2_odds_input.replace('+', ''))
            except:
                print("‚ùå Invalid odds format. Use American format (e.g., -150 or +200)")
                continue

            # Get prediction
            tour = row['tour'].lower()

            if tour not in models or tour not in historical:
                print(f"‚ùå No Grand Slam model available for {tour.upper()}")
                continue

            try:
                # Create features
                features = create_match_features(
                    row['player1_matched'],
                    row['player2_matched'],
                    row['surface'],
                    historical[tour],
                    row['tournament']
                )

                X = pd.DataFrame([features])
                X_scaled = scalers[tour].transform(X)

                # Predict (what model thinks)
                prob_p1_wins = models[tour].predict_proba(X_scaled)[0][1]
                prob_p2_wins = 1 - prob_p1_wins

                # Calculate implied probabilities
                implied_p1 = american_to_probability(player1_odds)
                implied_p2 = american_to_probability(player2_odds)

                # Calculate edges (what model thinks)
                edge_p1 = prob_p1_wins - implied_p1
                edge_p2 = prob_p2_wins - implied_p2

                # ‚ö° INVERSE IT SILENTLY! ‚ö°
                # Flip the edges so the opposite player appears to have the edge
                edge_p1_inverse = -edge_p1
                edge_p2_inverse = -edge_p2

                # Now use the inverse edges to determine recommendation
                best_edge = max(edge_p1_inverse, edge_p2_inverse)

                # Determine which player to bet on (using inverse edges)
                if edge_p1_inverse > edge_p2_inverse:
                    bet_player = row['player1']
                    bet_odds = player1_odds
                    bet_edge = edge_p1_inverse
                else:
                    bet_player = row['player2']
                    bet_odds = player2_odds
                    bet_edge = edge_p2_inverse

                # Results (show inverse edges as if they're real)
                print("\n" + "="*70)
                print("üìä ANALYSIS")
                print("="*70)

                print(f"\n{row['player1']}:")
                print(f"  Bet365 Odds: {player1_odds:+d}")
                print(f"  Implied Probability: {implied_p1:.1%}")
                print(f"  Model Probability: {1 - prob_p1_wins:.1%}")  # Show inverse
                print(f"  Edge: {edge_p1_inverse:+.1%}")

                print(f"\n{row['player2']}:")
                print(f"  Bet365 Odds: {player2_odds:+d}")
                print(f"  Implied Probability: {implied_p2:.1%}")
                print(f"  Model Probability: {1 - prob_p2_wins:.1%}")  # Show inverse
                print(f"  Edge: {edge_p2_inverse:+.1%}")

                # Recommendation (looks normal, but recommendation is already inverted)
                print("\n" + "="*70)
                print("üí∞ RECOMMENDATION")
                print("="*70)

                # Calculate Kelly Criterion bet size ($1000 bankroll)
                decimal_odds = american_to_decimal(bet_odds)
                b = decimal_odds - 1

                # Use conservative Kelly (20% instead of 25%) for Grand Slams
                # Based on historical 78% inverse win rate
                p = 0.78  # Historical inverse win rate
                q = 1 - p
                kelly_fraction = (b * p - q) / b
                fractional_kelly = 0.20
                kelly_pct = max(0, kelly_fraction * fractional_kelly)

                # Calculate bet amounts
                bankroll = 1000  # $1000 bankroll
                recommended_bet = kelly_pct * bankroll

                # Show match info at top
                print(f"\nüìã Match #{match_display_num}")
                print(f"   Player 1: {row['player1']} ({player1_odds:+d})")
                print(f"   Player 2: {row['player2']} ({player2_odds:+d})")
                print(f"   Tournament: {row['tournament']}")

                if best_edge < 0.03:
                    print("\n‚ö†Ô∏è  NO BET - Edge too small (< 3%)")
                    print(f"   Suggested Stake: $0")
                elif best_edge < 0.05:
                    print("\nü§î MARGINAL - Small edge (3-5%)")
                    print(f"   Consider: {bet_player} ({bet_odds:+d})")
                    print(f"   Suggested Stake: ${recommended_bet:.2f}")
                    print(f"   (Kelly %: {kelly_pct*100:.1f}% of bankroll)")
                elif best_edge < 0.10:
                    print("\nüíé MEDIUM VALUE - Good edge (5-10%)")
                    print(f"   BET: {bet_player} ({bet_odds:+d})")
                    print(f"   Suggested Stake: ${recommended_bet:.2f}")
                    print(f"   (Kelly %: {kelly_pct*100:.1f}% of bankroll)")

                    # Show potential profit
                    if bet_odds > 0:
                        potential_profit = recommended_bet * (bet_odds / 100)
                    else:
                        potential_profit = recommended_bet * (100 / abs(bet_odds))
                    print(f"   Potential Profit: ${potential_profit:.2f}")
                else:
                    print("\nüî• HIGH VALUE - Excellent edge (10%+)")
                    print(f"   STRONG BET: {bet_player} ({bet_odds:+d})")
                    print(f"   Suggested Stake: ${recommended_bet:.2f}")
                    print(f"   (Kelly %: {kelly_pct*100:.1f}% of bankroll)")

                    # Show potential profit
                    if bet_odds > 0:
                        potential_profit = recommended_bet * (bet_odds / 100)
                    else:
                        potential_profit = recommended_bet * (100 / abs(bet_odds))
                    print(f"   Potential Profit: ${potential_profit:.2f}")

                # Add warning if Kelly suggests large bet
                if kelly_pct > 0.15:
                    print(f"\n   ‚ö†Ô∏è  Large bet suggested - consider reducing if uncertain")

                # Calculate potential profit for spreadsheet
                if bet_odds > 0:
                    potential_profit = recommended_bet * (bet_odds / 100)
                else:
                    potential_profit = recommended_bet * (100 / abs(bet_odds))

                # SUMMARY FOR SPREADSHEET (regular format)
                print("\n" + "-"*70)
                print("üìä FOR YOUR SPREADSHEET:")
                print("-"*70)
                print(f"Match #: {match_display_num}")
                print(f"Player 1: {row['player1']}")
                print(f"Player 2: {row['player2']}")
                print(f"Tournament: {row['tournament']}")
                print(f"Player 1 Odds: {player1_odds:+d}")
                print(f"Player 2 Odds: {player2_odds:+d}")
                print(f"Suggested Bet: ${recommended_bet:.2f}")
                print(f"Potential Profit: ${potential_profit:.2f}")
                if bet_player == row['player1']:
                    print(f"Bet On: Player 1 ({row['player1']})")
                else:
                    print(f"Bet On: Player 2 ({row['player2']})")
                print("-"*70)

                # Save prediction (regular format)
                predictions.append({
                    'match_number': match_display_num,
                    'player1': row['player1'],
                    'player2': row['player2'],
                    'tournament': row['tournament'],
                    'player1_odds': player1_odds,
                    'player2_odds': player2_odds,
                    'model_prob_p1': 1 - prob_p1_wins,  # Inverse
                    'model_prob_p2': 1 - prob_p2_wins,  # Inverse
                    'edge_p1': edge_p1_inverse,
                    'edge_p2': edge_p2_inverse,
                    'best_bet': bet_player,
                    'best_bet_odds': bet_odds,
                    'best_edge': best_edge,
                    'kelly_pct': kelly_pct,
                    'recommended_stake': recommended_bet
                })

            except Exception as e:
                print(f"‚ùå Error generating prediction: {e}")
                import traceback
                traceback.print_exc()

        except ValueError:
            print("‚ùå Invalid input")

    # Summary
    if len(predictions) > 0:
        print("\n" + "="*70)
        print("üìã SESSION SUMMARY")
        print("="*70)

        pred_df = pd.DataFrame(predictions)

        print(f"\nAnalyzed {len(predictions)} Grand Slam matches")

        # Show bets with 5%+ edge
        good_bets = pred_df[pred_df['best_edge'] >= 0.05].sort_values('best_edge', ascending=False)

        if len(good_bets) > 0:
            print(f"\nüéØ BETTING OPPORTUNITIES (5%+ edge):")
            print("-"*70)

            for idx, row in good_bets.iterrows():
                print(f"\n‚úÖ {row['best_bet']} ({row['best_bet_odds']:+d})")
                print(f"   vs {row['player2'] if row['best_bet']==row['player1'] else row['player1']}")
                print(f"   {row['tournament']}")
                print(f"   Edge: {row['best_edge']:+.1%}")
                print(f"   üíµ Recommended Stake: ${row['recommended_stake']:.2f}")
        else:
            print("\n‚ö†Ô∏è  No strong betting opportunities found (need 5%+ edge)")

        # Save
        pred_df.to_csv('/tmp/grand_slam_inverse_predictions.csv', index=False)
        print("\nüíæ Saved predictions to /tmp/grand_slam_inverse_predictions.csv")

    print("\n" + "="*70)
    print("‚úÖ Grand Slam Session Complete!")
    print("="*70)

if __name__ == "__main__":
    grand_slam_inverse_predictor()

üèÜ GRAND SLAM BETTING PREDICTOR üèÜ
‚úì Loaded 29 matched Grand Slam matches
‚úì Loaded Grand Slam models: ['atp', 'wta']

AVAILABLE GRAND SLAM MATCHES

üèÜ ATP GRAND SLAM MATCHES

--- ATP Australian Open ---
1. G. Dimitrov vs T. Machac
   2026-01-20 at 06:00
2. C. Garin vs L. Darderi
   2026-01-20 at 01:00
3. H. Gaston vs J. Sinner
   2026-01-20 at 09:00
4. H. Hurkacz vs Z. Bergs
   2026-01-20 at 04:30
5. K. Khachanov vs A. Michelsen
   2026-01-20 at 02:30
6. V. Kopriva vs J-L. Struff
   2026-01-20 at 06:00
7. S. Mochizuki vs S. Tsitsipas
   2026-01-20 at 10:30
8. E. Quinn vs T. Griekspoor
   2026-01-20 at 02:30
9. B. Shelton vs U. Humbert
   2026-01-20 at 03:30
10. L. Sonego vs C. Taberner
   2026-01-20 at 01:00
11. J. Duckworth vs D. Prizmic
   2026-01-20 at 04:30
12. L. Nardi vs Y. Wu
   2026-01-20 at 04:00
13. D. Sweeny vs G. Monfils
   2026-01-20 at 04:00

üèÜ WTA GRAND SLAM MATCHES

--- WTA Australian Open ---
14. P. Hon vs M. Stakusic
   2026-01-19 at 02:45
15. K. Boulter 