In [None]:
#!/usr/bin/env python3
"""
STEP 1: Fetch Today's Matches from API-Tennis
"""

import requests
from datetime import datetime
import pandas as pd

API_KEY = 'YOUR_API_KEY_HERE'

def step1_fetch_matches():
    """Fetch today's and tomorrow's tennis matches"""
    print("="*70)
    print("STEP 1: Fetching Upcoming Matches")
    print("="*70)

    base_url = "https://api.api-tennis.com/tennis/"

    from datetime import timedelta
    today = datetime.now()
    tomorrow = today + timedelta(days=1)

    today_str = today.strftime('%Y-%m-%d')
    tomorrow_str = tomorrow.strftime('%Y-%m-%d')

    print(f"\nüìÖ Fetching: {today_str} and {tomorrow_str}")
    print("   (Bet365 often shows next 24-48 hours)")

    all_matches = []

    # Fetch both days
    for date_str, label in [(today_str, "today"), (tomorrow_str, "tomorrow")]:
        params = {
            'method': 'get_fixtures',
            'APIkey': API_KEY,
            'date_start': date_str,
            'date_stop': date_str
        }

        print(f"\nüîç Fetching {label}'s matches...")

        response = requests.get(base_url, params=params)

        if response.status_code != 200:
            print(f"‚ùå Error: HTTP {response.status_code}")
            continue

        data = response.json()

        if data.get('success') != 1:
            print(f"‚ùå API Error: {data}")
            continue

        matches = data.get('result', [])
        print(f"‚úì Found {len(matches)} total matches")

        all_matches.extend(matches)

    print(f"\n‚úì Total matches across both days: {len(all_matches)}")

    # Filter for ATP/WTA singles only
    filtered = []
    for match in all_matches:
        event_type = match.get('event_type_type', '').lower()
        event_status = match.get('event_status', '')
        tournament = match.get('tournament_name', '').lower()

        # Skip Grand Slams entirely
        grand_slam_keywords = ['australian open', 'french open', 'roland garros',
                               'wimbledon', 'us open', 'grand slam']
        if any(slam in tournament for slam in grand_slam_keywords):
            continue  # Skip this match completely

        # Skip finished or cancelled matches
        # Keep: empty status ("") and status "1" (live/starting soon)
        if event_status in ['Finished', 'Cancelled', 'Postponed']:
            continue

        # Only singles
        if 'singles' in event_type and 'doubles' not in event_type:
            # Only ATP/WTA (no Challengers/ITF)
            if any(tour in event_type for tour in ['atp singles', 'wta singles']):
                if 'challenger' not in event_type and 'itf' not in event_type:
                    filtered.append({
                        'event_key': match.get('event_key'),
                        'player1': match.get('event_first_player'),
                        'player2': match.get('event_second_player'),
                        'tournament': match.get('tournament_name'),
                        'date': match.get('event_date'),
                        'time': match.get('event_time'),
                        'status': event_status,
                        'tour': 'WTA' if 'wta' in event_type else 'ATP',
                        'surface': 'Hard'  # Default, will detect from tournament later
                    })

    df = pd.DataFrame(filtered)

    print(f"‚úì Filtered to {len(df)} ATP/WTA singles matches")

    # Show breakdown
    print("\nüìä Breakdown by tournament:")
    for tournament in df['tournament'].unique():
        count = len(df[df['tournament'] == tournament])
        tour = df[df['tournament'] == tournament]['tour'].iloc[0]
        print(f"   {tournament} ({tour}): {count} matches")

    print(f"\n‚úÖ STEP 1 COMPLETE - {len(df)} matches ready")

    return df

if __name__ == "__main__":
    if API_KEY == 'YOUR_API_KEY_HERE':
        print("‚ùå Please add your API key first!")
    else:
        matches_df = step1_fetch_matches()

        if matches_df is not None:
            print("\n" + "="*70)
            print("Sample matches:")
            print("="*70)
            print(matches_df.head(10).to_string())

            # Save for next step
            matches_df.to_csv('/tmp/step1_matches.csv', index=False)
            print("\nüíæ Saved to /tmp/step1_matches.csv")

STEP 1: Fetching Upcoming Matches

üìÖ Fetching: 2026-01-16 and 2026-01-17
   (Bet365 often shows next 24-48 hours)

üîç Fetching today's matches...
‚úì Found 132 total matches

üîç Fetching tomorrow's matches...
‚úì Found 13 total matches

‚úì Total matches across both days: 145
‚úì Filtered to 3 ATP/WTA singles matches

üìä Breakdown by tournament:
   ATP Auckland (ATP): 1 matches
   WTA Adelaide (WTA): 1 matches
   WTA Hobart (WTA): 1 matches

‚úÖ STEP 1 COMPLETE - 3 matches ready

Sample matches:
   event_key      player1         player2    tournament        date   time status tour surface
0   12097104      S. Baez       J. Mensik  ATP Auckland  2026-01-17  02:00         ATP    Hard
1   12097108  M. Andreeva        V. Mboko  WTA Adelaide  2026-01-17  03:00         WTA    Hard
2   12097109     I. Jovic  E. Cocciaretto    WTA Hobart  2026-01-17  03:00         WTA    Hard

üíæ Saved to /tmp/step1_matches.csv


In [None]:
#!/usr/bin/env python3
"""
STEP 2: Check Bet365 Odds for Matches
"""

import requests
import pandas as pd

API_KEY = 'YOUR_API_KEY_HERE'

def step2_check_odds():
    """Check which matches have Bet365 odds"""
    print("="*70)
    print("STEP 2: Checking Bet365 Odds")
    print("="*70)

    # Load matches from Step 1
    try:
        matches_df = pd.read_csv('/tmp/step1_matches.csv')
        print(f"‚úì Loaded {len(matches_df)} matches from Step 1\n")
    except:
        print("‚ùå Run step1_fetch_matches.py first!")
        return None

    base_url = "https://api.api-tennis.com/tennis/"

    print("üí∞ Checking Bet365 odds for each match...")

    odds_list = []

    for idx, row in matches_df.iterrows():
        event_key = row['event_key']

        if (idx + 1) % 5 == 0:
            print(f"   Checked {idx + 1}/{len(matches_df)}...")

        params = {
            'method': 'get_odds',
            'APIkey': API_KEY,
            'match_key': event_key
        }

        try:
            response = requests.get(base_url, params=params, timeout=10)

            if response.status_code == 200:
                data = response.json()

                if data.get('success') == 1:
                    result = data.get('result', {}).get(str(event_key), {})

                    if 'Home/Away' in result:
                        home_away = result['Home/Away']
                        home_odds = home_away.get('Home', {})
                        away_odds = home_away.get('Away', {})

                        bet365_home = home_odds.get('bet365')
                        bet365_away = away_odds.get('bet365')

                        if bet365_home and bet365_away:
                            odds_list.append({
                                'event_key': event_key,
                                'player1_odds': float(bet365_home),
                                'player2_odds': float(bet365_away),
                                'bookmaker': 'Bet365'
                            })
        except:
            pass

    if len(odds_list) == 0:
        print("\n‚ùå No Bet365 odds found")
        return None

    odds_df = pd.DataFrame(odds_list)

    # Merge with matches
    result_df = matches_df.merge(odds_df, on='event_key', how='inner')

    print(f"\n‚úì Found Bet365 odds for {len(result_df)} matches")

    # Show breakdown
    print("\nüìä Matches with Bet365 odds by tournament:")
    for tournament in result_df['tournament'].unique():
        count = len(result_df[result_df['tournament'] == tournament])
        tour = result_df[result_df['tournament'] == tournament]['tour'].iloc[0]
        print(f"   {tournament} ({tour}): {count} matches")

    print(f"\n‚úÖ STEP 2 COMPLETE - {len(result_df)} matches with Bet365 odds")

    return result_df

if __name__ == "__main__":
    if API_KEY == 'YOUR_API_KEY_HERE':
        print("‚ùå Please add your API key first!")
    else:
        matches_with_odds = step2_check_odds()

        if matches_with_odds is not None:
            print("\n" + "="*70)
            print("Sample matches with Bet365 odds:")
            print("="*70)
            print(matches_with_odds[['player1', 'player2', 'tournament',
                                     'player1_odds', 'player2_odds']].head(10).to_string())

            # Save for next step
            matches_with_odds.to_csv('/tmp/step2_matches_with_odds.csv', index=False)
            print("\nüíæ Saved to /tmp/step2_matches_with_odds.csv")

STEP 2: Checking Bet365 Odds
‚úì Loaded 3 matches from Step 1

üí∞ Checking Bet365 odds for each match...

‚úì Found Bet365 odds for 3 matches

üìä Matches with Bet365 odds by tournament:
   ATP Auckland (ATP): 1 matches
   WTA Adelaide (WTA): 1 matches
   WTA Hobart (WTA): 1 matches

‚úÖ STEP 2 COMPLETE - 3 matches with Bet365 odds

Sample matches with Bet365 odds:
       player1         player2    tournament  player1_odds  player2_odds
0      S. Baez       J. Mensik  ATP Auckland          3.00          1.40
1  M. Andreeva        V. Mboko  WTA Adelaide          1.44          2.75
2     I. Jovic  E. Cocciaretto    WTA Hobart          1.36          3.20

üíæ Saved to /tmp/step2_matches_with_odds.csv


In [None]:
#!/usr/bin/env python3
"""
STEP 3: Load Historical Data & Match Players
"""
!pip install fuzzywuzzy
import pandas as pd
import requests
from io import StringIO
from fuzzywuzzy import fuzz
import numpy as np

def load_tennis_data_from_github(tour, years):
    """Load historical tennis data from Jeff Sackmann's GitHub"""
    print(f"\nLoading {tour.upper()} historical data...")

    base_url = "https://raw.githubusercontent.com/JeffSackmann/tennis_atp/master/"
    if tour == 'wta':
        base_url = "https://raw.githubusercontent.com/JeffSackmann/tennis_wta/master/"

    all_data = []

    for year in years:
        url = f"{base_url}atp_matches_{year}.csv" if tour == 'atp' else f"{base_url}wta_matches_{year}.csv"

        try:
            response = requests.get(url, timeout=10)
            if response.status_code == 200:
                df = pd.read_csv(StringIO(response.text))
                all_data.append(df)
        except:
            pass

    if len(all_data) == 0:
        return pd.DataFrame()

    combined = pd.concat(all_data, ignore_index=True)
    print(f"‚úì Loaded {len(combined)} {tour.upper()} matches")

    return combined

def prepare_tennis_data(df):
    """Clean and prepare historical data - create balanced dataset"""
    df['tourney_date'] = pd.to_datetime(df['tourney_date'], format='%Y%m%d')
    df['surface'] = df['surface'].fillna('Hard')
    df = df.sort_values('tourney_date').reset_index(drop=True)

    # Create balanced dataset: randomly assign who is "player 1"
    balanced_data = []

    for idx, row in df.iterrows():
        swap = np.random.rand() > 0.5

        if swap:
            balanced_data.append({
                'tourney_date': row['tourney_date'],
                'surface': row['surface'],
                'winner_name': row['loser_name'],
                'loser_name': row['winner_name'],
                'winner_rank': row.get('loser_rank', 100),
                'loser_rank': row.get('winner_rank', 100),
                'p1_won': 0
            })
        else:
            balanced_data.append({
                'tourney_date': row['tourney_date'],
                'surface': row['surface'],
                'winner_name': row['winner_name'],
                'loser_name': row['loser_name'],
                'winner_rank': row.get('winner_rank', 100),
                'loser_rank': row.get('loser_rank', 100),
                'p1_won': 1
            })

    return pd.DataFrame(balanced_data)

def fuzzy_match_player(name, candidates, threshold=70):
    """Fuzzy match player name - handles abbreviations"""
    if pd.isna(name):
        return None

    # Normalize the input name
    name_normalized = name.lower().strip()

    # Extract last name (after last space or dot)
    name_parts = name_normalized.replace('.', ' ').split()
    last_name = name_parts[-1] if name_parts else name_normalized

    best_match = None
    best_score = 0

    for candidate in candidates:
        if pd.isna(candidate):
            continue

        candidate_normalized = candidate.lower().strip()

        # Check if last names match
        candidate_parts = candidate_normalized.split()
        candidate_last = candidate_parts[-1] if candidate_parts else candidate_normalized

        # If last names don't match well, skip
        if fuzz.ratio(last_name, candidate_last) < 80:
            continue

        # Score the full names
        score = max(
            fuzz.ratio(name_normalized, candidate_normalized),
            fuzz.partial_ratio(name_normalized, candidate_normalized),
            fuzz.token_sort_ratio(name_normalized, candidate_normalized)
        )

        if score > best_score and score >= threshold:
            best_score = score
            best_match = candidate

    return best_match

def step3_load_and_match():
    """Load historical data and match player names"""
    print("="*70)
    print("STEP 3: Load Historical Data & Match Players")
    print("="*70)

    # Load matches from Step 2
    try:
        matches_df = pd.read_csv('/tmp/step2_matches_with_odds.csv')
        print(f"‚úì Loaded {len(matches_df)} matches with odds from Step 2\n")
    except:
        print("‚ùå Run step2_check_odds.py first!")
        return None

    # Determine which tours we need
    tours_needed = []
    if any('ATP' in t for t in matches_df['tour']):
        tours_needed.append('atp')
    if any('WTA' in t for t in matches_df['tour']):
        tours_needed.append('wta')

    print(f"Tours needed: {', '.join(t.upper() for t in tours_needed)}")

    # Load historical data
    years = [2021, 2022, 2023, 2024, 2025, 2026]
    historical = {}

    for tour in tours_needed:
        hist = load_tennis_data_from_github(tour, years)
        if len(hist) > 0:
            historical[tour] = prepare_tennis_data(hist)

    # Match player names
    print("\n" + "="*70)
    print("Matching Player Names")
    print("="*70)

    for tour in tours_needed:
        if tour not in historical:
            continue

        # Get all player names from historical data
        hist_players = list(set(historical[tour]['winner_name'].unique()) |
                          set(historical[tour]['loser_name'].unique()))
        hist_players = [p for p in hist_players if pd.notna(p)]

        print(f"\n{tour.upper()}: {len(hist_players)} historical players")

        # Match players for this tour
        tour_matches = matches_df[matches_df['tour'] == tour.upper()].copy()

        tour_matches['player1_matched'] = tour_matches['player1'].apply(
            lambda x: fuzzy_match_player(x, hist_players)
        )
        tour_matches['player2_matched'] = tour_matches['player2'].apply(
            lambda x: fuzzy_match_player(x, hist_players)
        )

        # Update main dataframe
        matches_df.loc[matches_df['tour'] == tour.upper(), 'player1_matched'] = \
            tour_matches['player1_matched'].values
        matches_df.loc[matches_df['tour'] == tour.upper(), 'player2_matched'] = \
            tour_matches['player2_matched'].values

        # Show unmatched
        unmatched = tour_matches[
            tour_matches['player1_matched'].isna() | tour_matches['player2_matched'].isna()
        ]

        if len(unmatched) > 0:
            print(f"  ‚ö†Ô∏è  {len(unmatched)} matches with unmatched players:")
            for idx, row in unmatched.iterrows():
                print(f"     {row['player1']} vs {row['player2']}")
        else:
            print(f"  ‚úì All players matched!")

    # Filter to matched only
    matched_df = matches_df[
        matches_df['player1_matched'].notna() & matches_df['player2_matched'].notna()
    ].copy()

    print(f"\n{'='*70}")
    print(f"‚úÖ STEP 3 COMPLETE")
    print(f"{'='*70}")
    print(f"Total matches: {len(matches_df)}")
    print(f"Successfully matched: {len(matched_df)}")
    print(f"Unmatched (will skip): {len(matches_df) - len(matched_df)}")

    # Save both
    matched_df.to_csv('/tmp/step3_matched.csv', index=False)
    print(f"\nüíæ Saved to /tmp/step3_matched.csv")

    # Also save historical data for next step
    import pickle
    with open('/tmp/step3_historical.pkl', 'wb') as f:
        pickle.dump(historical, f)
    print(f"üíæ Saved historical data to /tmp/step3_historical.pkl")

    return matched_df, historical

if __name__ == "__main__":
    matched, historical = step3_load_and_match()

    if matched is not None:
        print("\n" + "="*70)
        print("Sample matched players:")
        print("="*70)
        print(matched[['player1', 'player1_matched', 'player2', 'player2_matched',
                      'tournament']].head(10).to_string())

Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0




STEP 3: Load Historical Data & Match Players
‚úì Loaded 3 matches with odds from Step 2

Tours needed: ATP, WTA

Loading ATP historical data...
‚úì Loaded 11712 ATP matches

Loading WTA historical data...
‚úì Loaded 10690 WTA matches

Matching Player Names

ATP: 727 historical players
  ‚úì All players matched!

WTA: 647 historical players
  ‚úì All players matched!

‚úÖ STEP 3 COMPLETE
Total matches: 3
Successfully matched: 3
Unmatched (will skip): 0

üíæ Saved to /tmp/step3_matched.csv
üíæ Saved historical data to /tmp/step3_historical.pkl

Sample matched players:
       player1 player1_matched         player2         player2_matched    tournament
0      S. Baez  Sebastian Baez       J. Mensik            Jakub Mensik  ATP Auckland
1  M. Andreeva  Mirra Andreeva        V. Mboko          Victoria Mboko  WTA Adelaide
2     I. Jovic       Iva Jovic  E. Cocciaretto  Elisabetta Cocciaretto    WTA Hobart


In [None]:
#!/usr/bin/env python3
"""
STEP 4: Train Models
"""

import pandas as pd
import pickle
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def step4_train_models():
    """Train ATP and WTA models"""
    print("="*70)
    print("STEP 4: Train Models")
    print("="*70)

    # Load historical data
    try:
        with open('/tmp/step3_historical.pkl', 'rb') as f:
            historical = pickle.load(f)
        print("‚úì Loaded historical data\n")
    except:
        print("‚ùå Run step3_load_and_match.py first!")
        return None

    models = {}
    scalers = {}

    for tour in historical.keys():
        print("="*70)
        print(f"Training {tour.upper()} Model")
        print("="*70)

        df = historical[tour].copy()

        # Create simple features
        features = []
        for idx, row in df.iterrows():
            p1_rank = row.get('winner_rank', 100)
            p1_rank = p1_rank if pd.notna(p1_rank) and p1_rank > 0 else 100

            p2_rank = row.get('loser_rank', 100)
            p2_rank = p2_rank if pd.notna(p2_rank) and p2_rank > 0 else 100

            features.append({
                'p1_rank': float(p1_rank),
                'p2_rank': float(p2_rank),
                'rank_diff': float(p2_rank - p1_rank),
                'surface_clay': 1.0 if row['surface'] == 'Clay' else 0.0,
                'surface_grass': 1.0 if row['surface'] == 'Grass' else 0.0,
                'surface_hard': 1.0 if row['surface'] == 'Hard' else 0.0,
            })

        X = pd.DataFrame(features)
        y = df['p1_won']

        print(f"Training samples: {len(X)}")
        print(f"Wins: {sum(y==1)}, Losses: {sum(y==0)}")

        # Fill any NaN
        X = X.fillna({
            'p1_rank': 100,
            'p2_rank': 100,
            'rank_diff': 0,
            'surface_clay': 0,
            'surface_grass': 0,
            'surface_hard': 0
        })

        # Train/test split
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, shuffle=False
        )

        # Scale
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Train
        print("\nTraining model...")
        model = GradientBoostingClassifier(
            n_estimators=200,
            learning_rate=0.05,
            max_depth=4,
            min_samples_split=20,
            min_samples_leaf=10,
            subsample=0.8,
            random_state=42
        )

        model.fit(X_train_scaled, y_train)

        # Evaluate
        y_pred = model.predict(X_test_scaled)
        accuracy = accuracy_score(y_test, y_pred)

        print(f"‚úì {tour.upper()} model trained")
        print(f"  Accuracy: {accuracy:.2%}")

        models[tour] = model
        scalers[tour] = scaler

    # Save models
    print("\n" + "="*70)
    print("Saving Models")
    print("="*70)

    with open('/tmp/step4_models.pkl', 'wb') as f:
        pickle.dump({'models': models, 'scalers': scalers}, f)

    print("‚úì Saved models to /tmp/step4_models.pkl")

    print("\n" + "="*70)
    print("‚úÖ STEP 4 COMPLETE")
    print("="*70)

    return models, scalers

if __name__ == "__main__":
    models, scalers = step4_train_models()

    if models:
        print(f"\nTrained models: {list(models.keys())}")

STEP 4: Train Models
‚úì Loaded historical data

Training ATP Model
Training samples: 11712
Wins: 5821, Losses: 5891

Training model...
‚úì ATP model trained
  Accuracy: 62.61%
Training WTA Model
Training samples: 10690
Wins: 5348, Losses: 5342

Training model...
‚úì WTA model trained
  Accuracy: 62.68%

Saving Models
‚úì Saved models to /tmp/step4_models.pkl

‚úÖ STEP 4 COMPLETE

Trained models: ['atp', 'wta']


In [None]:
#!/usr/bin/env python3
"""
Manual Odds Entry - Input Bet365 odds yourself and get predictions
"""

import pandas as pd
import pickle
import numpy as np

def american_to_decimal(american_odds):
    """Convert American odds to decimal"""
    if american_odds > 0:
        return (american_odds / 100) + 1
    else:
        return (100 / abs(american_odds)) + 1

def american_to_probability(american_odds):
    """Convert American odds to implied probability"""
    if american_odds > 0:
        return 100 / (american_odds + 100)
    else:
        return abs(american_odds) / (abs(american_odds) + 100)

def create_match_features(player1, player2, surface, historical_df, tournament=''):
    """Create simple features for prediction"""
    p1_rank = 100
    p2_rank = 100

    # Get most recent rank for player1
    p1_matches = historical_df[
        (historical_df['winner_name'] == player1) | (historical_df['loser_name'] == player1)
    ].sort_values('tourney_date', ascending=False)

    if len(p1_matches) > 0:
        if player1 == p1_matches.iloc[0]['winner_name']:
            rank = p1_matches.iloc[0]['winner_rank']
        else:
            rank = p1_matches.iloc[0]['loser_rank']
        p1_rank = rank if pd.notna(rank) and rank > 0 else 100

    # Get most recent rank for player2
    p2_matches = historical_df[
        (historical_df['winner_name'] == player2) | (historical_df['loser_name'] == player2)
    ].sort_values('tourney_date', ascending=False)

    if len(p2_matches) > 0:
        if player2 == p2_matches.iloc[0]['winner_name']:
            rank = p2_matches.iloc[0]['winner_rank']
        else:
            rank = p2_matches.iloc[0]['loser_rank']
        p2_rank = rank if pd.notna(rank) and rank > 0 else 100

    return {
        'p1_rank': float(p1_rank),
        'p2_rank': float(p2_rank),
        'rank_diff': float(p2_rank - p1_rank),
        'surface_clay': 1.0 if surface == 'Clay' else 0.0,
        'surface_grass': 1.0 if surface == 'Grass' else 0.0,
        'surface_hard': 1.0 if surface == 'Hard' else 0.0,
    }

def manual_odds_entry():
    """Interactive odds entry and prediction"""
    print("="*70)
    print("üéæ MANUAL BET365 ODDS ENTRY üéæ")
    print("="*70)

    # Load data
    try:
        matches_df = pd.read_csv('/tmp/step3_matched.csv')
        print(f"‚úì Loaded {len(matches_df)} matched matches")

        with open('/tmp/step3_historical.pkl', 'rb') as f:
            historical = pickle.load(f)

        with open('/tmp/step4_models.pkl', 'rb') as f:
            saved = pickle.load(f)
            models = saved['models']
            scalers = saved['scalers']

        print("‚úì Loaded models and historical data\n")
    except Exception as e:
        print(f"‚ùå Error: {e}")
        print("Run steps 3 and 4 first!")
        return

    # Show available matches
    print("="*70)
    print("AVAILABLE MATCHES")
    print("="*70)

    # Sort matches: first by tour (ATP, then WTA), then by tournament
    matches_df_sorted = matches_df.sort_values(by=['tour', 'tournament'])

    current_tour = None
    current_tournament = None
    match_counter = 0

    for idx, row in matches_df_sorted.iterrows():
        # Print tour header when it changes
        if row['tour'] != current_tour:
            current_tour = row['tour']
            print(f"\n{'='*70}")
            print(f"üéæ {current_tour} MATCHES")
            print(f"{'='*70}")
            current_tournament = None  # Reset tournament when tour changes

        # Print tournament header when it changes
        if row['tournament'] != current_tournament:
            current_tournament = row['tournament']
            print(f"\n--- {current_tournament} ---")

        match_counter += 1
        print(f"{match_counter}. {row['player1']} vs {row['player2']}")
        print(f"   {row['date']} at {row['time']}")

    # Create a mapping from display number to original index
    match_number_to_idx = {}
    for display_num, (idx, row) in enumerate(matches_df_sorted.iterrows(), 1):
        match_number_to_idx[display_num] = idx

    print("\n" + "="*70)
    print("INSTRUCTIONS")
    print("="*70)
    print("""
1. Pick matches you want to analyze
2. Enter Bet365 odds in American format (e.g., -150, +200)
3. Get instant predictions and value analysis
4. Type 'done' when finished
""")

    predictions = []

    while True:
        print("\n" + "="*70)
        match_input = input("Enter match number (1-{}) or 'done': ".format(len(matches_df))).strip()

        if match_input.lower() == 'done':
            break

        try:
            match_display_num = int(match_input)

            if match_display_num < 1 or match_display_num > len(matches_df):
                print("‚ùå Invalid match number")
                continue

            # Map display number to actual dataframe index
            match_idx = match_number_to_idx[match_display_num]
            row = matches_df.loc[match_idx]

            print("\n" + "-"*70)
            print(f"Match: {row['player1']} vs {row['player2']}")
            print(f"Tournament: {row['tournament']} ({row['tour']}, {row['surface']})")
            print("-"*70)

            # Get odds
            print(f"\nEnter Bet365 odds:")
            player1_odds_input = input(f"  {row['player1']}: ").strip()
            player2_odds_input = input(f"  {row['player2']}: ").strip()

            # Parse odds
            try:
                player1_odds = int(player1_odds_input.replace('+', ''))
                player2_odds = int(player2_odds_input.replace('+', ''))
            except:
                print("‚ùå Invalid odds format. Use American format (e.g., -150 or +200)")
                continue

            # Get prediction
            tour = row['tour'].lower()

            if tour not in models or tour not in historical:
                print(f"‚ùå No model available for {tour.upper()}")
                continue

            try:
                # Create features
                features = create_match_features(
                    row['player1_matched'],
                    row['player2_matched'],
                    row['surface'],
                    historical[tour],
                    row['tournament']
                )

                X = pd.DataFrame([features])
                X_scaled = scalers[tour].transform(X)

                # Predict
                prob_p1_wins = models[tour].predict_proba(X_scaled)[0][1]
                prob_p2_wins = 1 - prob_p1_wins

                # Calculate implied probabilities
                implied_p1 = american_to_probability(player1_odds)
                implied_p2 = american_to_probability(player2_odds)

                # Calculate edges
                edge_p1 = prob_p1_wins - implied_p1
                edge_p2 = prob_p2_wins - implied_p2

                # Results
                print("\n" + "="*70)
                print("üìä ANALYSIS")
                print("="*70)

                print(f"\n{row['player1']}:")
                print(f"  Bet365 Odds: {player1_odds:+d}")
                print(f"  Implied Probability: {implied_p1:.1%}")
                print(f"  Model Probability: {prob_p1_wins:.1%}")
                print(f"  Edge: {edge_p1:+.1%}")

                print(f"\n{row['player2']}:")
                print(f"  Bet365 Odds: {player2_odds:+d}")
                print(f"  Implied Probability: {implied_p2:.1%}")
                print(f"  Model Probability: {prob_p2_wins:.1%}")
                print(f"  Edge: {edge_p2:+.1%}")

                # Recommendation
                print("\n" + "="*70)
                print("üí∞ RECOMMENDATION")
                print("="*70)

                best_edge = max(edge_p1, edge_p2)

                # Determine which player to bet on
                if edge_p1 > edge_p2:
                    bet_player = row['player1']
                    bet_odds = player1_odds
                    bet_prob = prob_p1_wins
                    bet_edge = edge_p1
                else:
                    bet_player = row['player2']
                    bet_odds = player2_odds
                    bet_prob = prob_p2_wins
                    bet_edge = edge_p2

                # Calculate Kelly Criterion bet size
                decimal_odds = american_to_decimal(bet_odds)
                b = decimal_odds - 1
                p = bet_prob
                q = 1 - p
                kelly_fraction = (b * p - q) / b

                # Use 25% fractional Kelly for safety
                fractional_kelly = 0.25
                kelly_pct = max(0, kelly_fraction * fractional_kelly)

                # Calculate bet amounts
                bankroll = 1000  # $1000 max
                recommended_bet = kelly_pct * bankroll

                # Show match info at top
                print(f"\nüìã Match #{match_display_num}")
                print(f"   Player 1: {row['player1']} ({player1_odds:+d})")
                print(f"   Player 2: {row['player2']} ({player2_odds:+d})")
                print(f"   Tournament: {row['tournament']}")

                if best_edge < 0.03:
                    print("\n‚ö†Ô∏è  NO BET - Edge too small (< 3%)")
                    print(f"   Suggested Stake: $0")
                elif best_edge < 0.05:
                    print("\nü§î MARGINAL - Small edge (3-5%)")
                    print(f"   Consider: {bet_player} ({bet_odds:+d})")
                    print(f"   Suggested Stake: ${recommended_bet:.2f}")
                    print(f"   (Kelly %: {kelly_pct*100:.1f}% of bankroll)")
                elif best_edge < 0.10:
                    print("\nüíé MEDIUM VALUE - Good edge (5-10%)")
                    print(f"   BET: {bet_player} ({bet_odds:+d})")
                    print(f"   Suggested Stake: ${recommended_bet:.2f}")
                    print(f"   (Kelly %: {kelly_pct*100:.1f}% of bankroll)")

                    # Show potential profit
                    if bet_odds > 0:
                        potential_profit = recommended_bet * (bet_odds / 100)
                    else:
                        potential_profit = recommended_bet * (100 / abs(bet_odds))
                    print(f"   Potential Profit: ${potential_profit:.2f}")
                else:
                    print("\nüî• HIGH VALUE - Excellent edge (10%+)")
                    print(f"   STRONG BET: {bet_player} ({bet_odds:+d})")
                    print(f"   Suggested Stake: ${recommended_bet:.2f}")
                    print(f"   (Kelly %: {kelly_pct*100:.1f}% of bankroll)")

                    # Show potential profit
                    if bet_odds > 0:
                        potential_profit = recommended_bet * (bet_odds / 100)
                    else:
                        potential_profit = recommended_bet * (100 / abs(bet_odds))
                    print(f"   Potential Profit: ${potential_profit:.2f}")

                # Add warning if Kelly suggests large bet
                if kelly_pct > 0.15:
                    print(f"\n   ‚ö†Ô∏è  Large bet suggested - consider reducing if uncertain")

                # Calculate potential profit for spreadsheet
                if bet_odds > 0:
                    potential_profit = recommended_bet * (bet_odds / 100)
                else:
                    potential_profit = recommended_bet * (100 / abs(bet_odds))

                # SUMMARY FOR SPREADSHEET
                print("\n" + "-"*70)
                print("üìä FOR YOUR SPREADSHEET:")
                print("-"*70)
                print(f"Match #: {match_display_num}")
                print(f"Player 1: {row['player1']}")
                print(f"Player 2: {row['player2']}")
                print(f"Tournament: {row['tournament']}")
                print(f"Player 1 Odds: {player1_odds:+d}")
                print(f"Player 2 Odds: {player2_odds:+d}")
                print(f"Suggested Bet: ${recommended_bet:.2f}")
                print(f"Potential Profit: ${potential_profit:.2f}")
                if bet_player == row['player1']:
                    print(f"Bet On: Player 1 ({row['player1']})")
                else:
                    print(f"Bet On: Player 2 ({row['player2']})")
                print("-"*70)

                # Save prediction
                bet_player = row['player1'] if edge_p1 > edge_p2 else row['player2']
                bet_odds = player1_odds if edge_p1 > edge_p2 else player2_odds
                bet_prob = prob_p1_wins if edge_p1 > edge_p2 else prob_p2_wins

                # Calculate Kelly with Grand Slam adjustment
                decimal_odds = american_to_decimal(bet_odds)
                b = decimal_odds - 1
                p = bet_prob
                q = 1 - p
                kelly_fraction = (b * p - q) / b

                # Calculate Kelly (always 25% fractional)
                kelly_pct = max(0, kelly_fraction * 0.25)
                recommended_stake = kelly_pct * 1000  # $1000 bankroll

                predictions.append({
                    'match_number': match_display_num,
                    'player1': row['player1'],
                    'player2': row['player2'],
                    'tournament': row['tournament'],
                    'player1_odds': player1_odds,
                    'player2_odds': player2_odds,
                    'model_prob_p1': prob_p1_wins,
                    'model_prob_p2': prob_p2_wins,
                    'edge_p1': edge_p1,
                    'edge_p2': edge_p2,
                    'best_bet': bet_player,
                    'best_bet_odds': bet_odds,
                    'best_edge': best_edge,
                    'kelly_pct': kelly_pct,
                    'recommended_stake': recommended_stake
                })

            except Exception as e:
                print(f"‚ùå Error generating prediction: {e}")

        except ValueError:
            print("‚ùå Invalid input")

    # Summary
    if len(predictions) > 0:
        print("\n" + "="*70)
        print("üìã SESSION SUMMARY")
        print("="*70)

        pred_df = pd.DataFrame(predictions)

        print(f"\nAnalyzed {len(predictions)} matches")

        # Show bets with 5%+ edge
        good_bets = pred_df[pred_df['best_edge'] >= 0.05].sort_values('best_edge', ascending=False)

        if len(good_bets) > 0:
            print(f"\nüéØ BETTING OPPORTUNITIES (5%+ edge):")
            print("-"*70)

            for idx, row in good_bets.iterrows():
                print(f"\n‚úÖ {row['best_bet']} ({row['best_bet_odds']:+d})")
                print(f"   vs {row['player2'] if row['best_bet']==row['player1'] else row['player1']}")
                print(f"   {row['tournament']}")
                print(f"   Edge: {row['best_edge']:+.1%}")
                print(f"   üíµ Recommended Stake: ${row['recommended_stake']:.2f}")
        else:
            print("\n‚ö†Ô∏è  No strong betting opportunities found (need 5%+ edge)")

        # Save
        pred_df.to_csv('/tmp/manual_predictions.csv', index=False)
        print("\nüíæ Saved predictions to /tmp/manual_predictions.csv")

    print("\n" + "="*70)
    print("‚úÖ Session Complete!")
    print("="*70)

if __name__ == "__main__":
    manual_odds_entry()

üéæ MANUAL BET365 ODDS ENTRY üéæ
‚úì Loaded 3 matched matches
‚úì Loaded models and historical data

AVAILABLE MATCHES

üéæ ATP MATCHES

--- ATP Auckland ---
1. S. Baez vs J. Mensik
   2026-01-17 at 02:00

üéæ WTA MATCHES

--- WTA Adelaide ---
2. M. Andreeva vs V. Mboko
   2026-01-17 at 03:00

--- WTA Hobart ---
3. I. Jovic vs E. Cocciaretto
   2026-01-17 at 03:00

INSTRUCTIONS

1. Pick matches you want to analyze
2. Enter Bet365 odds in American format (e.g., -150, +200)
3. Get instant predictions and value analysis
4. Type 'done' when finished



----------------------------------------------------------------------
Match: S. Baez vs J. Mensik
Tournament: ATP Auckland (ATP, Hard)
----------------------------------------------------------------------

Enter Bet365 odds:

üìä ANALYSIS

S. Baez:
  Bet365 Odds: +200
  Implied Probability: 33.3%
  Model Probability: 57.7%
  Edge: +24.4%

J. Mensik:
  Bet365 Odds: -250
  Implied Probability: 71.4%
  Model Probability: 42.3%
  Edge: -2