In [1]:
import joblib
import numpy as np
import pandas as pd


In [2]:
# Load trained calibrated model
model = joblib.load("../models/calibrated_logistic_v2.joblib")

# Load feature columns (v2)
feature_cols = joblib.load("../models/feature_columns_v2.joblib")

# Load reference datasets
career = pd.read_csv("../data/processed/player_career_stats.csv")
recent = pd.read_csv("../data/processed/player_recent_form.csv")

career["player"] = career["player"].str.strip()
recent["player"] = recent["player"].str.strip()

print("Model + data loaded")


Model + data loaded


In [3]:
PLAYER_NAME_MAP = {
    "Lasith Malinga": "SL Malinga",
    "Nathan Coulter-Nile": "NM Coulter-Nile",
    "Krunal Pandya": "KH Pandya",
    "Hardik Pandya": "HH Pandya",
    "Quinton de Kock": "Q de Kock",
    "Rohit Sharma": "RG Sharma",
    "Jasprit Bumrah": "JJ Bumrah",
    "Suryakumar Yadav": "SA Yadav",
    "Kieron Pollard": "KA Pollard",
}


In [4]:
def compute_team_features(xi, career_df, recent_df):
    c = career_df[career_df["player"].isin(xi)]
    r = recent_df[recent_df["player"].isin(xi)]

    if not r.empty:
        r = r.groupby("player").tail(1)

    def safe_mean(series, default=0.0):
        if series is None or series.empty or series.isna().all():
            return default
        return float(series.mean())

    return {
        "batting": safe_mean(c.get("batting_strength")),
        "bowling": safe_mean(c.get("bowling_strength")),
        "overall": safe_mean(c.get("overall_strength")),
        "recent_runs": safe_mean(r.get("recent_runs")),
        "recent_wickets": safe_mean(r.get("recent_wickets")),
        "recent_econ": safe_mean(r.get("recent_economy")),
    }


In [5]:
TEAM_CITY = {
    "Chennai Super Kings": "Chennai",
    "Mumbai Indians": "Mumbai",
    "Royal Challengers Bangalore": "Bangalore",
    "Kolkata Knight Riders": "Kolkata",
    "Delhi Capitals": "Delhi",
    "Rajasthan Royals": "Jaipur",
    "Sunrisers Hyderabad": "Hyderabad",
    "Punjab Kings": "Chandigarh",
}


In [6]:
def home_advantage(team1, team2, venue):
    city1 = TEAM_CITY.get(team1, "")
    city2 = TEAM_CITY.get(team2, "")

    v = venue.lower()

    if city1.lower() in v:
        return 1
    elif city2.lower() in v:
        return -1
    else:
        return 0   # neutral venue


In [7]:
def build_feature_row(team1, team2, xi1, xi2, venue, toss):
    t1 = compute_team_features(xi1, career, recent)
    t2 = compute_team_features(xi2, career, recent)

    row = {
        # PURE DIFFERENCES (anti team1 bias)
        "batting_diff": t1["batting"] - t2["batting"],
        "bowling_diff": t1["bowling"] - t2["bowling"],
        "overall_diff": t1["overall"] - t2["overall"],
        "recent_runs_diff": t1["recent_runs"] - t2["recent_runs"],
        "recent_wickets_diff": t1["recent_wickets"] - t2["recent_wickets"],
        "recent_econ_diff": t1["recent_econ"] - t2["recent_econ"],

        # Context
        "toss_adv": (
            1 if toss["winner"] == team1 else
           -1 if toss["winner"] == team2 else 0
        ),

        "home_adv": home_advantage(team1, team2, venue),

        # neutral default (can be improved later)
        "venue_win_rate_diff": 0.0
    }

    return row, t1, t2


In [8]:
def normalize_xi(xi):
    return [PLAYER_NAME_MAP.get(p, p) for p in xi]


In [9]:
def predict_match(team1, team2, xi1, xi2, venue, toss):
    xi1 = normalize_xi(xi1)
    xi2 = normalize_xi(xi2)
    feature_row, t1, t2 = build_feature_row(
        team1, team2, xi1, xi2, venue, toss
    )

    X = pd.DataFrame([feature_row], columns=feature_cols)

    p1 = model.predict_proba(X)[0, 1]
    p2 = 1 - p1

    return {
        "team1": team1,
        "team2": team2,
        "team1_win_prob": round(float(p1), 3),
        "team2_win_prob": round(float(p2), 3),
        "team1_features": t1,
        "team2_features": t2,
        "model_features": feature_row
    }


In [10]:
rr_xi = [
    "Steve Smith","Jos Buttler","Sanju Samson","Ben Stokes",
    "Robin Uthappa","Rahul Tewatia","Shreyas Gopal",
    "Jofra Archer","Kartik Tyagi","Jaydev Unadkat","Varun Aaron"
]

kkr_xi = [
    "Shubman Gill","Sunil Narine","Nitish Rana","Eoin Morgan",
    "Andre Russell","Dinesh Karthik","Pat Cummins",
    "Lockie Ferguson","Shivam Mavi","Varun Chakravarthy","Kamlesh Nagarkoti"
]

result = predict_match(
    team1="Rajasthan Royals",
    team2="Kolkata Knight Riders",
    xi1=rr_xi,
    xi2=kkr_xi,
    venue="Eden Gardens",
    toss={"winner": "KKR", "decision": "field"}
)

result


{'team1': 'Rajasthan Royals',
 'team2': 'Kolkata Knight Riders',
 'team1_win_prob': 0.267,
 'team2_win_prob': 0.733,
 'team1_features': {'batting': 28.525,
  'bowling': 4.993488372093023,
  'overall': 33.518488372093024,
  'recent_runs': 0.0,
  'recent_wickets': 0.4,
  'recent_econ': 12.4},
 'team2_features': {'batting': 48.27434279416826,
  'bowling': 7.038702290076336,
  'overall': 55.31304508424459,
  'recent_runs': 18.8,
  'recent_wickets': 0.5,
  'recent_econ': 5.339449541284403},
 'model_features': {'batting_diff': -19.749342794168264,
  'bowling_diff': -2.045213917983313,
  'overall_diff': -21.794556712151568,
  'recent_runs_diff': -18.8,
  'recent_wickets_diff': -0.09999999999999998,
  'recent_econ_diff': 7.060550458715597,
  'toss_adv': 0,
  'home_adv': 0,
  'venue_win_rate_diff': 0.0}}

In [9]:
swap = predict_match(
    team1="Kolkata Knight Riders",
    team2="Rajasthan Royals",
    xi1=kkr_xi,
    xi2=rr_xi,
    venue="Eden Gardens",
    toss={"winner": "KKR", "decision": "field"}
)

swap


{'team1': 'Kolkata Knight Riders',
 'team2': 'Rajasthan Royals',
 'team1_win_prob': 0.634,
 'team2_win_prob': 0.366,
 'team1_features': {'batting': 48.27434279416826,
  'bowling': 7.038702290076336,
  'overall': 55.31304508424459,
  'recent_runs': 18.8,
  'recent_wickets': 0.5,
  'recent_econ': 5.339449541284403},
 'team2_features': {'batting': 28.525,
  'bowling': 4.993488372093023,
  'overall': 33.518488372093024,
  'recent_runs': 0.0,
  'recent_wickets': 0.4,
  'recent_econ': 12.4},
 'model_features': {'batting_diff': 19.749342794168264,
  'bowling_diff': 2.045213917983313,
  'overall_diff': 21.794556712151568,
  'recent_runs_diff': 18.8,
  'recent_wickets_diff': 0.09999999999999998,
  'recent_econ_diff': -7.060550458715597,
  'toss_adv': 0,
  'home_adv': 0,
  'venue_win_rate_diff': 0.0}}

For MI vs CSK

In [24]:
mi_xi = [
    "Rohit Sharma",
    "Quinton de Kock",
    "Suryakumar Yadav",
    "Kieron Pollard",
    "Hardik Pandya",
    "Krunal Pandya",
    "Rahul Chahar",
    "Jasprit Bumrah",
    "Trent Boult",
    "Nathan Coulter-Nile",
    "Lasith Malinga"
]

csk_xi = [
    "MS Dhoni",
    "Shane Watson",
    "Faf du Plessis",
    "Suresh Raina",
    "Ambati Rayudu",
    "Ravindra Jadeja",
    "Dwayne Bravo",
    "Deepak Chahar",
    "Shardul Thakur",
    "Imran Tahir",
    "Harbhajan Singh"
]


In [25]:
result = predict_match(
    team1="Mumbai Indians",
    team2="Chennai Super Kings",
    xi1=mi_xi,
    xi2=csk_xi,
    venue="Wankhede Stadium",
    toss={"winner": "Mumbai Indians", "decision": "field"},
)

result


{'team1': 'Mumbai Indians',
 'team2': 'Chennai Super Kings',
 'team1_win_prob': 0.237,
 'team2_win_prob': 0.763,
 'team1_features': {'batting': 55.392082401454296,
  'bowling': 37.943745921160584,
  'overall': 93.33582832261487,
  'recent_runs': 12.511111111111113,
  'recent_wickets': 0.6000000000000001,
  'recent_econ': 5.8039703957545035},
 'team2_features': {'batting': 56.79908515406853,
  'bowling': 39.733652062324886,
  'overall': 96.53273721639341,
  'recent_runs': 5.266666666666667,
  'recent_wickets': 0.5333333333333333,
  'recent_econ': 4.826007326007326},
 'model_features': {'batting_diff': -1.4070027526142326,
  'bowling_diff': -1.7899061411643018,
  'overall_diff': -3.1969088937785415,
  'recent_runs_diff': 7.244444444444446,
  'recent_wickets_diff': 0.06666666666666676,
  'recent_econ_diff': 0.9779630697471777,
  'toss_adv': 1,
  'home_adv': 0,
  'venue_win_rate_diff': 0.0}}

In [26]:
result_swap = predict_match(
    team1="Chennai Super Kings",
    team2="Mumbai Indians",
    xi1=csk_xi,
    xi2=mi_xi,
    venue="Wankhede Stadium",
    toss={"winner": "Mumbai Indians", "decision": "field"},
)

result_swap


{'team1': 'Chennai Super Kings',
 'team2': 'Mumbai Indians',
 'team1_win_prob': 0.663,
 'team2_win_prob': 0.337,
 'team1_features': {'batting': 56.79908515406853,
  'bowling': 39.733652062324886,
  'overall': 96.53273721639341,
  'recent_runs': 5.266666666666667,
  'recent_wickets': 0.5333333333333333,
  'recent_econ': 4.826007326007326},
 'team2_features': {'batting': 55.392082401454296,
  'bowling': 37.943745921160584,
  'overall': 93.33582832261487,
  'recent_runs': 12.511111111111113,
  'recent_wickets': 0.6000000000000001,
  'recent_econ': 5.8039703957545035},
 'model_features': {'batting_diff': 1.4070027526142326,
  'bowling_diff': 1.7899061411643018,
  'overall_diff': 3.1969088937785415,
  'recent_runs_diff': -7.244444444444446,
  'recent_wickets_diff': -0.06666666666666676,
  'recent_econ_diff': -0.9779630697471777,
  'toss_adv': -1,
  'home_adv': 0,
  'venue_win_rate_diff': 0.0}}

In [14]:
# RCB vs KKR (Chinnaswamy Stadium)

rcb_xi = [
    "Virat Kohli",
    "AB de Villiers",
    "Devdutt Padikkal",
    "Aaron Finch",
    "Shivam Dube",
    "Washington Sundar",
    "Chris Morris",
    "Yuzvendra Chahal",
    "Navdeep Saini",
    "Umesh Yadav",
    "Mohammed Siraj"
]

kkr_xi = [
    "Shubman Gill",
    "Sunil Narine",
    "Nitish Rana",
    "Andre Russell",
    "Eoin Morgan",
    "Dinesh Karthik",
    "Pat Cummins",
    "Varun Chakravarthy",
    "Shivam Mavi",
    "Kamlesh Nagarkoti",
    "Lockie Ferguson"
]

result = predict_match(
    team1="Kolkata Knight Riders",
    team2="Royal Challengers Bangalore",
    xi1=kkr_xi,
    xi2=rcb_xi,
    venue="M Chinnaswamy Stadium",
    toss={"winner": "Royal Challengers Bangalore", "decision": "bat"}
)

result


{'team1': 'Kolkata Knight Riders',
 'team2': 'Royal Challengers Bangalore',
 'team1_win_prob': 0.218,
 'team2_win_prob': 0.782,
 'team1_features': {'batting': 48.27434279416826,
  'bowling': 7.038702290076336,
  'overall': 55.31304508424459,
  'recent_runs': 18.8,
  'recent_wickets': 0.5,
  'recent_econ': 5.339449541284403},
 'team2_features': {'batting': 48.77523996215375,
  'bowling': 17.82610931473684,
  'overall': 66.60134927689057,
  'recent_runs': 7.675000000000001,
  'recent_wickets': 0.575,
  'recent_econ': 7.901306083966501},
 'model_features': {'batting_diff': -0.5008971679854852,
  'bowling_diff': -10.787407024660503,
  'overall_diff': -11.288304192645981,
  'recent_runs_diff': 11.125,
  'recent_wickets_diff': -0.07499999999999996,
  'recent_econ_diff': -2.561856542682098,
  'toss_adv': -1,
  'home_adv': 0,
  'venue_win_rate_diff': 0.0}}