In [1]:
import joblib
import numpy as np
import pandas as pd

# Load artifacts
model = joblib.load("../models/calibrated_logistic.joblib")
feature_cols = joblib.load("../models/feature_columns.joblib")

# Load reference data
career = pd.read_csv("../data/processed/player_career_stats.csv")
recent = pd.read_csv("../data/processed/player_recent_form.csv", dtype={"match_id": str})

career["player"] = career["player"].str.strip()
recent["player"] = recent["player"].str.strip()


In [7]:
def compute_team_features(xi, career_df, recent_df):
    # Career stats
    c = career_df[career_df["player"].isin(xi)]

    # Recent stats
    r = recent_df[recent_df["player"].isin(xi)]
    if not r.empty:
        r = r.groupby("player").tail(1)

    def safe_mean(series, default=0.0):
        if series.empty or series.isna().all():
            return default
        return float(series.mean())

    return {
        "batting": safe_mean(c.get("batting_strength")),
        "bowling": safe_mean(c.get("bowling_strength")),
        "overall": safe_mean(c.get("overall_strength")),

        "recent_runs": safe_mean(r.get("recent_runs")),
        "recent_wickets": safe_mean(r.get("recent_wickets")),
        "recent_econ": safe_mean(r.get("recent_economy")),
    }


In [3]:
def is_home_team(team, venue):
    team_city_map = {
        "Chennai Super Kings": "Chennai",
        "Mumbai Indians": "Mumbai",
        "Royal Challengers Bangalore": "Bangalore",
        "Kolkata Knight Riders": "Kolkata",
        "Delhi Capitals": "Delhi",
        "Rajasthan Royals": "Jaipur",
        "Sunrisers Hyderabad": "Hyderabad",
        "Punjab Kings": "Chandigarh",
    }
    city = team_city_map.get(team)
    return int(city is not None and city.lower() in venue.lower())


In [10]:
def predict_match(team1, team2, xi1, xi2, venue, toss):
    t1 = compute_team_features(xi1, career, recent)
    t2 = compute_team_features(xi2, career, recent)

    feature_row = {
        "t1_batting": t1["batting"],
        "t1_bowling": t1["bowling"],
        "t1_overall": t1["overall"],
        "t1_recent_runs": t1["recent_runs"],
        "t1_recent_wickets": t1["recent_wickets"],
        "t1_recent_econ": t1["recent_econ"],

        "t2_batting": t2["batting"],
        "t2_bowling": t2["bowling"],
        "t2_overall": t2["overall"],
        "t2_recent_runs": t2["recent_runs"],
        "t2_recent_wickets": t2["recent_wickets"],
        "t2_recent_econ": t2["recent_econ"],

        "toss_team1": int(toss["winner"] == team1),
        "toss_field": int(toss["decision"] == "field"),
        "is_home_team1": is_home_team(team1, venue),
        "venue_team1_win_rate": 0.5,  # neutral default
    }

    
    # X = np.array([
    #     0.0 if pd.isna(feature_row.get(c)) else feature_row.get(c, 0.0)
    #     for c in feature_cols
    # ]).reshape(1, -1)
    X = pd.DataFrame([{
        c: 0.0 if pd.isna(feature_row.get(c)) else feature_row.get(c, 0.0)
        for c in feature_cols
    }])

    prob = model.predict_proba(X)[0, 1]
    return prob


In [34]:
rr_xi = [
    "Steve Smith",
    "Jos Buttler",
    "Sanju Samson",
    "Ben Stokes",
    "Robin Uthappa",
    "Rahul Tewatia",
    "Shreyas Gopal",
    "Jofra Archer",
    "Kartik Tyagi",
    "Jaydev Unadkat",
    "Varun Aaron"
]

kkr_xi = [
    "Shubman Gill",
    "Sunil Narine",
    "Nitish Rana",
    "Eoin Morgan",
    "Andre Russell",
    "Dinesh Karthik",
    "Pat Cummins",
    "Lockie Ferguson",
    "Shivam Mavi",
    "Varun Chakravarthy",
    "Kamlesh Nagarkoti"
]



prob = predict_match(
    team1="Rajasthan Royals",
    team2="Kolkata Knight Riders",
    xi1=rr_xi,
    xi2=kkr_xi,
    venue="Kolkata",
    toss={"winner": "KKR", "decision": "field"},
    # career_df=career,
    # recent_df=recent
)

print(f"RR Win Probability: {prob:.3f}")


RR Win Probability: 0.876


In [17]:
# Swap teams
prob_swap = predict_match(
    team1="Kolkata Knight Riders",
    team2="Rajasthan Royals",
    xi1=kkr_xi,
    xi2=rr_xi,
    venue="Sawai Mansingh stadium",
    toss={"winner": "RR", "decision": "bat"},
)

print("KKR Win Probability:", prob_swap)
print("RR Win Probability:", 1 - prob_swap)


KKR Win Probability: 0.7805242311741516
RR Win Probability: 0.2194757688258484


IF PROBABILITY WITH FEATURES VALUES
 

In [12]:
def predict_match_2(team1, team2, xi1, xi2, venue, toss,
                  career_df, recent_df):
    """
    Predicts win probability for team1 and returns features.
    """

    # Load trained model & columns
    clf = joblib.load("../models/calibrated_logistic.joblib")
    feature_cols = joblib.load("../models/feature_columns.joblib")

    # Compute team features
    t1 = compute_team_features(xi1, career_df, recent_df)
    t2 = compute_team_features(xi2, career_df, recent_df)

    # Build model feature row
    feature_row = {
        "t1_batting": t1["batting"],
        "t1_bowling": t1["bowling"],
        "t1_overall": t1["overall"],
        "t1_recent_runs": t1["recent_runs"],
        "t1_recent_wickets": t1["recent_wickets"],
        "t1_recent_econ": t1["recent_econ"],

        "t2_batting": t2["batting"],
        "t2_bowling": t2["bowling"],
        "t2_overall": t2["overall"],
        "t2_recent_runs": t2["recent_runs"],
        "t2_recent_wickets": t2["recent_wickets"],
        "t2_recent_econ": t2["recent_econ"],


        "toss_team1": int(toss["winner"] == team1),
        "toss_field": int(toss["decision"] == "field"),

        "is_home_team1": int(team1.lower() in venue.lower()),
        "venue_team1_win_rate": 0.5   # neutral fallback
    }

    # Convert to DataFrame (important)
    X = pd.DataFrame([feature_row], columns=feature_cols)

    # Predict probability
    prob = clf.predict_proba(X)[0, 1]

    # ðŸ”¥ Return everything cleanly
    return {
        "win_probability": prob,
        "team1_features": t1,
        "team2_features": t2,
        "model_features": feature_row
    }


In [37]:
result = predict_match_2(
    # team1="Chennai Super Kings",
    # team2="Royal Challengers Bangalore",
    # xi1=csk_xi,
    # xi2=rcb_xi,
    # venue="MA Chidambaram Stadium",
    # toss={"winner": "CSK", "decision": "bat"},
    team1="Rajasthan Royals",
    team2="Kolkata Knight Riders",
    xi1=rr_xi,
    xi2=kkr_xi,
    venue="Eden Gardens",
    toss={"winner": "KKR", "decision": "field"},
    career_df=career,
    recent_df=recent
)

print(f"RR Win Probability: {result['win_probability']:.3f}")
print("\nRR features:", result["team1_features"])
print("\nKKR features:", result["team2_features"])


RR Win Probability: 0.876

RR features: {'batting': 28.525, 'bowling': 4.993488372093023, 'overall': 33.518488372093024, 'recent_runs': 0.0, 'recent_wickets': 0.4, 'recent_econ': 12.4}

KKR features: {'batting': 48.27434279416826, 'bowling': 7.038702290076336, 'overall': 55.31304508424459, 'recent_runs': 18.8, 'recent_wickets': 0.5, 'recent_econ': 5.339449541284403}
