In [None]:
import pandas as pd

# Load match and delivery data
matches = pd.read_csv('matches_stats.csv')
deliveries = pd.read_csv('ipl_stats.csv')

# Merge on match_id to get winner and player info
data = pd.merge(deliveries, matches[['id', 'winner']], left_on='match_id', right_on='id', how='left')

# Create player match participation
player_match_outcomes = data[['match_id', 'batsman', 'winner', 'batting_team']].drop_duplicates()

# Add "won" column: if player's batting_team == winner
player_match_outcomes['won'] = (player_match_outcomes['batting_team'] == player_match_outcomes['winner']).astype(int)

# Group by player
player_stats = player_match_outcomes.groupby('batsman').agg(
    total_matches=('match_id', 'nunique'),
    total_wins=('won', 'sum')
).reset_index()

# Calculate win rate
player_stats['win_rate'] = player_stats['total_wins'] / player_stats['total_matches']


In [None]:
# Calculate player performance stats
player_perf = data.groupby('batsman').agg(
    total_runs=('batsman_runs', 'sum'),
    total_balls=('ball', 'count'),
    matches_played=('match_id', 'nunique')
).reset_index()

# Merge with player_stats
player_df = pd.merge(player_stats, player_perf, on='batsman')

# Compute more features
player_df['strike_rate'] = 100 * player_df['total_runs'] / player_df['total_balls']
player_df['avg_runs_per_match'] = player_df['total_runs'] / player_df['matches_played']


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

# Features and target
features = ['total_matches', 'total_runs', 'total_balls', 'strike_rate', 'avg_runs_per_match']
X = player_df[features]
y = player_df['win_rate']

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print(f"R² Score: {r2_score(y_test, y_pred):.4f}")
