In [2]:
# Import necessary libraries
import pandas as pd
from google.colab import files

In [3]:
# Upload CSV files
uploaded = files.upload()

Saving deliveries.csv to deliveries.csv
Saving matches.csv to matches.csv


In [4]:
# Load the CSV files
matches_df = pd.read_csv('matches.csv')
deliveries_df = pd.read_csv('deliveries.csv')

In [5]:
# Display the first few rows of each dataframe to understand their structure
print(matches_df.head())
print(deliveries_df.head())

       id   season        city        date match_type player_of_match  \
0  335982  2007/08   Bangalore  2008-04-18     League     BB McCullum   
1  335983  2007/08  Chandigarh  2008-04-19     League      MEK Hussey   
2  335984  2007/08       Delhi  2008-04-19     League     MF Maharoof   
3  335985  2007/08      Mumbai  2008-04-20     League      MV Boucher   
4  335986  2007/08     Kolkata  2008-04-20     League       DJ Hussey   

                                        venue                        team1  \
0                       M Chinnaswamy Stadium  Royal Challengers Bangalore   
1  Punjab Cricket Association Stadium, Mohali              Kings XI Punjab   
2                            Feroz Shah Kotla             Delhi Daredevils   
3                            Wankhede Stadium               Mumbai Indians   
4                                Eden Gardens        Kolkata Knight Riders   

                         team2                  toss_winner toss_decision  \
0        Kolkat

In [6]:
# Aggregate player performance metrics from the deliveries dataframe

# Calculate total runs scored by each batsman
batsman_runs = deliveries_df.groupby('batter')['batsman_runs'].sum().reset_index()

In [7]:
batsman_runs.sample(10)

Unnamed: 0,batter,batsman_runs
15,A Raghuvanshi,163
453,R Dhawan,210
215,HR Shokeen,66
101,BAW Mendis,3
155,DJ Harris,111
571,STR Binny,880
533,SB Jakati,28
17,A Symonds,974
470,RA Jadeja,2959
498,RT Ponting,91


In [8]:
# Calculate total wickets taken by each bowler
bowler_wickets = deliveries_df[deliveries_df['dismissal_kind'].notnull()].groupby('bowler').size().reset_index(name='wickets')

In [9]:
# Merge batsman runs and bowler wickets into a single dataframe
player_performance = pd.merge(batsman_runs, bowler_wickets, left_on='batter', right_on='bowler', how='outer')

In [10]:
# Fill NaN values with 0 (for players who are either batsmen or bowlers but not both)
player_performance['batsman_runs'] = player_performance['batsman_runs'].fillna(0)
player_performance['wickets'] = player_performance['wickets'].fillna(0)

In [11]:
# Drop the extra bowler column as we have combined performance metrics
player_performance = player_performance[['batter', 'batsman_runs', 'wickets']]

In [12]:
# Rename columns for clarity
player_performance.columns = ['player', 'total_runs', 'total_wickets']

In [13]:
# Display the aggregated player performance metrics
print(player_performance.head())

           player  total_runs  total_wickets
0  A Ashish Reddy       280.0           19.0
1        A Badoni       634.0            2.0
2      A Chandila         4.0           11.0
3        A Chopra        53.0            0.0
4     A Choudhary        25.0            5.0


In [14]:
# Define a function to select the best 11 players from a given list of 22 players
def select_best_11_players(player_list, player_performance_df):
    # Filter the player performance dataframe to include only the given player list
    selected_players = player_performance_df[player_performance_df['player'].isin(player_list)]

    # Handle missing player names by replacing NaN with a placeholder
    selected_players['player'] = selected_players['player'].fillna('Unknown Player')

    # Define a performance score (simple sum of runs and wickets for now)
    selected_players['performance_score'] = selected_players['total_runs'] + selected_players['total_wickets'] * 20

    # Sort players based on the performance score in descending order
    selected_players = selected_players.sort_values(by='performance_score', ascending=False)

    # Select the top 11 players
    best_11_players = selected_players.head(11)

    return best_11_players

In [29]:
# Example player list (replace this with the actual input player list)
example_player_list = player_performance['player'].sample(22, random_state=10).tolist()

In [30]:
# Get the best 11 players from the example list
best_11_players = select_best_11_players(example_player_list, player_performance)
print(best_11_players)

               player  total_runs  total_wickets  performance_score
39         AD Russell      2488.0          125.0             4988.0
449         Q de Kock      3160.0            0.0             3160.0
566           SS Iyer      3131.0            0.0             3131.0
579    Sandeep Sharma        54.0          153.0             3114.0
300     KS Williamson      2132.0            0.0             2132.0
223          I Sharma        57.0           96.0             1977.0
441           PP Shaw      1892.0            0.0             1892.0
537        SC Ganguly      1349.0           12.0             1589.0
616        TM Dilshan      1153.0            5.0             1253.0
341           M Vohra      1083.0            0.0             1083.0
486  RN ten Doeschate       326.0            3.0              386.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_players['player'] = selected_players['player'].fillna('Unknown Player')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_players['performance_score'] = selected_players['total_runs'] + selected_players['total_wickets'] * 20


In [31]:
# Enter the list of 22 player names
example_player_list = [
    'SN Thakur', ' RA Jadeja', 'MS Dhoni', 'S Dube', 'D Brevis', 'C Green',
    'TH David', 'R Shepherd', 'M Pathirana', 'SA Yadav', 'C Green',
    'Ishan Kishan', 'TU Deshpande', 'Mustafizur Rahman', 'TH David',
    'RG Sharma', 'MM Ali', 'AM Rahane', 'RD Gaikwad', 'G Coetzee',
    ' HH Pandya', 'JJ Bumrah'
]


In [32]:
best_11_players = select_best_11_players(example_player_list, player_performance)
print(best_11_players)

                player  total_runs  total_wickets  performance_score
477          RG Sharma      6630.0           16.0             6950.0
374           MS Dhoni      5243.0            0.0             5243.0
50           AM Rahane      4642.0            1.0             4662.0
251          JJ Bumrah        68.0          182.0             3708.0
530           SA Yadav      3594.0            0.0             3594.0
231       Ishan Kishan      2644.0            0.0             2644.0
473         RD Gaikwad      2380.0            0.0             2380.0
556          SN Thakur       307.0          100.0             2307.0
366             MM Ali      1162.0           40.0             1962.0
513             S Dube      1502.0            6.0             1622.0
394  Mustafizur Rahman        13.0           70.0             1413.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_players['player'] = selected_players['player'].fillna('Unknown Player')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_players['performance_score'] = selected_players['total_runs'] + selected_players['total_wickets'] * 20
