In [1]:
import pandas as pd
import altair as alt

# Load matches data
matches = pd.read_csv('archive (1)/matches.csv')

matches.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1095 entries, 0 to 1094
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               1095 non-null   int64  
 1   season           1095 non-null   object 
 2   city             1044 non-null   object 
 3   date             1095 non-null   object 
 4   match_type       1095 non-null   object 
 5   player_of_match  1090 non-null   object 
 6   venue            1095 non-null   object 
 7   team1            1095 non-null   object 
 8   team2            1095 non-null   object 
 9   toss_winner      1095 non-null   object 
 10  toss_decision    1095 non-null   object 
 11  winner           1090 non-null   object 
 12  result           1095 non-null   object 
 13  result_margin    1076 non-null   float64
 14  target_runs      1092 non-null   float64
 15  target_overs     1092 non-null   float64
 16  super_over       1095 non-null   object 
 17  method        

In [2]:
import pandas as pd
import altair as alt

# Load matches data
matches = pd.read_csv('archive (1)/matches.csv')

# Convert season to int properly
matches['season'] = matches['season'].str.extract('(\d+)')
matches['season'] = matches['season'].astype(int)

# Prepare wins per team per season
team_wins = matches.groupby(['season', 'winner']).size().reset_index(name='wins')
team_wins = team_wins.dropna(subset=['winner'])

# Create slider
season_slider = alt.binding_range(min=team_wins['season'].min(), max=team_wins['season'].max(), step=1)
season_select = alt.param('Season', bind=season_slider, value=team_wins['season'].min())

# Race bar chart
race_chart = alt.Chart(team_wins).transform_filter(
    alt.datum.season == season_select
).mark_bar().encode(
    x=alt.X('wins:Q', title='Number of Wins'),
    y=alt.Y('winner:N', sort='-x', title='Teams'),
    color='winner:N',
    tooltip=['winner:N', 'wins:Q']
).add_params(
    season_select
).properties(
    title='🏆 IPL Team Wins Over Seasons (Race Chart)',
    width=700,
    height=400
)

race_chart

  matches['season'] = matches['season'].str.extract('(\d+)')
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [1]:
import networkx as nx
from pyvis.network import Network
import pandas as pd

# Load deliveries
deliveries = pd.read_csv('archive (1)/deliveries.csv')

deliveries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260920 entries, 0 to 260919
Data columns (total 17 columns):
 #   Column            Non-Null Count   Dtype 
---  ------            --------------   ----- 
 0   match_id          260920 non-null  int64 
 1   inning            260920 non-null  int64 
 2   batting_team      260920 non-null  object
 3   bowling_team      260920 non-null  object
 4   over              260920 non-null  int64 
 5   ball              260920 non-null  int64 
 6   batter            260920 non-null  object
 7   bowler            260920 non-null  object
 8   non_striker       260920 non-null  object
 9   batsman_runs      260920 non-null  int64 
 10  extra_runs        260920 non-null  int64 
 11  total_runs        260920 non-null  int64 
 12  extras_type       14125 non-null   object
 13  is_wicket         260920 non-null  int64 
 14  player_dismissed  12950 non-null   object
 15  dismissal_kind    12950 non-null   object
 16  fielder           9354 non-null    obj

In [7]:
# Prepare partnership data
partnerships = deliveries.groupby(['batter', 'non_striker'])['batsman_runs'].sum().reset_index()

# Filter: Only strong partnerships (> 400 runs together)
strong_partnerships = partnerships[partnerships['batsman_runs'] > 200]

# Build Graph
G = nx.Graph()

for index, row in strong_partnerships.iterrows():
    G.add_edge(row['batter'], row['non_striker'], weight=row['batsman_runs'])



In [12]:
partnerships.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9120 entries, 0 to 9119
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   batter        9120 non-null   object
 1   non_striker   9120 non-null   object
 2   batsman_runs  9120 non-null   int64 
dtypes: int64(1), object(2)
memory usage: 213.9+ KB


In [14]:
# Create PyVis Network
net = Network(height="1000px", width="101%", bgcolor="#1A1A1A", font_color="white")
net.from_nx(G)

# Save manually (safe way)
net.write_html('batsman_partnerships.html')

# Open it
import webbrowser
webbrowser.open('batsman_partnerships.html')


True

In [30]:
import altair as alt

# Prepare batsman stats
batsman_stats = deliveries.groupby('batter').agg(
    runs=('batsman_runs', 'sum'),
    balls=('ball', 'count')
).reset_index()

batsman_stats['strike_rate'] = (batsman_stats['runs'] / batsman_stats['balls']) * 100
batsman_stats = batsman_stats[batsman_stats['balls'] >= 200]  # Only serious players

# Bubble Chart
bubble_chart = alt.Chart(batsman_stats).mark_circle().encode(
    x=alt.X('balls:Q', title='Balls Faced'),
    y=alt.Y('strike_rate:Q', title='Strike Rate'),
    size=alt.Size('runs:Q', scale=alt.Scale(range=[20, 1000])),
    color=alt.Color('batter:N', legend=None),
    tooltip=['batter:N', 'runs:Q', 'balls:Q', 'strike_rate:Q']
).properties(
    title="💥 Batsman Strike Rate vs Balls Faced (Bubble Chart)",
    width=700,
    height=500
).interactive()

bubble_chart


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [31]:
import plotly.express as px

# Dismissal types
dismissals = deliveries['dismissal_kind'].value_counts().reset_index()
dismissals.columns = ['Dismissal Type', 'Count']

# Pie Chart
dismissal_pie = px.pie(
    dismissals,
    names='Dismissal Type',
    values='Count',
    title='Wicket Dismissal Types in IPL',
    hole=0.4
)

dismissal_pie.show()


In [32]:
# Prepare bowler stats
bowler_stats = deliveries.groupby('bowler').agg(
    runs_conceded=('total_runs', 'sum'),
    balls_bowled=('ball', 'count'),
    wickets=('player_dismissed', 'count')
).reset_index()

bowler_stats['overs'] = bowler_stats['balls_bowled'] / 6
bowler_stats['economy'] = bowler_stats['runs_conceded'] / bowler_stats['overs']

# Filter serious bowlers
bowler_stats = bowler_stats[bowler_stats['overs'] >= 100]

# Bubble Chart
bowler_bubble = alt.Chart(bowler_stats).mark_circle().encode(
    x=alt.X('economy:Q', title='Economy Rate'),
    y=alt.Y('wickets:Q', title='Wickets Taken'),
    size=alt.Size('overs:Q', scale=alt.Scale(range=[20, 800])),
    color=alt.Color('bowler:N', legend=None),
    tooltip=['bowler:N', 'economy:Q', 'wickets:Q', 'overs:Q']
).properties(
    title="🎯 Bowler Economy Rate vs Wickets Taken (Bubble Chart)",
    width=700,
    height=500
).interactive()

bowler_bubble



the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



In [33]:
bowler_stats = deliveries.groupby('bowler').agg(
    runs_conceded=('total_runs', 'sum'),
    balls_bowled=('ball', 'count'),
    wickets=('player_dismissed', 'count')
).reset_index()

bowler_stats['overs'] = bowler_stats['balls_bowled'] / 6
bowler_stats['economy'] = bowler_stats['runs_conceded'] / bowler_stats['overs']

# Filter serious bowlers
bowler_stats = bowler_stats[bowler_stats['overs'] >= 100]

# Create Wickets Groups
bowler_stats['wickets_group'] = pd.cut(bowler_stats['wickets'], 
                                       bins=[0, 50, 100, 150, 200],
                                       labels=['0-50', '51-100', '101-150', '151-200'])

# Bubble Chart
bubble_chart = alt.Chart(bowler_stats).mark_circle(opacity=0.7).encode(
    x=alt.X('economy:Q', title='Economy Rate', scale=alt.Scale(zero=False)),
    y=alt.Y('wickets:Q', title='Wickets Taken'),
    size=alt.Size('overs:Q', scale=alt.Scale(range=[30, 1000])), # Bigger bubbles
    color=alt.Color('wickets_group:N', 
                    scale=alt.Scale(scheme='set2'), 
                    title='Wickets Range'),
    tooltip=[
        alt.Tooltip('bowler:N', title='Bowler'),
        alt.Tooltip('wickets:Q', title='Total Wickets'),
        alt.Tooltip('economy:Q', title='Economy'),
        alt.Tooltip('overs:Q', title='Overs Bowled')
    ]
).properties(
    title="🎯 Creative Bubble Chart: Bowler Economy Rate vs Wickets Taken",
    width=800,
    height=600
).configure_axis(
    grid=True,
    gridOpacity=0.3
).configure_view(
    stroke=None
).interactive()

bubble_chart


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



## Both CSV combined

In [35]:
import pandas as pd

# Load both datasets
matches = pd.read_csv('archive (1)/matches.csv')
deliveries = pd.read_csv('archive (1)/deliveries.csv')

# Clean 'season' to integer if needed
matches['season'] = matches['season'].str.extract('(\d+)')
matches['season'] = matches['season'].astype(int)

# Merge on match_id
combined_df = deliveries.merge(matches, how='left', left_on='match_id', right_on='id')

# Let's see final combined structure
print(combined_df.shape)
print(combined_df.columns)



invalid escape sequence '\d'


invalid escape sequence '\d'


invalid escape sequence '\d'



(260920, 37)
Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batter', 'bowler', 'non_striker', 'batsman_runs', 'extra_runs',
       'total_runs', 'extras_type', 'is_wicket', 'player_dismissed',
       'dismissal_kind', 'fielder', 'id', 'season', 'city', 'date',
       'match_type', 'player_of_match', 'venue', 'team1', 'team2',
       'toss_winner', 'toss_decision', 'winner', 'result', 'result_margin',
       'target_runs', 'target_overs', 'super_over', 'method', 'umpire1',
       'umpire2'],
      dtype='object')


In [37]:
# Batsman performance in winning matches
batsman_wins = combined_df[combined_df['winner'] == combined_df['batting_team']]
batsman_performance = batsman_wins.groupby('batter')['batsman_runs'].sum().reset_index()

# Top 10
top_batsmen = batsman_performance.sort_values(by='batsman_runs', ascending=False).head(10)

# Plot
import altair as alt

chart = alt.Chart(top_batsmen).mark_bar().encode(
    x=alt.X('batsman_runs:Q', title='Total Runs in Winning Matches'),
    y=alt.Y('batter:N', sort='-x', title='Batsman'),
    color='batter:N',
    tooltip=['batter:N', 'batsman_runs:Q']
).properties(
    title="🏆 Top 10 Batsmen by Runs in Winning Matches",
    width=600,
    height=400
)

chart



the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



In [38]:
# Bowler economy in winning matches
bowler_wins = combined_df[combined_df['winner'] == combined_df['bowling_team']]
bowler_stats = bowler_wins.groupby('bowler').agg(
    total_runs=('total_runs', 'sum'),
    balls_bowled=('ball', 'count')
).reset_index()

bowler_stats['overs'] = bowler_stats['balls_bowled'] / 6
bowler_stats['economy'] = bowler_stats['total_runs'] / bowler_stats['overs']

# Serious bowlers only
bowler_stats = bowler_stats[bowler_stats['overs'] >= 30]

# Top 10 best economy
best_economy = bowler_stats.sort_values('economy').head(10)

# Plot
chart2 = alt.Chart(best_economy).mark_bar().encode(
    x=alt.X('economy:Q', title='Economy Rate in Wins'),
    y=alt.Y('bowler:N', sort='x', title='Bowler'),
    color='bowler:N',
    tooltip=['bowler:N', 'economy:Q', 'overs:Q']
).properties(
    title="🎯 Top 10 Best Economy Bowlers in Winning Matches",
    width=600,
    height=400
)

chart2



the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



In [39]:
import plotly.express as px

# Toss winner decision vs match winner
combined_df['toss_match_result'] = combined_df.apply(lambda row: 'Won Toss and Match' if row['toss_winner'] == row['winner'] else 'Lost After Toss', axis=1)

toss_outcomes = combined_df[['match_id', 'toss_match_result']].drop_duplicates()

fig = px.pie(toss_outcomes, names='toss_match_result', title='🧠 Toss Impact on Winning Matches')

fig.show()
