In [58]:
import json
import os
from os import listdir
from os.path import isfile, join
import pandas as pd
from collections import Counter
mypath = './ipl_json/'
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
onlyfiles.remove('README.txt')
len(onlyfiles)



1095

In [2]:

def pvp_data(onlyfiles, batsman, bowlman, mypath):
    # Initialize result dictionary
    all_data = {}

    for f in onlyfiles:
        file_path = os.path.join(mypath, f)

        # Load JSON data
        try:
            with open(file_path, 'r') as file:
                data = json.load(file)
        except Exception as e:
            print(f"Error loading {f}: {e}")
            continue

        # Flatten and check player existence
        flat_list = {player for players in data['info']['players'].values() for player in players}
        if batsman not in flat_list or bowlman not in flat_list:
            continue

        # Initialize player data
        player_data = {
            'teams': data['info'].get('teams', []),
            'city': data['info'].get('city', 'Unknown'),
            'batting': [],
            'dismissals': []
        }

        # Process innings
        for inning in data.get('innings', []):
            for over_data in inning.get('overs', []):
                for count, delivery in enumerate(over_data.get('deliveries', []), start=1):
                    # Capture batting data
                    if delivery.get('batter') == batsman and delivery.get('bowler') == bowlman:
                        player_data['batting'].append({
                            'over': over_data.get('over'),
                            'ball_number': count,
                            'bowler': delivery.get('bowler'),
                            'runs': delivery.get('runs', {}).get('batter', 0),
                            'total_runs': delivery.get('runs', {}).get('total', 0)
                        })
                    
                    # Capture dismissal data
                    for wicket in delivery.get('wickets', []):
                        bowler_name = wicket.get('bowler', delivery.get('bowler'))
                        if wicket.get('player_out') == batsman and bowler_name == bowlman:
                            player_data['dismissals'].append({
                                'over': over_data.get('over'),
                                'ball_number': count,
                                'bowler': delivery.get('bowler'),
                                'kind': wicket.get('kind'),
                                'player_out': wicket.get('player_out')
                            })
        
        all_data[data['info']['dates'][0]] = player_data

    return all_data


In [160]:
def pretty_print(all_data, batsman, bowlman):
    print(f"{batsman} vs {bowlman}")
    df_all = pd.DataFrame(columns=['Date', 'Runs', 'Balls', '0s', '1s', '2s', '3s', '4s', '6s', 'extras', 'dismissals'])
    all_runs = 0
    all_balls = 0
    all_counts = {i: 0 for i in range(7)}
    all_total_runs = 0
    all_dismissals = 0
    count = 0
    for date, data in all_data.items():
        # Summary Calculations
        balls = len(data['batting'])
        all_balls += balls
        
        run_counts = Counter(ball['runs'] for ball in data['batting'])
        for i in range(7):
            all_counts[i] += run_counts.get(i, 0)
        
        total_runs = sum(ball['runs'] for ball in data['batting'])
        all_runs += total_runs
        all_total_runs += sum(ball['total_runs'] for ball in data['batting'])
        
        dismissals = len(data['dismissals'])
        all_dismissals += dismissals
        
        dmatch = {
            'Date': date,
            'Runs': total_runs,
            'Balls': balls,
            '0s': run_counts.get(0, 0),
            '1s': run_counts.get(1, 0),
            '2s': run_counts.get(2, 0),
            '3s': run_counts.get(3, 0),
            '4s': run_counts.get(4, 0),
            '6s': run_counts.get(6, 0),
            'extras': sum(ball['total_runs'] for ball in data['batting']) - total_runs,
            'dismissals': dismissals
        }
        df_all.loc[count] = dmatch
        count += 1

    # Aggregate Data
    d1 = {
        'Runs': all_runs,
        'Balls': all_balls,
        '0s': all_counts[0],
        '1s': all_counts[1],
        '2s': all_counts[2],
        '3s': all_counts[3],
        '4s': all_counts[4],
        '6s': all_counts[6],
        'extras': all_total_runs - all_runs,
        'dismissals': all_dismissals
    }
    
    # Convert to DataFrames
    print("Summary\n")
    df_d1 = pd.DataFrame([d1])
    print(df_d1)
    print("\nMatch by Match\n")
    df_all['Date'] = pd.to_datetime(df_all['Date'], format = '%Y-%m-%d')
    df_all.set_index('Date', inplace=True)
    df_all = df_all.sort_index(ascending=False)
    print(df_all)


In [161]:
batsman = 'RG Sharma'
bowlman = 'RA Jadeja'
result = pvp_data(onlyfiles=onlyfiles, batsman = batsman, bowlman=bowlman, mypath=mypath)
#print(json.dumps(result, indent=2))

In [162]:
pretty_print(result, batsman=batsman, bowlman=bowlman)

RG Sharma vs RA Jadeja
Summary

   Runs  Balls  0s  1s  2s  3s  4s  6s  extras  dismissals
0    93     87  26  49   5   0   4   3       2           3

Match by Match

            Runs  Balls  0s  1s  2s  3s  4s  6s  extras  dismissals
Date                                                               
2024-04-14    22     13   5   4   0   0   3   1       0           0
2023-05-06     0      0   0   0   0   0   0   0       0           0
2023-04-08     0      0   0   0   0   0   0   0       0           0
2022-04-21     0      0   0   0   0   0   0   0       0           0
2021-05-01     0      0   0   0   0   0   0   0       0           0
2020-09-19     0      0   0   0   0   0   0   0       0           0
2019-05-12     0      0   0   0   0   0   0   0       0           0
2019-05-07     0      0   0   0   0   0   0   0       0           0
2019-04-03     0      1   1   0   0   0   0   0       0           1
2018-04-28     0      0   0   0   0   0   0   0       0           0
2018-04-07     0 