In [6]:
import pandas as pd 
from loading import innings_lookup
import yaml

In [7]:
innings_lookup = {
    '1st innings': 'first_innings',
    '2nd innings': 'second_innings',
    '3rd innings': 'third_innings',
    '4th innings': 'fourth_innings'
}


def load_yaml_file(filepath):
    with open(filepath, 'r') as stream:
        try:
            raw_data = yaml.safe_load(stream)
            meta = raw_data['meta']
            info = raw_data['info']
            innings = raw_data['innings']
            innings_processed = {}
            innings_processed['n_innings'] = len(innings)
            print(f'Match Information: {info}')

            for inning in innings: 
                original_key = list(inning.keys())[0]
                key = innings_lookup[original_key]
                innings_processed[key] = inning[original_key]['team']
            
            return inning[original_key]['deliveries']

        except yaml.YAMLError as exc:
            print(exc)

In [8]:
filepath = "files/64071.yaml"
deliveries = load_yaml_file(filepath)

Match Information: {'city': 'Galle', 'dates': ['2004-03-08', '2004-03-09', '2004-03-10', '2004-03-11', '2004-03-12'], 'gender': 'male', 'match_type': 'Test', 'match_type_number': 1685, 'outcome': {'winner': 'Australia', 'by': {'runs': 197}}, 'player_of_match': ['ML Hayden'], 'teams': ['Australia', 'Sri Lanka'], 'toss': {'decision': 'bat', 'winner': 'Australia'}, 'umpires': ['DR Shepherd', 'RE Koertzen'], 'venue': 'Galle International Stadium'}


In [9]:
deliveries[0][list(deliveries[0])[0]]

{'non_striker': 'KC Sangakkara',
 'bowler': 'SK Warne',
 'runs': {'extras': 0, 'total': 0, 'batsman': 0},
 'batsman': 'MS Atapattu'}

In [10]:
def merge_two_dicts(x, y):
    """Given two dictionaries, merge them into a new dict as a shallow copy."""
    z = x.copy()
    z.update(y)
    return z

# Print the wickets for this innings
wickets = [merge_two_dicts({'delivery': list(i)[0]}, i[list(i)[0]]) for i in deliveries if 'wicket' in list(i[list(i)[0]])]
df = pd.DataFrame(wickets)
df = pd.concat([df, df['wicket'].apply(pd.Series)], axis=1)
df.drop(['runs', 'wicket'], axis=1, inplace=True)
df


Unnamed: 0,delivery,non_striker,bowler,batsman,player_out,kind,fielders
0,5.4,MS Atapattu,MS Kasprowicz,KC Sangakkara,KC Sangakkara,lbw,
1,18.3,DPMD Jayawardene,SK Warne,MS Atapattu,MS Atapattu,caught,[ML Hayden]
2,20.1,DPMD Jayawardene,SK Warne,TM Dilshan,TM Dilshan,lbw,
3,22.5,ST Jayasuriya,SK Warne,DPMD Jayawardene,DPMD Jayawardene,caught,[ML Hayden]
4,23.1,HP Tillakaratne,SCG MacGill,ST Jayasuriya,ST Jayasuriya,caught,[ML Hayden]
5,33.5,HP Tillakaratne,SCG MacGill,TT Samaraweera,TT Samaraweera,bowled,
6,38.2,UDU Chandana,SK Warne,HP Tillakaratne,HP Tillakaratne,caught,[A Symonds]
7,43.6,WPUJC Vaas,SCG MacGill,UDU Chandana,UDU Chandana,caught,[JL Langer]
8,44.6,WPUJC Vaas,SK Warne,HDPK Dharmasena,HDPK Dharmasena,caught,[ML Hayden]
9,45.2,WPUJC Vaas,SCG MacGill,M Muralitharan,M Muralitharan,stumped,[AC Gilchrist]


In [11]:
extras_balls = [list(i)[0] for i in deliveries if 'extras' in list(i[list(i)[0]])]
extras_balls

[8.6, 26.3, 28.2]

In [12]:
# Extras
[i for i in deliveries if 'extras' in list(i[list(i)[0]])]

[{8.6: {'non_striker': 'MS Atapattu',
   'bowler': 'JN Gillespie',
   'runs': {'extras': 1, 'total': 1, 'batsman': 0},
   'extras': {'wides': 1},
   'batsman': 'DPMD Jayawardene'}},
 {26.3: {'non_striker': 'TT Samaraweera',
   'bowler': 'SK Warne',
   'runs': {'extras': 4, 'total': 4, 'batsman': 0},
   'extras': {'byes': 4},
   'batsman': 'HP Tillakaratne'}},
 {28.2: {'non_striker': 'HP Tillakaratne',
   'bowler': 'SK Warne',
   'runs': {'extras': 1, 'total': 1, 'batsman': 0},
   'extras': {'noballs': 1},
   'batsman': 'TT Samaraweera'}}]

In [13]:
[i for i in deliveries if list(i.keys())[0] in [8.6, 26.3, 28.2]]

[{8.6: {'non_striker': 'MS Atapattu',
   'bowler': 'JN Gillespie',
   'runs': {'extras': 1, 'total': 1, 'batsman': 0},
   'extras': {'wides': 1},
   'batsman': 'DPMD Jayawardene'}},
 {26.3: {'non_striker': 'TT Samaraweera',
   'bowler': 'SK Warne',
   'runs': {'extras': 4, 'total': 4, 'batsman': 0},
   'extras': {'byes': 4},
   'batsman': 'HP Tillakaratne'}},
 {28.2: {'non_striker': 'HP Tillakaratne',
   'bowler': 'SK Warne',
   'runs': {'extras': 1, 'total': 1, 'batsman': 0},
   'extras': {'noballs': 1},
   'batsman': 'TT Samaraweera'}}]

In [14]:
def update_aggregates(delivery_json, innings_details):
    process_over(delivery_json, innings_details)
    process_wicket(delivery_json, innings_details)
    process_head_to_head(delivery_json, innings_details)
    pass

def display_score(delivery_display, innings_details):
    return f"{delivery_display}: {innings_details['total_score']}-{innings_details['wickets']}"

def process_over(delivery_json, innings_details):
    # Regex on the delivery display and update the overs aggregates 

    # If over hasn't started create over aggregate 
    # Else update existing with details 
    pass

def process_partnership():
    # If a wicket, then end current partnerships
    # Start new partnership
    # Else update partnership details 
    pass

def process_wicket(delivery_json, innings_details):
    # Find batsman and add in wicket method 
    pass

def process_head_to_head(bowler, batsman, runs, innings_details):
    if bowler not in list(innings_details['h2h_bowl']):
        innings_details['h2h_bowl'][bowler] = {batsman: {'deliveries': 1, 'total_runs': runs}}
    else:
        if batsman not in list(innings_details['h2h_bowl'][bowler]):
            innings_details['h2h_bowl'][bowler][batsman] = {'deliveries': 1, 'total_runs': runs}
        else:
            innings_details['h2h_bowl'][bowler][batsman]['deliveries'] += 1
            innings_details['h2h_bowl'][bowler][batsman]['total_runs'] += runs
    pass

def process_delivery(delivery_json, innings_details):
    delivery_display = list(delivery_json)[0]
    over = ""
    ball_in_over = 0
    delivery = delivery_json[delivery_display]
    innings_details['total_score'] += delivery['runs']['total']
    bowler = delivery['bowler']
    batsman = delivery['batsman']
    wides = 0
    noballs = 0

    process_over(delivery, innings_details)
    process_wicket(delivery_json, innings_details)
    process_head_to_head(bowler, batsman, delivery['runs']['batsman'], innings_details)

    # Create an object that is good for doing 'last 18 balls' 
    innings_details['balls'].append(delivery['runs']['total'])

    if 'wicket' in list(delivery):
        innings_details['wickets'] += 1
        print(display_score(delivery_display, innings_details))

    if 'extras' in list(delivery):
        extras_type = list(delivery['extras'])[0]
        if extras_type not in list(innings_details['extras']):
            innings_details['extras'][extras_type] = delivery['extras'][extras_type]
        else:
            innings_details['extras'][extras_type] += delivery['extras'][extras_type]

        if extras_type == 'wides':
            wides = delivery['extras'][extras_type]
        
        if extras_type == 'noballs':
            noballs = delivery['extras'][extras_type]

    runs_off_delivery = delivery['runs']['batsman']
    if 'extras' in list(delivery):
        if 'byes' not in list(delivery['extras']):
            runs_off_delivery = delivery['runs']['total']

    is_wicket = 1 if 'wicket' in list(delivery) else 0 
                
    if bowler not in list(innings_details['bowling_figures']):
        innings_details['bowling_figures'][bowler] =  {
                'deliveries': 1, 
                'runs': runs_off_delivery, 
                'wickets': is_wicket, 
                'wides': 0, 
                'noballs': 0
            }
    else:
        innings_details['bowling_figures'][bowler]['deliveries'] += 1
        innings_details['bowling_figures'][bowler]['runs'] += runs_off_delivery
        innings_details['bowling_figures'][bowler]['wickets'] += is_wicket
        innings_details['bowling_figures'][bowler]['wides'] += wides
        innings_details['bowling_figures'][bowler]['noballs'] += noballs

    pass
    # print(innings_details['balls'][:6])
    # return f"{delivery_display}: {innings_details['total_score']}-{innings_details['wickets']}"

### Head to head match ups
Below is some basic logic to work out the keys required for creating head to head match ups and building on those iteratively throughout the process. 

In [15]:
head_to_head_bowl = {}
bowlers = ['Bowl 1', 'Bowl 2']
batsmen = ['Bat 1', 'Bat 2', 'Bat 2', 'Bat 3']
for bowler in bowlers:
    for batter in batsmen:
        print(bowler, batter)
        if bowler not in list(head_to_head_bowl):
            print(f"New bowler: {(bowler, batter)}")
            head_to_head_bowl[bowler] = {batter: {'deliveries': 1, 'total_runs': 2}}
        else:
            if batter not in list(head_to_head_bowl[bowler]):
                print(f"Existing bowler, new batter: {(bowler, batter)}")
                head_to_head_bowl[bowler][batter] = {'deliveries': 1, 'total_runs': 2}
            else:
                print(f"Existing combination: {(bowler, batter)}")
                head_to_head_bowl[bowler][batter]['deliveries'] += 1
                head_to_head_bowl[bowler][batter]['total_runs'] += 4

head_to_head_bowl['Bowl 2']

Bowl 1 Bat 1
New bowler: ('Bowl 1', 'Bat 1')
Bowl 1 Bat 2
Existing bowler, new batter: ('Bowl 1', 'Bat 2')
Bowl 1 Bat 2
Existing combination: ('Bowl 1', 'Bat 2')
Bowl 1 Bat 3
Existing bowler, new batter: ('Bowl 1', 'Bat 3')
Bowl 2 Bat 1
New bowler: ('Bowl 2', 'Bat 1')
Bowl 2 Bat 2
Existing bowler, new batter: ('Bowl 2', 'Bat 2')
Bowl 2 Bat 2
Existing combination: ('Bowl 2', 'Bat 2')
Bowl 2 Bat 3
Existing bowler, new batter: ('Bowl 2', 'Bat 3')


{'Bat 1': {'deliveries': 1, 'total_runs': 2},
 'Bat 2': {'deliveries': 2, 'total_runs': 6},
 'Bat 3': {'deliveries': 1, 'total_runs': 2}}

In [16]:
innings_details = {
    'total_score': 0, 
    'wickets': 0, 
    'legal_deliveries': 0, 
    'run_rate': 0,
    'extras': {}, 
    'partnerships': [], 
    'balls': [], 
    'overs': [], 
    'bowling_figures': {}, 
    'scorecard': {}, 
    'h2h_bowl': {}
}

# Push through the deliveries and update the innings summary
[process_delivery(i, innings_details) for i in deliveries]

# View the final innings SSIS
innings_details['bowling_figures']

5.4: 14-1
18.3: 41-2
20.1: 49-3
22.5: 56-4
23.1: 56-5
33.5: 89-6
38.2: 119-7
43.6: 153-8
44.6: 153-9
45.2: 154-10


{'SK Warne': {'deliveries': 91,
  'runs': 43,
  'wickets': 5,
  'wides': 0,
  'noballs': 1},
 'JN Gillespie': {'deliveries': 55,
  'runs': 20,
  'wickets': 0,
  'wides': 1,
  'noballs': 0},
 'MS Kasprowicz': {'deliveries': 30,
  'runs': 13,
  'wickets': 1,
  'wides': 0,
  'noballs': 0},
 'SCG MacGill': {'deliveries': 98,
  'runs': 74,
  'wickets': 4,
  'wides': 0,
  'noballs': 0}}

In [17]:
pd.DataFrame(innings_details['bowling_figures'])

Unnamed: 0,SK Warne,JN Gillespie,MS Kasprowicz,SCG MacGill
deliveries,91,55,30,98
runs,43,20,13,74
wickets,5,0,1,4
wides,0,1,0,0
noballs,1,0,0,0
