In [3]:
import re
from collections import defaultdict

def extract_runs_wickets(text):
    runs = 0
    wicket = False
    
    # Check for runs scored
    run_patterns = [r'\b(\d+)\s+run', r'\b(\d+)\s+runs', r'(FOUR)\s+runs', r'(SIX)\s+runs', r'\b(\d+)\s+leg\s+bye']
    for pattern in run_patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            if match.lastindex == 3 and match.group(3).lower() == 'four':
                runs += 4
            elif match.lastindex == 4 and match.group(4).lower() == 'six':
                runs += 6
            elif match.group(1) and match.group(1).lower() not in ['four', 'six']:
                runs += int(match.group(1))
            elif match.group(1) and match.group(1).lower() not in ['four', 'six']:
                runs += int(match.group(1))
    
    # Check for wickets
    wicket_patterns = [r'\b(OUT)\b', r'\bcaught\s+and\s+bowled\b', r'\bcaught\s+behind\b', r'\bbowled\b', r'\blbw\b', r'\bstumped\b']
    for pattern in wicket_patterns:
        if re.search(pattern, text, re.IGNORECASE):
            wicket = True
            break
    
    return runs, wicket

# Define a function to extract batsman and bowler from the text
def extract_batsman_bowler(text):
    parts = text.split(',')
    bowler_batsman = parts[0].split('to')
    bowler = bowler_batsman[0].strip()
    batsman = bowler_batsman[1].strip()
    return bowler, batsman

def calculate_shots(data, batting_terms, bowling_terms):
    batsman_shots = defaultdict(list)
    bowler_shots = defaultdict(list)
    
    for _, row in data.iterrows():
        long_text = str(row['long_text']).lower()  # Convert to string first, then to lowercase
        for term in batting_terms:
            if term in long_text:
                bowler, batsman = extract_batsman_bowler(row['short_text'])
                batsman_shots[batsman].append(term)
        for term in bowling_terms:
            if term in long_text:
                bowler, batsman = extract_batsman_bowler(row['short_text'])
                bowler_shots[bowler].append(term)
    
    return batsman_shots, bowler_shots

# Main code
import pandas as pd

# Load data
data = pd.read_csv('/Users/shiwaditya./Desktop/CCNLP/IPL2018.csv')

# Define a knowledge base of cricket terms related to batting
batting_terms = ['drives', 'cuts', 'pulls', 'nudges', 'squeezes', 'flicks', 'glances', 'sweeps', 'slog', 'lofts', 'edges', 'defends', 'across the line', 'agricultural shot', 'anchor', 'attacking shot', 'back foot shot', 'biffer', 'block', 'blocker', 'carry the bat', 'cartwheel', 'caught', 'caught and bowled', 'caught behind', 'centurion', 'century', 'charge', 'chest on', 'chop on', 'club', 'come to the crease', 'contrived circumstances', 'cow shot', 'cross-bat shot', 'cross the rope', 'reverse sweep', 'cut', 'cut shot', 'drive', 'full blooded', 'hook', 'late cut', 'pull', 'pull shot', 'square-cut', 'sweep', 'on-drive', 'off-drive', 'cover drive', 'dilscoop', 'dolly', 'duck', 'golden duck', 'leg glance', 'lolly', 'out', 'textbook shot', 'ton', 'top edge', 'uppish', 'upper cut', 'waft']

# Define a knowledge base of cricket terms related to bowling
bowling_terms = ['bowled', 'shaping', 'beats', 'yorker', 'bouncer', 'action', 'all out', 'arm ball', 'around the wicket', 'away swing', 'back foot', 'back foot contact', 'back spin', 'badger', 'ball tampering', 'ball tracking', 'bang (it) in', 'bat-pad', 'bend the back', 'bite', 'bodyline', 'bounce out', 'bowl-out', 'bowling action', 'bowling analysis', 'bowling at the death', 'bowling average', 'brace', 'bumper', 'cafeteria bowling', 'carrom ball', 'castled', 'caught and bowled', 'chin music', 'chinaman', 'chinese cut', 'clean bowled', 'creeper', 'daisy cutter', 'declaration', 'dibbly-dobbly', 'doosra', 'edge', 'fast bowler', 'ferret', 'finger spinner', 'flipper', 'follow-on', 'full toss', 'googly', 'hit wicket', 'inswinger', 'jaffa', 'leg break', 'leg cutter', 'leg theory', 'leg before wicket (lbw)', 'long hop', 'maiden over', 'no ball', 'off break', 'off side', 'off spinner', 'overpitched', 'pace bowler', 'reverse swing', 'seam', 'seamer', 'shooter', 'short leg', 'silly mid-off', 'silly mid-on', 'sticky wicket', 'stump', 'tape ball', 'teesra', 'third man', 'throwing', 'top spin', 'toe-crusher', 'twelfth man', 'unplayable delivery', 'very fine leg', 'wag', 'wagon wheel', 'wearing wicket', 'white ball', 'wicket maiden', 'wicket-to-wicket', 'worm', 'wrist spin', 'wrong foot', 'wrong footed', 'wrong', 'yorker']

# Calculate runs and wickets
batsman_runs = defaultdict(int)
bowler_wickets = defaultdict(int)

for _, row in data.iterrows():
    runs, wicket = extract_runs_wickets(str(row['long_text']))
    bowler, batsman = extract_batsman_bowler(row['short_text'])
    batsman_runs[batsman] += runs
    if wicket:
        bowler_wickets[bowler] += 1

# Print runs and wickets
print("Batsman Runs:")
for batsman, runs in sorted(batsman_runs.items()):
    print(f"{batsman}: {runs}")

print("\nBowler Wickets:")
for bowler, wickets in sorted(bowler_wickets.items()):
    print(f"{bowler}: {wickets}")

# Calculate shots played
batsman_shots, bowler_shots = calculate_shots(data, batting_terms, bowling_terms)

# Print shots played
print("\nBatsman Shots:")
for batsman, shots in sorted(batsman_shots.items()):
    print(f"{batsman}: {', '.join(set(shots))}")

print("\nBowler Shots:")
for bowler, shots in sorted(bowler_shots.items()):
    print(f"{bowler}: {', '.join(set(shots))}")

Batsman Runs:
AB: 0
Abhishek: 0
Agarwal: 0
Ali: 24
Anderson: 0
Anureet: 0
Archer: 0
Ashwin: 0
Axar Patel: 0
Barinder Sran: 0
Basil: 0
Bhui: 0
Bhuvneshwar: 0
Billings: 0
Binny: 0
Boult: 0
Brathwaite: 0
Bravo: 0
Bumrah: 0
Buttler: 144
Chahal: 0
Chahar: 0
Chawla: 0
Chopra: 0
Christian: 0
Cutting: 28
Dananjaya: 0
Dhoni: 0
Duminy: 0
Finch: 0
Gambhir: 0
Gayle: 0
Gill: 32
Gopal: 0
Goswami: 0
Gowtham: 56
Grandhomme: 0
Hales: 0
Harbhajan: 0
Hooda: 0
Ishan: 0
Iyer: 0
JPR Scantlebury-Searles: 0
Jadeja: 0
Jadhav: 0
Johnson: 110
Karthik: 26
Kaul: 0
Khan: 0
Klaasen: 0
Kohli: 92
Krunal: 0
Kuldeep: 0
Kulkarni: 0
Laughlin: 128
Lewis: 0
Lomror: 0
Lynn: 0
M Nabi: 0
Mandeep: 70
Markande: 0
Maxwell: 94
McClenaghan: 0
McCullum: 26
Miller: 0
Mishra: 0
Morris: 20
Mujeeb: 0
Munro: 0
Mustafizur: 0
Nadeem: 0
Nair: 0
Narine: 0
Nath: 0
Negi: 0
Ojha: 0
Pandey: 0
Pandya: 34
Pant: 316
Patel: 0
Pathan: 0
Plunkett: 0
Pollard: 0
Prasidh: 0
Rahane: 0
Rahul: 1030
Raina: 0
Rajpoot: 0
Rana: 0
Rashid Khan: 0
Rayudu: 800
Rohi