In [None]:
%pip install selenium webdriver-manager

import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import numpy as np
import pandas as pd


In [8]:
def get_player_url(player_name: str):

    print("getting player details....")
    start_time = time.time()
    
    # Set up the WebDriver (make sure ChromeDriver is installed)
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  # Run in headless mode (without opening a browser window)
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    # Construct the search URL for the player
    search_url = f"https://search.espncricinfo.com/ci/content/site/search.html?search={player_name.replace(' ', '%20')};type=player"
    
    # Open the URL
    driver.get(search_url)
    
    
    try:
        # Find the first player link using XPath or CSS Selectors
        player_link_element = driver.find_element(By.CSS_SELECTOR, "h3.name.link-cta a")
        player_url = player_link_element.get_attribute("href")
        player_id = player_url.split('-')[-1]
        driver.quit()
        end_time = time.time()
        print(f"player url extracted in {end_time - start_time:.2f} seconds")
        return player_url, player_id
    except Exception as e:
        print(f"Error in extracting {player_name}'s url:", e)
        driver.quit()
        return None, None

In [9]:
def extract_inns_data(player_id, player_name, record_type):
    start_time = time.time()
    print(f"Starting extraction of {player_name}'s {record_type} stats....")
    
    # Set up the WebDriver (headless mode for silent execution)
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    # Construct the search URL based on record_type (batting, bowling, etc.)
    search_url = f"https://stats.espncricinfo.com/ci/engine/player/{player_id}.html?class=11;template=results;type={record_type};view=innings"
    
    # Open the URL
    driver.get(search_url)

    # Step 1: Extract the headers of the table
    headers = driver.find_elements(By.CSS_SELECTOR, "thead tr.headlinks th")
    header_names = [header.text for header in headers if header.text != ''] + ['match_id']  # Add match_id column name
    
    # Step 2: Extract the data from the 4th tbody
    rows = driver.find_elements(By.XPATH, "(//tbody)[4]//tr")
    
    # Step 3: Extract the data column-wise and store it in a list
    player_data = []
    for row in rows:
        cells = row.find_elements(By.TAG_NAME, "td")
        row_data = [cell.text for cell in cells if cell.text != '']
        player_data.append(row_data)
    
    # Step 4: Create a DataFrame from the extracted data
    innings_data = pd.DataFrame(player_data, columns=header_names)
    
    # Step 5: Close the driver after scraping
    end_time = time.time()
    driver.quit()
    
    print(f"Extracted {innings_data.shape[0]} records in {end_time - start_time:.2f} seconds")
    
    return innings_data


In [6]:
def extract_player_info(player_id, player_name):
    start_time = time.time()
    print(f"Starting extraction of {player_name}'s info....")
    
    # Set up the WebDriver (headless mode for silent execution)
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    # Start by opening the player info URL
    search_url = f"https://www.espncricinfo.com/cricketers/{player_name.replace(' ', '-').lower()}-{player_id}"
    driver.get(search_url)

    # Step 1: Extract headers within the specified div tag
    headers = driver.find_elements(By.XPATH, "//div[@class='ds-grid lg:ds-grid-cols-3 ds-grid-cols-2 ds-gap-4 ds-mb-8']//p[@class='ds-text-tight-m ds-font-regular ds-uppercase ds-text-typo-mid3']")
    header_names = [header.text for header in headers]

    # Step 2: Extract values within the specified div tag
    values = driver.find_elements(By.XPATH, "//div[@class='ds-grid lg:ds-grid-cols-3 ds-grid-cols-2 ds-gap-4 ds-mb-8']//span[@class='ds-text-title-s ds-font-bold ds-text-typo']")
    value_texts = [value.text for value in values]

    # Step 3: Create a DataFrame from the extracted data
    player_info = pd.DataFrame([value_texts], columns=header_names)

    # Step 4: Close the driver after scraping
    end_time = time.time()
    driver.quit()
    
    print(f"Extracted player info in {end_time - start_time:.2f} seconds")
    
    return player_info

In [10]:
player_name = "Virat Kohli"
record_type = 'bowling'  # Can be 'batting', 'bowling', 'fielding', 'allround'

player_url, player_id = get_player_url(player_name)
#innings_data = extract_inns_data(player_id, player_name, record_type)

player_info = extract_player_info(player_id, player_name)

# Print the extracted data
player_info

getting player details....
player url extracted in 10.09 seconds
Starting extraction of Virat Kohli's info....
Extracted player info in 6.86 seconds


Unnamed: 0,FULL NAME,BORN,AGE,BATTING STYLE,BOWLING STYLE,PLAYING ROLE
0,Virat Kohli,"November 05, 1988, Delhi",36y 81d,Right hand Bat,Right arm Medium,Top order Batter


In [22]:
fielding = extract_inns_data(player_id, player_name, 'fielding')
fielding

Starting extraction of Virat Kohli's fielding stats....
Extracted 656 records in 105.41 seconds


Unnamed: 0,Dis,Ct,St,Ct Wk,Ct Fi,Inns,Opposition,Ground,Start Date,match_id
0,0,0,0,0,0,2,ODI v Sri Lanka,Dambulla,18 Aug 2008,ODI # 2742
1,3,3,0,0,3,1,ODI v Sri Lanka,Dambulla,20 Aug 2008,ODI # 2745
2,0,0,0,0,0,2,ODI v Sri Lanka,Colombo (RPS),24 Aug 2008,ODI # 2750
3,0,0,0,0,0,2,ODI v Sri Lanka,Colombo (RPS),27 Aug 2008,ODI # 2755
4,0,0,0,0,0,1,ODI v Sri Lanka,Colombo (RPS),29 Aug 2008,ODI # 2756
...,...,...,...,...,...,...,...,...,...,...
651,0,0,0,0,0,3,Test v Australia,Brisbane,14 Dec 2024,Test # 2570
652,1,1,0,0,1,1,Test v Australia,Melbourne,26 Dec 2024,Test # 2571
653,0,0,0,0,0,3,Test v Australia,Melbourne,26 Dec 2024,Test # 2571
654,1,1,0,0,1,2,Test v Australia,Sydney,3 Jan 2025,Test # 2575


In [14]:
player_id="45789"
player_name="Jacques Kallis"

data = extract_inns_data(player_id, player_name, 'allround')
player_info = extract_player_info(player_id, player_name)

Starting extraction of Jacques Kallis's allround stats....
Extracted 1314 records in 276.87 seconds
Starting extraction of Jacques Kallis's info....
Extracted player info in 7.01 seconds


In [13]:
data

Unnamed: 0,Inns,Score,Overs,Conc,Wkts,Ct,St,Opposition,Ground,Start Date,match_id
0,1,1,-,-,-,-,-,Test v England,Durban,14 Dec 1995,Test # 1318
1,2,-,DNB,-,-,0,0,Test v England,Durban,14 Dec 1995,Test # 1318
2,1,-,4.0,2,0,0,0,Test v England,Cape Town,2 Jan 1996,Test # 1321
3,2,7,-,-,-,-,-,Test v England,Cape Town,2 Jan 1996,Test # 1321
4,3,-,DNB,-,-,1,0,Test v England,Cape Town,2 Jan 1996,Test # 1321
...,...,...,...,...,...,...,...,...,...,...,...
1309,2,-,DNB,-,-,1,0,ODI v Sri Lanka,Colombo (RPS),6 Jul 2014,ODI # 3500
1310,1,-,DNB,-,-,0,0,ODI v Sri Lanka,Pallekele,9 Jul 2014,ODI # 3501
1311,2,1,-,-,-,-,-,ODI v Sri Lanka,Pallekele,9 Jul 2014,ODI # 3501
1312,1,4,-,-,-,-,-,ODI v Sri Lanka,Hambantota,12 Jul 2014,ODI # 3502


In [6]:
%run ../scripts/scraper/scraper.py


In [7]:
player = Cricketer_Stats_Scraper("Virat Kohli")

Setting up WebDriver...
Extracting Virat Kohli's player URL and Player ID....
Extraction Successful for Virat Kohli.
Time taken to extract URL: 2.92 seconds


In [11]:
player.get_player_stats("batting")

Starting extraction of Virat Kohli's batting stats....
Extracted 639 records in 200.16 seconds


In [13]:
batting_data = player.battingstats
batting_data

Unnamed: 0,Runs,Mins,BF,4s,6s,SR,Pos,Dismissal,Inns,Opposition,Ground,Start Date,Match id
0,12,33,22,1,0,54.54,2,lbw,1,ODI v Sri Lanka,Dambulla,18 Aug 2008,ODI # 2742
1,37,82,67,6,0,55.22,2,caught,2,ODI v Sri Lanka,Dambulla,20 Aug 2008,ODI # 2745
2,25,40,38,4,0,65.78,1,run out,1,ODI v Sri Lanka,Colombo (RPS),24 Aug 2008,ODI # 2750
3,54,87,66,7,0,81.81,1,bowled,1,ODI v Sri Lanka,Colombo (RPS),27 Aug 2008,ODI # 2755
4,31,45,46,3,1,67.39,1,lbw,2,ODI v Sri Lanka,Colombo (RPS),29 Aug 2008,ODI # 2756
...,...,...,...,...,...,...,...,...,...,...,...,...,...
634,DNB,-,-,-,-,-,-,-,4,Test v Australia,Brisbane,14 Dec 2024,Test # 2570
635,36,125,86,4,0,41.86,4,caught,2,Test v Australia,Melbourne,26 Dec 2024,Test # 2571
636,5,40,29,0,0,17.24,4,caught,4,Test v Australia,Melbourne,26 Dec 2024,Test # 2571
637,17,101,69,0,0,24.63,4,caught,1,Test v Australia,Sydney,3 Jan 2025,Test # 2575
