In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import numpy as np
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [10]:
def scrape_xscores_upcoming_fixtures(country, league_name):
    url = 'https://www.xscores.com/soccer/' + country + '/' + league_name + '/fixtures/2022-2023'
    path = '/Users/willfitzhugh/Desktop/chromedriver'
    driver = webdriver.Chrome(path)
    driver.get(url)
    
#wait 15 seconds for page to load, proceed to else statement if score elements are detected
    try:
        elem = WebDriverWait(driver, 15
                            ).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div.match_home_txt')))
        
#if webpage doesn't load, empty df is created, and nothing is scraped
    except:
        df = pd.DataFrame(columns = ['Date', 'Time', 'Home_Team', 'Home_Score', 'Away_Score', 'Away_Team',
                                'Home_Points', 'Away_Points'])

#if there' no exception, scraping begins
    else:
        
#homes teams
        home_team_elements = driver.find_elements(By.CSS_SELECTOR, 'div.match_home_txt')
        home_teams = []
        for i in home_team_elements[1:]:
            home_teams.extend([i.text])
        
#away teams
        away_team_elements = driver.find_elements(By.CSS_SELECTOR, 'div.match_away_txt')
        away_teams = []
        for i in away_team_elements[1:]:
            away_teams.extend([i.text])
            
#dates & times
        dates_times_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'round_date') or contains(@class, 'match_ko')]")

        dates_times = []
        for i in dates_times_elements[1:]:
            dates_times.extend([i.text])

#rounds & times
        rounds_times_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'match_ko') or contains(@class, 'round_name')]")
        rounds_times = []
        for i in rounds_times_elements[1:]:
            rounds_times.extend([i.text])
        
#dates
        date_elements = driver.find_elements(By.CSS_SELECTOR, 'div.round_date')
        dates = []
        for i in date_elements:
            dates.extend([i.text])
            
#rounds
        round_elements = driver.find_elements(By.CSS_SELECTOR, 'div.round_name')
        rounds = []
        for i in round_elements:
            rounds.extend([i.text])
        
#times
        time_elements = driver.find_elements(By.CSS_SELECTOR, 'div.match_ko')
        times = []
        for i in time_elements[1:]:
            times.extend([i.text])
        
#get date indices
        dates_times = pd.Series(dates_times)
        date_indices = list(dates_times[dates_times.isin(dates)].index)
        
#prepare dates column
        date_column = []
        for i in range(len(date_indices)-1):
            date_to_add = dates_times[date_indices[i]]
    
            index_diff = date_indices[i+1] - date_indices[i]
    
            num_games = (index_diff - 1)
    
            date_column.extend( num_games * [date_to_add] )

        date_column.extend( (len(dates_times) - date_indices[-1] - 1) * [dates_times[date_indices[-1]]] )

#get round indicies
        rounds_times = pd.Series(rounds_times)
        round_indices = list(rounds_times[rounds_times.isin(rounds)].index)
                
#prepare rounds column
        round_column = []
        for i in range(len(round_indices)-1):
            round_to_add = rounds_times[round_indices[i]]
    
            index_diff = round_indices[i+1] - round_indices[i]
    
            num_games = (index_diff - 1)
    
            round_column.extend( num_games * [round_to_add] )
        
        round_column.extend( (len(rounds_times) - round_indices[-1] - 1) * [rounds_times[round_indices[-1]]] )
        
        

#create df, score columns are included to match data structure of completed games
        df = pd.DataFrame({
            'Round':round_column,
            'Date':date_column,
            'Time':times,
            'Home_Team':home_teams,
            'Home_Score':np.nan,
            'Away_Score':np.nan,
            'Away_Team':away_teams,
            'Home_Score_AET':np.nan,
            'Away_Score_AET':np.nan,
            'Home_Penalties':np.nan,
            'Away_Penalties':np.nan,
            'Home_Points':np.nan,
            'Away_Points':np.nan
            })

#return df regardless of ealier results
    finally:
        return df

---
#### Scrape For Upcoming Fixtures

In [7]:
league_names = pd.DataFrame({
    'country':['spain', 'england','germany','france','italy'],
    'league_name':['primera-division','premier-league','bundesliga','ligue-1','serie-a']
})
cup_names = pd.DataFrame({
    'country':['europe-uefa','europe-uefa','spain','england','germany','france', 'italy'],
    'league_name':['uefa-champions-league','uefa-europa-league','fa-cup','fa-cup','fa-cup','fa-cup','fa-cup']
})
leagues_cups = pd.concat([league_names,cup_names])
leagues_cups

Unnamed: 0,country,league_name
0,spain,primera-division
1,england,premier-league
2,germany,bundesliga
3,france,ligue-1
4,italy,serie-a
0,europe-uefa,uefa-champions-league
1,europe-uefa,uefa-europa-league
2,spain,fa-cup
3,england,fa-cup
4,germany,fa-cup


In [17]:
for i in range(len(leagues_cups)):
    data = scrape_xscores_upcoming_fixtures(leagues_cups.iloc[i,0], leagues_cups.iloc[i,1])
    data['season'] = 2022
    
    label = 'Upcoming Fixtures/' + leagues_cups.iloc[i,0] + '_' + leagues_cups.iloc[i,1] + '_upcoming_fixtures.csv'
    
    data.to_csv(label, index=False)

  driver = webdriver.Chrome(path)


KeyboardInterrupt: 