In [1]:
from bs4 import BeautifulSoup
from requests import get
from csv import writer
import datetime
import pandas as pd
import re

In [2]:
def get_scores(output_path):

    url = input("Name of Event: ")

    month = str(input("Beginning Month of Search (01, 02...): "))
    
    year = str(input("Year to Search (2022, 2023...): "))

    url = url.replace(' ', '%20')

    search = f'https://www.pdga.com/tour/search?OfficialName={url}&date_filter[min][date]={year}-{month}-01&date_filter[max][date]=2024-02-15'

    date = datetime.datetime.date(datetime.datetime.now())

    dater = str(date)[0:4]

    searcher = get(search)

    souper = BeautifulSoup(searcher.text, 'lxml')

    event_nums = set()

    for events in souper.find_all('a', {'href': re.compile('\/t\w*\/e\w*\/\d\d\d\d\d')}):
        event_link = events.get('href')
        event_num = event_link[-5:]
        event_nums.add(event_num)

    for x in event_nums:
        URL = f'https://www.pdga.com/tour/event/{x}'

        r = get(URL)
        soup = BeautifulSoup(r.text, 'lxml')

        # get all tables
        tables = soup.find_all('table')
        titular = str(soup.find('title'))

        title = titular.split('|')[0].replace('<title>','')
        title = title.replace('/', '')
        title = title.replace(' ','_')

        bdf = pd.DataFrame()

        # loop over each table
        for num, table in enumerate(tables, start=1):

            # create filename
            filename = f'{output_path}\\{title}{dater}_{num}.csv'

            # open file for writing
            with open(filename, 'w', encoding='utf-8') as f:

                # store rows here
                data = []

                # create csv writer object
                csv_writer = writer(f)

                # go through each row
                rows = table.find_all('tr')
                for row in rows:

                    # write headers if any
                    headers = row.find_all('th')
                    if headers:
                        csv_writer.writerow([header.text.strip() for header in headers])

                    # write column items
                    columns = row.find_all('td')
                    csv_writer.writerow([column.text.strip() for column in columns]) 
            df = pd.read_csv(filename)
            if list(df.columns.values)[0] == 'Place':
                bdf = pd.concat([bdf, df])
            else:
                pass

        bdf.to_csv(f'{output_path}\\{title}{dater}_merge.csv',index=False)
        path_to_bdf = str(f'{output_path}\\{title}{dater}_merge.csv')
        url = url.replace('%20', '_')
        return path_to_bdf, url

## fix PDGA# corresponding to 'drafted' vs 'current roster'

In [26]:
def update_tables(path_to_roster, scores_df_path):
    roster = pd.read_csv(path_to_roster, dtype=str)
    scores_df = pd.read_csv(scores_df_path, dtype=str)
    found_players = pd.DataFrame(scores_df[scores_df['PDGA#'].isin(roster['PDGA#'])])
    roster = roster.set_index('PDGA#')
    found_players = found_players.set_index('PDGA#')
    merge = roster.join(found_players, how='inner', rsuffix='_r')
    merge = merge.reset_index()
    merge = merge.sort_values(by=['Team', 'Current Roster #'])
    del merge['Points']
    del merge['Name']
    del merge['Rating']
    del merge['Par']
    del merge['Rd1']
    del merge['Unnamed: 7']
    del merge['Rd2']
    del merge['Unnamed: 9']
    del merge['Rd3']
    del merge['Unnamed: 11']
    del merge['Total']
    del merge['Prize']
    return merge

In [27]:
def main():
    path = input("Output Path? > ")
    table_df, url = get_scores(path)
    roster_path = input("Path to Roster? > ")
    new_table = update_tables(roster_path, table_df)
    out = path + f'\\{url}_roster_updated'
    new_table.to_csv(out+'.csv')

In [25]:
main()

Output Path? > C:\Users\Z\Documents\Disc Golf\Test CSVs
Name of Event: Waco Annual Charity
Beginning Month of Search (01, 02...): 01
Year to Search (2022, 2023...): 2022
Path to Roster? > C:\Users\Z\Documents\Team_Rosters.csv
