In [5]:
import requests
from bs4 import BeautifulSoup
import os
import pandas as pd

"""
This script processes a ranked list of Borda Counts in decreasing order generated from AP College Poll Top 25 voting for every week
in the seasons from 2014 to 2024. It iteratively gathers poll data for all combinations of weeks & seasons and generates 
seperate files for each week & season.

Input: URLs corresponding to a ranked list of Borda Counts in decreasing order for each week from 2014 to 2024.

Output: 
1. Seperate CSV files for the Borda Count data for the corresponding week and season
"""


def org_borda_count_dictionary(Week, url, short_szn=True):
    Dict = {}

    #Checks that the provided url is valid and parses the html content of the file
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
    else:
        print("bad")
        return Dict

    #Accounts for the fact that short seasons do not have a 'week-16' 
    if short_szn and Week == 'week-16': return Dict

    #Extracts teams and their corresponding Borda Count by iterating through div elements that contain the desired information
    Rows = soup.find_all('div', class_='teamBar')
    for row in Rows:
        points = (row.find('div', class_='secondRow').find('span', class_='teamPoints').find('b').text)
        team = (row.find('span', class_='teamName').find('a')['href'].split("/")[3])
        print(team, points)
        Dict[team] = [team, points]

    return Dict


def csv_data_writer_by_year_and_week(year, Week, num_week): 
    url = "https://collegepolltracker.com/football/" + year + "/" + Week
    long_szns = ["2014", "2019", "2020"]
    
    #Checks if the year corresponds with a shorter season and adjusts the variable 'short_szn' if need be
    if year in long_szns:
        Dict = org_borda_count_dictionary(Week, url, short_szn=False)
    else:
        Dict = org_borda_count_dictionary(Week, url)

    #Check if the Dictionary contain data about teams and their Borda Counts, before we make new folders and write data to a csv file
    if Dict:
        #Accounts for any number errors that could occur as a result of a season being 16 weeks instead of 17 weeks
        if year not in long_szns and Week == 'final-rankings':
            num_week -= 1
        
        #Creates borda_top25 folder for the corresponding year, if it does not already exist, and generates csv file with data for that
        #year and week
        os.makedirs(f"./original_borda_top25/season_{year}", exist_ok=True)
        output_file = f'./original_borda_top25/season_{year}/{year}_week{num_week}_top25.csv'
        df = pd.DataFrame.from_dict(Dict, orient='index', columns=['Teams', 'Borda Points'])
        df.to_csv(output_file, mode='w', index=False, header=True)


def csv_creation():
    years = ["2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023", "2024"]
    Weeks = ['pre-season', 'week-2', 'week-3', 'week-4', 'week-5', 'week-6', 'week-7', 'week-8', 'week-9', 'week-10', 'week-11', 'week-12', 'week-13', 'week-14', 'week-15', 'week-16', 'final-rankings']

    #Iterate through all combinations of years and weeks in order to make all the csv files we need
    for year in years:
        num_week = 1
        for week in Weeks:
            if week == "week-6" and year == "2024": break     #Hard-codes a specific end week for 2024, in order to match data in results folder
            csv_data_writer_by_year_and_week(year, week, num_week)
            num_week += 1

csv_creation()

florida-state-seminoles 1496
alabama-crimson-tide 1361
oregon-ducks 1334
oklahoma-sooners 1324
ohio-state-buckeyes 1207
auburn-tigers 1198
ucla-bruins 1106
michigan-state-spartans 1080
south-carolina-gamecocks 1015
baylor-bears 966
stanford-cardinal 885
georgia-bulldogs 843
lsu-tigers 776
wisconsin-badgers 637
usc-trojans 626
clemson-tigers 536
notre-dame-fighting-irish 445
ole-miss-rebels 424
arizona-state-sun-devils 357
kansas-state-wildcats 242
texas-am-aggies 238
nebraska-cornhuskers 226
north-carolina-tar-heels 194
missouri-tigers 134
washington-huskies 130
ucf-knights 94
florida-gators 87
texas-longhorns 86
duke-blue-devils 71
iowa-hawkeyes 68
louisville-cardinals 48
marshall-thundering-herd 41
oklahoma-state-cowboys 37
virginia-tech-hokies 26
tcu-horned-frogs 23
mississippi-state-bulldogs 22
michigan-wolverines 19
texas-tech-red-raiders 19
miami-fl-hurricanes 16
cincinnati-bearcats 15
boise-state-broncos 10
oregon-state-beavers 10
byu-cougars 8
northwestern-wildcats 8
penn-state