In [10]:
import pandas as pd

pts_err_cnt = 0
raw_key_only_cnt = 0
processed_key_only_cnt = 0

def write_to_err_txt(pts_errs, raw_key_only, processed_key_only, year, week):
    #Checks for the presence of any errors and writes a corresponding header
    if pts_errs or raw_key_only or processed_key_only:
        with open("errors.txt", "a") as file:
            file.write(f"ERRORS FOR SEASON {year} AND WEEK {week}\n")
    
    #Checks for any errors in point totals and writes output relaying that info to the 'errors.txt' file
    if pts_errs:
        with open("errors.txt", "a") as file:
            file.write("TEAMS WITH DIFFERENT PTS/RANKINGS BETWEEN RAW AND PROCESSED FILE\n")
            for i in range(0, len(pts_errs)):
                file.write(pts_errs[i])

    #Checks for teams only in the raw borda_count and writes output relaying that info to the 'errors.txt' file
    if raw_key_only:
        with open("errors.txt", "a") as file:
            file.write("TEAMS THAT ONLY SHOW UP IN RAW FILE\n")
            for i in range(len(raw_key_only)):
                file.write(raw_key_only[i])

    #Checks for teams only in the processed borda_count and writes output relaying that info to the 'errors.txt' file
    if processed_key_only:
        with open("errors.txt", "a") as file:
            file.write("\nTEAMS THAT ONLY SHOW UP IN PROCESSED FILE\n")
            for i in range(len(processed_key_only)):
                file.write(processed_key_only[i])

def compare_borda_count(raw_filename, raw_dict, processed_filename, processed_dict, year, week):
    """
    Compares the borda count that was programmatically calculated with the borda count that was scraped from poll tracker website 
    (for a specific the ballot of a specific week and year)

    Args:
        raw_filename: The name of the CSV file containing the scraped borda count for a particular week within a year.
        raw_dict: A dictionary that contains the borda count data in raw_filename, where the keys are teams and 
        values are borda count totals for that team.
        processed_filename: The name of the CSV file containing the programmatically calculated borda count for a particular
        week within a year.
        processed_dict: A dictionary that contains the borda count data in processed_filename, where the keys are teams and 
        values are borda count totals for that team.
        year: The season/year the borda count data is associated with.
        week: The week within the year/season that borda count data is associated with.

    Returns:
        This function does not actually return anything. What it does is instantiate & populate the parameters that are
        passed to 'write_to_err_txt', which will write data that parameters contain to 'errors.txt' file
    """
    raw_df = pd.read_csv(raw_filename)
    raw_rankings = raw_df["Teams"].tolist()

    processed_df = pd.read_csv(processed_filename)
    processed_rankings = processed_df["Teams"].tolist()

    keys_only_in_raw = [key for key in raw_dict if key not in processed_dict]
    keys_only_in_processed = [key for key in processed_dict if key not in raw_dict]    

    pts_errs = []
    raw_key_only = []
    processed_key_only = []

    global pts_err_cnt, processed_key_only_cnt, raw_key_only_cnt

    for key in raw_dict:
        if key in processed_dict and raw_dict[key] != processed_dict[key]:
            pts_errs.append(f"RAW FILE: {key} has ranking position of {raw_rankings.index(key) + 1} and has a total of {raw_dict[key]} pts.\n")
            pts_errs.append(f"PROCESSED FILE: {key} has ranking position of {processed_rankings.index(key) + 1} and has a total of {processed_dict[key]} pts.\n\n")
            pts_err_cnt += 1

    for key in keys_only_in_raw:
        raw_key_only.append(f"{key} accounted for in raw data BUT not in processed data.\n")
        raw_key_only_cnt += 1

    for key in keys_only_in_processed:
        processed_key_only.append(f"{key} accounted for in processed data BUT not in raw data.\n")
        processed_key_only_cnt += 1
    # print(pts_err_cnt, raw_key_only_cnt, processed_key_only_cnt)
    write_to_err_txt(pts_errs, raw_key_only, processed_key_only, year, week)

def csv_to_dict(csv_file):
    """Converts a two-column CSV file into a dictionary.

    Args:
        csv_file (str): Path to the CSV file.

    Returns:
        dict: Dictionary containing key-value pairs from the CSV columns.
    """
    result = {}

    #Checks if the file path provided is valid, if the filename is invalid print the filename and return an empty dictionary
    try:
        df = pd.read_csv(csv_file)
    except FileNotFoundError:
        print(f"File not found: {csv_file}")
        return result

    for index, row in df.iterrows():
        if len(row) != 2:
            raise ValueError("CSV file must have exactly two columns.")
        result[row[0]] = row[1]
        
    return result

def err_txt_creation():
    #Clears the output text file 'errors.txt' because the file write mode is set to append, ensuring that
    #  no undesired content is in the file
    f = open("errors.txt", "w")
    f.truncate()
    f.close()

    # pts_err_cnt = 0
    # raw_key_only_cnt = 0
    # processed_key_only_cnt = 0

    years = ["2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023", "2024"]
    Weeks = ['week1', 'week2', 'week3', 'week4', 'week5', 'week6', 'week7', 'week8', 'week9', 'week10', 'week11', 'week12', 'week13', 'week14', 'week15', 'week16', 'week17']

    #Iterate through all combinations of years and weeks in order to make all the csv files we need
    for year in years:
        for week in Weeks:
            raw_csv_file = f"./original_borda_count/original_borda_top25/season_{year}/{year}_{week}_top25.csv"
            processed_csv_file = f"./results/borda_top25/season_{year}/{year}_{week}_top25.csv"
            raw_dict = csv_to_dict(raw_csv_file)
            processed_dict = csv_to_dict(processed_csv_file)

            #Only calls the 'compare_borda_count' if raw_dict and processed_dict are non-empty dictionaries
            if raw_dict and processed_dict:
                compare_borda_count(raw_csv_file, raw_dict, processed_csv_file, processed_dict, year, week)

    with open("errors.txt", "a") as file:
        file.write(f"\nTotal number of point errors: {pts_err_cnt}\n")
        file.write(f"Total number of teams only in RAW: {raw_key_only_cnt}\n")
        file.write(f"Total number of teams only in PROCESSED: {processed_key_only_cnt}\n")

err_txt_creation()



  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row

File not found: ./original_borda_count/original_borda_top25/season_2015/2015_week2_top25.csv
File not found: ./results/borda_top25/season_2015/2015_week2_top25.csv


  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]


File not found: ./original_borda_count/original_borda_top25/season_2015/2015_week17_top25.csv
File not found: ./results/borda_top25/season_2015/2015_week17_top25.csv


  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]


File not found: ./original_borda_count/original_borda_top25/season_2016/2016_week17_top25.csv
File not found: ./results/borda_top25/season_2016/2016_week17_top25.csv


  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row

File not found: ./original_borda_count/original_borda_top25/season_2017/2017_week17_top25.csv
File not found: ./results/borda_top25/season_2017/2017_week17_top25.csv


  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]


File not found: ./original_borda_count/original_borda_top25/season_2018/2018_week17_top25.csv
File not found: ./results/borda_top25/season_2018/2018_week17_top25.csv


  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row

File not found: ./original_borda_count/original_borda_top25/season_2021/2021_week17_top25.csv
File not found: ./results/borda_top25/season_2021/2021_week17_top25.csv


  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]


File not found: ./original_borda_count/original_borda_top25/season_2022/2022_week17_top25.csv
File not found: ./results/borda_top25/season_2022/2022_week17_top25.csv


  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]


File not found: ./original_borda_count/original_borda_top25/season_2023/2023_week17_top25.csv
File not found: ./results/borda_top25/season_2023/2023_week17_top25.csv
File not found: ./results/borda_top25/season_2024/2024_week6_top25.csv
File not found: ./results/borda_top25/season_2024/2024_week7_top25.csv
File not found: ./results/borda_top25/season_2024/2024_week8_top25.csv
File not found: ./results/borda_top25/season_2024/2024_week9_top25.csv
File not found: ./results/borda_top25/season_2024/2024_week10_top25.csv
File not found: ./results/borda_top25/season_2024/2024_week11_top25.csv
File not found: ./results/borda_top25/season_2024/2024_week12_top25.csv
File not found: ./results/borda_top25/season_2024/2024_week13_top25.csv
File not found: ./results/borda_top25/season_2024/2024_week14_top25.csv
File not found: ./results/borda_top25/season_2024/2024_week15_top25.csv
File not found: ./results/borda_top25/season_2024/2024_week16_top25.csv
File not found: ./original_borda_count/origina

  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
  result[row[0]] = row[1]
