In [1]:
# For working with files
import os
import re
import csv

# For manipulating data
import pandas as pd

# For manipulating dates
import datetime

# For getting geographic locations and distances
from geopy.distance import geodesic
from geopy.exc import GeocoderTimedOut
from geopy.geocoders import Nominatim

In [2]:
DATA_DIR = 'march-machine-learning-mania-2023/'

#### **Load Functions**

In [3]:
def store_cities(csvfile):
    """
    Converts a CSV file with three columns (CityID, City, State) to a dictionary with CityID as the key 
    and both City, State as the value.

    Parameters:
    csvfile (str): The path and name of the CSV file to convert.

    Returns:
    Returns a dictionary for City and State using CityID as the key.

    Use:
    Use to get the City and State for a game using the CityID (eg. cities['4030'])
    """

    # Create an empty dictionary to store the data
    cities = {}

    # Open the csv file and read the contents into a list of dictionaries
    with open(csvfile, 'r', encoding='latin-1') as csv_file:
        csv_reader = csv.DictReader(csv_file)

        # Iterate over each row in the csv data
        for row in csv_reader:
            # Extract the relevant fields from the row
            city_id = row['CityID']
            city = row['City']
            state = row['State']

            # Add the data to the city_data dictionary
            cities[city_id] = f"{city}, {state}"

    # Return the dictionary
    return cities

In [4]:
def get_distance(city1, city2):
    # Function to get the distance between two locations (city, state)
    
    # Designate the two locations from function parameters
    city_1 = city1
    city_2 = city2
    
    # Use custom function to get coordinates for each city
    coord_1 = get_coordinates(city_1)  # Latitude and longitude of city1
    coord_2 = get_coordinates(city_2)  # Latitude and longitude of city2
    
    # Calculate distance between city1 and city2 using geopy
    distance_mi = geodesic(coord_1, coord_2).miles
    
    # print(f"The distance between {city_1} and {city_2} is {dist:.2f} miles")
    # Return the distance as numer (float)
    return distance_mi

In [5]:
def get_coordinates(city):
    # Function to get coordinates for a single location (city, state)
    
    # Create an instance of Nominatim class
    geolocator = Nominatim(user_agent='fake_useragent')

    # Use geocode method to get the location
    try:
        location_data = geolocator.geocode(city, timeout=10)
        latitude = location_data.latitude
        longitude = location_data.longitude

    # Add an except to return 'None' when location can not be found (to prevent error)
    except (AttributeError, GeocoderTimedOut):
        latitude = None
        longitude = None

    # Return (latitude, longitude) as tuple
    return (latitude, longitude)

In [6]:
def store_seasons(csvfile):
    """
    Converts a CSV file with six columns (Season, DayZero, RegionW, RegionX, RegionY, RegionZ) to 
    TWO dictionaries: 
    (1) with Season as the key and DayZero as the value; and 
    (2) with Season as the key and the four regions names as the value (in tuple)

    Parameters:
    csvfile (str): The path and name of the CSV file to convert.

    Returns:
    Returns two dictionaries for DayZero and (RegionW, RegionX, RegionY, RegionZ) using Season as the key.

    Use:
    (1) Use to get the date for any game using Season as key eg. dayz(2022) outputs 2021-11-01
    (2) Use to get the four regions for any season using Season as key eg. regions(2022) outputs 
    (East, West, Midwest, South)
    """

    # Create two empty dictionaries: one for storing day zero and another for storing regions
    dict_day_zero = {}
    dict_regions = {}

    with open(csvfile, 'r', encoding='latin-1') as csv_file:
        reader = csv.reader(csv_file)
        for row in reader:
            # Extract the values from the row
            season, day_zero, region_w, region_x, region_y, region_z = row

            # Populate the dictionaries
            dict_day_zero[season] = day_zero
            dict_regions[season] = (region_w, region_x, region_y, region_z)

    # Return the two dictionaries
    return dict_day_zero, dict_regions

In [7]:
def store_spellings(csvfile):
    """
    Converts a CSV file with two columns (TeamNameSpelling, TeamID) to a dictionary with TeamNameSpelling as 
    the key and TeamID as the value.

    Parameters:
    csvfile (str): The path and name of the CSV file to convert.

    Returns:
    Returns a dictionary with TeamNameSpelling as the key and TeamID as the value.

    Use:
    Use to get the TeamID for a team using an alternate spelling of the name (eg. mnames['mt-st-marys'])
    """

    # Create an empty dictionary to store the data
    spellings = {}

    # Open the csv file and read the contents into a list of dictionaries
    with open(csvfile, 'r', encoding='latin-1') as csv_file:
        csv_reader = csv.DictReader(csv_file)

        # Use a dictionary comprehension to create a dictionary from the csv data
        spellings = {row['TeamNameSpelling']: row['TeamID'] for row in csv_reader}

    # Return dictionary
    return spellings

In [8]:
def store_team_conference(csvfile):
    """
    Converts a CSV file with three columns (Season, TeamID, ConfAbbrev) to a nested dictionary with Season as 
    the outer key, TeamID as the inner key, and ConfAbbrev as the value.

    Parameters:
    csvfile (str): The path and name of the CSV file to convert.

    Returns:
    Returns a nested dictionary with Season as the outer key, TeamID as the inner key, and ConfAbbrev as the value.

    Use:
    Use to get the conference abbreviation for a team in a specific season (eg. mens_conferences['1985']['1449'])
    """

    # Create an empty nested dictionary
    conferences = {}

    # Open the CSV file and read the contents into a list of dictionaries
    with open(csvfile, 'r', encoding='latin-1') as csv_file:
        csv_reader = csv.DictReader(csv_file)

        # Iterate over the rows in the CSV file
        for row in csv_reader:
            # Extract the year, id, and name from the current row
            year = row['Season']
            team_id = row['TeamID']
            abbrv = row['ConfAbbrev']

            # Create the outer dictionary if it doesn't exist
            if year not in conferences:
                conferences[year] = {}

            # Add the name to the inner dictionary with the id as the key
            conferences[year][team_id] = abbrv

    # Returns dictionary
    return conferences

In [9]:
def store_tourney_seeds(csvfile):
    """
    Converts a CSV file with three columns (Season, Seed, TeamID) to a nested dictionary with Season as the 
    outer key, TeamID as the inner key, and Seed as the value.

    Parameters:
    csvfile (str): The path and name of the CSV file to convert.

    Returns:
    Returns a nested dictionary with Season as the outer key, TeamID as the inner key, and Seed as the value.

    Use:
    Use to get the conference abbreviation for a team in a specific season (eg. mens_seeds['1985']['1449'])
    """

    # Create an empty nested dictionary
    seeds = {}

    # Open the CSV file and read the contents into a list of dictionaries
    with open(csvfile, 'r', encoding='latin-1') as csv_file:
        csv_reader = csv.DictReader(csv_file)

        # Iterate over the rows in the CSV file
        for row in csv_reader:
            # Extract the year, id, and name from the current row
            year = row['Season']
            seed = row['Seed']
            team_id = row['TeamID']

            # Create the outer dictionary if it doesn't exist
            if year not in seeds:
                seeds[year] = {}

            # Add the name to the inner dictionary with the id as the key
            seeds[year][team_id] = seed

    # Returns dictionary
    return seeds

In [10]:
def store_conferences(csvfile):
    """
    Converts a CSV file with two columns (ConfAbbrev, Description) to a dictionary with ConfAbbrev as the key 
    and Description as the value.

    Parameters:
    csvfile (str): The path and name of the CSV file to convert.

    Returns:
    Returns a dictionary with ConfAbbrev as the key and Description as the value.

    Use:
    Use to get the Description (full name) for a conference using its abbreviation (eg. conf_abrv['a_sun'])
    """

    # Create an empty dictionary to store the data
    conferences = {}

    # Open the csv file and read the contents into a list of dictionaries
    with open(csvfile, 'r', encoding='latin-1') as csv_file:
        csv_reader = csv.DictReader(csv_file)

        # Use a dictionary comprehension to create a dictionary from the csv data
        conferences = {row['ConfAbbrev']: row['Description'] for row in csv_reader}

    # Returns dictionary
    return conferences

In [11]:
def store_sec_tourney_teams(csvfile):
    """
    Converts a CSV file with three columns (Season, SecondaryTourney, TeamID) to a nested dictionary with Season 
    as the outer key, TeamID as the inner key, and SecondaryTourney as the value.

    Parameters:
    csvfile (str): The path and name of the CSV file to convert.

    Returns:
    Returns a dictionary with Season as the outer key, TeamID as the inner key, and SecondaryTourney as 
    the value.

    Use:
    Use to get the conference abbreviation for a team in a specific season (eg. sec_tourney['1985']['1449'])
    """

    # Create an empty nested dictionary
    secondary = {}

    # Open the CSV file and read the contents into a list of dictionaries
    with open(csvfile, 'r', encoding='latin-1') as csv_file:
        csv_reader = csv.DictReader(csv_file)

        # Iterate over the rows in the CSV file
        for row in csv_reader:
            # Extract the year, id, and name from the current row
            year = row['Season']
            second = row['SecondaryTourney']
            team_id = row['TeamID']

            # Create the outer dictionary if it doesn't exist
            if year not in secondary:
                secondary[year] = {}

            # Add the name to the inner dictionary with the id as the key
            secondary[year][team_id] = second

    # Returns dictionary
    return secondary

In [12]:
import datetime

def set_date(df):
    
    """
    Converts the DayNum field in dataframe into a date using the 'Season' field from the current dataframe 
    and DayZero; and creates a new Date field.

    Parameters:
    dataframe (): The name of the pandas dataframe lookup.

    Returns:
    Returns a game date using DayNum as integer and adds to DayZero as date.

    Use:
    Use to get a specific date from a DayNum field.
    """
    
    # Get number of days as input
    num_days = int(df.loc['DayNum'])
    
    # Pull starting date DayZero in YYYY-MM-DD format
    if gender == 'mens' | gender == 'Mens':
        start_date = mens_day0[str(df['Season'])]
    else:
        start_date = womens_day0[str(df['Season'])]

    # Convert starting date to a datetime object
    start_datetime = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    
    # Add number of days to starting datetime
    end_datetime = start_datetime + datetime.timedelta(days=num_days)
    
    # Format end datetime as YYYY-MM-DD
    end_date = end_datetime.strftime("%Y-%m-%d")
    
    # Returns new calculated date
    return end_date

In [13]:
def set_date(df):
    
    """
    Converts the DayNum field in dataframe into a date using the 'Season' field from the current dataframe 
    and DayZero; and creates a new Date field.

    Parameters:
    day_num (int): The game day number past day zero to use for calculation.

    Returns:
    Returns a game date using DayNum as integer and adds to DayZero as date.

    Use:
    Use to get a specific date from a DayNum field (see example below):
    df['Date'] = pd.DataFrame({'DayNum': df['DayNum']}).apply(lambda x: set_date(x['DayNum']), axis=1)
    """
    df_name = df
    gender = df_name[0]
    day_num = df['DayNum']
    
    # Convert starting date to a datetime object
    start_date = datetime.date(2022, 11, 29)

    # Pull starting date DayZero in YYYY-MM-DD format
    if gender == 'm' | gender == 'M':
        start_date = mens_day0[str(df['Season'])]
    else:
        start_date = womens_day0[str(df['Season'])]

    # Convert starting date to a datetime object
    start_datetime = datetime.datetime.strptime(start_date, "%Y-%m-%d")

    # Convert starting date to a datetime object
    new_date = start_datetime + datetime.timedelta(days=day_num)
    
    return new_date.strftime("%Y-%m-%d")

In [14]:
# assume 'mens_day0' and 'womens_day0' are dictionaries with season start dates,
# and 'df' is a pandas DataFrame with 'Season' and 'DayNum' columns

def set_date(df, start_dates):
    """
    Converts the DayNum field in dataframe into a date using the 'Season' field from the current dataframe 
    and DayZero; and creates a new Date field.

    Parameters:
    df (pandas DataFrame): A pandas DataFrame containing 'DayNum' and 'Season' columns.
    start_dates (dict): A dictionary containing the start dates for each season.

    Returns:
    Returns a pandas DataFrame with an additional 'NewDate' column containing the calculated game dates.
    
    Use:
    Use to get a specific date from a DayNum field (see example below):
    new_df = set_date(df, mens_day0) or womens_day0 depending on the DataFrame gender
    """
    # create a copy of the DataFrame to avoid modifying the original
    new_df = df.copy()

    # get start dates from dictionary based on 'Season' column
    new_df['DayZero'] = new_df['Season'].map(start_dates)

    # convert start date to datetime object
    new_df['DayZero'] = pd.to_datetime(new_df['DayZero'])

    # add number of days to start date
    new_df['Date'] = new_df['DayZero'] + pd.to_timedelta(new_df['DayNum'], unit='D')

    # convert new date to string in format YYYY-MM-DD
    new_df['Date'] = new_df['Date'].dt.strftime('%Y-%m-%d')

    return new_df

In [15]:
# assume 'mens_day0' and 'womens_day0' are dictionaries with season start dates,
# and 'df' is a pandas DataFrame with 'Season' and 'DayNum' columns

def set_date(df, start_dates):
    """
    Converts the DayNum field in dataframe into a date using the 'Season' field from the current dataframe 
    and DayZero; and creates a new Date field.

    Parameters:
    df (pandas DataFrame): A pandas DataFrame containing 'DayNum' and 'Season' columns.
    start_dates (dict): A dictionary containing the start dates for each season.

    Returns:
    Returns a pandas DataFrame with an additional 'NewDate' column containing the calculated game dates.
    
    Use:
    Use to get a specific date from a DayNum field (see example below):
    new_df = set_date(df, mens_day0) or womens_day0 depending on the DataFrame gender
    """
# Sample dictionaries
mens_day0 = {'2022': '2022-Jan-01', '2023': '2023-Jan-01'}
womens_day0 = {'2022': '2022-Jan-07', '2023': '2023-Jan-07'}

# Sample DataFrame
df = pd.DataFrame({'Season': ['2022', '2022', '2023', '2023'],
                   'DayNum': [10, 20, 30, 40]})

# Function to calculate date
def calculate_date(row):
    season = row['Season']
    day0 = mens_day0[season] if season in mens_day0 else womens_day0[season]
    day0_date = datetime.datetime.strptime(day0, '%Y-%b-%d')
    days_to_add = row['DayNum']
    return day0_date + datetime.timedelta(days=days_to_add)

# Apply function to DataFrame to create new column
df['NewDate'] = df.apply(calculate_date, axis=1)

#### **Load Data**

In [16]:
# Creates a Teams tables
Mens = pd.read_csv(DATA_DIR + 'MTeams.csv')
Womens = pd.read_csv(DATA_DIR + 'WTeams.csv')
print("Done. Loaded  team tables as 'Mens' and 'Womens'.")

Done. Loaded  team tables as 'Mens' and 'Womens'.


In [17]:
# Creates a dictionary to store alternate team name spellings
mnames = store_spellings(DATA_DIR + 'MTeamSpellings.csv') # use with Season as e.g. mnames['mt-st-marys']
wnames = store_spellings(DATA_DIR + 'WTeamSpellings.csv') # use with Season as e.g. wnames['mt-st-marys']
print('e.g. output - mens:', mnames['mt-st-marys'], 'womens:', wnames['mt-st-marys'])

e.g. output - mens: 1291 womens: 3291


In [18]:
# Creates Compact Secondary Tournament Results table:
MCoaches = pd.read_csv(DATA_DIR + 'MTeamCoaches.csv')
print("Done. Loaded team coaches table as 'MCoaches', mens only.")

Done. Loaded team coaches table as 'MCoaches', mens only.


In [19]:
# Creates two dictionaries to store day zero for each season and the region names
mens_day0, mens_regions = store_seasons(DATA_DIR + 'MSeasons.csv'); # use with Season as e.g. mens_day0['2022'] or mens_regions['2022']
womens_day0, womens_regions = store_seasons(DATA_DIR + 'WSeasons.csv'); # use with Season as e.g. womens_day0['2022'] or womens_regions['2022']
print('e.g. output - mens:', mens_day0['2022'], mens_regions['2022'], 'womens:', womens_day0['2022'], womens_regions['2022'])

e.g. output - mens: 2021-11-01 ('East', 'West', 'Midwest', 'South') womens: 2021-11-01 ('Bridgeport', 'Spokane', 'Greensboro', 'Wichita')


In [20]:
# Creates Season tables:
MSeason = pd.read_csv(DATA_DIR + 'MSeasons.csv')
WSeason = pd.read_csv(DATA_DIR + 'WSeasons.csv')
print("Done. Loaded season start date and regions tables as 'MSeason' and 'WSeason'.")

Done. Loaded season start date and regions tables as 'MSeason' and 'WSeason'.


In [21]:
# Creates a dictionary to store team conference full names
mens_conferences = store_team_conference(DATA_DIR + 'MTeamConferences.csv') # use as mens_conferences[year][team_id]
womens_conferences = store_team_conference(DATA_DIR + 'WTeamConferences.csv') # use as womens_conferences[year][team_id]
print('e.g. output - mens:', mens_conferences['2022']['1449'], 'womens:', womens_conferences['2022']['3449'])

e.g. output - mens: pac_twelve womens: pac_twelve


In [22]:
# Creates Detailed Regular Season tables:
MRegular = pd.read_csv(DATA_DIR + 'MRegularSeasonDetailedResults.csv')
WRegular = pd.read_csv(DATA_DIR + 'WRegularSeasonDetailedResults.csv')
print("Done. Loaded detailed regular season tables as 'MRegular' and 'WRegular'.")

Done. Loaded detailed regular season tables as 'MRegular' and 'WRegular'.


In [23]:
# Creates Compact Regular Season tables (Optional):
MSeason_compact = pd.read_csv(DATA_DIR + 'MRegularSeasonCompactResults.csv')
WSeason_compact = pd.read_csv(DATA_DIR + 'WRegularSeasonCompactResults.csv')
print("Done. Loaded compact regular season tables as 'MSeason_compact' and 'WSeason_compact'.")

Done. Loaded compact regular season tables as 'MSeason_compact' and 'WSeason_compact'.


In [24]:
# Convert DayZero to datetime field called 'StartDate'
MSeason['StartDate'] = pd.to_datetime(MSeason['DayZero'])
WSeason['StartDate'] = pd.to_datetime(WSeason['DayZero'])

# Show sample output
print(MSeason.shape)
MSeason.head(2)

(39, 7)


Unnamed: 0,Season,DayZero,RegionW,RegionX,RegionY,RegionZ,StartDate
0,1985,1984-10-29,East,West,Midwest,Southeast,1984-10-29
1,1986,1985-10-28,East,Midwest,Southeast,West,1985-10-28


In [25]:
# Creates Game Cities tables:
MGCities = pd.read_csv(DATA_DIR + 'MGameCities.csv')
WGCities = pd.read_csv(DATA_DIR + 'WGameCities.csv')
print("Done. Loaded game cities tables as 'MGCities' and 'WGCities'.")

Done. Loaded game cities tables as 'MGCities' and 'WGCities'.


In [26]:
# Creates a dictionary to store team tournament seeding
mens_seeds = store_tourney_seeds(DATA_DIR + 'MNCAATourneySeeds.csv') # use as mens_seeds[year][team_id]
womens_seeds = store_tourney_seeds(DATA_DIR + 'WNCAATourneySeeds.csv') # use as womens_seeds[year][team_id]
print('e.g. output - mens:', mens_seeds['1985']['1449'], 'womens:', womens_seeds['2001']['3449'])

e.g. output - mens: X05 womens: Z06


In [27]:
# Creates a Tourney Seeding by Round table 
Tourney_rounds = pd.read_csv(DATA_DIR + 'MNCAATourneySeedRoundSlots.csv')
print("Done. Loaded tournament seeding for each round in table as 'Tourney_rounds'.")

Done. Loaded tournament seeding for each round in table as 'Tourney_rounds'.


In [28]:
# Creates Tournament Seed Matchup tables:
MTourney_seeds = pd.read_csv(DATA_DIR + 'MNCAATourneySlots.csv')
WTourney_seeds = pd.read_csv(DATA_DIR + 'WNCAATourneySlots.csv')
print("Done. Loaded tournament seed matchup tables as 'MTourney_seeds' and 'WTourney_seeds'.")

Done. Loaded tournament seed matchup tables as 'MTourney_seeds' and 'WTourney_seeds'.


In [29]:
# Creates Detailed Tournament Results tables:
MTourney = pd.read_csv(DATA_DIR + 'MNCAATourneyDetailedResults.csv')
WTourney = pd.read_csv(DATA_DIR + 'WNCAATourneyDetailedResults.csv')
print("Done. Loaded detailed tournament tables as 'MTourney' and 'WTourney'.")

Done. Loaded detailed tournament tables as 'MTourney' and 'WTourney'.


In [30]:
# Creates Compact Tournament Results tables (Optional):
MTourney_compact = pd.read_csv(DATA_DIR + 'MNCAATourneyCompactResults.csv')
WTourney_compact = pd.read_csv(DATA_DIR + 'WNCAATourneyCompactResults.csv')
print("Done. Loaded compact tournament tables an 'MTourney_compact' and 'WTourney_compact'.")

Done. Loaded compact tournament tables an 'MTourney_compact' and 'WTourney_compact'.


In [31]:
# Creates a Compact Secondary Tournament Results table:
MSecondary = pd.read_csv(DATA_DIR + 'MSecondaryTourneyCompactResults.csv')
print("Done. Loaded secondary tournament table as 'MSecondary'.")

Done. Loaded secondary tournament table as 'MSecondary'.


In [32]:
# Creates a dictionary to store cities as city, state
cities = store_cities(DATA_DIR + 'Cities.csv'); # use with CityID as e.g. city['4030']
print('e.g. output -', cities['4030'])

e.g. output - Birmingham, AL


In [33]:
# Creates a dictionary to store conference full names
conf_abrv = store_conferences(DATA_DIR + 'Conferences.csv'); # use with conference abbrv as e.g. conf_abrv['a_sun']
print('e.g. output - ', conf_abrv['a_sun'])

e.g. output -  Atlantic Sun Conference


In [34]:
# Creates a Conference Tourney Games table 
CTourney = pd.read_csv(DATA_DIR + 'MConferenceTourneyGames.csv')
print("Done. Loaded conference tournament games table as 'CTourney'.")

Done. Loaded conference tournament games table as 'CTourney'.


In [35]:
# Creates a dictionary to store secondary tournament that certain teams played 
sec_tourney = store_sec_tourney_teams(DATA_DIR + 'MSecondaryTourneyTeams.csv') # use as sec_tourney[year][team_id]
print('e.g. output -', sec_tourney['2019']['1400'])

e.g. output - NIT


#### **Build Tables**

In [36]:
GAMES = MRegular.copy()

# Show sample output
print(GAMES.shape)
GAMES.head(2)

(106834, 34)


Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
0,2003,10,1104,68,1328,62,N,0,27,58,...,10,16,22,10,22,8,18,9,2,20
1,2003,10,1272,70,1393,63,N,0,26,62,...,24,9,20,20,25,7,12,8,6,16


In [37]:
# Merge dataframes on 'Season' column
merged_df = pd.merge(GAMES, MSeason, on='Season', how='inner')

# Calculate date using vectorized operations
merged_df['Date'] = merged_df['StartDate'] + pd.to_timedelta(merged_df['DayNum'], unit='D')

# Update Games with the calculated dates
GAMES['Date'] = merged_df['Date']

# Show sample output
print(GAMES.shape)
GAMES.tail(2)

(106834, 35)


Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,...,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF,Date
106832,2023,113,1433,88,1386,63,A,0,31,59,...,13,18,5,20,10,11,3,3,15,2023-02-21
106833,2023,113,1437,64,1462,63,A,0,25,56,...,9,12,6,27,17,14,4,1,10,2023-02-21


#### **Custom Functions**

In [38]:
def store_day_zero(file):
    """
    Converts a CSV file with six columns (Season, DayZero, RegionW, RegionX, RegionY, RegionZ) to TWO dictionaries: 
    (1) with Season as the key and DayZero as the value; and 
    (2) with Season as the key and the four regions names as the value (in tuple)

    Parameters:
    file (str): The path and name of the CSV file to convert.

    Returns:
    Returns two dictionaries for DayZero and (RegionW, RegionX, RegionY, RegionZ) using Season as the key.

    Use:
    (1) Use to get the date for any game using Season as key eg. dayz(2022) outputs 2021-11-01
    (2) Use to get the four regions for any season using Season as key eg. dayz(2022) outputs (East, West, Midwest, South)
    """

    # Create five dictionaries with Season as the key and the other columns as the value
    dict_zero = {}
    dict_w = {}
    dict_x = {}
    dict_y = {}
    dict_z = {}

    with open(file, 'r', encoding='latin-1') as csvfile:
        reader = csv.reader(csvfile)
        #header = next(reader) # skip header row
        for row in reader:
            # Extract the values from the row
            season, day_zero, region_w, region_x, region_y, region_z = row
            
            # Populate the dictionaries
            dict_zero.setdefault(season, []).append(day_zero)
            dict_w.setdefault(season, []).append(region_w)
            dict_x.setdefault(season, []).append(region_x)
            dict_y.setdefault(season, []).append(region_y)
            dict_z.setdefault(season, []).append(region_z)

    # Return the dictionaries as a tuple
    return dict_zero, dict_w, dict_x, dict_y, dict_z