In [1]:
DATA_DIR = 'march-machine-learning-mania-2023/' # input('Enter file path for data files: ')

In [2]:
# Import Libraries
import pandas as pd
import numpy as np
import warnings

In [3]:
# Load the Seasons dataframes:
seasonsm = pd.read_csv('seasons-mens.csv')
seasonsw = pd.read_csv('seasons-womens.csv')

In [4]:
# Create lists of Seasons, Men TeamIDs, and Womens TeamIDs
seasons = seasonsm.Season.unique()
men_ids = np.delete(seasonsm['TeamID'].unique(), np.where(seasonsm['TeamID'].unique() == 1445))
wom_ids = np.delete(seasonsw['TeamID'].unique(), np.where(seasonsw['TeamID'].unique() == 3445))

##### **Data Section 2 - TEAMS**

In [5]:
print(len(men_ids), len(wom_ids))

365 363


In [6]:
# Creates a Teams dataframes:
MTeams = pd.read_csv(DATA_DIR + 'MTeams.csv')
WTeams = pd.read_csv(DATA_DIR + 'WTeams.csv')

# Filter dataframes to only include rows where the TeamID is in list
teamsm = MTeams[MTeams['TeamID'].isin(men_ids)]
teamsw = WTeams[WTeams['TeamID'].isin(wom_ids)]

# Show sample output
print('MTeams:', teamsm.shape, 'WTeams:', teamsw.shape)
print('MTeams:', teamsm.columns)

MTeams: (365, 4) WTeams: (363, 2)
MTeams: Index(['TeamID', 'TeamName', 'FirstD1Season', 'LastD1Season'], dtype='object')


In [7]:
print(teamsm.shape[0], teamsw.shape[0])

365 363


In [8]:
# Creates Alternate Team Name Spellings dataframes:
MSpellings = pd.read_csv(DATA_DIR + 'MTeamSpellings.csv', encoding='latin-1')
WSpellings = pd.read_csv(DATA_DIR + 'WTeamSpellings.csv', encoding='latin-1')

# Filter dataframes to only include rows where the TeamID is in list
spellm = MSpellings[MSpellings['TeamID'].isin(men_ids)]
spellw = WSpellings[WSpellings['TeamID'].isin(wom_ids)]

# Show sample output
print('MSpellings:', spellm.shape, 'WSpellings:', spellw.shape)
print('MSpellings:', spellm.columns)

MSpellings: (1140, 2) WSpellings: (1133, 2)
MSpellings: Index(['TeamNameSpelling', 'TeamID'], dtype='object')


In [9]:
# Ignore warnings
warnings.filterwarnings("ignore")

# Group the dataframe by TeamID and apply a function to create a list of unique TeamNameSpellings for each group
grouped_men = spellm.groupby('TeamID').apply(lambda x: list(x['TeamNameSpelling'].unique()))
grouped_wom = spellw.groupby('TeamID').apply(lambda x: list(x['TeamNameSpelling'].unique()))

# Determine the maximum number of alternate spellings for a TeamID
max_men = grouped_men.apply(len).max()
max_wom = grouped_wom.apply(len).max()

# Create a new dataframe with the appropriate number of columns
columns_spellm = ['Name{}'.format(i) for i in range(1, max_men + 1)] + ['TeamID']
new_mspell = pd.DataFrame(columns=columns_spellm)

columns_spellw = ['Name{}'.format(i) for i in range(1, max_wom + 1)] + ['TeamID']
new_wspell = pd.DataFrame(columns=columns_spellw)

# Iterate over the groups in the grouped dataframe and add rows to the new dataframe
for group_name, group_values in grouped_men.items():
    new_row = {key: value for key, value in zip(columns_spellm, group_values)}
    new_row['TeamID'] = group_name
    new_mspell = new_mspell.append(new_row, ignore_index=True)

for group_name, group_values in grouped_wom.items():
    new_row = {key: value for key, value in zip(columns_spellw, group_values)}
    new_row['TeamID'] = group_name
    new_wspell = new_wspell.append(new_row, ignore_index=True)

In [10]:
print(new_mspell.shape[0], new_wspell.shape[0])

365 363


In [11]:
# Copy the resulting dataframe
spellm = new_mspell.copy()
spellw = new_wspell.copy()

## Add new columns
extend_men = ['Abilene Christian','Air Force','Akron','Alabama','Alabama A&M','Alabama State','Albany (NY)','Alcorn State','American','Appalachian State','Arizona','Arizona State','Little Rock','Pine Bluff','Arkansas','Arkansas State','Army','Auburn','Austin Peay','Ball State','Baylor','Belmont','Bethune-Cookman','Binghamton','Boise State','Boston College','Boston University','Bowling Green','Bradley','Brown','Bryant','Bucknell','Buffalo','Butler','Brigham Young','Central Michigan','Cal Poly','California','Campbell','Canisius','Central Arkansas','Centenary','Central Connecticut','Charleston Southern','Charlotte','Chattanooga','Chicago State','Cincinnati','Citadel','Clemson','Cleveland State','Coastal Carolina','College of Charleston','Colgate','Colorado','Colorado State','Columbia','Connecticut','Coppin State','Cornell','Creighton','Cal State Bakersfield','Cal State Fullerton','Cal State Northridge','Sacramento State','Dartmouth','Davidson','Dayton','Delaware','Delaware State','Denver','DePaul','Detroit Mercy','Drake','Drexel','Duke','Duquesne','Eastern Illinois','Eastern Kentucky','Eastern Michigan','Eastern Washington','East Carolina','SIU Edwardsville','Elon','East Tennessee State','Evansville','Fairleigh Dickinson','Fairfield','Florida Atlantic','Florida Gulf Coast','Florida','Florida A&M','Florida International','Florida State','Fordham','Fresno State','Furman','George Washington','Georgia Southern','Gardner-Webb','George Mason','Georgetown','Georgia','Georgia State','Georgia Tech','Gonzaga','Grambling','Grand Canyon','Hampton','Hartford','Harvard','Hawaii','High Point','Hofstra','Holy Cross','Houston','Houston Christian','Howard','Idaho','Idaho State','Illinois-Chicago','Illinois','Illinois State','Incarnate Word','Indiana','Indiana State','Iona','Iowa','Iowa State','Purdue-Fort Wayne','IUPUI','Jackson State','Jacksonville','Jacksonville State','James Madison','Kansas','Kansas State','Kennesaw State','Kent State','Kentucky','La Salle','Lafayette','Lamar','Lehigh','Liberty','Lipscomb','Long Beach State','Long Island University','Longwood','Louisiana Tech','Louisville','Loyola Marymount','Loyola MD','Loyola IL','Louisiana State','MA-Lowell','Maine','Manhattan','Marist','Marquette','Marshall','Maryland','Massachusetts','McNeese State','MD-Eastern Shore','Memphis','Mercer','Miami FL','Miami OH','Michigan','Michigan State','Minnesota','Mississippi','Mississippi State','Missouri','Kansas City','Missouri State','Monmouth','Montana','Montana State','Morehead State','Morgan State','Mississippi Valley State','Mount St Marys','Middle Tennessee','Murray State','Northern Colorado','North Dakota State','Northern Illinois','Northern Kentucky','Navy','North Carolina A&T','North Carolina Central','NC State','Omaha','Nebraska','Nevada','New Hampshire','New Mexico','New Mexico State','New Orleans','Niagara','Nicholls State','NJIT','Norfolk State','North Carolina','North Dakota','North Florida','North Texas','Northeastern','Northern Arizona','Northern Iowa','Northwestern','Northwestern State','Notre Dame','Oakland','Ohio','Ohio State','Oklahoma','Oklahoma State','Old Dominion','Oral Roberts','Oregon','Oregon State','Pacific','Penn','Penn State','Pepperdine','Pittsburgh','Portland','Portland State','Prairie View','Presbyterian','Princeton','Providence','Purdue','Quinnipiac','Radford','Rhode Island','Rice','Richmond','Rider','Robert Morris','Rutgers','South Carolina State','South Dakota State','Southern Illinois','Sacred Heart','Sam Houston State','Samford','San Diego','San Diego State','San Francisco','San Jose State','Santa Barbara','Santa Clara','Savannah State','South Carolina Upstate','Southeastern Louisiana','Southeast Missouri State','Seattle','Seton Hall','Stephen F Austin','Siena','Southern Methodist','South Alabama','South Carolina','South Dakota','South Florida','Southern Mississippi','Southern','Southern Utah','St Bonaventure','St Francis NY','St Francis PA','St Johns NY','St Josephs','Saint Louis','St Marys CA','St Peters','Stanford','Stetson','Stony Brook','Syracuse','TAM Corpus Christi','TCU','Temple','Tennessee','Tennessee State','Tennessee Tech','Texas','Texas A&M','Texas State','Texas Tech','Tennessee-Martin','Toledo','Towson','Troy','Tulane','Tulsa','Texas-Rio Grande Valley','Texas Southern','UAB','UC Davis','UC Irvine','UC Riverside','Central Florida','UCLA','Louisiana','Louisiana-Monroe','UMBC','NC Asheville','NC Greensboro','NC Wilmington','UNLV','Southern California','UT Arlington','UTSA','Utah','Utah State','Utah Valley','UTEP','Virginia Commonwealth','Valparaiso','Vanderbilt','Vermont','Villanova','Virginia','Virginia Tech','VMI','Western Carolina','Western Illinois','Western Kentucky','Western Michigan','Wagner','Wake Forest','Washington','Washington State','Weber State','West Virginia','WI-Green Bay','WI-Milwaukee','Wichita State','William & Mary','Winthrop','Wisconsin','Wofford','Wright State','Wyoming','Xavier','Yale','Youngstown State','California Baptist','North Alabama','Merrimack','Bellarmine','Utah Tech','Tarleton State','UC San Diego','St Thomas','Lindenwood','Queens NC','Southern Indiana','Stonehill','TAM Commerce']
sports_men = ["Abilene Christian","Air Force","Akron","Alabama","Alabama A&M","Alabama State","Albany (NY)","Alcorn State","American","Appalachian State","Arizona","Arizona State","Little Rock","Arkansas-Pine Bluff","Arkansas","Arkansas State","Army","Auburn","Austin Peay","Ball State","Baylor","Belmont","Bethune-Cookman","Binghamton","Boise State","Boston College","Boston University","Bowling Green State","Bradley","Brown","Bryant","Bucknell","Buffalo","Butler","Brigham Young","Central Michigan","Cal Poly","California","Campbell","Canisius","Central Arkansas","Centenary (LA)","Central Connecticut State","Charleston Southern","Charlotte","Chattanooga","Chicago State","Cincinnati","The Citadel","Clemson","Cleveland State","Coastal Carolina","College of Charleston","Colgate","Colorado","Colorado State","Columbia","Connecticut","Coppin State","Cornell","Creighton","Cal State Bakersfield","Cal State Fullerton","Cal State Northridge","Sacramento State","Dartmouth","Davidson","Dayton","Delaware","Delaware State","Denver","DePaul","Detroit Mercy","Drake","Drexel","Duke","Duquesne","Eastern Illinois","Eastern Kentucky","Eastern Michigan","Eastern Washington","East Carolina","SIU Edwardsville","Elon","East Tennessee State","Evansville","Fairleigh Dickinson","Fairfield","Florida Atlantic","Florida Gulf Coast","Florida","Florida A&M","Florida International","Florida State","Fordham","Fresno State","Furman","George Washington","Georgia Southern","Gardner-Webb","George Mason","Georgetown","Georgia","Georgia State","Georgia Tech","Gonzaga","Grambling","Grand Canyon","Hampton","Hartford","Harvard","Hawaii","High Point","Hofstra","Holy Cross","Houston","Houston Christian","Howard","Idaho","Idaho State","Illinois-Chicago","Illinois","Illinois State","Incarnate Word","Indiana","Indiana State","Iona","Iowa","Iowa State","Purdue-Fort Wayne","IUPUI","Jackson State","Jacksonville","Jacksonville State","James Madison","Kansas","Kansas State","Kennesaw State","Kent State","Kentucky","La Salle","Lafayette","Lamar","Lehigh","Liberty","Lipscomb","Long Beach State","Long Island University","Longwood","Louisiana Tech","Louisville","Loyola Marymount","Loyola (MD)","Loyola (IL)","Louisiana State","Massachusetts-Lowell","Maine","Manhattan","Marist","Marquette","Marshall","Maryland","Massachusetts","McNeese State","Maryland-Eastern Shore","Memphis","Mercer","Miami (FL)","Miami (OH)","Michigan","Michigan State","Minnesota","Mississippi","Mississippi State","Missouri","Kansas City","Missouri State","Monmouth","Montana","Montana State","Morehead State","Morgan State","Mississippi Valley State","Mount St. Mary's","Middle Tennessee","Murray State","Northern Colorado","North Dakota State","Northern Illinois","Northern Kentucky","Navy","North Carolina A&T","North Carolina Central","NC State","Omaha","Nebraska","Nevada","New Hampshire","New Mexico","New Mexico State","New Orleans","Niagara","Nicholls State","NJIT","Norfolk State","North Carolina","North Dakota","North Florida","North Texas","Northeastern","Northern Arizona","Northern Iowa","Northwestern","Northwestern State","Notre Dame","Oakland","Ohio","Ohio State","Oklahoma","Oklahoma State","Old Dominion","Oral Roberts","Oregon","Oregon State","Pacific","Pennsylvania","Penn State","Pepperdine","Pittsburgh","Portland","Portland State","Prairie View","Presbyterian","Princeton","Providence","Purdue","Quinnipiac","Radford","Rhode Island","Rice","Richmond","Rider","Robert Morris","Rutgers","South Carolina State","South Dakota State","Southern Illinois","Sacred Heart","Sam Houston State","Samford","San Diego","San Diego State","San Francisco","San Jose State","UC Santa Barbara","Santa Clara","Savannah State","South Carolina Upstate","Southeastern Louisiana","Southeast Missouri State","Seattle","Seton Hall","Stephen F. Austin","Siena","Southern Methodist","South Alabama","South Carolina","South Dakota","South Florida","Southern Mississippi","Southern","Southern Utah","St. Bonaventure","St. Francis (NY)","Saint Francis (PA)","St. John's (NY)","Saint Joseph's","Saint Louis","Saint Mary's (CA)","Saint Peter's","Stanford","Stetson","Stony Brook","Syracuse","Texas A&M-Corpus Christi","TCU","Temple","Tennessee","Tennessee State","Tennessee Tech","Texas","Texas A&M","Texas State","Texas Tech","Tennessee-Martin","Toledo","Towson","Troy","Tulane","Tulsa","Texas-Rio Grande Valley","Texas Southern","UAB","UC Davis","UC Irvine","UC Riverside","Central Florida","UCLA","Louisiana","Louisiana-Monroe","Maryland-Baltimore County","UNC Asheville","UNC Greensboro","UNC Wilmington","Nevada-Las Vegas","Southern California","UT Arlington","UTSA","Utah","Utah State","Utah Valley","UTEP","Virginia Commonwealth","Valparaiso","Vanderbilt","Vermont","Villanova","Virginia","Virginia Tech","VMI","Western Carolina","Western Illinois","Western Kentucky","Western Michigan","Wagner","Wake Forest","Washington","Washington State","Weber State","West Virginia","Green Bay","Milwaukee","Wichita State","William & Mary","Winthrop","Wisconsin","Wofford","Wright State","Wyoming","Xavier","Yale","Youngstown State","California Baptist","North Alabama","Merrimack","Bellarmine","Utah Tech","Tarleton State","UC San Diego","St. Thomas","Lindenwood","Queens (NC)","Southern Indiana","Stonehill","Texas A&M-Commerce"]
betexp_men = ["Abilene Christian","Air Force","Akron","Alabama","Alabama A&M","Alabama State","Albany","Alcorn State","American University","Appalachian State","Arizona","Arizona State","UALR","Arkansas-Pine Bluff","Arkansas","Arkansas State","Army","Auburn","Austin Peay","Ball State","Baylor","Belmont","Bethune-Cookman","Binghamton","Boise State","Boston College","Boston University","Bowling Green","Bradley","Brown","Bryant University","Bucknell","Buffalo","Butler","Brigham Young","Central Michigan","Cal Poly","California","Campbell","Canisius","Central Arkansas","","Central Connecticut State","Charleston Southern","Charlotte","Chattanooga Mocs","Chicago State","Cincinnati","Citadel","Clemson","Cleveland State","Coastal Carolina","Charleston","Colgate","Colorado","Colorado State","Columbia","UConn","Coppin State","Cornell","Creighton","CSU Bakersfield","CS Fullerton","CS Northridge","Sacramento State","Dartmouth","Davidson","Dayton","Delaware","Delaware State","Denver","DePaul","Detroit","Drake","Drexel","Duke","Duquesne","Eastern Illinois","Eastern Kentucky","Eastern Michigan","East. Washington","East Carolina","Siu Edwardsville","Elon","East Tennessee St","Evansville","Fairleigh Dickinson","Fairfield","Florida Atlantic","Florida Gulf Coast","Florida","Florida A&M","Florida International","Florida State","Fordham","Fresno State","Furman","George Washington","Georgia Southern","Gardner Webb","George Mason","Georgetown","Georgia","Georgia State","Georgia Tech","Gonzaga","Grambling St.","Grand Canyon","Hampton","Hartford","Harvard","Hawaii","High Point","Hofstra","Holy Cross","Houston","Houston Christian","Howard","Idaho","Idaho State","Illinois (Chi.)","Illinois","Illinois State","Incarnate Word","Indiana","Indiana State","Iona","Iowa","Iowa State","IPFW","IUPUI","Jackson State","Jacksonville","Jacksonville State","James Madison","Kansas","Kansas State","Kennesaw State","Kent State","Kentucky","La Salle","Lafayette","Lamar","Lehigh","Liberty","Lipscomb","Long Beach State","LIU Sharks","Longwood","Louisiana Tech","Louisville","Loyola Marymount","Loyola Maryland","Loyola Chicago","LSU","UMass Lowell","Maine Black Bears","Manhattan","Marist","Marquette","Marshall","Maryland","UMass","McNeese State","Md.-East. Shore","Memphis","Mercer","Miami (FL)","Miami (Ohio)","Michigan","Michigan State","Minnesota","Ole Miss","Mississippi St.","Missouri","UMKC","Missouri State","Monmouth","Montana","Montana State","Morehead State","Morgan State","Miss. Valley St.","Mount St. Mary's","Middle Tenn. St.","Murray State","Northern Colorado","North Dakota St","Northern Illinois","Northern Kentucky","Navy","N. Carolina A&T","N. Carolina Central","NC State","Nebraska O.","Nebraska","Nevada","New Hampshire","New Mexico","New Mexico State","New Orleans","Niagara","Nicholls State","NJIT","Norfolk State","North Carolina","North Dakota","North Florida","North Texas","Northeastern","Northern Arizona","Northern Iowa","Northwestern","Northwestern St.","Notre Dame","Oakland","Ohio","Ohio State","Oklahoma","Oklahoma State","Old Dominion","Oral Roberts","Oregon","Oregon State","Pacific","Penn","Penn State","Pepperdine","Pittsburgh","Portland","Portland State","Prairie View A&M","Presbyterian","Princeton","Providence","Purdue","Quinnipiac","Radford","Rhode Island","Rice","Richmond","Rider","Robert Morris","Rutgers","South Carolina St","South Dakota St.","Southern Illinois","Sacred Heart","Sam Houston St.","Samford","San Diego Toreros","San Diego State","San Francisco","San Jose State","UC Santa Barbara","Santa Clara","","USC Upstate","SE Louisiana","Southeast Missouri State","Seattle","Seton Hall","Stephen F. Austin","Siena","SMU Mustangs","South Alabama","South Carolina","South Dakota Coyotes","South Florida","Southern Miss","Southern Univ.","Southern Utah","St. Bonaventure","St. Francis Brooklyn","St. Francis (PA)","St. John's (N.Y.)","Saint Josephs","St. Louis","St. Marys (CA)","St. Peters","Stanford","Stetson","Stony Brook","Syracuse","Texas A&M-CC","TCU","Temple","Tennessee","Tennessee State","Tennessee Tech","Texas","Texas A&M","Texas State","Texas Tech","UT Martin","Toledo","Towson","Troy","Tulane","Tulsa","UTRGV","Texas Southern","UAB","UC Davis","UC Irvine","UC Riverside","UCF Knights","UCLA","Louisiana Lafayette","Louisiana Monroe","UMBC Retrievers","UNC Asheville","NC Greensboro","NC Wilmington","UNLV","USC","UT Arlington","UTSA Roadrunners","Utah Utes","Utah State","Utah Valley State","UTEP","VCU Rams","Valparaiso","Vanderbilt","Vermont","Villanova","Virginia","Virginia Tech","VMI","Western Carolina","Western Illinois","Western Kentucky","Western Michigan","Wagner","Wake Forest","Washington","Washington State","Weber State","West Virginia","Wisc. Green Bay","Wisc. Milwaukee","Wichita State","William & Mary","Winthrop","Wisconsin","Wofford","Wright State","Wyoming","Xavier","Yale","Youngstown State","California Baptist","North Alabama","Merrimack Warriors","Bellarmine","Utah Tech","Tarleton","UC San Diego","St. Thomas (Minn.)","","","","",""]

extend_wom = ['Abilene Christian','Air Force','Akron','Alabama','Alabama A&M','Alabama State','Albany (NY)','Alcorn State','American','Appalachian State','Arizona','Arizona State','Little Rock','Pine Bluff','Arkansas','Arkansas State','Army','Auburn','Austin Peay','Ball State','Baylor','Belmont','Bethune-Cookman','Binghamton','Boise State','Boston College','Boston University','Bowling Green','Bradley','Brown','Bryant','Bucknell','Buffalo','Butler','Brigham Young','Central Michigan','Cal Poly','California','Campbell','Canisius','Central Arkansas','Centenary','Central Connecticut','Charleston Southern','Charlotte','Chattanooga','Chicago State','Cincinnati','Clemson','Cleveland State','Coastal Carolina','College of Charleston','Colgate','Colorado','Colorado State','Columbia','Connecticut','Coppin State','Cornell','Creighton','Cal State Bakersfield','Cal State Fullerton','Cal State Northridge','Sacramento State','Dartmouth','Davidson','Dayton','Delaware','Delaware State','Denver','DePaul','Detroit Mercy','Drake','Drexel','Duke','Duquesne','Eastern Illinois','Eastern Kentucky','Eastern Michigan','Eastern Washington','East Carolina','SIU Edwardsville','Elon','East Tennessee State','Evansville','Fairleigh Dickinson','Fairfield','Florida Atlantic','Florida Gulf Coast','Florida','Florida A&M','Florida International','Florida State','Fordham','Fresno State','Furman','George Washington','Georgia Southern','Gardner-Webb','George Mason','Georgetown','Georgia','Georgia State','Georgia Tech','Gonzaga','Grambling','Grand Canyon','Hampton','Hartford','Harvard','Hawaii','High Point','Hofstra','Holy Cross','Houston','Houston Christian','Howard','Idaho','Idaho State','Illinois-Chicago','Illinois','Illinois State','Incarnate Word','Indiana','Indiana State','Iona','Iowa','Iowa State','Purdue-Fort Wayne','IUPUI','Jackson State','Jacksonville','Jacksonville State','James Madison','Kansas','Kansas State','Kennesaw State','Kent State','Kentucky','La Salle','Lafayette','Lamar','Lehigh','Liberty','Lipscomb','Long Beach State','Long Island University','Longwood','Louisiana Tech','Louisville','Loyola Marymount','Loyola MD','Loyola IL','Louisiana State','MA-Lowell','Maine','Manhattan','Marist','Marquette','Marshall','Maryland','Massachusetts','McNeese State','MD-Eastern Shore','Memphis','Mercer','Miami FL','Miami OH','Michigan','Michigan State','Minnesota','Mississippi','Mississippi State','Missouri','Kansas City','Missouri State','Monmouth','Montana','Montana State','Morehead State','Morgan State','Mississippi Valley State','Mount St Marys','Middle Tennessee','Murray State','Northern Colorado','North Dakota State','Northern Illinois','Northern Kentucky','Navy','North Carolina A&T','North Carolina Central','NC State','Omaha','Nebraska','Nevada','New Hampshire','New Mexico','New Mexico State','New Orleans','Niagara','Nicholls State','NJIT','Norfolk State','North Carolina','North Dakota','North Florida','North Texas','Northeastern','Northern Arizona','Northern Iowa','Northwestern','Northwestern State','Notre Dame','Oakland','Ohio','Ohio State','Oklahoma','Oklahoma State','Old Dominion','Oral Roberts','Oregon','Oregon State','Pacific','Penn','Penn State','Pepperdine','Pittsburgh','Portland','Portland State','Prairie View','Presbyterian','Princeton','Providence','Purdue','Quinnipiac','Radford','Rhode Island','Rice','Richmond','Rider','Robert Morris','Rutgers','South Carolina State','South Dakota State','Southern Illinois','Sacred Heart','Sam Houston State','Samford','San Diego','San Diego State','San Francisco','San Jose State','Santa Barbara','Santa Clara','Savannah State','South Carolina Upstate','Southeastern Louisiana','Southeast Missouri State','Seattle','Seton Hall','Stephen F Austin','Siena','Southern Methodist','South Alabama','South Carolina','South Dakota','South Florida','Southern Mississippi','Southern','Southern Utah','St Bonaventure','St Francis NY','St Francis PA','St Johns NY','St Josephs','Saint Louis','St Marys CA','St Peters','Stanford','Stetson','Stony Brook','Syracuse','TAM Corpus Christi','TCU','Temple','Tennessee','Tennessee State','Tennessee Tech','Texas','Texas A&M','Texas State','Texas Tech','Tennessee-Martin','Toledo','Towson','Troy','Tulane','Tulsa','Texas-Rio Grande Valley','Texas Southern','UAB','UC Davis','UC Irvine','UC Riverside','Central Florida','UCLA','Louisiana','Louisiana-Monroe','UMBC','NC Asheville','NC Greensboro','NC Wilmington','UNLV','Southern California','UT Arlington','UTSA','Utah','Utah State','Utah Valley','UTEP','Virginia Commonwealth','Valparaiso','Vanderbilt','Vermont','Villanova','Virginia','Virginia Tech','Western Carolina','Western Illinois','Western Kentucky','Western Michigan','Wagner','Wake Forest','Washington','Washington State','Weber State','West Virginia','WI-Green Bay','WI-Milwaukee','Wichita State','William & Mary','Winthrop','Wisconsin','Wofford','Wright State','Wyoming','Xavier','Yale','Youngstown State','California Baptist','North Alabama','Merrimack','Bellarmine','Utah Tech','Tarleton State','UC San Diego','St Thomas','Lindenwood','Queens NC','Southern Indiana','Stonehill','TAM Commerce']
sports_wom = ["Abilene Christian","Air Force","Akron","Alabama","Alabama A&M","Alabama State","Albany (NY)","Alcorn State","American","Appalachian State","Arizona","Arizona State","Little Rock","Arkansas-Pine Bluff","Arkansas","Arkansas State","Army","Auburn","Austin Peay","Ball State","Baylor","Belmont","Bethune-Cookman","Binghamton","Boise State","Boston College","Boston University","Bowling Green State","Bradley","Brown","Bryant","Bucknell","Buffalo","Butler","Brigham Young","Central Michigan","Cal Poly","California","Campbell","Canisius","Central Arkansas","Centenary (LA)","Central Connecticut State","Charleston Southern","Charlotte","Chattanooga","Chicago State","Cincinnati","Clemson","Cleveland State","Coastal Carolina","College of Charleston","Colgate","Colorado","Colorado State","Columbia","Connecticut","Coppin State","Cornell","Creighton","Cal State Bakersfield","Cal State Fullerton","Cal State Northridge","Sacramento State","Dartmouth","Davidson","Dayton","Delaware","Delaware State","Denver","DePaul","Detroit Mercy","Drake","Drexel","Duke","Duquesne","Eastern Illinois","Eastern Kentucky","Eastern Michigan","Eastern Washington","East Carolina","SIU Edwardsville","Elon","East Tennessee State","Evansville","Fairleigh Dickinson","Fairfield","Florida Atlantic","Florida Gulf Coast","Florida","Florida A&M","Florida International","Florida State","Fordham","Fresno State","Furman","George Washington","Georgia Southern","Gardner-Webb","George Mason","Georgetown","Georgia","Georgia State","Georgia Tech","Gonzaga","Grambling","Grand Canyon","Hampton","Hartford","Harvard","Hawaii","High Point","Hofstra","Holy Cross","Houston","Houston Christian","Howard","Idaho","Idaho State","Illinois-Chicago","Illinois","Illinois State","Incarnate Word","Indiana","Indiana State","Iona","Iowa","Iowa State","Purdue-Fort Wayne","IUPUI","Jackson State","Jacksonville","Jacksonville State","James Madison","Kansas","Kansas State","Kennesaw State","Kent State","Kentucky","La Salle","Lafayette","Lamar","Lehigh","Liberty","Lipscomb","Long Beach State","Long Island University","Longwood","Louisiana Tech","Louisville","Loyola Marymount","Loyola (MD)","Loyola (IL)","Louisiana State","Massachusetts-Lowell","Maine","Manhattan","Marist","Marquette","Marshall","Maryland","Massachusetts","McNeese State","Maryland-Eastern Shore","Memphis","Mercer","Miami (FL)","Miami (OH)","Michigan","Michigan State","Minnesota","Mississippi","Mississippi State","Missouri","Kansas City","Missouri State","Monmouth","Montana","Montana State","Morehead State","Morgan State","Mississippi Valley State","Mount St. Mary's","Middle Tennessee","Murray State","Northern Colorado","North Dakota State","Northern Illinois","Northern Kentucky","Navy","North Carolina A&T","North Carolina Central","NC State","Omaha","Nebraska","Nevada","New Hampshire","New Mexico","New Mexico State","New Orleans","Niagara","Nicholls State","NJIT","Norfolk State","North Carolina","North Dakota","North Florida","North Texas","Northeastern","Northern Arizona","Northern Iowa","Northwestern","Northwestern State","Notre Dame","Oakland","Ohio","Ohio State","Oklahoma","Oklahoma State","Old Dominion","Oral Roberts","Oregon","Oregon State","Pacific","Pennsylvania","Penn State","Pepperdine","Pittsburgh","Portland","Portland State","Prairie View","Presbyterian","Princeton","Providence","Purdue","Quinnipiac","Radford","Rhode Island","Rice","Richmond","Rider","Robert Morris","Rutgers","South Carolina State","South Dakota State","Southern Illinois","Sacred Heart","Sam Houston State","Samford","San Diego","San Diego State","San Francisco","San Jose State","UC Santa Barbara","Santa Clara","Savannah State","South Carolina Upstate","Southeastern Louisiana","Southeast Missouri State","Seattle","Seton Hall","Stephen F. Austin","Siena","Southern Methodist","South Alabama","South Carolina","South Dakota","South Florida","Southern Mississippi","Southern","Southern Utah","St. Bonaventure","St. Francis (NY)","Saint Francis (PA)","St. John's (NY)","Saint Joseph's","Saint Louis","Saint Mary's (CA)","Saint Peter's","Stanford","Stetson","Stony Brook","Syracuse","Texas A&M-Corpus Christi","TCU","Temple","Tennessee","Tennessee State","Tennessee Tech","Texas","Texas A&M","Texas State","Texas Tech","Tennessee-Martin","Toledo","Towson","Troy","Tulane","Tulsa","Texas-Rio Grande Valley","Texas Southern","UAB","UC Davis","UC Irvine","UC Riverside","Central Florida","UCLA","Louisiana","Louisiana-Monroe","Maryland-Baltimore County","UNC Asheville","UNC Greensboro","UNC Wilmington","Nevada-Las Vegas","Southern California","UT Arlington","UTSA","Utah","Utah State","Utah Valley","UTEP","Virginia Commonwealth","Valparaiso","Vanderbilt","Vermont","Villanova","Virginia","Virginia Tech","Western Carolina","Western Illinois","Western Kentucky","Western Michigan","Wagner","Wake Forest","Washington","Washington State","Weber State","West Virginia","Green Bay","Milwaukee","Wichita State","William & Mary","Winthrop","Wisconsin","Wofford","Wright State","Wyoming","Xavier","Yale","Youngstown State","California Baptist","North Alabama","Merrimack","Bellarmine","Utah Tech","Tarleton State","UC San Diego","St. Thomas","Lindenwood","Queens (NC)","Southern Indiana","Stonehill","Texas A&M-Commerce"]
betexp_wom = ["Abilene Christian","Air Force","Akron","Alabama","Alabama A&M","Alabama State","Albany","Alcorn State","American University","Appalachian State","Arizona","Arizona State","UALR","Arkansas-Pine Bluff","Arkansas","Arkansas State","Army","Auburn","Austin Peay","Ball State","Baylor","Belmont","Bethune-Cookman","Binghamton","Boise State","Boston College","Boston University","Bowling Green","Bradley","Brown","Bryant University","Bucknell","Buffalo","Butler","Brigham Young","Central Michigan","Cal Poly","California","Campbell","Canisius","Central Arkansas","","Central Connecticut State","Charleston Southern","Charlotte","Chattanooga Mocs","Chicago State","Cincinnati","Clemson","Cleveland State","Coastal Carolina","Charleston","Colgate","Colorado","Colorado State","Columbia","UConn","Coppin State","Cornell","Creighton","CSU Bakersfield","CS Fullerton","CS Northridge","Sacramento State","Dartmouth","Davidson","Dayton","Delaware","Delaware State","Denver","DePaul","Detroit","Drake","Drexel","Duke","Duquesne","Eastern Illinois","Eastern Kentucky","Eastern Michigan","East. Washington","East Carolina","Siu Edwardsville","Elon","East Tennessee St","Evansville","Fairleigh Dickinson","Fairfield","Florida Atlantic","Florida Gulf Coast","Florida","Florida A&M","Florida International","Florida State","Fordham","Fresno State","Furman","George Washington","Georgia Southern","Gardner Webb","George Mason","Georgetown","Georgia","Georgia State","Georgia Tech","Gonzaga","Grambling St.","Grand Canyon","Hampton","Hartford","Harvard","Hawaii","High Point","Hofstra","Holy Cross","Houston","Houston Christian","Howard","Idaho","Idaho State","Illinois (Chi.)","Illinois","Illinois State","Incarnate Word","Indiana","Indiana State","Iona","Iowa","Iowa State","IPFW","IUPUI","Jackson State","Jacksonville","Jacksonville State","James Madison","Kansas","Kansas State","Kennesaw State","Kent State","Kentucky","La Salle","Lafayette","Lamar","Lehigh","Liberty","Lipscomb","Long Beach State","LIU Sharks","Longwood","Louisiana Tech","Louisville","Loyola Marymount","Loyola Maryland","Loyola Chicago","LSU","UMass Lowell","Maine Black Bears","Manhattan","Marist","Marquette","Marshall","Maryland","UMass","McNeese State","Md.-East. Shore","Memphis","Mercer","Miami (FL)","Miami (Ohio)","Michigan","Michigan State","Minnesota","Ole Miss","Mississippi St.","Missouri","UMKC","Missouri State","Monmouth","Montana","Montana State","Morehead State","Morgan State","Miss. Valley St.","Mount St. Mary's","Middle Tenn. St.","Murray State","Northern Colorado","North Dakota St","Northern Illinois","Northern Kentucky","Navy","N. Carolina A&T","N. Carolina Central","NC State","Nebraska O.","Nebraska","Nevada","New Hampshire","New Mexico","New Mexico State","New Orleans","Niagara","Nicholls State","NJIT","Norfolk State","North Carolina","North Dakota","North Florida","North Texas","Northeastern","Northern Arizona","Northern Iowa","Northwestern","Northwestern St.","Notre Dame","Oakland","Ohio","Ohio State","Oklahoma","Oklahoma State","Old Dominion","Oral Roberts","Oregon","Oregon State","Pacific","Penn","Penn State","Pepperdine","Pittsburgh","Portland","Portland State","Prairie View A&M","Presbyterian","Princeton","Providence","Purdue","Quinnipiac","Radford","Rhode Island","Rice","Richmond","Rider","Robert Morris","Rutgers","South Carolina St","South Dakota St.","Southern Illinois","Sacred Heart","Sam Houston St.","Samford","San Diego Toreros","San Diego State","San Francisco","San Jose State","UC Santa Barbara","Santa Clara","","USC Upstate","SE Louisiana","Southeast Missouri State","Seattle","Seton Hall","Stephen F. Austin","Siena","SMU Mustangs","South Alabama","South Carolina","South Dakota Coyotes","South Florida","Southern Miss","Southern Univ.","Southern Utah","St. Bonaventure","St. Francis Brooklyn","St. Francis (PA)","St. John's (N.Y.)","Saint Josephs","St. Louis","St. Marys (CA)","St. Peters","Stanford","Stetson","Stony Brook","Syracuse","Texas A&M-CC","TCU","Temple","Tennessee","Tennessee State","Tennessee Tech","Texas","Texas A&M","Texas State","Texas Tech","UT Martin","Toledo","Towson","Troy","Tulane","Tulsa","UTRGV","Texas Southern","UAB","UC Davis","UC Irvine","UC Riverside","UCF Knights","UCLA","Louisiana Lafayette","Louisiana Monroe","UMBC Retrievers","UNC Asheville","NC Greensboro","NC Wilmington","UNLV","USC","UT Arlington","UTSA Roadrunners","Utah Utes","Utah State","Utah Valley State","UTEP","VCU Rams","Valparaiso","Vanderbilt","Vermont","Villanova","Virginia","Virginia Tech","Western Carolina","Western Illinois","Western Kentucky","Western Michigan","Wagner","Wake Forest","Washington","Washington State","Weber State","West Virginia","Wisc. Green Bay","Wisc. Milwaukee","Wichita State","William & Mary","Winthrop","Wisconsin","Wofford","Wright State","Wyoming","Xavier","Yale","Youngstown State","California Baptist","North Alabama","Merrimack Warriors","Bellarmine","Utah Tech","Tarleton","UC San Diego","St. Thomas (Minn.)","","","","",""]

# Create new columns for additional alternative names
spellm['Name0'] = extend_men
spellm['Sports'] = sports_men
spellm['BetExp'] = betexp_men

spellw['Name0'] = extend_wom
spellw['Sports'] = sports_wom
spellw['BetExp'] = betexp_wom

# Show sample output
print('MSpellings:', spellm.shape, 'WSpellings:', spellw.shape)
print('MSpellings:', spellm.columns)

MSpellings: (365, 14) WSpellings: (363, 14)
MSpellings: Index(['Name1', 'Name2', 'Name3', 'Name4', 'Name5', 'Name6', 'Name7', 'Name8',
       'Name9', 'Name10', 'TeamID', 'Name0', 'Sports', 'BetExp'],
      dtype='object')


In [12]:
# Merge 'Teams' with 'Team Spellings' dataframes on 'TeamID' column
teamsm = pd.merge(teamsm, spellm,  how='left', left_on=['TeamID'], right_on = ['TeamID'])
teamsw = pd.merge(teamsw, spellw,  how='left', left_on=['TeamID'], right_on = ['TeamID'])

In [13]:
# Show sample output
print('MSpellings:', teamsm.shape, 'WSpellings:', teamsw.shape)
print('MSpellings:', teamsm.columns)
print('WSpellings:', teamsw.columns)

MSpellings: (365, 17) WSpellings: (363, 15)
MSpellings: Index(['TeamID', 'TeamName', 'FirstD1Season', 'LastD1Season', 'Name1', 'Name2',
       'Name3', 'Name4', 'Name5', 'Name6', 'Name7', 'Name8', 'Name9', 'Name10',
       'Name0', 'Sports', 'BetExp'],
      dtype='object')
WSpellings: Index(['TeamID', 'TeamName', 'Name1', 'Name2', 'Name3', 'Name4', 'Name5',
       'Name6', 'Name7', 'Name8', 'Name9', 'Name10', 'Name0', 'Sports',
       'BetExp'],
      dtype='object')


In [14]:
# Reorder and drop unwanted columns from dataframe
reo_teamsm = [0, 14, 15, 16, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 2, 3]
reo_teamsw = [0, 12, 13, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]

ren_teams = {'TeamID':'TeamID', 'Name0':'TeamName', 'Sports':'Sports', 'BetExp':'BetExp', 'TeamName':'Name0', 'Name1':'Name1', 'Name2':'Name2', 
              'Name3':'Name3', 'Name4':'Name4', 'Name5':'Name5', 'Name6':'Name6', 'Name7':'Name7', 'Name8':'Name8', 'Name9':'Name9', 'Name10':'Name10'}

teamsm = teamsm.iloc[:,reo_teamsm].drop(columns=['FirstD1Season', 'LastD1Season'])
teamsw = teamsw.iloc[:,reo_teamsw]

teamsm = teamsm.rename(columns=ren_teams)
teamsw = teamsw.rename(columns=ren_teams)

# Show sample output
print('MTeams:', teamsm.shape, 'WTeams:', teamsw.shape)
print('MTeams:', teamsm.columns)

MTeams: (365, 15) WTeams: (363, 15)
MTeams: Index(['TeamID', 'TeamName', 'Sports', 'BetExp', 'Name0', 'Name1', 'Name2',
       'Name3', 'Name4', 'Name5', 'Name6', 'Name7', 'Name8', 'Name9',
       'Name10'],
      dtype='object')


In [15]:
teamsm = teamsm.to_csv('teams-mens.csv',index=False)
teamsw = teamsw.to_csv('teams-womens.csv',index=False)