In [22]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import selenium
import json

In [23]:
from dataclasses import dataclass
@dataclass
class Team:
    name: str
    abbrev: str
    wins: int = 0
    losses: int = 0
    elo: float = 1500

In [24]:
from datetime import datetime, timedelta
'''
    Get game results and elo from schedule
'''

team_dict = {}

SCOREBOARD_URL = "https://site.api.espn.com/apis/site/v2/sports/basketball/mens-college-basketball/scoreboard?limit=1000&dates=20231106-20240315"

res = requests.get(SCOREBOARD_URL)
espn_games_dict = res.json()

def K_VALUE(team : Team):
    total_games = team.wins + team.losses
    
    if total_games < 10 and team.elo < 2000:
        return 40
    if team.elo < 2400:
        return 20
    return 10

for event in espn_games_dict['events']:
    away_team, home_team = re.split(r' (?:@|VS) ', event['shortName'])
    away_name, home_name = event['name'].split(" at ")
    
    if away_team not in team_dict: team_dict[away_team] = Team(away_name, away_team)
    if home_team not in team_dict: team_dict[home_team] = Team(home_name, home_team)
    
    # TODO create some kind of travel pentalty here

    score_home = 1 if event['competitions'][0]['competitors'][0]['winner'] else 0
    score_away = int(not(score_home))
    
    q_home = 10**(team_dict[home_team].elo/400)
    q_away = 10**(team_dict[away_team].elo/400)
    
    e_home = q_home/(q_home+q_away)
    e_away = q_away/(q_home+q_away)
    
    team_dict[home_team].elo += K_VALUE(team_dict[home_team]) * (score_home - e_home)
    team_dict[away_team].elo += K_VALUE(team_dict[away_team]) * (score_away - e_away)
    
    if score_home == 1: # home team won
        team_dict[home_team].wins += 1
        team_dict[away_team].losses += 1
    else:
        team_dict[home_team].losses += 1
        team_dict[away_team].wins += 1
    
team_df = pd.DataFrame(team_dict.values())
team_df.elo = team_df.elo.round(1)
team_df.sort_values(by=['elo'], ascending=False, ignore_index=True).to_csv('ncaa_teams_elo.csv')


# SCHEDULE_URL = "https://www.espn.com/mens-college-basketball/schedule/_/date/20231106"


In [122]:
import random

@dataclass
class Match:
    left: Team
    right: Team
    winner: Team = None
    
    def get_winner(self):
        if not self.left and self.right:
            self.winner = self.right
        elif self.left and not self.right:
            self.winner = self.left
        else:
            assert self.left and self.right
            expected_score = (1+10**((self.right.elo - self.left.elo)/400))**-1 # expected chance that left will win
            if random.random() <= expected_score:
                self.winner = self.left
            else:
                self.winner = self.right
                
        return self.winner
    
    def __repr__(self):
        return f"{self.left.abbrev} vs {self.right.abbrev}"
    
class Tournament:
    def __init__(self, team_list, options = defaultdict(lambda: False)):
        assert len(team_list) > 1
        self._matches = []
        self._size = len(team_list)
        round = 1
        
        while len(team_list) > 1:
            if options['verbose']:
                print(f" -------------------------- ROUND {round} -------------------------- ")
            
            next_round = []
            for pair in zip(team_list[::2], team_list[1::2]):
                match = Match(*pair)
                next_round.append(match.get_winner())
                self._matches.append(match)
                if options['verbose']: print(f"{match} ==> {match.winner}")
            team_list = next_round
            round += 1
        self._winner = team_list[0]
        if options['verbose']:
            print(f"------------------------- WINNER: {self._winner} -------------------------")
        
        
    # def print_rounds(self):
    #     round = 1
    #     q = self._matches[:]
    #     while len(q) > 1:
            
    #         curr_size = len(q)
    #         for _ in range(curr_size//2):
    #             match = q.pop(0)
    #             print(f"{match.left} vs {match.right} ==> {match.winner}")
    #         round += 1
                
from collections import defaultdict
from math import log2, ceil
def monte_carlo_simulator(n, conferences):
    # assert len(conferences) > 1
    n_rounds = 4
    freq_list = { team.abbrev : [0 for _ in range(n_rounds+1)] for teams in conferences for team in teams }
    
    for _ in range(n):
        tourney = Tournament(conferences[0])
        
        q = tourney._matches[:]
        
        round = 0
        for round in range(n_rounds+1):
            curr_size = ceil(len(q)/2)
            # print(q)
            # print(curr_size)
            for _ in range(curr_size):
                match = q.pop(0)
                # print(round, match)
                freq_list[match.winner.abbrev][round] += 1
        freq_list[tourney._winner.abbrev][-1] += 1 
        # print(freq_list)
        # tourney.print_rounds()
    return {k: list(cnt/n*100 for cnt in v) for k,v in freq_list.items()}
    
    
tournament_entries = [
    'PUR', 'MONT', 'BOIS', 'TEX', 'MSST',
    'SDSU', 'AMCC', 'UK', 'NJIT',
    'WSU', 'UVA', 'CREI', 'MORE',
    'USU', 'TA&M', 'ISU', 'NDSU' 
]    
    
    
freq = monte_carlo_simulator(100_000, [list(team_dict[team] for team in tournament_entries)])
n_rounds = len(list(freq.values())[0])
print(n_rounds)
df = team_df[team_df['abbrev'].isin(freq.keys())].sort_values(by=["abbrev"])
for i in range(1, n_rounds+1):
    s = f'round {i}'
    df[f'round {i}'] = 0.0

for k,v in freq.items():
    for r in range(n_rounds):
        df.loc[df['abbrev'] == k, f'round {r+1}'] = round(v[r], 1)

df.sort_values(by=[f'round {n_rounds}'], ascending=False).to_csv('midwest_odds.csv')
# final_four = Tournament()
# final_four.print_rounds()

5


In [None]:

# # computer 
# bpi_tables = pd.read_html("https://www.espn.com/mens-college-basketball/bpi")
# # print(bpi_tables[0][''])
# bpi_tables[1]['Team'] = bpi_tables[0]['Team']
# bpi_tables[1]['CONF'] = bpi_tables[0]['CONF']

# bpi_tables[1].to_csv('bpi.csv')
# # human rankings
# # sagarin_table = pd.read_html("http://www.usatoday.com/sports/ncaab/sagarin/")
# # ncaa_table = pd.read_html("")


In [None]:
# # from bs4 import BeautifulSoup

# pom_table = pd.read_html("pomeroy.html")
# print(pom_table.head(20))

In [None]:
# lrmc_table = pd.read_html("lrmc.html")
# print(lrmc_table)

In [None]:
# moore_table = pd.read_csv("moore.csv")
# print(moore_table)