In [1]:
import pandas as pd
import random
import numpy as np
import altair as alt
from datetime import datetime
from espn_data.get_espn_data import weeks_since_start_season



def split(df, col):
    return [x for _, x in df.groupby(col)]

ff2020 = pd.read_csv("fantasy/fantasy_data_2020.csv")
ff2021 = pd.read_csv("fantasy/fantasy_data_2021.csv")

In [6]:
def create_top6_dict(ffdata):
    top6_wins = ffdata[['team_name', 'week', 'top6_win']].drop_duplicates()
    top6_wins.top6_win = top6_wins.top6_win.astype('int')
    top6_split = split(top6_wins, 'team_name')

    top6_dict = {}
    for i in top6_split:
        top6_dict.update({i.team_name.unique()[0]:list(i.top6_win.values)})
    return top6_dict
    
def create_team_dict(raw_scores):
    '''Create dictionary of team names and their scores'''
    team_scores = split(raw_scores, 'team_name')
    team_dict = {}
    for i in team_scores:
        team_name = i.team_name.drop_duplicates().values[0]
        team_dict.update({team_name:i.points.values})
    return team_dict

def simulate_season(team_name, team_dict, top6_dict):
    opponents = [i for i in list(team_dict.keys()) if i != team_name]
    # Randomize order of opponents

    # Get maximum number of games so far
    games_count = len(team_dict[team_name])

    all_wins = []
    for i in range(10000):
        new_order = random.sample(opponents, games_count) 
        wins = 0
        for j, opp in enumerate(new_order):
            my_points = team_dict[team_name][j]
            opp_points = team_dict[opp][j]
            if my_points > opp_points:
                wins += 1
            wins += top6_dict[team_name][j]
        all_wins.append(wins)
    return all_wins

def build_probability_distribution(ffdata):
    raw_scores = ffdata[['team_name', 'week', 'points']].drop_duplicates()

    team_dict = create_team_dict(raw_scores)
    top6_dict = create_top6_dict(ffdata)
    all_team_names = sorted(list(set(raw_scores.team_name.values)))
    simulated_wins = []
    for name in all_team_names:
        total_wins = pd.Series(simulate_season(name, team_dict, top6_dict))
        total_wins = total_wins.rename(name)
        simulated_wins.append(total_wins.value_counts() / total_wins.value_counts().sum())

    return pd.concat(simulated_wins, axis=1).fillna(0)

liklihood_table = build_probability_distribution(ff2021).sort_index(ascending=False).cumsum(axis=0).sort_index(ascending=True)
format_dict = {col:'{:,.1%}'.format for col in liklihood_table.columns }

alt.Chart(cusum_wins.reset_index()).mark_line().encode(x='index', y='probability', tooltip=[alt.Tooltip('index', title='Total H2H Wins'), 'probability']).interactive()

In [9]:
from espn_data.get_espn_data import our_league
our_league

League(443750, 2021)

In [146]:
def player_df_from_line(lineup, first_matchup, week, home_team):
    '''Given a played lineup, return the player information'''
    team_info_list = []
    for player in lineup :
        player_info_list = []

        # Set team name
        if home_team:
            player_info_list.append(first_matchup.home_team.owner)
        else:
            player_info_list.append(first_matchup.away_team.owner)

        # Set player info
        player_info_list.append(week)
        player_info_list.append(player.name)
        player_info_list.append(player.position)
        player_info_list.append(player.slot_position)
        player_info_list.append(player.projected_points)
        player_info_list.append(player.points)

        if home_team:
            player_info_list.append(first_matchup.away_team.owner)
        else:
            player_info_list.append(first_matchup.home_team.owner)

        team_info_list.append(player_info_list)

    player_df_single_week = pd.DataFrame(team_info_list)
    player_df_cols = ['team_name', 'week', 'player_name', 'player_pos','player_slot', 'player_proj_points', 'player_points', 'opponent']
    player_df_single_week.columns = player_df_cols
    sorted_players = player_df_single_week.sort_values('player_points', ascending=False).reset_index(drop=True)

    return sorted_players

# Calcualte the maximum points given the following allowed positions

def position_sorter(column):
    """Custom Sort function to put them in the way ESPN displays them"""
    positions = ['QB', 'RB', 'WR', 'TE', 'FLEX', 'D/ST']
    correspondence = {team: order for order, team in enumerate(positions)}
    return column.map(correspondence)


def add_ideal_to_player_df(player_df):
    '''Add a tag to the player to determine if they were an ideal pick for that week'''
    positions = ['QB', 'RB1', 'RB2', 'WR1', 'WR2', 'TE', 'FLEX', 'D/ST']
    ordered_played = player_df.iterrows()

    found_positions = []
    for i, pos in ordered_played:
        for set_pos in positions:
            if pos.player_pos in set_pos:
                found_positions.append(pos)
                positions.remove(set_pos)
                break
            elif set_pos == 'FLEX' and pos.player_pos in ['WR', 'RB', 'TE']:
                found_positions.append(pos)
                positions.remove(set_pos)
                break

    ideal_lineup = pd.concat(found_positions, axis=1).transpose().sort_values(by='player_pos', key=position_sorter)
    ideal_lineup['ideal_player'] = True
    comb_player_ideal = player_df.merge( ideal_lineup[['team_name', 'player_name', 'ideal_player']], how='left').fillna(False)
    comb_player_ideal['played'] = comb_player_ideal.player_slot != 'BE'
    return comb_player_ideal

def build_matchup_player_dfs(matchup, week):
    '''Create a dataframe that contains all the players and their ideal status for each matchup'''
    
    home_lineup = matchup.home_lineup
    away_lineup = matchup.away_lineup

    home_player_df = player_df_from_line(home_lineup, matchup, week, True)
    away_player_df = player_df_from_line(away_lineup, matchup, week, False)

    home_player_added_ideals = add_ideal_to_player_df(home_player_df)
    away_player_added_ideals = add_ideal_to_player_df(away_player_df)

    combined_set = pd.concat([home_player_added_ideals, away_player_added_ideals])
    return combined_set

## Build the player DF
@st.experimental_memo
def build_full_player_df(our_league):
    '''Build a dataframe of teams and player information on each team'''
    full_player_df = []
    for week in range(1,weeks_since_start_season()+1):
        matchups = our_league.box_scores(week)
        for match in matchups:
            full_player_df.append(build_matchup_player_dfs(match, week))

    return pd.concat(full_player_df)

@st.experimental_memo
def waiver_table(our_league):
    '''Create a table of teams, transaction, and player_names'''
    activities = our_league.recent_activity(1000)
    fa_adds = []
    for activity in activities:
        row = []
        transaction_date = datetime.fromtimestamp(activity.date/1000).strftime('%Y-%m-%d %H:%M:%S.%f') 
        for step in activity.actions:
            if 'FA ADDED' in step or 'WAIVER ADDED' in step:
                row.append(transaction_date)
                row.append(step[0].owner)
                row.append(step[1])
                row.append (step[2].name)
                fa_adds.append(row)

    return pd.DataFrame(fa_adds, columns=['date','team_name', 'action', 'player_name'])

# Average waiver points by team
def calc_avg_waiver_points_by_team(our_league):
    '''Create a dataframe of team_name and average points for waiver addition'''
    full_player_table = build_full_player_df(our_league)
    transaction_table = waiver_table(our_league)

    full_player_df_waivers = full_player_table.merge(transaction_table[['team_name', 'player_name', 'action']], how='left').fillna('DRAFTED')

    avg_waiver_points = full_player_df_waivers.query('(action == "WAIVER ADDED" | action == "FA ADDED") & played == True').groupby(['team_name'], as_index=False).mean()[['team_name', 'player_points']]
    avg_waiver_points = avg_waiver_points.rename(columns={'player_points':'average_waiver_points'})
    return avg_waiver_points

# Calculate win vs loss point differential
def win_loss_marings(ffdata):
    '''Create a dataframe of team, avg win margin, and avg loss margin'''
    ffdata['point_diff'] = ffdata.points - ffdata.points_against
    win_loss_diff_table = ffdata.groupby(['team_name', 'h2h_win'], as_index=False)['point_diff'].mean()
    win_loss_pivot = win_loss_diff_table.pivot('team_name', 'h2h_win', 'point_diff').fillna('-')
    win_loss_pivot = win_loss_pivot.reset_index().rename_axis(None, axis = 1)
    win_loss_pivot.columns = ['team_name', 'avg_margin_of_loss', 'avg_margin_of_victor']
    return win_loss_pivot

# Joined teams, margins, and waiver points
def calc_margins_waivers(fantasy_data, our_league):
    '''Create a dataframe of teams, avg_loss_margin, avg_waiver_points'''
    win_loss_pivot = win_loss_marings(fantasy_data)
    avg_waiver_points = calc_avg_waiver_points_by_team(our_league)
    return win_loss_pivot.merge(avg_waiver_points, how = 'outer').fillna(0)


In [7]:
from espn_api.football import League
import pickle
import streamlit as st
from espn_data.get_espn_data import get_2021_season_data

for year in [2021]:
    filename = f"./fantasy/waiver_data/wd_{year}.pickle"
    our_league = League(
            league_id=st.secrets["league_id"], year=year, espn_s2=st.secrets["espn_s2"], swid=st.secrets["swid"]
        )
    get_2021_season_data(year).to_csv(f"./fantasy/fantasy_data_{year}.csv", index=False)
    ra = our_league.recent_activity(2000)

    with open(filename, 'wb') as handle:
        pickle.dump(ra, handle, protocol=pickle.HIGHEST_PROTOCOL)



In [1]:
from espn_data.get_espn_data import get_2021_season_data
from espn_api.football import League
import streamlit as st

year = 2021
our_league = League(
            league_id=st.secrets["league_id"], year=year, espn_s2=st.secrets["espn_s2"], swid=st.secrets["swid"]
        )
get_2021_season_data(year).to_csv(f"./fantasy/fantasy_data_{year}.csv", index=False)

In [6]:
from espn_api.football import League
import pickle
import streamlit as st
from espn_data.get_espn_data import get_2021_season_data
from google.cloud import storage
import pandas as pd
import gcsfs
import logging
import os
import yaml


# Read YAML file
with open(".env_vars.yaml", 'r') as stream:
    data_loaded = yaml.safe_load(stream)
for key, value in data_loaded.items():
    os.environ[key] = value

for year in [2021]:

    BUCKET_NAME = "fantasy-football-palo-alto-data"
    PROJECT_NAME = "fantasy-football-palo-alto"

    # Initalize league
    our_league = League(
        league_id=os.environ.get("league_id"),
        year=year,
        espn_s2=os.environ.get("espn_s2"),
        swid=os.environ.get("swid"),
    )
    logging.info("League connected")

    # Download data
    season_data_2021 = get_2021_season_data(year, our_league)
    logging.info("Data downloaded")

    # Connect to the client
    client = storage.Client()
    bucket = client.get_bucket(BUCKET_NAME)

    # Save the data
    bucket.blob(f"fantasy_data_{year}.csv").upload_from_string(season_data_2021.to_csv(index=False), "text/csv")
    logging.info("Data saved to GCS")

    # Download the waiver data
    ra = our_league.recent_activity(2000)
    fs = gcsfs.GCSFileSystem(project=)

    logging.info("Waiver data downloaded")

    filename = f"{BUCKET_NAME}/wd_{year}.pickle"
    with fs.open(filename, "wb") as handle:
        pickle.dump(ra, handle, protocol=pickle.HIGHEST_PROTOCOL)
    logging.info("Waiver data saved")

2021-10-04 22:41:31.585 INFO    root: League connected
2021-10-04 22:42:23.706 INFO    root: Data downloaded
2021-10-04 22:42:26.809 INFO    root: Data saved to GCS
2021-10-04 22:42:39.902 INFO    root: Waiver data downloaded


OSError: Attempt to open a bucket