# AUC and Fairlay odds benchmarking

Testing different logistic scale and time scale (for covariance function) parameter values.

## Inputs

### Input files

In [8]:
MATCHES_DF = 'data/raw/premium_matches.2019-08-31.json.gz'
FAIRLAY_PICKLE = 'data/external/dota2_markets.20190831.pkl'

In [10]:
from collections import OrderedDict
files = OrderedDict()
files['ts = 1.25, ls = 3'] = (
    3.0,  # logistic scale
    'fitted/ti9_full_new_params_by_map_numba.win_probs.gz',
    'fitted/ti9_full_new_params_by_map_numba.player_skills.gz'
)
for logistic_scale in [0.5, 1, 2, 4, 6, 8]:
    files['ls = ' + str(logistic_scale)] = (
        logistic_scale,
        f'fitted/ti9_full_new_params_by_map_ls-{logistic_scale}.win_probs.gz',
        f'fitted/ti9_full_new_params_by_map_ls-{logistic_scale}.player_skills'
            '.gz'
    )
for time_scale in [0.5, 0.75, 1.0, 1.5, 1.75, 2.0]:
    if time_scale in [1, 2]:
        time_scale = "{:.1f}".format(time_scale)
    files['ts = ' + str(time_scale)] = (
        3.0,
        f'fitted/ti9_full_new_params_by_map_ts-{time_scale}.win_probs.gz',
        f'fitted/ti9_full_new_params_by_map_ts-{time_scale}.player_skills.gz'
    )
files

OrderedDict([('ts = 1.25, ls = 3',
              (3.0,
               'fitted/ti9_full_new_params_by_map_numba.win_probs.gz',
               'fitted/ti9_full_new_params_by_map_numba.player_skills.gz')),
             ('ls = 0.5',
              (0.5,
               'fitted/ti9_full_new_params_by_map_ls-0.5.win_probs.gz',
               'fitted/ti9_full_new_params_by_map_ls-0.5.player_skills.gz')),
             ('ls = 1',
              (1,
               'fitted/ti9_full_new_params_by_map_ls-1.win_probs.gz',
               'fitted/ti9_full_new_params_by_map_ls-1.player_skills.gz')),
             ('ls = 2',
              (2,
               'fitted/ti9_full_new_params_by_map_ls-2.win_probs.gz',
               'fitted/ti9_full_new_params_by_map_ls-2.player_skills.gz')),
             ('ls = 4',
              (4,
               'fitted/ti9_full_new_params_by_map_ls-4.win_probs.gz',
               'fitted/ti9_full_new_params_by_map_ls-4.player_skills.gz')),
             ('ls = 6',
             

### Parameters

In [16]:
# Scaling factor for logistic regression win probability.
# LOGISTIC_SCALE = 3.0

# Maximum difference in minutes between the Fairlay closing datetime and the
# Datdota map start datetime.
MAX_TIMEDIFF_MINS = 1440

## Setup.

### Load libraries.

In [6]:
import os
while 'src' not in os.listdir():
    os.chdir("..")

In [7]:
import gzip
import json
import pickle

import numpy as np
import pandas as pd
import scipy

import src.fairlay
import src.load
import src.statics
import src.stats

### Load data.

#### Matches and predictions

In [11]:
with gzip.open(MATCHES_DF, 'rb') as fh:
    matches = src.load.MatchDF.from_json(json.load(fh)['data'])

In [12]:
match_pred_objs = OrderedDict()
for label, val in files.items():
    logistic_scale, pred_file, skills_file = val
    pred_df = pd.read_csv(pred_file, header=0, sep='\t', index_col=0)
    skills_mat = pd.read_csv(skills_file, header=0, sep='\t', index_col=0)
    skills_mat.columns = skills_mat.columns.astype(int)
    match_pred_objs[label] = \
        src.stats.MatchPred(matches, pred_df, logistic_scale, skills_mat)

#### Fairlay odds

1. Load the Fairlay data frame.
2. Merge Fairlay odds with match predictions.
3. Add columns for TI9 group stages and playoffs.

In [13]:
fairlay_df = pd.read_pickle(FAIRLAY_PICKLE)

In [18]:
fl_preds = OrderedDict()
for label, match_pred in match_pred_objs.items():
    print(label)
    fl_pred, fl_pred_sel = match_pred.merge_fairlay_df(fairlay_df,
                                                       MAX_TIMEDIFF_MINS)
    comp_is_ti9 = fl_pred.Comp == 'Dota 2 - The International'
    groupstage_date = fl_pred.startDate < '2019-08-18 12:00:00-04:00'
    fl_pred_sel['ti9_group_stage'] = (comp_is_ti9
                                      & groupstage_date).fillna(False)
    fl_pred_sel['ti9_playoffs'] = (comp_is_ti9
                                   & (~groupstage_date)).fillna(False)
    fl_preds[label] = (fl_pred, fl_pred_sel)

ts = 1.25, ls = 3
ls = 0.5
ls = 1
ls = 2
ls = 4
ls = 6
ls = 8
ts = 0.5
ts = 0.75
ts = 1.0
ts = 1.5
ts = 1.75
ts = 2.0


## AUCs

In [22]:
aucs = []
for label, match_pred in match_pred_objs.items():
    ti9_group_stage_auc = match_pred._roc_auc(
        False,
        fl_pred.map_id[fl_pred_sel.ti9_group_stage])
    ti9_playoffs_auc = match_pred._roc_auc(
        False,
        fl_pred.map_id[fl_pred_sel.ti9_playoffs])
    aucs.append((ti9_group_stage_auc, ti9_playoffs_auc))

#     print("--------")
#     print(label)
#     print("TI9 group stage AUC: {:.3f}".format(ti9_group_stage_auc))
#     print("TI9 playoffs AUC: {:.3f}".format(ti9_playoffs_auc))

pd.DataFrame(aucs, index=match_pred_objs.keys(),
             columns=['TI9_groups_AUC', 'TI9_playoffs_AUC'])

Unnamed: 0,TI9_groups_AUC,TI9_playoffs_AUC
"ts = 1.25, ls = 3",0.661199,0.592141
ls = 0.5,0.655776,0.565041
ls = 1,0.659392,0.593948
ls = 2,0.66261,0.602078
ls = 4,0.662478,0.587173
ls = 6,0.652072,0.54607
ls = 8,0.646649,0.52981
ts = 0.5,0.652998,0.603884
ts = 0.75,0.661243,0.594851
ts = 1.0,0.663448,0.593948


## Fairlay odds

### Overall TI9, positive EV maps.

In [40]:
import importlib
importlib.reload(src.fairlay)

<module 'src.fairlay' from '/Users/yl3/github/dota2/src/fairlay.py'>

In [41]:
fairlay_outcomes = []
for label, val in fl_preds.items():
    fl_pred, fl_pred_sel = val
    ev0 = src.fairlay.compute_fairlay_outcomes(
        fl_pred,
        fl_pred_sel.correct_match & fl_pred_sel.before_match
            & (fl_pred.ev > 0)
            & (fl_pred_sel.ti9_group_stage | fl_pred_sel.ti9_playoffs))

    ev0_05 = src.fairlay.compute_fairlay_outcomes(
        fl_pred,
        fl_pred_sel.correct_match & fl_pred_sel.before_match
            & (fl_pred.ev > 0.05)
            & (fl_pred_sel.ti9_group_stage | fl_pred_sel.ti9_playoffs))

    ev0_1 = src.fairlay.compute_fairlay_outcomes(
        fl_pred,
        fl_pred_sel.correct_match & fl_pred_sel.before_match
            & (fl_pred.ev > 0.1)
            & (fl_pred_sel.ti9_group_stage | fl_pred_sel.ti9_playoffs))

    ev0_2 = src.fairlay.compute_fairlay_outcomes(
        fl_pred,
        fl_pred_sel.correct_match & fl_pred_sel.before_match
            & (fl_pred.ev > 0.2)
            & (fl_pred_sel.ti9_group_stage | fl_pred_sel.ti9_playoffs))

    ev0_3 = src.fairlay.compute_fairlay_outcomes(
        fl_pred,
        fl_pred_sel.correct_match & fl_pred_sel.before_match
            & (fl_pred.ev > 0.3)
            & (fl_pred_sel.ti9_group_stage | fl_pred_sel.ti9_playoffs))

    fairlay_outcomes.append(ev0 + ev0_05 + ev0_1 + ev0_2 + ev0_3)

In [43]:
import itertools
columns = [x + ' ' + y
           for x in ['ev=0', 'ev=0.05', 'ev=0.1', 'ev=0.2', 'ev=0.3']
           for y in ['outcome', 'ev', 'ci', 'n_matches']]
fairlay_outcomes_df = pd.DataFrame(fairlay_outcomes, index=fl_preds.keys(),
                                   columns=columns)
fairlay_outcomes_df

Unnamed: 0,ev=0 outcome,ev=0 ev,ev=0 ci,ev=0 n_matches,ev=0.05 outcome,ev=0.05 ev,ev=0.05 ci,ev=0.05 n_matches,ev=0.1 outcome,ev=0.1 ev,ev=0.1 ci,ev=0.1 n_matches,ev=0.2 outcome,ev=0.2 ev,ev=0.2 ci,ev=0.2 n_matches,ev=0.3 outcome,ev=0.3 ev,ev=0.3 ci,ev=0.3 n_matches
"ts = 1.25, ls = 3",18.329,23.424768,"[-7.678174999999994, 51.67407499999998]",153,20.509,22.520265,"[-2.855999999999999, 50.36007499999997]",108,16.986,20.437071,"[-1.1050499999999999, 47.28104999999998]",79,7.255,15.513005,"[-2.3340999999999985, 36.52999999999999]",44,6.774,10.858816,"[-4.235025000000001, 26.90504999999999]",25
ls = 0.5,22.951,61.553933,"[-25.252149999999997, 28.44404999999999]",183,20.259,61.305834,"[-26.248049999999996, 24.708174999999958]",168,21.235,59.877654,"[-24.694024999999996, 24.50004999999999]",149,7.753,54.630581,"[-18.930124999999997, 25.366999999999997]",112,25.111,45.11843,"[-8.902024999999998, 29.897]",73
ls = 1,19.313,37.311335,"[-21.841025000000002, 32.06002499999999]",168,13.613,36.538386,"[-22.21805, 27.123099999999972]",141,18.162,34.891056,"[-14.786049999999996, 30.529024999999994]",113,23.597,28.797049,"[-7.103049999999997, 32.732]",72,10.363,20.693834,"[-4.027025000000001, 28.621]",39
ls = 2,16.68,19.902765,"[-17.490025, 36.075025]",149,22.359,18.76798,"[-10.175199999999995, 35.49007499999999]",99,23.881,17.106557,"[-5.979000000000001, 36.44609999999997]",75,8.467,11.597066,"[-3.7619999999999987, 28.81902499999999]",35,3.34,7.18703,"[-5.0889999999999995, 17.496]",17
ls = 4,12.388,16.532849,"[-2.670025000000003, 56.77009999999997]",144,14.93,15.63123,"[3.137950000000001, 55.58607499999999]",94,16.878,13.619675,"[2.774925000000004, 48.840999999999994]",66,7.748,8.046952,"[-5.092, 27.94112499999997]",27,1.232,4.86869,"[-5.488, 20.633999999999997]",13
ls = 6,5.512,21.096872,"[13.279900000000001, 79.12512499999997]",158,13.901,20.197443,"[7.259000000000005, 65.316]",108,16.702,17.458663,"[4.394925000000002, 55.36204999999999]",72,4.247,13.103039,"[2.5010000000000017, 44.52107499999999]",40,5.057,8.675954,"[-4.443049999999998, 27.485]",21
ls = 8,6.435,26.760326,"[14.305950000000006, 82.27600000000001]",161,7.665,26.029618,"[14.040900000000004, 76.69709999999998]",118,14.66,23.570774,"[11.904000000000003, 68.55504999999998]",85,7.512,18.650545,"[5.3480000000000025, 52.927074999999995]",49,4.887,14.5647,"[3.427000000000001, 44.584]",33
ts = 0.5,18.699,37.510587,"[-5.9571, 56.71307499999998]",167,19.906,36.882598,"[-2.1070499999999996, 57.056]",138,18.685,34.847481,"[0.1339000000000033, 55.26604999999997]",108,11.274,28.315446,"[3.2209500000000015, 49.35402499999999]",63,13.176,24.374717,"[1.2229749999999995, 43.58505]",47
ts = 0.75,16.461,30.755796,"[-6.013124999999997, 55.433024999999994]",162,21.951,30.055458,"[-1.1110500000000014, 56.000999999999976]",127,19.461,27.943384,"[-2.3400999999999956, 49.88907499999998]",97,11.672,22.397855,"[2.5019500000000003, 47.01899999999999]",56,10.224,17.51718,"[-3.1529999999999987, 33.944]",36
ts = 1.0,16.228,26.4544,"[-5.3450750000000005, 55.477074999999985]",161,23.626,25.605473,"[-3.957, 50.754]",115,16.76,23.682799,"[-2.833024999999999, 47.592999999999996]",89,7.455,18.274629,"[-1.22505, 39.65799999999999]",49,8.562,13.876805,"[-2.9780499999999983, 31.80704999999999]",31


In [49]:
(fairlay_outcomes_df.iloc[:, [1, 0, 5, 4, 9, 8, 13, 12, 17, 16]]
 / fairlay_outcomes_df.iloc[:, [3, 3, 7, 7, 11, 11, 15, 15, 19, 19]].values)

Unnamed: 0,ev=0 ev,ev=0 outcome,ev=0.05 ev,ev=0.05 outcome,ev=0.1 ev,ev=0.1 outcome,ev=0.2 ev,ev=0.2 outcome,ev=0.3 ev,ev=0.3 outcome
"ts = 1.25, ls = 3",0.153103,0.119797,0.208521,0.189898,0.258697,0.215013,0.352568,0.164886,0.434353,0.27096
ls = 0.5,0.33636,0.125415,0.364916,0.120589,0.401863,0.142517,0.487773,0.069223,0.618061,0.343986
ls = 1,0.222091,0.114958,0.259137,0.096546,0.30877,0.160726,0.399959,0.327736,0.530611,0.265718
ls = 2,0.133576,0.111946,0.189576,0.225848,0.228087,0.318413,0.331345,0.241914,0.422766,0.196471
ls = 4,0.114811,0.086028,0.16629,0.15883,0.206359,0.255727,0.298035,0.286963,0.374515,0.094769
ls = 6,0.133525,0.034886,0.187013,0.128713,0.242481,0.231972,0.327576,0.106175,0.413141,0.24081
ls = 8,0.166213,0.039969,0.22059,0.064958,0.277303,0.172471,0.380623,0.153306,0.441355,0.148091
ts = 0.5,0.224614,0.11197,0.267265,0.144246,0.322662,0.173009,0.449452,0.178952,0.518611,0.28034
ts = 0.75,0.189851,0.101611,0.236657,0.172843,0.288076,0.200629,0.399962,0.208429,0.486588,0.284
ts = 1.0,0.164313,0.100795,0.222656,0.205443,0.266099,0.188315,0.372952,0.152143,0.447639,0.276194
