# Exploring data for fitting professional matches

Fairlay provides betting opportunities for lower tier leagues involving teams that are probably categorised as *professional* as opposed to *premium* in Datdota. The purpose of this notebook is to check the prospects and requirements of performing fitting and prediction on these non-premium tournament/league matches.

## Setup

In [11]:
import os
if 'src' not in os.listdir():
    os.chdir("..")

In [72]:
import gzip
import json
import pickle

import numpy as np
import pandas as pd

from src import load
import src.models.gp

In [73]:
import importlib
importlib.reload(src.models.gp)

<module 'src.models.gp' from '/Users/yl3/github/dota2/src/models/gp.py'>

In [3]:
pd.set_option('max_rows', 6)
pd.set_option('max_columns', 50)

## Load data

### Fairlay odds

In [31]:
with open('data/external/dota2_markets.20190831.pkl', 'rb') as fh:
    fl = pickle.load(fh)

In [39]:
fl_f = fl.copy()

# Only keep individual maps.
fl_f = fl_f.loc[fl_f.dota_market_type == 'map']

# For each Fairlay market, only keep the earliest odds.
fl_f = (fl_f.groupby('ID')
        .apply(lambda grp: grp.sort_values('LastSoftCh').iloc[0]))

# Remove TI9 matches
fl_f = fl_f.loc[fl_f.Comp != 'Dota 2 - The International']

fl_f

Unnamed: 0_level_0,Comp,Title,Descr,dota_market_type,LastSoftCh,ClosD,wager_type,RunnerName,handicap,odds,odds_c,amount,winp,winp_c,RunnerVolMatched,CatID,SettlD,Status,_Type,_Period,SettlT,Comm,Settler,ComRecip,MinVal,MaxVal,LastCh,LogBugs,Pop,Margin,RunnerVisDelay,RunnerRedA,OrdBStr_S,MarketType,MarketPeriod,MarketCat
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1
177105530100,Dota 2 - Gold Occupation Invitational,2Be Continued Esports vs. WOLF,1st Map,map,2019-08-12 21:27:11-04:00,2019-08-13 02:58:02-04:00,against,2Be Continued Esports,0.0,1.980,1.978,9.11,0.4950,0.4944,0.0,32,2019-08-13 03:58:02-04:00,0,0,0,0,0.0025,"{'1': True, '777889': True, '777555': True}","{'1011849': 0.5, '1013506': 0.5}",0.0,0.0,2019-08-12 15:58:50-04:00,,0.0,106.833,6000,0.0,0,MONEYLINE,UNDEFINED,eSports
177105530185,Dota 2 - Gold Occupation Invitational,2Be Continued Esports vs. WOLF,2nd Map,map,2019-08-12 22:51:18-04:00,2019-08-13 02:58:02-04:00,against,2Be Continued Esports,0.0,2.030,2.027,10.11,0.5074,0.5067,0.0,32,2019-08-13 03:58:02-04:00,0,0,0,0,0.0025,"{'1': True, '777889': True, '777555': True}","{'1011849': 0.5, '1013506': 0.5}",0.0,0.0,2019-08-12 15:58:50-04:00,,0.0,107.101,6000,0.0,0,MONEYLINE,UNDEFINED,eSports
177105530270,Dota 2 - Gold Occupation Invitational,2Be Continued Esports vs. WOLF,3rd Map,map,2019-08-12 22:51:18-04:00,2019-08-13 02:58:02-04:00,against,2Be Continued Esports,0.0,1.962,1.959,9.11,0.4902,0.4896,0.0,32,2019-08-13 03:58:02-04:00,0,0,0,0,0.0025,"{'1': True, '777889': True, '777555': True}","{'1011849': 0.5, '1013506': 0.5}",0.0,0.0,2019-08-12 15:58:50-04:00,,0.0,106.940,6000,0.0,0,MONEYLINE,UNDEFINED,eSports
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178683631504,Dota 2 - Gold Occupation Invitational,Sunrise vs. Team Aspirations,2nd Map,map,2019-08-31 01:54:03-04:00,2019-08-31 02:59:06-04:00,against,Sunrise,0.0,1.781,1.779,11.11,0.4386,0.4380,0.0,32,2019-08-31 03:59:06-04:00,0,0,0,0,0.0025,"{'1': True, '777889': True, '777555': True}","{'1011849': 0.5, '1013506': 0.5}",0.0,0.0,2019-08-30 22:20:32-04:00,,0.0,106.902,6000,0.0,0,MONEYLINE,UNDEFINED,eSports
178683649722,Dota 2 - Gold Occupation Invitational,LFS.cn vs. Five Brothers,1st Map,map,2019-08-31 01:53:59-04:00,2019-08-31 04:59:09-04:00,against,LFS.cn,0.0,1.870,1.867,11.11,0.4651,0.4645,0.0,32,2019-08-31 05:59:09-04:00,0,0,0,0,0.0025,"{'1': True, '777889': True, '777555': True}","{'1011849': 0.5, '1013506': 0.5}",0.0,0.0,2019-08-30 22:20:50-04:00,,0.0,106.993,6000,0.0,0,MONEYLINE,UNDEFINED,eSports
178683649822,Dota 2 - Gold Occupation Invitational,LFS.cn vs. Five Brothers,2nd Map,map,2019-08-31 01:53:59-04:00,2019-08-31 04:59:09-04:00,against,LFS.cn,0.0,1.870,1.867,11.11,0.4651,0.4645,0.0,32,2019-08-31 05:59:09-04:00,0,0,0,0,0.0025,"{'1': True, '777889': True, '777555': True}","{'1011849': 0.5, '1013506': 0.5}",0.0,0.0,2019-08-30 22:20:50-04:00,,0.0,106.993,6000,0.0,0,MONEYLINE,UNDEFINED,eSports


#### Team frequencies

In [51]:
fl_f_teams = fl_f.Title.str.split(' vs. ', expand=True)
print(fl_f_teams.unstack().value_counts().to_string())

FlyToMoon                18
Vega Squadron            14
Sunrise                  10
Hippomaniacs             10
Five Brothers            10
Neon Esports              8
Team Space                8
Team Aspirations          6
Look For Sponsors         6
Resurgence                6
Team Drinking             5
2Be Continued Esports     5
Pavaga Junior             5
WOLF                      5
FrozenDawn                5
Shutdown Esports Club     5
LFS.cn                    4
WOLF.dota                 4
Hans Pro Gaming           3
PG.Orca                   3
Flashpoint Gaming         3
BOOM ID                   2
ZEN9                      2
Alter Ego                 2
PG.Barracx                2
Prime                     2
Team Oracle               2
EVOS Esports              2
WOLF.cn                   2
Athletico                 2
Antrophy                  2
Dark Sided                1
FlyTomoon                 1
TRIDENT                   1


## Datdota match data

In [133]:
import importlib
importlib.reload(src.models.gp)
importlib.reload(load)

<module 'src.load' from '/Users/yl3/github/dota2/src/load.py'>

In [134]:
with gzip.open('data/raw/premium_matches.2019-08-31.json.gz', 'rb') as fh:
    premium_matches = load.matches_json_to_df(json.load(fh)['data'])
    prm_m = load.MatchDF(premium_matches)
with gzip.open('data/raw/professional_matches.2019-08-31.json.gz', 'rb') \
        as fh:
    professional_matches = load.matches_json_to_df(json.load(fh)['data'])
    prof_m = load.MatchDF(professional_matches)

## Side track: test class for querying MatchDF matches. 

In [105]:
prm_m.df.loc[prm_m.df.radiant_name.isin(['TNC Predator', 'Vici Gaming'])
             & prm_m.df.dire_name.isin(['TNC Predator', 'Vici Gaming'])]

Unnamed: 0_level_0,startDate,league_name,radiant_name,dire_name,radiantVictory,radiant_nicknames,dire_nicknames,seriesId,duration,radiant_valveId,radiant_players,dire_valveId,dire_players,league_id,startTimestamp,series_start_time,match_i_in_series
matchId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
3810442630,2018-04-01 04:00:22,Dota 2 Asia Championships 2018,TNC Predator,Vici Gaming,True,"[Raven, TIMS, Armel, Kuku, Sam H]","[old LaNm, Ori, Fenrir, old eLeVeN, Paparazi]",-3810442630,1941,2108395,"[132309493, 155494381, 164532005, 184950344, 1...",726228,"[89423756, 107803494, 113800818, 134276083, 13...",9643,1522555222000,1522555222000,0
3815594091,2018-04-04 09:29:44,Dota 2 Asia Championships 2018,TNC Predator,Vici Gaming,True,"[Raven, TIMS, Armel, Kuku, Sam H]","[old LaNm, Ori, Fenrir, old eLeVeN, Paparazi]",212287,1926,2108395,"[132309493, 155494381, 164532005, 184950344, 1...",726228,"[89423756, 107803494, 113800818, 134276083, 13...",9643,1522834184000,1522834184000,0
3815664057,2018-04-04 10:31:23,Dota 2 Asia Championships 2018,Vici Gaming,TNC Predator,True,"[old LaNm, Ori, Fenrir, old eLeVeN, Paparazi]","[Raven, TIMS, Armel, Kuku, Sam H]",212287,2818,726228,"[89423756, 107803494, 113800818, 134276083, 13...",2108395,"[132309493, 155494381, 164532005, 184950344, 1...",9643,1522837883000,1522834184000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4976922218,2019-08-20 05:33:15,The International 2019,TNC Predator,Vici Gaming,True,"[Gabbi, TIMS, Armel, Eyyou, Kuku]","[Ori, Paparazi, Yang, Dy, Fade]",359608,4602,2108395,"[152545459, 155494381, 164532005, 173476224, 1...",726228,"[107803494, 137193239, 139937922, 143693439, 1...",10749,1566279195000,1566279195000,0
4977018128,2019-08-20 07:20:47,The International 2019,TNC Predator,Vici Gaming,False,"[Gabbi, TIMS, Armel, Eyyou, Kuku]","[Ori, Paparazi, Yang, Dy, Fade]",359608,3673,2108395,"[152545459, 155494381, 164532005, 173476224, 1...",726228,"[107803494, 137193239, 139937922, 143693439, 1...",10749,1566285647000,1566279195000,1
4977114097,2019-08-20 08:50:26,The International 2019,TNC Predator,Vici Gaming,False,"[Gabbi, TIMS, Armel, Eyyou, Kuku]","[Ori, Paparazi, Yang, Dy, Fade]",359608,2082,2108395,"[152545459, 155494381, 164532005, 173476224, 1...",726228,"[107803494, 137193239, 139937922, 143693439, 1...",10749,1566291026000,1566279195000,2


### `MatchupDict.from_match_df()`

In [121]:
temp = load.MatchupDict.from_match_df(prm_m)

In [122]:
temp.query('TNC Predator', 'Vici Gaming', 0, '2018-04-01 04:00:22')

(3810442630, False)

In [102]:
temp.query('Vici Gaming', 'TNC Predator', 0, '2018-04-01 04:00:22')

(3810442630, True)

In [107]:
display(temp.query('TNC Predator', 'Vici Gaming', 0, '2018-03-01 04:00:22'))
display(temp.query('TNC Predator', 'Vici Gaming', 0, '2018-04-02 04:00:22'))

(3810442630, False)

(3810442630, False)

### `MatchDF.query_maps()`

In [191]:
import importlib
importlib.reload(src.models.gp)
importlib.reload(load)

<module 'src.load' from '/Users/yl3/github/dota2/src/load.py'>

In [192]:
with gzip.open('data/raw/premium_matches.2019-08-31.json.gz', 'rb') as fh:
    premium_matches = load.matches_json_to_df(json.load(fh)['data'])
    prm_m = load.MatchDF(premium_matches)
# with gzip.open('data/raw/professional_matches.2019-08-31.json.gz', 'rb') \
#         as fh:
#     professional_matches = load.matches_json_to_df(json.load(fh)['data'])
#     prof_m = load.MatchDF(professional_matches)

In [193]:
# Expected match ID: 3815664057
prm_m.query_maps('Vici Gaming', 'TNC Predator', 1, '2018-04-04 10:31:23')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,matchId,startDate,league_name,radiant_name,dire_name,radiantVictory,radiant_nicknames,dire_nicknames,seriesId,duration,radiant_valveId,radiant_players,dire_valveId,dire_players,league_id,startTimestamp,series_start_time,match_i_in_series,qry_flipped
qry_team1,qry_team2,qry_map_i,qry_time,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Vici Gaming,TNC Predator,1,2018-04-04 10:31:23,3815664057,2018-04-04 10:31:23,Dota 2 Asia Championships 2018,Vici Gaming,TNC Predator,True,"[old LaNm, Ori, Fenrir, old eLeVeN, Paparazi]","[Raven, TIMS, Armel, Kuku, Sam H]",212287,2818,726228,"[89423756, 107803494, 113800818, 134276083, 13...",2108395,"[132309493, 155494381, 164532005, 184950344, 1...",9643,1522837883000,1522834184000,1,True


In [194]:
prm_m.query_maps('TNC Predator', 'Vici Gaming', [0, 0, 1],
                 ['2018-04-01 04:00:22', '2018-04-04 04:00:22',
                  '2018-04-04 10:31:23'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,matchId,startDate,league_name,radiant_name,dire_name,radiantVictory,radiant_nicknames,dire_nicknames,seriesId,duration,radiant_valveId,radiant_players,dire_valveId,dire_players,league_id,startTimestamp,series_start_time,match_i_in_series,qry_flipped
qry_team1,qry_team2,qry_map_i,qry_time,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
TNC Predator,Vici Gaming,0,2018-04-01 04:00:22,3810442630,2018-04-01 04:00:22,Dota 2 Asia Championships 2018,TNC Predator,Vici Gaming,True,"[Raven, TIMS, Armel, Kuku, Sam H]","[old LaNm, Ori, Fenrir, old eLeVeN, Paparazi]",-3810442630,1941,2108395,"[132309493, 155494381, 164532005, 184950344, 1...",726228,"[89423756, 107803494, 113800818, 134276083, 13...",9643,1522555222000,1522555222000,0,False
TNC Predator,Vici Gaming,0,2018-04-04 04:00:22,3815594091,2018-04-04 09:29:44,Dota 2 Asia Championships 2018,TNC Predator,Vici Gaming,True,"[Raven, TIMS, Armel, Kuku, Sam H]","[old LaNm, Ori, Fenrir, old eLeVeN, Paparazi]",212287,1926,2108395,"[132309493, 155494381, 164532005, 184950344, 1...",726228,"[89423756, 107803494, 113800818, 134276083, 13...",9643,1522834184000,1522834184000,0,False
TNC Predator,Vici Gaming,1,2018-04-04 10:31:23,3815664057,2018-04-04 10:31:23,Dota 2 Asia Championships 2018,Vici Gaming,TNC Predator,True,"[old LaNm, Ori, Fenrir, old eLeVeN, Paparazi]","[Raven, TIMS, Armel, Kuku, Sam H]",212287,2818,726228,"[89423756, 107803494, 113800818, 134276083, 13...",2108395,"[132309493, 155494381, 164532005, 184950344, 1...",9643,1522837883000,1522834184000,1,False
