In [None]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
from pymongo import MongoClient
client = MongoClient()#(host='localhost', port=9999)
players = client['usta']['trentries']

In [None]:
players.count()

In [None]:
from plotly.graph_objs import *
from plotly import tools
import numpy as np
from scipy.optimize import curve_fit

In [None]:
gender = 'Male'
utr_players = list(players.find({
    'utr.doubles.rating': {'$gt': 1.005},
    'info.gender': gender,
    'utr.doubles.status': {'$in': ['Rated', 'Projected']}
}))
print(len(utr_players), '/', players.count(), gender.lower(), 'players with UTR doubles rating')

In [None]:
data = {}
nr_progress_bins = 3
progress_bin_width = 1./nr_progress_bins
fmt = 'doubles'

for player in utr_players:
    name = player['info']['name']
    ntrp = player['tr']['EstimatedDynamic']    
    utr = player['utr'][fmt]['rating']
    status = player['utr'][fmt]['status']
    progress = player['utr'][fmt]['progress']

    key = status
    if status == 'Projected':
        progress_bin = int(np.floor(progress/progress_bin_width))
        key = '{}-{}'.format(key, progress_bin) if progress_bin < nr_progress_bins else 'Rated'

    if not key in data:
        data[key] = dict(
            x=[], y=[], mode='markers', text=[], name=key
        )
    data[key]['x'].append(np.log(utr))
    data[key]['y'].append(ntrp)
    data[key]['text'].append(name)

print(dict((k, len(v['x'])) for k, v in data.items()))
print(sum(len(v['x']) for v in data.values()))

In [None]:
def func(x, a, b, c, d):
    return a * np.arctan(b * (x - c)) + d

pars, lines, funcs, xi = {}, {}, {}, {}
cutoffs = {'Rated': 0.6, 'Projected': 0.75}
projected = ['Projected-{}'.format(pb) for pb in range(nr_progress_bins)]

for it in [0, 1]:
    for key in ['Rated'] + projected:

        if it:
            l = list(zip(list(data[key]['x']), list(data[key]['y'])))
            c = 0
            k = key.split('-')[0]
            for idx, (x, y) in enumerate(l):
                if abs(y-funcs[key](x, *(pars[key]))) > cutoffs[k]:
                    data[key]['x'].pop(c)
                    data[key]['y'].pop(c)
                    data[key]['text'].pop(c)
                    c -= 1
                c += 1
       
        xvals = np.array(data[key]['x'])
        yvals = np.array(data[key]['y'])
        
        funcs[key] = func
        if 'Projected' in key:
            a, b, c, d = [p for p in pars['Rated']]
            funcs[key] = lambda x, c, d: func(x, a, b, c, d)
            
        pars[key], pcov = curve_fit(funcs[key], xvals, yvals)
        xi[key] = np.linspace(0, 2.7, num=100)
        lines[key] = funcs[key](xi[key], *(pars[key]))
        print(it, key, *(pars[key]))

In [None]:
keys = ['Rated'] + projected[::-1]
fig = tools.make_subplots(
    rows=2, cols=2, #shared_xaxes=True, shared_yaxes=True,
    subplot_titles=keys
)
nbins_x = 7
bw_x = max([max(data[k]['x'])+0.01 for k in data.keys()]) / nbins_x
avg_widths = {}

for ikey, key in enumerate(keys):

    yvals = [[] for n in range(nbins_x)]
    l = list(zip(list(data[key]['x']), list(data[key]['y'])))
    for idx, (x, y) in enumerate(l):
        nbin = int(x/bw_x)
        yvals[nbin].append(y)

    widths = [np.std(ys) for ys in yvals]
    nx, ny = int(ikey/2)+1, ikey%2+1
    avg_widths[key] = np.average(widths)
    if 'Projected' in key:
        print(key, avg_widths[key]/avg_widths['Rated'])

    for n in range(nbins_x):
        fig.append_trace(
            Histogram(
                x=yvals[n], opacity=0.75,
                name='{:.2f}-{:.2f}'.format(n*bw_x, (n+1)*bw_x)
            ), nx, ny
        )

fig['layout'].update(title='widths for NTRP distributions', barmode='overlay', showlegend=False)
iplot(fig)

In [None]:
def inv_func(ntrp, p):
    # ntrp = a atan(b*(ln(utr)-c)) + d
    # ln(utr) = tan[(ntrp - d)/a]/b + c
    return np.tan((ntrp-p[3])/p[0])/p[1] + p[2]

for key in projected:
    xs = pars[key][0] - pars['Rated'][2]
    ys = pars[key][1] - pars['Rated'][3]
    l = zip(data[key]['x'], data[key]['y'])
    p = list(pars['Rated'][:2]) + list(pars[key])
    print(xs, ys, p)

    for idx in range(len(data[key]['x'])):
        y0 = funcs[key](data[key]['x'][idx], *(pars[key]))
        dy = data[key]['y'][idx] - y0
        dy_corr = abs(dy) / avg_widths[key] * avg_widths['Rated']
        data[key]['y'][idx] = y0 + np.sign(dy) * dy_corr - ys
        xfac = inv_func(data[key]['y'][idx], pars['Rated'])
        xfac /= inv_func(data[key]['y'][idx], p)
        data[key]['x'][idx] *= xfac

    for idx, x in enumerate(xi[key]):
        lines[key][idx] -= ys
        xfac = inv_func(lines[key][idx], pars['Rated'])
        xfac /= inv_func(lines[key][idx], p)
        xi[key][idx] *= xfac

In [None]:
plotly_data = []
for key in keys:
    print(len(data[key]['x']))
    plotly_data.append(Scatter(data[key]))
for key in keys:
    plotly_data.append(Scatter(x=xi[key], y=lines[key], mode='lines', name=key))
layout = Layout(xaxis=dict(title='ln(UTR)'), yaxis=dict(title='NTRP'))
figure = Figure(data=plotly_data, layout=layout)
iplot(figure)

In [None]:
def avg_rating(ntrp, utr, progress):
    max_utr_corr = inv_func(ntrp, pars['Projected']) - inv_func(ntrp, pars['Rated'])
    print(ntrp, np.log(utr), max_utr_corr)
    utr_corr = np.log(utr) - (1-progress) * max_utr_corr
    utr2ntrp = func(utr_corr, *(pars['Rated']))
    return utr2ntrp

In [None]:
import operator
from pprint import pprint

In [None]:
davie = [
    'Jas Hodzic', 'Jeffrey Wilson', 'Daniel Keolasy', 'Daniel Brandt', 'David Hauser', 'Aaron George', 'Eric Krueger',
    'Patrick Huck', 'Vikas Punna', 'Christopher Belloli', 'Abhinauv Venugopal', 'David Richmond', 'Jaco Raubenheimer',
    'Jan Vejsada', 'Ryan Thompson', 'Santi Soumpholphakdy', 'Mark McKeen', 'Steven K. Wang', 'Jeff Machle', 'Dave Ochoa',
    'Chris Unno', 'Brian Barlay', 'Wayne Hagstrom', 'Lucas Goldstein', 'John Booker', 'Michael Rennels'
]

In [None]:
davie_data = {}
# wait for UTR to import full combo season
for name in davie:
    player = players.find_one({'info.name': name})
    if player is None or 'utr' not in player:
        print('no NTRP for', name)
        continue

    name = player['info']['name']
    ntrp = player['tr']['EstimatedDynamic']
    utr = player['utr']['doubles']['rating']
    progress = player['utr']['doubles']['progress']
    avg = None

    if isinstance(utr, str):
        print('no doubles UTR for', name, '-> using NTRP only')
        avg = ntrp
        
    progress = 1. if progress is None else progress
    avg = avg_rating(ntrp, utr, progress) if avg is None else avg
        
    davie_data[name] = float('{:.3f}'.format(avg))

pprint(sorted(davie_data.items(), key=operator.itemgetter(1), reverse=True))