In [None]:
import pymongo, itertools, collections, unittest, time, sys
import networkx as nx
import matplotlib.pyplot as plt
from circos import CircosPlot
from hiveplot import HivePlot
from usta_tennis.crd import crd
from usta_tennis.utils import *
%matplotlib inline

In [None]:
client = pymongo.MongoClient('mongodb://localhost:27017')
players, matches = client['usta'].players, client['usta'].matches
assert(players.count() == 76114)
assert(matches.count() == 6228)

In [None]:
def add_player(player, graph):
    player_id = int(player['_id'])
    full_name = ' '.join([player['first_name'], player['last_name']])
    rating = player['rating_level'] - 0.25 # start in the middle
    kwargs = dict(
        name=full_name, rating=rating,
        level='{0:.1f}'.format(player['rating_level'])            
    )
    graph.add_node(player_id, **kwargs)

In [None]:
def get_player_graph(player, graph, verbose=False):
    player_id = int(player['_id'])
    global nesting
    indent = ' '*nesting
    player_full_name = ' '.join([player['first_name'], player['last_name']])
    if verbose:
        print('{}{}: {} ({})'.format(
            indent, player_id, player_full_name, player['rating_level']
        ))
    query = {'$or': []}
    # sd_keys, wl_keys = ['singles', 'doubles'], ['winner', 'loser']
    sd_keys, wl_keys = ['singles'], ['winner', 'loser']
    for key in itertools.product(sd_keys, wl_keys):
        query['$or'].append({'.'.join(key): player_id})
    player_matches = matches.find(query).sort('date')
    if player_matches.count() < 2:
        if verbose:
            print('{}skip {}. Only played 1 match'.format(indent, player_full_name))
        return
    for match in player_matches:
        individual_match_found = False
        for sd in sd_keys:
            if individual_match_found:
                break
            for individual_match in match[sd]:
                if individual_match_found:
                    break
                for iwl, wl in enumerate(wl_keys):
                    if individual_match_found:
                        break
                    if individual_match[wl] == player_id or (
                        isinstance(individual_match[wl], list) and \
                        player_id in individual_match[wl]
                    ):
                        individual_match_found = True
                        score = individual_match['score']
                        opponent_id = individual_match[wl_keys[int(not(iwl))]]
                        win_or_loss = wl[0].upper()
                        if (
                            nr_sets_completed(score) < 2 or # two sets completed
                            is_bagel(score) or opponent_id is None # skip bagels and defaults
                        ):
                            break
                        opponent = players.find_one({'_id': str(opponent_id)})
                        if opponent is None:
                            print('{} not found!'.format(opponent_id))
                            break
                        if opponent_id not in graph.node:
                            add_player(opponent, graph)
                        kwargs = dict(score=score, date=match['date'])
                        source, sink = (player_id, opponent_id) if win_or_loss == 'W' \
                                else (opponent_id, player_id)
                        if sink in graph.edge[source] and kwargs in graph[source][sink].values():
                            break # match already added
                        graph.add_edge(source, sink, **kwargs)
                        if verbose:
                            print(
                                '{}{}:'.format(indent, match['_id']), win_or_loss,
                                pretty_score(score), 'vs',
                                ' '.join([opponent['first_name'], opponent['last_name']]),
                                '({})'.format(opponent_id)
                            )
                        nesting += 1
                        if not verbose:
                            sys.stdout.write('\r{}'.format(nesting))
                        get_player_graph(opponent, graph=graph, verbose=verbose)
                        nesting -= 1

In [None]:
sys.setrecursionlimit(5000)
nesting = 0
G = nx.MultiDiGraph()
player = players.find_one({'last_name': 'Huck', 'first_name': 'Patrick'})
#player = players.find_one({'last_name': 'Huck', 'first_name': 'Johanna'})
add_player(player, G) # root node
get_player_graph(player, graph=G, verbose=False)

In [None]:
print(len(G))

In [None]:
edges = sorted(G.edges(data=True), key=lambda x: x[-1]['date'])
drv, levels = {}, {} # dynamic rating values
for winner_id, loser_id, d in edges:
    if winner_id not in drv:
        drv[winner_id] = [G.node[winner_id]['rating']]
    if loser_id not in drv:
        drv[loser_id] = [G.node[loser_id]['rating']]
    if winner_id not in levels:
        levels[winner_id] = G.node[winner_id]['level']
    if loser_id not in levels:
        levels[loser_id] = G.node[loser_id]['level']
    prd = drv[winner_id][-1] - drv[loser_id][-1] # Player Rating Differential
    rdd = crd(d['score']) - prd # Rating Differential Discrepancy
    awr = drv[winner_id][-1] + rdd/2 # Adjusted Winner’s Rating (before averaging)
    dwr = (sum(drv[winner_id]) + awr) / (len(drv[winner_id]) + 1) # Dynamic Winner’s Rating
    drv[winner_id].append(dwr)
    alr = drv[loser_id][-1] - rdd/2 # Adjusted Loser’s Rating (before averaging)
    dlr = (sum(drv[loser_id]) + alr) / (len(drv[loser_id]) + 1) # Dynamic Loser’s Rating
    drv[loser_id].append(dlr)

In [None]:
fig = plt.figure(figsize=(17,8))
ax = fig.add_subplot(111)
for k,v in drv.items():
    if len(v) > 10 and levels[k] == '3.0':
        plt.plot(v)
plt.show()

In [None]:
# nx.draw(G)
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111)

nodes = sorted(G.nodes())
edges = G.edges()
node_cmap = {'4.0': 'blue', '4.5': 'red', '5.0': 'green'}
nodecolors = [node_cmap[G.node[n]['level']] for n in G.nodes()]

c = CircosPlot(nodes, edges, radius=10, ax=ax, fig=fig, nodecolor=nodecolors)
c.draw()

In [None]:
nodes = dict(
    (level, [n for n,d in G.nodes(data=True) if d['level'] == level])
    for level in ['4.0', '4.5', '5.0']
)
edges = dict(group1=G.edges(data=True))
edge_cmap = dict(group1='black')
h = HivePlot(nodes, edges, node_cmap, edge_cmap)
h.draw()

In [None]:
# sorted([(n, G.neighbors(n)) for n in G.nodes()], key=lambda x: len(x[1]), reverse=True)
# print(nx.degree_centrality(G))
# print(nx.has_path(G, 400, 1))
fig = plt.figure(0)
degree_centralities = list(nx.degree_centrality(G).values())
plt.hist(degree_centralities)
plt.title('Degree Centralities')