In [None]:
import pymongo, itertools, collections
import networkx as nx
import matplotlib.pyplot as plt
from circos import CircosPlot
from hiveplot import HivePlot
%matplotlib inline

In [None]:
client = pymongo.MongoClient('mongodb://localhost:27017')
players, matches = client['usta'].players, client['usta'].matches
print(players.count(), matches.count())

In [None]:
def get_total_games(score):
    return sum([int(s[0]) + int(s[1]) for s in score])

def pretty_score(score):
    return ','.join(['-'.join(s) for s in score])

def is_bagel(score):
    return all([not int(s[1]) for s in score])

score = ['76', '64']
assert(get_total_games(score) == 23)
assert(pretty_score(score) == '7-6,6-4')
assert(is_bagel(score) == False)
assert(is_bagel(['60', '30']) == True)

In [None]:
def get_player_graph(player, prev_player_id=None, graph=None):
    indent = '\t' if prev_player_id is not None else ''
    player_id = int(player['_id'])
    player_full_name = ' '.join([player['first_name'], player['last_name']])
    print('{}{}: {} ({})'.format(
        indent, player_id, player_full_name, player['rating_level']
    ))
    if prev_player_id is None and graph is not None:
        player_rating = player['rating_level'] - 0.25 # start in the middle
        kwargs = dict(
            name=player_full_name, rating=player_rating,
            level='{0:.1f}'.format(player['rating_level'])
        )
        graph.add_node(player_id, **kwargs)
    counter = collections.Counter()
    query = {'$or': []}
    # sd_keys, wl_keys = ['singles', 'doubles'], ['winner', 'loser']
    sd_keys, wl_keys = ['singles'], ['winner', 'loser']
    for key in itertools.product(sd_keys, wl_keys):
        query['$or'].append({'.'.join(key): player_id})
    for match in matches.find(query):
        individual_match_found = False
        for sd in sd_keys:
            for individual_match in match[sd]:
                if individual_match_found:
                    break
                for iwl, wl in enumerate(wl_keys):
                    if individual_match[wl] == player_id or (
                        isinstance(individual_match[wl], list) and \
                        player_id in individual_match[wl]
                    ):
                        individual_match_found = True
                        score = individual_match['score']
                        opponent_id = individual_match[wl_keys[int(not(iwl))]]
                        if (
                            get_total_games(score) < 6 or # less than 1 set played
                            is_bagel(score) or # skip bagels
                            opponent_id is None or # default
                            opponent_id == prev_player_id # already added                         
                        ):
                            break
                        counter['{}.{}'.format(sd, wl)] += 1
                        win_or_loss = wl[0].upper()
                        opponent = players.find_one({'_id': str(opponent_id)})
                        opponent_full_name = ' '.join([opponent['first_name'], opponent['last_name']])
                        print(
                            '{}{}:'.format(indent, match['_id']), win_or_loss,
                            pretty_score(score), '\tvs', opponent_full_name, '({})'.format(opponent_id)
                        )
                        if prev_player_id is None: # only go one level down
                            get_player_graph(opponent, prev_player_id=player_id, graph=graph)
                        if graph is not None:
                            opponent_rating = opponent['rating_level'] - 0.25 # start in the middle
                            kwargs = dict(
                                name=opponent_full_name, rating=opponent_rating,
                                level='{0:.1f}'.format(opponent['rating_level'])
                            )
                            graph.add_node(opponent_id, **kwargs)
                        if graph is not None:
                            kwargs = dict(score=score, date=match['date'])
                            if win_or_loss == 'W':
                                graph.add_edge(player_id, opponent_id, **kwargs)
                            else:
                                graph.add_edge(opponent_id, player_id, **kwargs)
                        break
    #print('{}{}'.format(indent, counter))

In [None]:
G = nx.DiGraph()
player = players.find_one({'last_name': 'Huck', 'first_name': 'Patrick'})
get_player_graph(player, graph=G)
# print(G.nodes(data=True))
# print(G.edges(data=True))

In [None]:
def crd(score):
    """
    CRD - Computer Rated Differential (numerical value for the difference in score)
    
    One way to assign a value to a specific score, is to count the number of service breaks
    and scale it to a value appropriate for NTRP ratings. For instance, at-level/true 4.5
    players should populate the core of the 4.5 interval. Defining the core of a 0.5-wide
    interval as its inner 90%, yields the range 4.05 - 4.45. An average upper 4.5 player
    would then correspond to a 4.35 rating and a lower 4.5 player to 4.15. A good scale for
    the CRD reflects the fact that an upper 4.5 player routinely beats a lower 4.5 player.
    A sensible choice for a routine but competitive win is a score of 6-3/6-3 or 6-3/6-2. The
    number of service breaks in these cases should hence be equivalent to the difference of
    0.2 between an upper and a lower 4.5 player. A 6-3/6-3 win entails 3 service breaks whereas
    the a 6-3/6-2 win could be counted as 3.5 service breaks. Assigning a scaling factor of 0.06
    for each service break is thus a good choice and results in CRDs of 0.18 and 0.21, respectively.
    """
    nb_total = 0. # total number of breaks (weighted)
    for i,s in enumerate(score):
        nb = (int(s[0]) - int(s[1]))/2. # number of breaks
        nb_total += nb * 0.5 if i == 2 else nb # third set counts half
    return nb_total * 0.06

assert(crd(['75', '57', '10']) == 0.015)
assert(crd(['64', '46', '64']) == 0.03)
assert(crd(['75', '57', '75']) == 0.03)
assert(crd(['75', '57', '63']) == 0.045)
assert(crd(['75', '57', '61']) == 0.075)
assert(crd(['76', '76']) == 0.06)
assert(crd(['76', '64']) == 0.09)
assert(crd(['64', '64']) == 0.12)
assert(crd(['63', '64']) == 0.15)
assert(crd(['63', '63']) == 0.18)
assert(crd(['75', '62']) == 0.18)
assert(crd(['62', '63']) == 0.21)
assert(crd(['62', '62']) == 0.24)
assert(crd(['63', '61']) == 0.24)

assert(crd(['61', '62']) == 0.265)
assert(crd(['63', '60']) == 0.27)
assert(crd(['62', '60']) == 0.295)
assert(crd(['61', '61']) == 0.295)
assert(crd(['61', '60']) == 0.325)

In [None]:
edges = sorted(G.edges(data=True), key=lambda x: x[-1]['date'])
for winner_id, loser_id, d in edges:
    winner_rating = G.node[winner_id]['rating']
    loser_rating = G.node[loser_id]['rating']
    print(winner_rating, loser_rating, d['score'])

In [None]:
# nx.draw(G)
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111)

nodes = sorted(G.nodes())
edges = G.edges()
node_cmap = {'4.0': 'blue', '4.5': 'red', '5.0': 'green'}
nodecolors = [node_cmap[G.node[n]['level']] for n in G.nodes()]

c = CircosPlot(nodes, edges, radius=10, ax=ax, fig=fig, nodecolor=nodecolors)
c.draw()

In [None]:
nodes = dict(
    (level, [n for n,d in G.nodes(data=True) if d['level'] == level])
    for level in ['4.0', '4.5', '5.0']
)
edges = dict(group1=G.edges(data=True))
edge_cmap = dict(group1='black')
h = HivePlot(nodes, edges, node_cmap, edge_cmap)
h.draw()

In [None]:
# sorted([(n, G.neighbors(n)) for n in G.nodes()], key=lambda x: len(x[1]), reverse=True)
# print(nx.degree_centrality(G))
# print(nx.has_path(G, 400, 1))
fig = plt.figure(0)
degree_centralities = list(nx.degree_centrality(G).values())
plt.hist(degree_centralities)
plt.title('Degree Centralities')