## Using plotly and the tutorial for chord diagrams

https://plot.ly/python/filled-chord-diagram/

In [1]:
# import libraries
import pandas as pd
import numpy as np
import codecs, json

In [2]:
# load pickled dataframe of trades (count) indexed by year
yearly_sq_matrix = pd.read_pickle('trades_by_year')

In [4]:
yearly_sq_matrix.head()

Unnamed: 0_level_0,Team_2,ATL,BKN,BOS,CHA,CHI,CLE,DAL,DEN,DET,GSW,...,OKC,ORL,PHI,PHO,POR,SAC,SAS,TOR,UTA,WAS
Year,Team_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1985,ATL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1985,BKN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1985,BOS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1985,CHA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1985,CHI,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# This function just checks that a matrix is a SQUARE matrix
def check_data(data_matrix):
    L, M=data_matrix.shape
    if L!=M:
        raise ValueError('Data array must have (n,n) shape')
    return L

In [None]:
import plotly.plotly as py
#import plotly.figure_factory as ff
from plotly.graph_objs import *

In [None]:
# Check that the nba matrix is square and save the matrix 'size' as the var L
L = check_data(matrix)
L

### Ideograms

Need to calculate the IDEOGRAM lengths (i.e. how the OUTER circle will be divided up amongst the teams).

In [None]:
PI=np.pi

def moduloAB(x, a, b): #maps a real number onto the unit circle identified with 
                       #the interval [a,b), b-a=2*PI
        if a>=b:
            raise ValueError('Incorrect interval ends')
        y=(x-a)%(b-a)
        return y+b if y<0 else y+a
    
def test_2PI(x):
    return 0<= x <2*PI

Calculate the row sums (i.e. all transactions for 1 team).

In [None]:
row_sum=[np.sum(matrix[k,:]) for k in range(L)]

#set the gap between two consecutive ideograms
gap=2*PI*0.005
ideogram_length=2*PI*np.asarray(row_sum)/sum(row_sum)-gap*np.ones(L)

Now get a list of the angular coordinates for the end-points of each ideogram.

In [None]:
# This returns a list of lists, with each inner list having two values corresponding to the start and end
# points (in radians) of each ideogram around the circle.
def get_ideogram_ends(ideogram_len, gap):
    ideo_ends=[]
    left=0
    for k in range(len(ideogram_len)):
        right=left+ideogram_len[k]
        ideo_ends.append([left, right])   
        left=right+gap
    return ideo_ends 

ideo_ends = get_ideogram_ends(ideogram_length, gap)
ideo_ends

The 'make_ideogram_arc()' function below returns equally-spaced points on an ideogram arc, expressed as complex numbers.

The parts of these complex numbers will be used to define the ideogram as a plotly shape bounded by an SVG path.

In [None]:
def make_ideogram_arc(R, phi, a=50):
    # R is the circle radius
    # phi is the list of ends angle coordinates of an arc
    # a is a parameter that controls the number of points to be evaluated on an arc
    if not test_2PI(phi[0]) or not test_2PI(phi[1]):
        phi=[moduloAB(t, 0, 2*PI) for t in phi]
    length=(phi[1]-phi[0])% 2*PI 
    nr=5 if length<=PI/4 else int(a*length/PI)

    if phi[0] < phi[1]:   
        theta=np.linspace(phi[0], phi[1], nr)
    else:
        phi=[moduloAB(t, -PI, PI) for t in phi]
        theta=np.linspace(phi[0], phi[1], nr)
    return R*np.exp(1j*theta)   

Set the ideogram colours and labels.

In [None]:
# Label all 30 teams and set colours
labels=['ATL', 'BRK', 'BOS', 'CHA', 'CHI',
       'CLE', 'DAL', 'DEN', 'DET', 'GSW',
       'HOU', 'IND', 'LAC', 'LAL', 'MEM',
       'MIA', 'MIL', 'MIN', 'NOP', 'NYK',
       'OKC', 'ORL', 'PHI', 'PHO', 'POR',
       'SAC', 'SAS', 'TOR', 'UTA', 'WAS']
ideo_colors=['rgba(209, 14, 14, 0.75)', #ATL
             'rgba(68, 68, 68, 0.75)', #BRK
             'rgba(0, 76, 8, 0.75)', #BOS
             'rgba(1, 190, 193, 0.75)', #CHA
             'rgba(209, 64, 41, 0.75)', #CHI
             'rgba(109, 29, 17, 0.75)', #CLE
             'rgba(20, 45, 188, 0.75)', #DAL
             'rgba(134, 196, 249, 0.75)', #DEN
             'rgba(44, 55, 119, 0.75)', #DET
             'rgba(214, 204, 17, 0.75)', #GSW
             'rgba(216, 31, 15, 0.75)', #HOU
             'rgba(224, 173, 56, 0.75)', #IND
             'rgba(252, 250, 249, 0.75)', #LAC
             'rgba(105, 25, 135, 0.75)', #LAL
             'rgba(14, 40, 17, 0.75)', #MEM
             'rgba(135, 30, 28, 0.75)', #MIA
             'rgba(40, 89, 38, 0.75)', #MIL
             'rgba(40, 58, 43, 0.75)', #MIN
             'rgba(0, 6, 91, 0.75)', #NOP
             'rgba(209, 125, 29, 0.75)', #NYK
             'rgba(8, 2, 219, 0.75)', #OKC
             'rgba(7, 131, 239, 0.75)', #ORL
             'rgba(249, 249, 249, 0.75)', #PHI
             'rgba(80, 3, 158, 0.75)', #PHO
             'rgba(158, 3, 8, 0.75)', #POR
             'rgba(54, 3, 158, 0.75)', #SAC
             'rgba(144, 144, 144, 0.75)', #SAS
             'rgba(178, 42, 33, 0.75)', #TOR
             'rgba(213, 214, 215, 0.75)', #UTA
             'rgba(196, 78, 80, 0.75)' #WAS
            ] 

### Ribbons of the chord diagram

The ideograms represent how MANY trades each respective team made, whereas the ribbons that connect ideograms represent the FLOW of players between teams (i.e. the relationships).

To do this we need to 'map' the data in the matrix onto the ideogram locations defined above.

In [None]:
# This func maps all the data points onto one of the defined ideogram intervalues
def map_data(data_matrix, row_value, ideogram_length):
    mapped=np.zeros(data_matrix.shape)
    for j in range(L): # 'L' is the size of the matrix in one dimension
        mapped[:, j]=ideogram_length*data_matrix[:,j]/row_value
    return mapped 

mapped_data = map_data(matrix, row_sum, ideogram_length)
mapped_data[3]

*How this works:* To each pair of values `(mapped_data[k][j], mapped_data[j][k])`, where `k<=j`, a ribbon is associated (i.e. a curvelinear filled rectangle), having as opposite ends two sub-arcs of the k<sup>th</sup> and j<sup>th</sup> ideogram, and two arcs of quadratic Bezier curves.

Sorting the values along the rows of this `mapped_data` array is recommended for a nicer looking chord chart.

In [None]:
# Get the SORTED array
idx_sort=np.argsort(mapped_data, axis=1)
idx_sort[5]

Need to create another arc to connect the end points of each ribbon around the line of the circle -- i.e. create the 'end' of each ribbon.

In [None]:
def make_ribbon_ends(mapped_data, ideo_ends,  idx_sort):
    L=mapped_data.shape[0]
    ribbon_boundary=np.zeros((L,L+1))
    for k in range(L):
        start=ideo_ends[k][0]
        ribbon_boundary[k][0]=start
        for j in range(1,L+1):
            J=idx_sort[k][j-1]
            ribbon_boundary[k][j]=start+mapped_data[k][J]
            start=ribbon_boundary[k][j]
    return [[(ribbon_boundary[k][j],ribbon_boundary[k][j+1] ) for j in range(L)] for k in range(L)]   

ribbon_ends=make_ribbon_ends(mapped_data, ideo_ends,  idx_sort)
print('ribbon ends starting from the ideogram[2]\n', ribbon_ends[2])

Now we need some functions to define the SIDES of the ribbons in terms of Bezier curves

The function `control_pts()` returns the cartesian coordinates of the control points, b<sub>0</sub>, b<sub>1</sub>, b<sub>2</sub>, supposed as being initially located on the unit circle, and thus defined only by their angular coordinate. The angular coordinate of the point b<sub>1</sub> is the mean of angular coordinates of the points b<sub>0</sub>, b<sub>2</sub>.

Since for a Bezier ribbon side only b<sub>0</sub>, b<sub>2</sub> are placed on the unit circle, one gives `radius` as a parameter that controls position of b<sub>1</sub>. `radius` is the distance of b<sub>1</sub> to the circle center.

In [None]:
def control_pts(angle, radius):
    #angle is a  3-list containing angular coordinates of the control points b0, b1, b2
    #radius is the distance from b1 to the  origin O(0,0) 

    if len(angle)!=3:
        raise InvalidInputError('angle must have len =3')
    b_cplx = np.array([np.exp(1j*angle[k]) for k in range(3)])
    b_cplx[1]=radius*b_cplx[1]
    return list(zip(b_cplx.real, b_cplx.imag))   # HAD TO UPDATE THIS CODE FOR PYTHON 3

In [None]:
def ctrl_rib_chords(l, r, radius):
    # this function returns a 2-list containing control poligons of the two quadratic Bezier
    #curves that are opposite sides in a ribbon
    #l (r) the list of angular variables of the ribbon arc ends defining 
    #the ribbon starting (ending) arc 
    # radius is a common parameter for both control polygons
    if len(l)!=2 or len(r)!=2:
        raise ValueError('the arc ends must be elements in a list of len 2')
    return [control_pts([l[j], (l[j]+r[j])/2, r[j]], radius) for j in range(2)]

Each ribbon will be coloured by one of the ideograms it connects.

In [None]:
ribbon_color=[L*[ideo_colors[k]] for k in range(L)]

We can manually modify these colours by acessing the list members and changing to the colour of a particular (e.g. the k<sup>th</sup>) ideogram: `ribbon_color[k][j]=ideo_colors[k]`

Next, we need some functions that return the Plotly SVG path of the ribbon boundaries.

In [None]:
def make_q_bezier(b):
    # defines the Plotly SVG path for a quadratic Bezier curve defined by the 
    # list of its control points
    
    #if len(b)!=3: # THIS WAS CAUSING A BUG IN PY 3.4 - REPLACED WITH THE ITERATOR BELOW
    if sum([1 for _ in b]) !=3:
        raise valueError('control poligon must have 3 points')
    A, B, C = b    
    return 'M '+str(A[0])+',' +str(A[1])+' '+'Q '+\
                str(B[0])+', '+str(B[1])+ ' '+\
                str(C[0])+', '+str(C[1])

# Try with example list entry
b = [(1,4), (-0.5, 2.35), (3.745, 1.47)]

make_q_bezier(b)

In [None]:
# Returns the Plotly SVG path corresponding to an arc represented by its end angular coordinates theta0, theta1.
def make_ribbon_arc(theta0, theta1):

    if test_2PI(theta0) and test_2PI(theta1):
        if theta0 < theta1:
            theta0= moduloAB(theta0, -PI, PI)
            theta1= moduloAB(theta1, -PI, PI)
            if theta0*theta1>0:
                raise ValueError('incorrect angle coordinates for ribbon')
    
        nr=int(40*(theta0-theta1)/PI)
        if nr<=2: nr=3
        theta=np.linspace(theta0, theta1, nr)
        pts=np.exp(1j*theta)# points on arc in polar complex form
    
        string_arc=''
        for k in range(len(theta)):
            string_arc+='L '+str(pts.real[k])+', '+str(pts.imag[k])+' '
        return   string_arc 
    else:
        raise ValueError('the angle coordinates for an arc side of a ribbon must be in [0, 2*pi]')
        
make_ribbon_arc(np.pi/3, np.pi/6)

**_Finally, we are ready to define data and layout for the Plotly plot of the chord diagram._**

In [None]:
# Establish layout for plot
def make_layout(title, plot_size):
    axis=dict(showline=False, # hide axis line, grid, ticklabels and  title
          zeroline=False,
          showgrid=False,
          showticklabels=False,
          title='' 
          )

    return Layout(title=title,
                  xaxis=XAxis(axis),
                  yaxis=YAxis(axis),
                  showlegend=False,
                  width=plot_size,
                  height=plot_size,
                  margin=Margin(t=25, b=25, l=25, r=25),
                  hovermode='closest',
                  shapes=[] # to this list one appends below the dicts defining the ribbon,
                           #respectively the ideogram shapes
                 )  

In [None]:
# Function that returns the Plotly shape of an ideogram
def make_ideo_shape(path, line_color, fill_color):
    #line_color is the color of the shape boundary
    #fill_color is the color assigned to an ideogram
    return  dict(
                  line=Line(
                  color=line_color, 
                  width=0.45
                 ),

            path = path,
            type = 'path',
            fillcolor = fill_color,
            layer = 'below'
        )

Typically, you need to generate two types of ribbons: a ribbon connecting two subarcs in different ideograms, and a ribbon from one ideogram to itself.

May not need the latter type for this dataset, as a team cannot trade to itself, but will just leave it in the code as it should have an entry of zero anyway and this way we can re-use the code for other plots that may require this feature.

In [None]:
def make_ribbon(l, r, line_color, fill_color, radius=0.2):
    #l=[l[0], l[1]], r=[r[0], r[1]]  represent the opposite arcs in the ribbon 
    #line_color is the color of the shape boundary
    #fill_color is the fill color for the ribbon shape
    poligon = ctrl_rib_chords(l, r, radius)
    b, c = poligon
           
    return  dict(
                line=Line(
                color=line_color, width=0.5
            ),
            path=  make_q_bezier(b)+make_ribbon_arc(r[0], r[1])+
                   make_q_bezier(c[::-1])+make_ribbon_arc(l[1], l[0]),
            type='path',
            fillcolor=fill_color,
            layer='below'
        )

# Remove this if it's buggy for the nba trade dataset
def make_self_rel(l, line_color, fill_color, radius):
    #radius is the radius of Bezier control point b_1
    b=control_pts([l[0], (l[0]+l[1])/2, l[1]], radius) 
    return  dict(
                line=Line(
                color=line_color, width=0.5
            ),
            path=  make_q_bezier(b)+make_ribbon_arc(l[1], l[0]),
            type='path',
            fillcolor=fill_color,
            layer='below'
        )

def invPerm(perm):
    # function that returns the inverse of a permutation, perm
    inv = [0] * len(perm)
    for i, s in enumerate(perm):
        inv[s] = i
    return inv

# Create 'layout' obj using 'make_layout()' function
layout = make_layout('Chord diagram', 900)

`ribbon_info` is a list of dicts setting the information that is displayed when hovering the mouse over the ribbon ends.

Set the radius of Bezier control point, b<sub>1</sub>, for each ribbon associated to a diagonal data entry (may need to play around with these to get good looking ribbons):

In [None]:
# these value are set after a few trials. Are 30 values for the 30 teams.
radii_sribb=[0.5, 0.5, 0.5, 0.5, 0.5,
             0.5, 0.5, 0.5, 0.5, 0.5,
             0.5, 0.5, 0.5, 0.5, 0.5,
             0.5, 0.5, 0.5, 0.5, 0.5,
             0.5, 0.5, 0.5, 0.5, 0.5,
             0.5, 0.5, 0.5, 0.5, 0.5
            ]

In [None]:
# Will have to modify these dicts to get more detailed 'hover over' info to show (e.g. player name, year, etc.)
ribbon_info=[]
for k in range(L):
    
    sigma=idx_sort[k]
    sigma_inv=invPerm(sigma)
    for j in range(k, L):
        if matrix[k][j]==0 and matrix[j][k]==0: continue
        eta=idx_sort[j]
        eta_inv=invPerm(eta)
        l=ribbon_ends[k][sigma_inv[j]]  
        
        if j==k:
            layout['shapes'].append(make_self_rel(l, 'rgb(175,175,175)' ,
                                    ideo_colors[k], radius=radii_sribb[k])) 
            z=0.9*np.exp(1j*(l[0]+l[1])/2)
            #the text below will be displayed when hovering the mouse over the ribbon
            text=labels[k]+' traded '+ '{:.0f}'.format(matrix[k][k])+' players '+ 'to themselves?!',
            ribbon_info.append(Scatter(x=[z.real],
                                       y=[z.imag],
                                       mode='markers',
                                       marker=Marker(size=0.5, color=ideo_colors[k]),
                                       text=text,
                                       hoverinfo='text'
                                       )
                              )
        else:
            r=ribbon_ends[j][eta_inv[k]]
            zi=0.9*np.exp(1j*(l[0]+l[1])/2)
            zf=0.9*np.exp(1j*(r[0]+r[1])/2)
            #texti and textf are the strings that will be displayed when hovering the mouse 
            #over the two ribbon ends
            #texti=labels[k]+' commented on '+ '{:.0f}'.format(matrix[k][j])+' of '+\
                  #labels[j]+ ' Fb posts',
            texti=labels[k]+' have traded '+ '{:.0f}'.format(matrix[k][j])+' players to '+\
                  labels[j],
            
            textf=labels[j]+' have traded '+ '{:.0f}'.format(matrix[j][k])+' players to '+\
            labels[k],
            ribbon_info.append(Scatter(x=[zi.real],
                                       y=[zi.imag],
                                       mode='markers',
                                       marker=Marker(size=0.5, color=ribbon_color[k][j]),
                                       text=texti,
                                       hoverinfo='text'
                                       )
                              ),
            ribbon_info.append(Scatter(x=[zf.real],
                                       y=[zf.imag],
                                       mode='markers',
                                       marker=Marker(size=0.5, color=ribbon_color[k][j]),
                                       text=textf,
                                       hoverinfo='text'
                                       )
                              )
            r=(r[1], r[0]) #IMPORTANT!!!  Reverse these arc ends because otherwise you get
                          # a twisted ribbon
            #append the ribbon shape
            layout['shapes'].append(make_ribbon(l, r, 'rgb(175,175,175)' , ribbon_color[k][j]))

`ideograms` is a list of dicts that set the position, and color of ideograms, as well as the information associated to each ideogram.

In [None]:
ideograms=[]
for k in range(len(ideo_ends)):
    z= make_ideogram_arc(1.1, ideo_ends[k])
    zi=make_ideogram_arc(1.0, ideo_ends[k])
    m=len(z)
    n=len(zi)
    ideograms.append(Scatter(x=z.real,
                             y=z.imag,
                             mode='lines',
                             line=Line(color=ideo_colors[k], shape='spline', width=0.25),
                             text=labels[k]+'<br>'+'{:.0f}'.format(row_sum[k]), 
                             hoverinfo='text'
                             )
                     )
    
       
    path='M '
    for s in range(m):
        path+=str(z.real[s])+', '+str(z.imag[s])+' L '
        
    Zi=np.array(zi.tolist()[::-1]) 

    for s in range(m):
        path+=str(Zi.real[s])+', '+str(Zi.imag[s])+' L '
    path+=str(z.real[0])+' ,'+str(z.imag[0]) 
   
    layout['shapes'].append(make_ideo_shape(path,'rgb(150,150,150)' , ideo_colors[k]))
    
data = Data(ideograms+ribbon_info)
fig = Figure(data=data, layout=layout)

import plotly.offline as off
off.init_notebook_mode()

off.iplot(fig, filename='chord-diagram-nba-trades') 