# Colley ranking

Description: Construct a Colley ranking of data.
  
Created by Tim Chartier

### Set parameters

gameFilename - game data file, presumed to be in the format from 
the Massey rating data server, which can be found at 
http://www.masseyratings.com/. 

teamFilename - team data file

k - number of teams to print in the final ranking - set to 0 to get all teams

In [22]:
gameFilename = 'data/2021games.txt'
teamFilename = 'data/2021teams.txt'
k = 10

### Load the team names into an array

In [23]:
import pandas as pd

teamNames = pd.read_csv(teamFilename, header = None)
numTeams = len(teamNames)

### Load the games

In [24]:
# columns of games are:
#	column 0 = days since 1/1/0000
#	column 1 = date in YYYYMMDD format
#	column 2 = team1 index
#	column 3 = team1 homefield (1 = home, -1 = away, 0 = neutral)
#	column 4 = team1 score
#	column 5 = team2 index
#	column 6 = team2 homefield (1 = home, -1 = away, 0 = neutral)
#	column 7 = team2 score
games = pd.read_csv(gameFilename, header = None)
numGames = len(games)

### Create the Colley linear system

In [25]:
import numpy as np

colleyMatrix = 2*np.diag(np.ones(numTeams))
b = np.ones(numTeams)

for i in range(numGames):
    team1ID = games.loc[i, 2] - 1 # subtracting 1 since python indexes at 0
    team1Score = games.loc[i, 4]
    team2ID = games.loc[i, 5] - 1 # subtracting 1 since python indexes at 0
    team2Score = games.loc[i, 7]
    
    colleyMatrix[team1ID, team2ID] -= 1
    colleyMatrix[team2ID, team1ID] -= 1

    colleyMatrix[team1ID, team1ID] += 1
    colleyMatrix[team2ID, team2ID] += 1
    
    if team1Score > team2Score:
        b[team1ID] += 1/2
        b[team2ID] -= 1/2
    elif team1Score < team2Score:
        b[team1ID] -= 1/2
        b[team2ID] += 1/2
    else:  # it is a tie and make 1/2 a win and 1/2 a loss for both teams
        b[team1ID] += 0; # this equates to adding nothing
        b[team2ID] += 0; # clearly this code could be deleted
        
print(colleyMatrix)
print(b)

[[74. -3. -3. -3. -3. -3. -2. -2. -3. -2. -2. -3. -2. -2. -2. -3. -3. -2.
  -2. -3. -2. -3. -3. -2. -2. -2. -2. -3. -2. -3.]
 [-3. 74. -3. -3. -3. -3. -2. -2. -3. -2. -2. -3. -2. -2. -2. -3. -3. -2.
  -2. -3. -2. -3. -3. -2. -2. -2. -2. -3. -2. -3.]
 [-3. -3. 74. -3. -3. -3. -2. -2. -3. -2. -2. -3. -2. -2. -2. -3. -3. -2.
  -2. -3. -2. -3. -3. -2. -2. -2. -2. -3. -2. -3.]
 [-3. -3. -3. 74. -3. -3. -2. -2. -3. -2. -2. -3. -2. -2. -2. -3. -3. -2.
  -2. -3. -2. -3. -3. -2. -2. -2. -2. -3. -2. -3.]
 [-3. -3. -3. -3. 74. -3. -2. -2. -3. -2. -2. -3. -2. -2. -2. -3. -3. -2.
  -2. -3. -2. -3. -3. -2. -2. -2. -2. -3. -2. -3.]
 [-3. -3. -3. -3. -3. 74. -2. -2. -3. -2. -2. -3. -2. -2. -2. -3. -3. -2.
  -2. -3. -2. -3. -3. -2. -2. -2. -2. -3. -2. -3.]
 [-2. -2. -2. -2. -2. -2. 74. -3. -2. -3. -3. -2. -3. -3. -3. -2. -2. -3.
  -3. -2. -3. -2. -2. -3. -3. -3. -3. -2. -3. -2.]
 [-2. -2. -2. -2. -2. -2. -3. 74. -2. -3. -3. -2. -3. -3. -3. -2. -2. -3.
  -3. -2. -3. -2. -2. -3. -3. -3. -3. -2. -3. -2.]


### Calculate linear system

In [26]:
#A = np.array([[5,-1,-1,-1],[-1,4,-1,0],[-1,-1,5,-1],[-1,0,-1,4,]])
#B = np.array([3/2, 0, 3/2, 1])
r = np.linalg.solve(colleyMatrix,b)
iSort = np.argsort(-r)

In [21]:
#teamNames2 = np.array(['Lakers', 'Sixers', 'Warriors', 'Pistons'])
#print('Page         PageRank  \n======================')
#for i in range(4):
    #print(f'{teamNames2[iSort[i]]:10}   {r[iSort[i]]:7.5f}')

Page         PageRank  
Lakers       0.58333
Warriors     0.58333
Pistons      0.54167
Sixers       0.29167


### Print the ranking of the teams

In [27]:
print('\n\n************** COLLEY Rating Method **************\n')
print('===========================')
print('Rank   Rating    Team   ')
print('===========================')
if k==0:
    numberTeamToPrint = numTeams
else:
    numberTeamToPrint = k

for i in range(numberTeamToPrint):
    print(f'{i+1:4d}   {r[iSort[i]]:.5f}  {teamNames.loc[iSort[i],1]}')

print('')   # extra carriage return



************** COLLEY Rating Method **************

Rank   Rating    Team   
   1   0.71135   Utah
   2   0.69837   Phoenix
   3   0.66527   Philadelphia
   4   0.65228   Brooklyn
   5   0.64642   Denver
   6   0.64642   LA_Clippers
   7   0.62631   Milwaukee
   8   0.58148   Portland
   9   0.58148   Dallas
  10   0.58148   LA_Lakers



### Calculate predictability of method

In [28]:
numberCorrectPredictions = 0
for i in range(numGames):
    team1ID = games.loc[i, 2] - 1 
    team1Score = games.loc[i, 4]
    team2ID = games.loc[i, 5] - 1 
    team2Score = games.loc[i, 7]
    
    if team1Score > team2Score and r[team1ID] > r[team2ID]:
        numberCorrectPredictions += 1
    elif team2Score > team1Score and r[team2ID] > r[team1ID]:
        numberCorrectPredictions += 1
    elif team1Score == team2Score and r[team1ID] == r[team2ID]:
        numberCorrectPredictions += 1

print(f'Predictability: {numberCorrectPredictions/numGames*100:.2f}%') 


Predictability: 65.74%
