# Colley weighted ranking

Description: Construct a Colley ranking of data with weighted games
  
Created by Tim Chartier

### Set parameters

gameFilename - game data file, presumed to be in the format from 
the Massey rating data server, which can be found at 
http://www.masseyratings.com/. 

teamFilename - team data file

k - number of teams to print in the final ranking - set to 0 to get all teams

In [1]:
gameFilename = 'data/2008games.txt'
teamFilename = 'data/2008teams.txt'
k = 10

In [2]:
# Set weights for home, away and neutral wins
weightHomeWin = 1
weightAwayWin = 1
weightNeutralWin = 1
segmentWeighting = [1/2,2]

# Will you use weighting? 
useWeighting = True 

### Load the team names into an array

In [3]:
import pandas as pd

teamNames = pd.read_csv(teamFilename, header = None)
numTeams = len(teamNames)

### Load the games

In [4]:
# columns of games are:
#	column 0 = days since 1/1/0000
#	column 1 = date in YYYYMMDD format
#	column 2 = team1 index
#	column 3 = team1 homefield (1 = home, -1 = away, 0 = neutral)
#	column 4 = team1 score
#	column 5 = team2 index
#	column 6 = team2 homefield (1 = home, -1 = away, 0 = neutral)
#	column 7 = team2 score
games = pd.read_csv(gameFilename, header = None)
numGames = len(games)

### Create the Colley linear system

In [5]:
import numpy as np
from math import ceil 

colleyMatrix = 2*np.diag(np.ones(numTeams))
b = np.ones(numTeams)

dayBeforeSeason = games.loc[0,0] - 1
lastDayOfSeason = games.loc[len(games)-1,0]

for i in range(numGames):
    team1ID = games.loc[i, 2] - 1 # subtracting 1 since python indexes at 0
    team1Score = games.loc[i, 4]
    team1Loc = games.loc[i, 3];

    team2ID = games.loc[i, 5] - 1 # subtracting 1 since python indexes at 0
    team2Score = games.loc[i, 7]
    team2Loc = games.loc[i, 6];
    
    currentDay = games.loc[i,0]

    # Find the weight for this game using time and home/away    
    if useWeighting:
        numberSegments = len(segmentWeighting)
        weightIndex = ceil(numberSegments*((currentDay-dayBeforeSeason)/(lastDayOfSeason-dayBeforeSeason))) - 1
        timeWeight = segmentWeighting[weightIndex]
    else:
        timeWeight = 1

    if team1Score > team2Score:  # Team 1 won        
        if (team1Loc == 1):      # Home win
            gameWeight = weightHomeWin*timeWeight
        elif (team1Loc == -1):   # Away win
            gameWeight = weightAwayWin*timeWeight
        else:                    # Neutral court win
            gameWeight = weightNeutralWin*timeWeight
    else:                        # Team 2 won
        if (team2Loc == 1):      # Home win
            gameWeight = weightHomeWin*timeWeight
        elif (team2Loc == -1):   # Away win
            gameWeight = weightAwayWin*timeWeight
        else:                    # Neutral court win
            gameWeight = weightNeutralWin*timeWeight
                
    # Update the Colley matrix and RHS
    colleyMatrix[team1ID, team2ID] -= gameWeight
    colleyMatrix[team2ID, team1ID] -= gameWeight

    colleyMatrix[team1ID, team1ID] += gameWeight
    colleyMatrix[team2ID, team2ID] += gameWeight
    
    if team1Score > team2Score:
        b[team1ID] += 1/2*gameWeight
        b[team2ID] -= 1/2*gameWeight
    elif team1Score < team2Score:
        b[team1ID] -= 1/2*gameWeight
        b[team2ID] += 1/2*gameWeight
    else:  # it is a tie and make 1/2 a win and 1/2 a loss for both teams
        b[team1ID] += 0; # this equates to adding nothing
        b[team2ID] += 0; # clearly this code could be deleted

### Calculate linear system

In [6]:
r = np.linalg.solve(colleyMatrix,b)
iSort = np.argsort(-r)

### Print the ranking of the teams

In [7]:
print('\n\n************** COLLEY Rating Method **************\n')
print('===========================')
print('Rank   Rating    Team   ')
print('===========================')
if k==0:
    numberTeamToPrint = numTeams
else:
    numberTeamToPrint = k

for i in range(numberTeamToPrint):
    print(f'{i+1:4d}   {r[iSort[i]]:.5f}  {teamNames.loc[iSort[i],1]}')

print('')   # extra carriage return



************** COLLEY Rating Method **************

Rank   Rating    Team   
   1   1.07328   North_Carolina
   2   1.04528   UCLA
   3   1.03382   Tennessee
   4   1.03086   Memphis
   5   1.00198   Wisconsin
   6   1.00125   Kansas
   7   0.99002   Georgetown
   8   0.98292   Drake
   9   0.98165   Duke
  10   0.97614   Texas



### Calculate predictability of method

In [8]:
numberCorrectPredictions = 0
for i in range(numGames):
    team1ID = games.loc[i, 2] - 1 
    team1Score = games.loc[i, 4]
    team2ID = games.loc[i, 5] - 1 
    team2Score = games.loc[i, 7]
    
    if team1Score > team2Score and r[team1ID] > r[team2ID]:
        numberCorrectPredictions += 1
    elif team2Score > team1Score and r[team2ID] > r[team1ID]:
        numberCorrectPredictions += 1
    elif team1Score == team2Score and r[team1ID] == r[team2ID]:
        numberCorrectPredictions += 1

print(f'Predictability: {numberCorrectPredictions/numGames*100:.2f}%') 


Predictability: 74.59%
