# Colley weighted ranking

Description: Construct a Colley ranking of data with weighted games
  
Created by Tim Chartier

### Set parameters

gameFilename - game data file, presumed to be in the format from 
the Massey rating data server, which can be found at 
http://www.masseyratings.com/. 

teamFilename - team data file

k - number of teams to print in the final ranking - set to 0 to get all teams

In [31]:
gameFilename = 'games2007.txt'
teamFilename = 'teams2007.txt'

k = 15

In [32]:
# Set weights for home, away and neutral wins
weightHomeWin = 1
weightAwayWin = 1
weightNeutralWin = 1
segmentWeighting = [1/2,2]

# Custom weighting function - inverse tangent
def arctanWeighting(day):
    return np.arctan(day/5 - 10)/3 + 1

# Will you use weighting? 
useWeighting = True

### Load the team names into an array

In [33]:
import pandas as pd

teamNames = pd.read_csv(teamFilename, header = None)
numTeams = len(teamNames)

### Load the games

In [34]:
# columns of games are:
#	column 0 = days since 1/1/0000
#	column 1 = date in YYYYMMDD format
#	column 2 = team1 index
#	column 3 = team1 homefield (1 = home, -1 = away, 0 = neutral)
#	column 4 = team1 score
#	column 5 = team2 index
#	column 6 = team2 homefield (1 = home, -1 = away, 0 = neutral)
#	column 7 = team2 score
allGames = pd.read_csv(gameFilename, header = None)
beginTourney = 733113

seasonGames = allGames[allGames[0] < beginTourney]
tourneyGames = allGames[allGames[0] >= beginTourney]
numGames = len(seasonGames)
numTourneyGames = len(tourneyGames)

In [35]:
tourneyGames

Unnamed: 0,0,1,2,3,4,5,6,7
5043,733113,20070313,189,0,77,86,0,69
5044,733113,20070313,182,-1,63,71,1,56
5045,733113,20070313,88,1,77,279,-1,61
5046,733113,20070313,153,1,89,3,-1,87
5047,733113,20070313,160,1,68,302,-1,58
...,...,...,...,...,...,...,...,...
5133,733127,20070327,47,0,68,1,0,67
5134,733129,20070329,323,0,78,47,0,73
5135,733131,20070331,204,0,67,96,0,60
5136,733131,20070331,85,0,76,291,0,66


### Create the Colley linear system

In [36]:
import numpy as np
from math import ceil 

colleyMatrix = 2*np.diag(np.ones(numTeams))
b = np.ones(numTeams)

dayBeforeSeason = seasonGames.loc[0,0] - 1
lastDayOfSeason = seasonGames.loc[len(seasonGames)-1,0]

for i in range(numGames):
    team1ID = seasonGames.loc[i, 2] - 1 # subtracting 1 since python indexes at 0
    team1Score = seasonGames.loc[i, 4]
    team1Loc = seasonGames.loc[i, 3];

    team2ID = seasonGames.loc[i, 5] - 1 # subtracting 1 since python indexes at 0
    team2Score = seasonGames.loc[i, 7]
    team2Loc = seasonGames.loc[i, 6];
    
    currentDay = seasonGames.loc[i,0]

    # Find the weight for this game using time and home/away    
    if useWeighting:
        numberSegments = len(segmentWeighting)
        weightIndex = ceil(numberSegments*((currentDay-dayBeforeSeason)/(lastDayOfSeason-dayBeforeSeason))) - 1
#         timeWeight = segmentWeighting[weightIndex]
        timeWeight = arctanWeighting(currentDay - dayBeforeSeason) # implement custom time-based weighting
    else:
        timeWeight = 1

    if team1Score > team2Score:  # Team 1 won        
        if (team1Loc == 1):      # Home win
            gameWeight = weightHomeWin*timeWeight
        elif (team1Loc == -1):   # Away win
            gameWeight = weightAwayWin*timeWeight
        else:                    # Neutral court win
            gameWeight = weightNeutralWin*timeWeight
    else:                        # Team 2 won
        if (team2Loc == 1):      # Home win
            gameWeight = weightHomeWin*timeWeight
        elif (team2Loc == -1):   # Away win
            gameWeight = weightAwayWin*timeWeight
        else:                    # Neutral court win
            gameWeight = weightNeutralWin*timeWeight
                
    # Update the Colley matrix and RHS
    colleyMatrix[team1ID, team2ID] -= gameWeight
    colleyMatrix[team2ID, team1ID] -= gameWeight

    colleyMatrix[team1ID, team1ID] += gameWeight
    colleyMatrix[team2ID, team2ID] += gameWeight
    
    if team1Score > team2Score:
        b[team1ID] += 1/2*gameWeight
        b[team2ID] -= 1/2*gameWeight
    elif team1Score < team2Score:
        b[team1ID] -= 1/2*gameWeight
        b[team2ID] += 1/2*gameWeight
    else:  # it is a tie and make 1/2 a win and 1/2 a loss for both teams
        b[team1ID] += 0; # this equates to adding nothing
        b[team2ID] += 0; # clearly this code could be deleted

### Calculate linear system

In [37]:
r = np.linalg.solve(colleyMatrix,b)
iSort = np.argsort(-r) # returns ranked index values

### Print the ranking of the teams

In [38]:
print('\n\n************** COLLEY Rating Method **************\n')
print('===========================')
print('Rank   Rating    Team   ')
print('===========================')
if k==0:
    numberTeamToPrint = numTeams
else:
    numberTeamToPrint = k

for i in range(numberTeamToPrint):
    print(f'{i+1:4d}   {r[iSort[i]]:.5f}  {teamNames.loc[iSort[i],1]}')

print('')   # extra carriage return




************** COLLEY Rating Method **************

Rank   Rating    Team   
   1   1.08575   Ohio_St
   2   1.01518   Kansas
   3   1.00614   Georgetown
   4   1.00383   Florida
   5   0.98694   North_Carolina
   6   0.98568   Wisconsin
   7   0.96570   Pittsburgh
   8   0.96398   Memphis
   9   0.96006   S_Illinois
  10   0.95304   UCLA
  11   0.92731   UNLV
  12   0.91904   Texas_A&M
  13   0.89618   Nevada
  14   0.88025   BYU
  15   0.87758   Louisville



In [39]:
f = open("rankings2007.txt", "w")
f.truncate(0)

f.write('\n\n************** COLLEY Rating Method **************\n')
f.write('===========================\n')
f.write('Rank   Rating    Team   \n')
f.write('===========================\n')


for i in range(numTeams):
    f.write(f'{i+1:4d}   {r[iSort[i]]:.5f}  {teamNames.loc[iSort[i],1]}\n')


f.write('')   # extra carriage return
f.close()

### Calculate predictability of method

In [40]:
numberCorrectPredictions = 0
for i in range(tourneyGames.index[0], tourneyGames.index[-1]+1):
    team1ID = tourneyGames.loc[i, 2] - 1 
    team1Score = tourneyGames.loc[i, 4]
    team2ID = tourneyGames.loc[i, 5] - 1 
    team2Score = tourneyGames.loc[i, 7]
    
    if team1Score > team2Score and r[team1ID] > r[team2ID]:
        numberCorrectPredictions += 1
    elif team2Score > team1Score and r[team2ID] > r[team1ID]:
        numberCorrectPredictions += 1
    elif team1Score == team2Score and r[team1ID] == r[team2ID]:
        numberCorrectPredictions += 1

print(f'Predictability: {numberCorrectPredictions/numTourneyGames*100:.2f}%') 


Predictability: 75.79%
