# Skill estimation using graphical models

Let's see how we can use a simple model to predict a hidden "skill level" of players based on their performance in a collection of games against each other.  We need data (the games and outcomes), and a model of how skill translates into these outcomes (e.g., higher skilled players have a better chance of winning).

In [1]:
import sys
import os

sys.path.append(r'C:\Users\user\OneDrive - personalmicrosoftsoftware.uci.edu\2020Spring\CS179')
import pyGM as gm
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline         

### Processing data

In [2]:
def load_data(dir='data/', pKeep=1.0, nEdge=3, nKeep=5, opt='train'):
    with open(dir+opt+'.csv', encoding='utf-8') as f:
        lines = f.read().split('\n')

    p = 0
    playerid = {}
    for i in range(len(lines)):
        csv = lines[i].split(',')
        if len(csv) != 10: 
            continue   # parse error or blank line
        player0,player1 = csv[1],csv[4]
        if player0 not in playerid:
            playerid[player0]=p
            p+=1
        if player1 not in playerid:
            playerid[player1]=p
            p+=1

    nplayers = len(playerid)
    playername = ['']*nplayers
    for player in playerid:
        playername[ playerid[player] ]=player  # id to name lookup


  # Sparsifying parameters (discard some training examples):
  # pKeep = 1.0   # fraction of edges to consider (immed. throw out 1-p edges)
  # nEdge = 3     # try to keep nEdge opponents per player (may be more; asymmetric)
  # nKeep = 5     # keep at most nKeep games per opponent pairs (play each other multiple times)
    
    games = []
    nplays, nwins = np.zeros( (nplayers,nplayers) ), np.zeros( (nplayers,nplayers) )
    for i in range(len(lines)):
        csv = lines[i].split(',')
        if len(csv) != 10:
            continue   # parse error or blank line
        a,b = playerid[csv[1]],playerid[csv[4]]
        aw,bw = csv[2]=='[winner]',csv[5]=='[winner]'
        sa, sb = csv[3].split('â€“')
        s = int(sa)-int(sb)
        if opt == 'valid':
            nplays[a,b] += 1
            nplays[b,a]+=1
            nwins[a,b] += aw
            nwins[b,a] += bw            
            continue
            
        if (np.random.rand() < pKeep):
            if (nplays[a,b] < nKeep) and ( ((nplays[a,:]>0).sum() < nEdge) or ((nplays[:,b]>0).sum() < nEdge) ):
                if a != b:
                    games.append((a, b, s))
                nplays[a,b] += 1
                nplays[b,a]+= 1
                nwins[a,b] += aw
                nwins[b,a] += bw
    
    return nplayers, nplays, nwins, games


In [83]:
nplayers, nplays, nwins, games = load_data(pKeep=1.0, nEdge=10, nKeep=10)

In [84]:
print('summary: ', nplayers)
print(nplays.shape, nplays.sum())
print(nwins.shape, nwins.sum())
print('games', len(games))

summary:  999
(999, 999) 40846.0
(999, 999) 20491.0
games 20423


### A simple list of games & outcomes

In [85]:
# games = [
#     (0,2, +1),  # P0 played P2 & won
#     (0,2, +1),  # played again, same outcome
#     (1,2, -1),  # P1 played P2 & lost
#     (0,1, -1),  # P0 played P1 and lost
# ]

### Win probability and graphical model

In [100]:
# nplayers = max( [max(g[0],g[1]) for g in games] )+1
nlevels = 10   # let's say 10 discrete skill levels
scale = 0.8     # this scales how skill difference translates to win probability

# Make variables for each player; value = skill level
X = [None]*nplayers
for i in range(nplayers):
    X[i] = gm.Var(i, nlevels)   

# Information from each game: what does Pi winning over Pj tell us?
#    Win probability  Pr[win | Xi-Xj]  depends on skill difference of players
Pwin = np.zeros( (nlevels,nlevels) )
for i in range(nlevels):
    for j in range(nlevels):
        diff = i-j                   # find the advantage of Pi over Pj, then 
        Pwin[i,j] = (1./(1+np.exp(-scale*diff)))  # Pwin = logistic of advantage
        Pwin[i,j] += 1e-8

# before any games, uniform belief over skill levels for each player:
factors = [ gm.Factor([X[i]],1./nlevels) for i in range(nplayers) ]

# Now add the information from each game:
for g in games:
    P1,P2,win = g[0],g[1],g[2]
    if P1>P2: P1,P2,win=P2,P1,-win  # (need to make player IDs sorted...)
    factors.append(gm.Factor([X[P1],X[P2]], Pwin if win>0 else 1-Pwin) )

In [None]:
# additional variables
scorelevel = 10
races = 3





In [101]:
Pwin.sum()

50.000001

In [102]:
model = gm.GraphModel(factors)
model.makeMinimal()  # merge any duplicate factors (e.g., repeated games)

In [104]:
if model.nvar < 0:       # for very small models, we can do brute force inference:
    jt = model.joint()
    jt /= jt.sum()       # normalize the distribution and marginalize the table
    bel = [jt.marginal([i]) for i in range(nplayers)] 
else:                    # otherwise we need to use some approximate inference:
    from pyGM.messagepass import LBP, NMF
#     lnZ,bel = LBP(model, maxIter=10, verbose=True)   # loopy BP
    lnZ,bel = NMF(model, maxIter=10, verbose=True)  # Mean field

Iter 0: -30741.95964370481
Iter 1: -11744.27047926317
Iter 2: -11026.783348891135
Iter 3: -10891.810791925527
Iter 4: -10852.157504617915
Iter 5: -10846.740128183399
Iter 6: -10845.678628873417
Iter 7: -10845.40338776213
Iter 8: -10845.31983207828
Iter 9: -10845.295733724108
Iter 10: -10845.289015731269


The normalization constant, $\log(Z)$, represents the (log) probability of evidence for our model, namely the probability of the observed game outcomes given our parameters, etc.  We could experiment with changing the win probability function or its scaling parameter to try to make our model better fit the data using this value.

For example, if you play with "scale" on these toy data, you'll find scale=0 (so that every game is a 50-50 chance independent of skill level) fits the data pretty well, because the few outcomes listed are not really consistent with skill determining outcome.  But, if you change the data so that there is an obvious ordering of skill (e.g., P0 then 1 and/or 2), a larger scale parameter will better fit the data.

###  Ranking players by predicted skill

In [94]:
print("Mean skill estimates: ")
print([ bel[i].table.dot(np.arange(nlevels)) for i in range(nplayers)] )

Mean skill estimates: 
[8.734073289911723, 8.934478382803936, 8.992910894827125, 8.999999999997192, 8.947546041636137, 6.94678830031607, 8.957862699113797, 7.23849843944507, 8.999311451833291, 8.99862759139609, 2.4415475865742593, 4.30642738717092, 7.365906450155485, 4.929594848786538, 8.124921303672258, 6.793004786626566, 2.9608952963229553, 2.831093716486644, 5.004389966616964, 7.0389340595794945, 6.518926815208918, 7.983624976990749, 8.513692215608222, 8.271338305509989, 8.99986890191227, 7.038705564762946, 8.999993070429472, 6.083659010564673, 7.761782007901001, 6.834949381479621, 8.313109780891041, 2.935520842535678, 6.449331728084801, 8.816478506653425, 6.709603268333056, 7.175691975935234, 4.819161753721701, 5.075320707683979, 8.999999912015898, 8.56801996966235, 6.257860515465069, 8.475151031295512, 8.545586870487714, 8.972887976688975, 5.130905842832103, 8.813038067414425, 8.961767424057971, 8.740957993172717, 8.434336620949551, 8.274784065537277, 6.042361130353985, 6.89333671

### Predicting match outcomes

In [95]:
i,j = np.random.randint(0, nplayers),  np.random.randint(0, nplayers)
print("Estimated probability P{} beats P{} next time:".format(i,j))
# Expected value (over skill of P0, P1) of Pr[win | P0-P1]
if i<j:
    print( (bel[i]*bel[j]*gm.Factor([X[i],X[j]],Pwin)).table.sum() )
else:
    print( (bel[i]*bel[j]*gm.Factor([X[i],X[j]],1-Pwin)).table.sum() )
    
# Notes: we should probably use the joint belief over Xi and Xj, but for simplicity
#  with approximate inference we'll just use the estimated singleton marginals

Estimated probability P935 beats P99 next time:
0.11042377713181889


In [96]:
nplayers_val, nplays_val, nwins_val, games_val = load_data(opt = 'valid')

In [97]:
def logit(z): return 1./(1.+np.exp(-z))

def prediction_loss(skill, nplayers, nplays, nwins, games):
    
    loss = 0.
    binary_loss = 0.
    for i in range(nplayers):
        for j in range(i+1, nplayers):
            if nplays[i, j] == 0:
                continue
            prob = nwins[i,j] / nplays[i,j]
            prob_hat = (bel[i]*bel[j]*gm.Factor([X[i],X[j]],Pwin)).table.sum()
            loss += np.abs(prob_hat - prob)
            binary_loss += np.logical_xor(prob_hat >= 0.5, prob >= 0.5)
    
    loss /= (nplays > 0).sum()/2
    binary_loss /= (nplays > 0).sum()/2
    
    return loss, binary_loss


In [98]:
skill = [ bel[i].table.dot(np.arange(nlevels)) for i in range(nplayers)]

In [99]:
prediction_loss(skill, nplayers_val, nplays_val, nwins_val, games_val)

(0.40247764646100154, 0.4248236018373822)