
## RankNet as described in [1].
[1] http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf

[2] https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf

In [107]:
import numpy as np
import torch
import torch.nn as nn


In [108]:
#binary case 1--> relevant 0--> no relevant

In [109]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [110]:
def compute_ndcg(y,y_hat,k=10):
    return ndcg_at_k(y[np.argsort(y_hat)[::-1]],k)
def dcg_at_k(r, k=10):
        """Score is discounted cumulative gain (dcg)
        Relevance is positive real values.  Can use binary
        as the previous methods.
        Example from
        http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
        >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
        >>> dcg_at_k(r, 1)
        3.0
        >>> dcg_at_k(r, 1, method=1)
        3.0
        >>> dcg_at_k(r, 2)
        5.0
        >>> dcg_at_k(r, 2, method=1)
        4.2618595071429155
        >>> dcg_at_k(r, 10)
        9.6051177391888114
        >>> dcg_at_k(r, 11)
        9.6051177391888114
        Args:
            r: Relevance scores (list or numpy) in rank order
                (first element is the first item)
            k: Number of results to consider
            method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                    If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
        Returns:
            Discounted cumulative gain
        """
        r = np.asfarray(r)[:k]
        if r.size > 0:
            return float(np.sum((2**r-1) / np.log2(np.arange(2, r.size + 2))))
        return 0.
def ndcg_at_k(r, k=10, method=0):
        """Score is normalized discounted cumulative gain (ndcg)
        Relevance is positive real values.  Can use binary
        as the previous methods.
        Example from
        http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
        >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
        >>> ndcg_at_k(r, 1)
        1.0
        >>> r = [2, 1, 2, 0]
        >>> ndcg_at_k(r, 4)
        0.9203032077642922
        >>> ndcg_at_k(r, 4, method=1)
        0.96519546960144276
        >>> ndcg_at_k([0], 1)
        0.0
        >>> ndcg_at_k([1], 2)
        1.0
        Args:
            r: Relevance scores (list or numpy) in rank order
                (first element is the first item)
            k: Number of results to consider
            method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                    If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
        Returns:
            Normalized discounted cumulative gain
        """
        dcg_max = dcg_at_k(sorted(r, reverse=True), k)
        if not dcg_max:
            return 0.
        return dcg_at_k(r, k) / dcg_max

In [111]:
# Data.
input_dim = 10
n_docs = 20
n_rel2 = 5
n_rel1 = 5
n_irr = n_docs - n_rel2 - n_rel1
y=np.array([2]*n_rel2+[1]*n_rel1+[0]*n_irr)
y_t=torch.from_numpy(y).float().to(device)
unique, unique_index = np.unique(y, return_index=True)
unique=np.flip(unique,axis=0).copy()
unique = torch.from_numpy(unique)
unique_index=np.flip(unique_index,axis=0).copy()
unique_index = torch.from_numpy(unique_index)
unique = unique.to(device)
unique_index = unique_index.to(device)
doc_features = np.random.randn(n_docs, input_dim)

# Document scores.
docs = torch.from_numpy(np.array(doc_features, dtype = "float32"))
docs = docs.to(device)

In [112]:
import copy
# Model.
L1=128//2
L2=64//2
L3=32//2
k=10
rank_net = torch.nn.Sequential(
    nn.Linear(input_dim, L1),
    nn.ReLU(),
    nn.Linear(L1, L2),
    nn.ReLU(),
    nn.Linear(L2, L3),
    nn.ReLU(),
    nn.Linear(L3, 1))
rank_lambda = copy.deepcopy(rank_net)
rank_net = rank_net.to(device)
rank_lambda = rank_lambda.to(device)
doc_scores_init = rank_net(docs)
print(doc_scores_init.view(-1))
ndcg_before=compute_ndcg(y,doc_scores_init.view(-1).tolist(),k)
print(ndcg_before)

tensor([0.2635, 0.3412, 0.3176, 0.3422, 0.2773, 0.1904, 0.3029, 0.2635, 0.2415,
        0.1859, 0.2241, 0.2341, 0.3001, 0.2208, 0.3259, 0.3043, 0.2335, 0.2753,
        0.2420, 0.2644], device='cuda:0', grad_fn=<ViewBackward>)
0.7171530897897874


In [113]:
n_epoch = 50
loss = torch.zeros(1)
loss = loss.to(device)
for epoch in range(n_epoch):  # loop over the dataset multiple times
    running_loss = 0.0
    # forward
    doc_scores = rank_net(docs)
    loss = torch.zeros(1).to(device)
    for idx, ui in enumerate(unique_index[1:]):
        o_ij=doc_scores[:ui]-doc_scores[ui:].view(-1)
        c_ij=o_ij - torch.log(1.0+torch.exp(o_ij))
        loss+=c_ij.sum()
    # backward
    rank_net.zero_grad()
    loss.backward()

    # Update model weights.
    lr = 0.001
    with torch.no_grad():
        for param in rank_net.parameters():
            param += lr * param.grad

    ##print statistics
    if epoch % (n_epoch//10) ==0:
        print(f"{epoch} : {loss}")
          

print('Finished Training')
doc_scores = rank_net(docs)
print(doc_scores.view(-1))
ndcg_after_net=compute_ndcg(y,doc_scores.view(-1).tolist(),k)
print(ndcg_after_net)


0 : tensor([-118.8558], device='cuda:0', grad_fn=<ThAddBackward>)
5 : tensor([-112.4036], device='cuda:0', grad_fn=<ThAddBackward>)
10 : tensor([-102.3496], device='cuda:0', grad_fn=<ThAddBackward>)
15 : tensor([-83.1775], device='cuda:0', grad_fn=<ThAddBackward>)
20 : tensor([-55.5328], device='cuda:0', grad_fn=<ThAddBackward>)
25 : tensor([-32.5745], device='cuda:0', grad_fn=<ThAddBackward>)
30 : tensor([-19.8857], device='cuda:0', grad_fn=<ThAddBackward>)
35 : tensor([-13.0919], device='cuda:0', grad_fn=<ThAddBackward>)
40 : tensor([-9.1671], device='cuda:0', grad_fn=<ThAddBackward>)
45 : tensor([-6.7600], device='cuda:0', grad_fn=<ThAddBackward>)
Finished Training
tensor([ 6.5765,  6.1348,  5.3881,  5.9810,  4.1113,  3.0329,  2.4669,  2.9753,
         2.5514,  2.9761, -1.0577, -1.0124,  0.2252, -0.5936, -1.0885, -0.5576,
        -1.2160,  1.3547, -0.6664, -0.2327],
       device='cuda:0', grad_fn=<ViewBackward>)
1.0


In [114]:
ds=rank_lambda(docs)
print(ds.view(-1))
ndcg_before=compute_ndcg(y,ds.view(-1).tolist(),k)
print(ndcg_before)

tensor([0.2635, 0.3412, 0.3176, 0.3422, 0.2773, 0.1904, 0.3029, 0.2635, 0.2415,
        0.1859, 0.2241, 0.2341, 0.3001, 0.2208, 0.3259, 0.3043, 0.2335, 0.2753,
        0.2420, 0.2644], device='cuda:0', grad_fn=<ViewBackward>)
0.7171530897897874


## lambdarank
https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf

In [115]:
N_max_dcg=dcg_at_k(y)
loss = torch.zeros(1)
loss = loss.to(device)
for epoch in range(n_epoch):  # loop over the dataset multiple times
    running_loss = 0.0
    # forward
    doc_scores = rank_lambda(docs)
    loss = torch.zeros(1).to(device)
    for idx, ui in enumerate(unique_index[1:]):
        doc_scores = rank_lambda(docs)
        o_ij=doc_scores[:ui]-doc_scores[ui:].view(-1)
        first_part=1.0/(1.0+torch.exp(o_ij))
        y_exp_t = 2**y_t[:,None]
        second_part=y_exp_t[:ui]-y_exp_t[ui:].view(-1)
        inv_log=1.0/torch.log2(torch.arange(2,len(doc_scores)+2).float())[:,None].to(device)
        thrid_part=inv_log[:ui]-inv_log[ui:].view(-1)
        whole_lambda=-N_max_dcg*first_part*second_part*thrid_part
        loss+=whole_lambda.sum()
    # backward
    rank_lambda.zero_grad()
    loss.backward()

    # Update model weights.
    lr = 0.001
    with torch.no_grad():
        for param in rank_lambda.parameters():
            param += lr * param.grad

    ##print statistics
    #if epoch % (n_epoch//10) ==0:
    #    print(f"{epoch} : {loss}")
          

print('Finished Training')
doc_scores = rank_lambda(docs)
print(doc_scores.view(-1))
ndcg_after_lambda=compute_ndcg(y,doc_scores.view(-1).tolist(),k)
print(ndcg_after_lambda)


Finished Training
tensor([13.2163, 10.5149,  8.9659,  8.8394,  6.9514,  4.7564,  3.5631,  3.6938,
         3.1804,  3.5187, -0.9816, -0.7624,  0.9364, -0.4240, -1.2307, -0.8579,
        -1.1998,  1.3178, -0.6483, -0.3315],
       device='cuda:0', grad_fn=<ViewBackward>)
1.0


In [116]:
ndcg_after_lambda-ndcg_after_net

0.0