
## RankNet as described in [1].
[1] http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf

[2] https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf

In [1]:
import numpy as np
import torch
import torch.nn as nn


In [2]:
#binary case 1--> relevant 0--> no relevant

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [34]:
def dcg_at_k(r, k=10, method=0):
        """Score is discounted cumulative gain (dcg)
        Relevance is positive real values.  Can use binary
        as the previous methods.
        Example from
        http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
        >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
        >>> dcg_at_k(r, 1)
        3.0
        >>> dcg_at_k(r, 1, method=1)
        3.0
        >>> dcg_at_k(r, 2)
        5.0
        >>> dcg_at_k(r, 2, method=1)
        4.2618595071429155
        >>> dcg_at_k(r, 10)
        9.6051177391888114
        >>> dcg_at_k(r, 11)
        9.6051177391888114
        Args:
            r: Relevance scores (list or numpy) in rank order
                (first element is the first item)
            k: Number of results to consider
            method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                    If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
        Returns:
            Discounted cumulative gain
        """
        r = np.asfarray(r)[:k]
        if r.size > 0:
            if method == 0:
                return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
            elif method == 1:
                return float(np.sum(r / np.log2(np.arange(2, r.size + 2))))
            else:
                raise ValueError('method must be 0 or 1.')
        return 0.
def ndcg_at_k(r, k=10, method=0):
        """Score is normalized discounted cumulative gain (ndcg)
        Relevance is positive real values.  Can use binary
        as the previous methods.
        Example from
        http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
        >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
        >>> ndcg_at_k(r, 1)
        1.0
        >>> r = [2, 1, 2, 0]
        >>> ndcg_at_k(r, 4)
        0.9203032077642922
        >>> ndcg_at_k(r, 4, method=1)
        0.96519546960144276
        >>> ndcg_at_k([0], 1)
        0.0
        >>> ndcg_at_k([1], 2)
        1.0
        Args:
            r: Relevance scores (list or numpy) in rank order
                (first element is the first item)
            k: Number of results to consider
            method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                    If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
        Returns:
            Normalized discounted cumulative gain
        """
        dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
        if not dcg_max:
            return 0.
        return dcg_at_k(r, k, method) / dcg_max

In [35]:
# Data.
input_dim = 10
n_docs = 20
n_rel2 = 5
n_rel1 = 5
n_irr = n_docs - n_rel2 - n_rel1
y=np.array([2]*n_rel2+[1]*n_rel1+[0]*n_irr)
unique, unique_index = np.unique(y, return_index=True)
unique=np.flip(unique,axis=0).copy()
unique = torch.from_numpy(unique)
unique_index=np.flip(unique_index,axis=0).copy()
unique_index = torch.from_numpy(unique_index)
unique = unique.to(device)
unique_index = unique_index.to(device)
doc_features = np.random.randn(n_docs, input_dim)

# Document scores.
docs = torch.from_numpy(np.array(doc_features, dtype = "float32"))
docs = docs.to(device)


In [40]:
array = np.array([4,2,7,1])
order = array.argsort()
ranks = order.argsort()
ranks

array([2, 1, 3, 0])

In [52]:
# Model.
L1=128//2
L2=64//2
L3=32//2
model = torch.nn.Sequential(
    nn.Linear(input_dim, L1),
    nn.ReLU(),
    nn.Linear(L1, L2),
    nn.ReLU(),
    nn.Linear(L2, L3),
    nn.ReLU(),
    nn.Linear(L3, 1))

model = model.to(device)
doc_scores = model(docs)
norm_doc_scores=doc_scores.clamp(0,1000)
print(doc_scores,norm_doc_scores)
ndcg_before=ndcg_at_k(norm_doc_scores.view(-1).tolist())
print(ndcg_before)

tensor([[-0.2583],
        [-0.2655],
        [-0.2136],
        [-0.1797],
        [-0.1465],
        [-0.2270],
        [-0.2163],
        [-0.1936],
        [-0.2347],
        [-0.2194],
        [-0.2321],
        [-0.2318],
        [-0.2109],
        [-0.1941],
        [-0.2284],
        [-0.2004],
        [-0.2136],
        [-0.1858],
        [-0.2284],
        [-0.2483]], device='cuda:0', grad_fn=<ThAddmmBackward>) tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]], device='cuda:0', grad_fn=<ClampBackward>)
0.0


In [53]:
n_epoch = 1000
loss = torch.zeros(1)
loss = loss.to(device)
for epoch in range(n_epoch):  # loop over the dataset multiple times
    running_loss = 0.0
    # forward
    doc_scores = model(docs)
    loss = torch.zeros(1).to(device)
    for idx, ui in enumerate(unique_index[1:]):
        o_ij=doc_scores[:n_rel2]-doc_scores[n_rel2:].view(-1)
        c_ij = o_ij - torch.log(1.0+torch.exp(o_ij))
        loss+=c_ij.sum()
    # backward
    model.zero_grad()
    loss.backward()

    # Update model weights.
    lr = 0.001
    with torch.no_grad():
        for param in model.parameters():
            param += lr * param.grad

    # print statistics
    if epoch % (n_epoch//10) ==0:
        print(f"{epoch} : {loss}")
          

print('Finished Training')
doc_scores = model(docs)
print(doc_scores)
norm_doc_scores=doc_scores.clamp(0,1000)
print(doc_scores,norm_doc_scores)
ndcg_after=ndcg_at_k(norm_doc_scores.view(-1).tolist())
print(ndcg_after)


0 : tensor([-103.6479], device='cuda:0', grad_fn=<ThAddBackward>)
100 : tensor([-1.8127], device='cuda:0', grad_fn=<ThAddBackward>)
200 : tensor([-0.3489], device='cuda:0', grad_fn=<ThAddBackward>)
300 : tensor([-0.1657], device='cuda:0', grad_fn=<ThAddBackward>)
400 : tensor([-0.1035], device='cuda:0', grad_fn=<ThAddBackward>)
500 : tensor([-0.0734], device='cuda:0', grad_fn=<ThAddBackward>)
600 : tensor([-0.0561], device='cuda:0', grad_fn=<ThAddBackward>)
700 : tensor([-0.0450], device='cuda:0', grad_fn=<ThAddBackward>)
800 : tensor([-0.0373], device='cuda:0', grad_fn=<ThAddBackward>)
900 : tensor([-0.0317], device='cuda:0', grad_fn=<ThAddBackward>)
Finished Training
tensor([[ 6.5048],
        [ 8.1659],
        [ 7.6849],
        [ 8.7419],
        [ 7.7385],
        [-1.5256],
        [-2.2626],
        [-1.0826],
        [-1.5735],
        [-2.9881],
        [-0.7554],
        [-1.1041],
        [-3.2397],
        [-1.7115],
        [-0.3968],
        [-1.3025],
        [-1.2773],

0.9586438533458532

In [90]:
def dcg_at_k(r, k=5, method=1):
        """Score is discounted cumulative gain (dcg)
        Relevance is positive real values.  Can use binary
        as the previous methods.
        Example from
        http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
        >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
        >>> dcg_at_k(r, 1)
        3.0
        >>> dcg_at_k(r, 1, method=1)
        3.0
        >>> dcg_at_k(r, 2)
        5.0
        >>> dcg_at_k(r, 2, method=1)
        4.2618595071429155
        >>> dcg_at_k(r, 10)
        9.6051177391888114
        >>> dcg_at_k(r, 11)
        9.6051177391888114
        Args:
            r: Relevance scores (list or numpy) in rank order
                (first element is the first item)
            k: Number of results to consider
            method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                    If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
        Returns:
            Discounted cumulative gain
        """
        r = np.asfarray(r)[:k]
        if r.size > 0:
            if method == 0:
                return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
            elif method == 1:
                return float(np.sum(r / np.log2(np.arange(2, r.size + 2))))
            else:
                raise ValueError('method must be 0 or 1.')
        return 0.
def ndcg_at_k(r, k=5, method=1):
        """Score is normalized discounted cumulative gain (ndcg)
        Relevance is positive real values.  Can use binary
        as the previous methods.
        Example from
        http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
        >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
        >>> ndcg_at_k(r, 1)
        1.0
        >>> r = [2, 1, 2, 0]
        >>> ndcg_at_k(r, 4)
        0.9203032077642922
        >>> ndcg_at_k(r, 4, method=1)
        0.96519546960144276
        >>> ndcg_at_k([0], 1)
        0.0
        >>> ndcg_at_k([1], 2)
        1.0
        Args:
            r: Relevance scores (list or numpy) in rank order
                (first element is the first item)
            k: Number of results to consider
            method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                    If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
        Returns:
            Normalized discounted cumulative gain
        """
        dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
        if not dcg_max:
            return 0.
        return dcg_at_k(r, k, method) / dcg_max

In [45]:
doc_scores_arr=np.array(doc_scores.view(-1).tolist())
doc_scores_arr

array([ 3.29433584,  3.48503256,  3.47028828,  4.27507067,  3.61133909,
       -4.2266221 , -9.12169266, -4.79133892, -5.19874191, -7.19841385,
       -6.83355951, -4.91971636, -6.3450489 , -4.39710379, -4.91532278,
       -5.30558538, -5.02252579, -6.02778721, -5.38184261, -6.263484  ])

In [47]:
order = doc_scores_arr.argsort()
ranks = order.argsort()
ranks

array([15, 17, 16, 19, 18, 14,  0, 12,  8,  1,  2, 10,  3, 13, 11,  7,  9,
        5,  6,  4])