In [1]:
import os
import sys

import logging
import math,random
import time

import torch
import torch.nn as nn
import torch.nn.functional as F

from models.Attention import Attention
from models.Interactors.FIM import FIM_Interactor
from models.Interactors.KNRM import KNRM_Interactor
from models.Encoders.CNN import CNN_Encoder
from data.configs.demo import config
from utils.utils import prepare

from models.base_model import BaseModel

In [2]:
config.device = 'cuda:0'
config.k = 3
config.spadam = False

vocab, loaders = prepare(config)
record = next(iter(loaders[0]))

[2021-07-21 08:27:22,252] INFO (root) Hyper Parameters are
<class 'data.configs.demo.config'>
[2021-07-21 08:27:22,254] INFO (root) preparing dataset...
[2021-07-21 08:27:24,532] INFO (torchtext.vocab) Loading vectors from /home/peitian_zhang/Data/.vector_cache/glove.840B.300d.txt.pt


In [3]:
class DRM(nn.Module):
    def __init__(self, k, threshold = -float('inf')):
        super().__init__()

        self.name = "matching-based"

        self.k = k
        self.threshold = threshold
    
    def forward(self, news_embedding, user_repr):
        """
        Extract words from news text according to the overall user interest

        Args:
            news_embedding: word-level news embedding, [batch_size, his_size, signal_length, hidden_dim]
            user_repr: user representation, [batch_size, 1, hidden_dim]
        
        Returns:
            weighted_pt: weighted embedding for personalized terms, [batch_size, his_size, k, hidden_dim]
        """
        # [bs, *, sl, 1]
        scores = F.normalize(news_embedding, dim=-1).matmul(F.normalize(user_repr, dim=-1).transpose(-2,-1).unsqueeze(1)).squeeze(-1)

        score_k, score_kid = scores.topk(dim=-1, k=self.k)
        personalized_terms = news_embedding.gather(dim=-2,index=score_kid.unsqueeze(-1).expand(score_kid.size() + (news_embedding.size(-1),)))

        weighted_ps_terms = personalized_terms * (score_k.masked_fill(score_k < self.threshold, 0).unsqueeze(-1))

        return weighted_ps_terms

class CNN_Interactor(nn.Module):
    def __init__(self, signal_length, term_num, hidden_dim):
        super().__init__()
        self.name = '2dcnn'
        self.hidden_dim = hidden_dim
        self.signal_length = signal_length
        self.term_num = term_num

        self.SeqCNN2D = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=[3, 3], padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=[3, 3], stride=[3, 3]),
            nn.Conv2d(in_channels=32, out_channels=16, kernel_size=[3, 3], padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=[3, 3], stride=[3, 3])
        )

        self.final_dim = int(int(signal_length/3)/3) * int(int(term_num/3)/3) * 16

        # nn.init.xavier_normal_(self.SeqCNN2D[0].weight)
        # nn.init.xavier_normal_(self.SeqCNN2D[3].weight)

    def forward(self, cdd_news_embedding, ps_terms):
        """
        calculate interaction tensor and reduce it to a vector

        Args:
            cdd_news_embedding: word-level representation of candidate news, [batch_size, cdd_size, signal_length, hidden_dim]
            ps_terms: personalized terms, [batch_size, term_num, hidden_dim]

        Returns:
            reduced_tensor: output tensor after CNN2d, [batch_size, cdd_size, final_dim]
        """

        # [bs, cs, sl, tn]
        matching_tensor = cdd_news_embedding.matmul(ps_terms.transpose(-2,-1).unsqueeze(1)).view(-1, 1, self.signal_length, self.term_num) / math.sqrt(self.hidden_dim)
        reduced_tensor = self.SeqCNN2D(matching_tensor).view(cdd_news_embedding.size(0), cdd_news_embedding.size(1), self.final_dim)
        return reduced_tensor

In [9]:
class ESM(BaseModel):
    def __init__(self, config, encoderN, encoderU, docReducer, interactor):
        super().__init__(config)
        self.title_size = config.title_size
        self.k = config.k

        self.encoderN = encoderN
        self.encoderU = encoderU
        self.docReducer = docReducer
        # self.term_fuser = term_fuser
        self.interactor = interactor

        self.hidden_dim = encoderN.hidden_dim
        self.final_dim = interactor.final_dim

        self.learningToRank = nn.Sequential(
            nn.Linear(self.final_dim, int(self.final_dim/2)),
            nn.ReLU(),
            nn.Linear(int(self.final_dim/2),1)
        )

        self.name = '-'.join(['esm', self.encoderN.name, self.encoderU.name, self.docReducer.name, self.interactor.name])

    def clickPredictor(self, reduced_tensor):
        """ calculate batch of click probabolity

        Args:
            reduced_tensor: [batch_size, cdd_size, final_dim]

        Returns:
            score of each candidate news, [batch_size, cdd_size]
        """
        return self.learningToRank(reduced_tensor).squeeze(dim=-1)

    def _forward(self,x):
        if x['candidate_title'].size(0) != self.batch_size:
            self.batch_size = x['candidate_title'].size(0)

        cdd_news = x['candidate_title'].long().to(self.device)
        cdd_news_embedding, cdd_news_repr = self.encoderN(
            cdd_news)
        his_news = x['clicked_title'].long().to(self.device)
        his_news_embedding, his_news_repr = self.encoderN(
            his_news)

        user_repr = self.encoderU(his_news_repr)

        ps_terms = self.docReducer(his_news_embedding, user_repr).view(self.batch_size, -1, self.hidden_dim)

        reduced_tensor = self.interactor(torch.cat([cdd_news_repr.unsqueeze(-2), cdd_news_embedding], dim=-2), ps_terms)

        return self.clickPredictor(reduced_tensor)
    
    def forward(self,x):
        """
        Decoupled function, score is unormalized click score
        """
        score = self._forward(x)

        if self.cdd_size > 1:
            prob = nn.functional.log_softmax(score, dim=1)
        else:
            prob = torch.sigmoid(score)

        return prob

In [10]:
from models.Encoders.CNN import CNN_Encoder
from models.Encoders.RNN import RNN_User_Encoder

encoderN = CNN_Encoder(config, vocab)
encoderU = RNN_User_Encoder(encoderN.hidden_dim)
docReducer = DRM(config.k)
interactor = CNN_Interactor(config.title_size+1, config.k * config.his_size, encoderN.hidden_dim)

esm = ESM(config, encoderN, encoderU, docReducer, interactor).to(config.device)

In [6]:
esm(record)

tensor([[-1.6019, -1.6012, -1.6236, -1.5991, -1.6218],
        [-1.6098, -1.6100, -1.6083, -1.6101, -1.6090],
        [-1.6062, -1.6071, -1.6178, -1.6249, -1.5914],
        [-1.6112, -1.6093, -1.6126, -1.6092, -1.6049],
        [-1.6044, -1.6037, -1.6032, -1.6157, -1.6203],
        [-1.6090, -1.6091, -1.6093, -1.6092, -1.6106],
        [-1.6100, -1.6088, -1.6106, -1.6056, -1.6121],
        [-1.6084, -1.6113, -1.6094, -1.6095, -1.6086],
        [-1.5986, -1.6273, -1.6114, -1.6266, -1.5840],
        [-1.6095, -1.6106, -1.6097, -1.6094, -1.6081]], device='cuda:0',
       grad_fn=<LogSoftmaxBackward>)

In [12]:
config.epochs = 8
config.val_freq = 2
config.spadam = False
esm.tune(config, loaders)

[2021-07-21 08:30:20,789] INFO (models.base_model) training...
  0%|          | 0/295 [00:00<?, ?it/s]


RuntimeError: Adam does not support sparse gradients, please consider SparseAdam instead