In [1]:
import torch
import warnings
warnings.filterwarnings('ignore')
from transformers import AutoTokenizer, AutoModelForSequenceClassification
path = "../model/Reranker"
tokenizer = AutoTokenizer.from_pretrained(path)

In [2]:
inputs = tokenizer('weather in new york', 'it is cold today in new york', return_tensors='pt', max_length=64, padding='max_length', truncation=True)
inputs

{'input_ids': tensor([[ 101, 4633, 1999, 2047, 2259,  102, 2009, 2003, 3147, 2651, 1999, 2047,
         2259,  102,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}

In [3]:
inputs["input_ids"].shape, inputs["attention_mask"].shape

(torch.Size([1, 64]), torch.Size([1, 64]))

# Reranker

In [4]:
reranker = AutoModelForSequenceClassification.from_pretrained(path)

In [5]:
with torch.no_grad():
    score = reranker(**inputs).logits
score

tensor([[7.0222]])

In [6]:
tokenizer.decode(inputs['input_ids'][0])

'[CLS] weather in new york [SEP] it is cold today in new york [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

In [7]:
inputs = tokenizer('weather in new york', 'I am Linda', return_tensors='pt')
with torch.no_grad():
    score = reranker(**inputs).logits
score

tensor([[-7.3656]])

In [8]:
query = "What is the capital of China?"
passages = [
        "Beijing is the capital of China.",
        "Shanghai is the largest city in China.",
    ]

In [9]:
for passage in passages:
    inputs = tokenizer(query, passage, return_tensors='pt', max_length=64, padding='max_length', truncation=True)
    with torch.no_grad():
        score = reranker(**inputs).logits
    print(passage, score)

Beijing is the capital of China. tensor([[6.9148]])
Shanghai is the largest city in China. tensor([[2.9081]])


In [10]:
reranker

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [11]:
reranker._modules.keys()

odict_keys(['bert', 'dropout', 'classifier'])

In [12]:
for name, param in reranker.named_parameters():
    print(name)

bert.embeddings.word_embeddings.weight
bert.embeddings.position_embeddings.weight
bert.embeddings.token_type_embeddings.weight
bert.embeddings.LayerNorm.weight
bert.embeddings.LayerNorm.bias
bert.encoder.layer.0.attention.self.query.weight
bert.encoder.layer.0.attention.self.query.bias
bert.encoder.layer.0.attention.self.key.weight
bert.encoder.layer.0.attention.self.key.bias
bert.encoder.layer.0.attention.self.value.weight
bert.encoder.layer.0.attention.self.value.bias
bert.encoder.layer.0.attention.output.dense.weight
bert.encoder.layer.0.attention.output.dense.bias
bert.encoder.layer.0.attention.output.LayerNorm.weight
bert.encoder.layer.0.attention.output.LayerNorm.bias
bert.encoder.layer.0.intermediate.dense.weight
bert.encoder.layer.0.intermediate.dense.bias
bert.encoder.layer.0.output.dense.weight
bert.encoder.layer.0.output.dense.bias
bert.encoder.layer.0.output.LayerNorm.weight
bert.encoder.layer.0.output.LayerNorm.bias
bert.encoder.layer.1.attention.self.query.weight
bert.enc

In [13]:
reranker.classifier

Linear(in_features=768, out_features=1, bias=True)

# Our model

In [14]:
from model import Rerank

In [15]:
model = Rerank(path, replace_classifer=False)

In [16]:
model.score(query, passages)

tensor([[2.0757],
        [1.3838]])