In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
import torch
import datasets
import numpy as np

from torch.utils.data import DataLoader
from tqdm import tqdm

from knn import FaissKNeighbors
from model import RecModel
from metrics import mrr

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model = RecModel(in_dim=798, out_dim=797, n_head=6)
model.load_state_dict(torch.load("checkpoints/model.best.pth"))

<All keys matched successfully>

In [4]:
vac_dataset = datasets.load_from_disk("vacancies_dataset")
vac_dataset.set_format("numpy")

In [5]:
X = vac_dataset["embedding"]
y = vac_dataset["vacancy_id"]

In [6]:
test_knn = FaissKNeighbors(k=100)
test_knn.fit(X, y)

In [7]:
test_dataset = datasets.load_from_disk("test_users_dataset")
test_dataset.set_format("numpy")
test_dataset

Dataset({
    features: ['data', 'target', 'key'],
    num_rows: 26000
})

In [8]:
y_test = test_knn.predict(test_dataset["target"])
y_test = y_test.tolist()
y_test[:10]

[['v_1881691',
  'v_2167075',
  'v_2165137',
  'v_1828262',
  'v_624289',
  'v_1222585',
  'v_272032',
  'v_287335',
  'v_1556467',
  'v_1848354',
  'v_1643776',
  'v_291015',
  'v_1977346',
  'v_1923417',
  'v_413456',
  'v_1362668',
  'v_1425927',
  'v_762950',
  'v_2486022',
  'v_1960394',
  'v_2169653',
  'v_96057',
  'v_1650079',
  'v_1424744',
  'v_1661017',
  'v_579818',
  'v_1904213',
  'v_1660089',
  'v_364075',
  'v_1019128',
  'v_2197943',
  'v_1070089',
  'v_84889',
  'v_2648995',
  'v_758121',
  'v_62608',
  'v_1039390',
  'v_201540',
  'v_1575563',
  'v_2672834',
  'v_2461070',
  'v_1035415',
  'v_1150260',
  'v_332478',
  'v_2120764',
  'v_1264541',
  'v_1990984',
  'v_1216432',
  'v_2357774',
  'v_2246582',
  'v_1102694',
  'v_668382',
  'v_1371810',
  'v_2638418',
  'v_828457',
  'v_1556243',
  'v_1994453',
  'v_1942152',
  'v_2633435',
  'v_1755797',
  'v_920172',
  'v_447554',
  'v_2227408',
  'v_489281',
  'v_1296502',
  'v_1861479',
  'v_22300',
  'v_316605',
  'v_

In [9]:
test_dataset.set_format("torch")
loader = DataLoader(test_dataset, batch_size=512, shuffle=False, drop_last=False)

In [10]:
model.eval()
model.cuda()

with torch.inference_mode():
    X_pred = []
    for batch in tqdm(loader):
        x = batch["data"]
        y = model(x.cuda()).cpu().numpy()
        X_pred.append(y)

X_pred = np.vstack(X_pred)
X_pred.shape

  0%|          | 0/51 [00:00<?, ?it/s]

100%|██████████| 51/51 [00:26<00:00,  1.92it/s]


(26000, 797)

In [11]:
y_pred = test_knn.predict(X_pred).tolist()
y_pred[:5]

[['v_2547500',
  'v_1187365',
  'v_1238567',
  'v_2041505',
  'v_196755',
  'v_663277',
  'v_85586',
  'v_929291',
  'v_63863',
  'v_278723',
  'v_614133',
  'v_899227',
  'v_864514',
  'v_1810117',
  'v_1287005',
  'v_432669',
  'v_996549',
  'v_1127237',
  'v_1936773',
  'v_1409223',
  'v_27698',
  'v_1322252',
  'v_1017024',
  'v_903739',
  'v_2294953',
  'v_1210745',
  'v_1541784',
  'v_2313726',
  'v_1873090',
  'v_933633',
  'v_1479023',
  'v_1798140',
  'v_2687611',
  'v_991601',
  'v_618756',
  'v_2496006',
  'v_425382',
  'v_100664',
  'v_770835',
  'v_1137374',
  'v_342481',
  'v_1024714',
  'v_906087',
  'v_2500053',
  'v_824058',
  'v_2298310',
  'v_1790660',
  'v_2288299',
  'v_973162',
  'v_1770933',
  'v_556631',
  'v_1111288',
  'v_1337944',
  'v_1087688',
  'v_675265',
  'v_997311',
  'v_1782627',
  'v_458567',
  'v_1093364',
  'v_1684010',
  'v_1908201',
  'v_1762871',
  'v_268947',
  'v_853783',
  'v_926086',
  'v_211558',
  'v_693181',
  'v_2011764',
  'v_297705',
 

In [12]:
mrr(y_test, y_pred)

100%|██████████| 26000/26000 [00:02<00:00, 11587.13it/s]


0.008217494907692894