# Profile Vacancies recommendation engine

## Context

The notebook has been created in the context of a a "Postgraduate Studies in Big Data & Analytics in Business and Management". 

It has been based on the blog post https://towardsdatascience.com/if-you-cant-measure-it-you-can-t-improve-it-5c059014faad, 
I modified it a bit to cater for my own dataset.

## Importing libraries

In [10]:
import pandas as pd
import time

from VacancyData import VacancyData
from VacancyHelper import helper

from lightfm.data import Dataset
from lightfm import LightFM

from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import auc_score
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import recall_at_k
from lightfm.evaluation import reciprocal_rank

import numpy as np

In [11]:
# Load data part
qd = VacancyData();

matchings, vacancies, profiles, profilestest = qd.getData()

# Creating a dataset    
dataset = Dataset()
dataset.fit((x['ProfielId'] for x in qd.getMatchings()),
            (x['VacatureId'] for x in qd.getMatchings()))

In [12]:
# Check on items and users in our interactions set
num_users, num_items = dataset.interactions_shape()
print('--- Interaction set : Num users: {}, num_items {}. ---'.format(num_users, num_items))

--- Interaction set : Num users: 3, num_items 6. ---


In [7]:
# Adding the vacancy features in the mix
dataset.fit_partial(items=(x['VacatureId'] for x in qd.getVacancies()),
                    item_features=(x['Naam'] for x in qd.getVacancies()))

dataset.fit_partial(items=(x['VacatureId'] for x in qd.getVacancies()),
                    item_features=(x['Taal'] for x in qd.getVacancies()),
                    )

dataset.fit_partial(items=(x['VacatureId'] for x in qd.getVacancies()),
                    item_features=(x['Functie'] for x in qd.getVacancies()),
                    )
# Adding the user features in the mix. TODO: make a feature list for users (other then just motivation)
dataset.fit_partial(users=(x['Id'] for x in qd.getProfiles()),
                    user_features=(x['Motivatie'] for x in qd.getProfiles())                    
                    )
num_users, num_items = dataset.interactions_shape()
print('--- Total set : Num users: {}, num_items {}. ---'.format(num_users, num_items))


--- Total set : Num users: 11361, num_items 8356. ---


In [8]:
# creating the interaction matrix for the model
(interactions, weights) = dataset.build_interactions(((x['ProfielId'], x['VacatureId'])
                                                      for x in qd.getMatchings()))

In [9]:
# creating the item feature matrix for the model
item_features = dataset.build_item_features(((x['VacatureId'], [x['Naam'],x['Taal'],x['Functie']])
                                              for x in qd.getVacancies()),normalize=False)

In [10]:
# Split the set in train and test
test , train = random_train_test_split(interactions, test_percentage=0.2, random_state=None)


In [40]:
# Start training the model
print("--- Start model training ---")
model=LightFM(no_components=5,learning_rate=0.027,loss='warp')
model.fit(train,item_features=item_features, epochs=100,num_threads=4, verbose=False)
# model.fit(train,epochs=12,num_threads=4)

modelnofeatures=LightFM(no_components=5,learning_rate=0.027,loss='warp')
modelnofeatures.fit(train, epochs=100,num_threads=4, verbose=False)


--- Start model training ---


<lightfm.lightfm.LightFM at 0x25cd57a4da0>

In [23]:
# Start evaluation of the model
print("--- Start model evaluation ---")
# Default k is 10. K is top N in which the precision or recall is measured.
topN = 5
start_time = time.time()

auc_train = auc_score(model, train,item_features=item_features).mean()
auc_test = auc_score(model, test,item_features=item_features).mean()
precision_train = precision_at_k(model, train, k=topN, item_features=item_features).mean()
precision_test = precision_at_k(model, test, k=topN, item_features=item_features).mean()
recall_train = recall_at_k(model, train,k=topN, item_features=item_features).mean()
recall_test = recall_at_k(model, test,k=topN, item_features=item_features).mean()

print("--- End model evaluation. Run time:  {} mins ---".format((time.time() - start_time)/60))

print('Auc: train %.2f, test %.2f.' % (auc_train, auc_test))
print('Precision: train %.2f, test %.2f.' % (precision_train, precision_test))
print('Recall: train %.2f, test %.2f.' % (recall_train, recall_test))

auc_trainnf = auc_score(modelnofeatures, train).mean()
auc_testnf = auc_score(modelnofeatures, test).mean()
precision_trainnf = precision_at_k(modelnofeatures, train, k=topN).mean()
precision_testnf = precision_at_k(modelnofeatures, test, k=topN).mean()
recall_trainnf = recall_at_k(modelnofeatures, train,k=topN).mean()
recall_testnf = recall_at_k(modelnofeatures, test,k=topN).mean()

print("--- End model evaluation model no features. Run time:  {} mins ---".format((time.time() - start_time)/60))

print('Auc: train %.2f, test %.2f.' % (auc_trainnf, auc_testnf))
print('Precision: train %.2f, test %.2f.' % (precision_trainnf, precision_testnf))
print('Recall: train %.2f, test %.2f.' % (recall_trainnf, recall_testnf))


--- Start model evaluation ---
--- End model evaluation. Run time:  0.9939277569452921 mins ---
Auc: train 1.00, test 0.88.
Precision: train 0.28, test 0.01.
Recall: train 0.53, test 0.02.
--- End model evaluation model no features. Run time:  1.5767034848531087 mins ---
Auc: train 1.00, test 0.85.
Precision: train 0.53, test 0.01.
Recall: train 1.00, test 0.02.


In [14]:
# Manual testing
ratingspd = pd.DataFrame(matchings)
ratingspd['rating']=ratingspd.apply(lambda row:'1', axis=1)

user_item_matrix = ratingspd.pivot(index='ProfielId', columns='VacatureId', values='rating')
user_item_matrix.fillna(0, inplace = True)
user_item_matrix = user_item_matrix.astype(np.int32)

itemspd = pd.DataFrame(vacancies)

user_dikt, item_dikt = helper.user_item_dikts(user_item_matrix, itemspd)


In [42]:
# Generate recommendations for the user without an interaction
# helper.similar_recommendation(model, user_item_matrix, '4666', user_dikt, item_dikt,threshold = 0)
helper.similar_recommendation_features(model, user_item_matrix, '166', item_dikt,dataset,interactions,item_features,threshold = 0)


The user has not applied for a job yet...
Recommended Jobs:
1- SECRÉTAIRE H/F
2- EMPLOYÉ CALL-CENTER (H/F)
3- CHAUFFEUR B


In [43]:
helper.similar_recommendation_features(model, user_item_matrix, '1134', item_dikt,dataset,interactions,item_features,threshold = 0)


Jobs that are chosen by the user:
1- EMPLOYÉ ADMINISTRATIF
Recommended Jobs:
1- SECRÉTAIRE
2- STUDENT - BOUCHERIE
3- ETUDIANT NETTOYEUR
