* Combine together job label encoders/job meta data/corresponding content embeddings
* Combine together user label encoders/user meta data/

In [1]:
import os
import random
import pickle
import argparse
import matplotlib
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
path =  "../data/"
dataset = "cb12/"

raw_path = path + dataset + "raw/" 
interim_path = path + dataset + "interim/"
processed_path = path + dataset + "processed/"

# Step 1: Load job metadata

In [3]:
def serialize(filename, obj):
    with tf.io.gfile.GFile(filename, 'wb') as handle:
        pickle.dump(obj, handle)

def deserialize(filename):
    with tf.io.gfile.GFile(filename, 'rb') as handle:
        return pickle.load(handle)
    
def load_feature_encoders(path):
    features_encoders = deserialize(path)
    return features_encoders

In [4]:
print('Loading job from file: {}'.format(processed_path + 'jobs_14d_30_consider_user_encoded.csv'))
job_df_30 = pd.read_csv(processed_path + 'jobs_14d_30_consider_user_encoded.csv', header=0, sep='\t')
print(job_df_30.shape)

Loading job from file: ../data/cb12/processed/jobs_14d_30_consider_user_encoded.csv
(207972, 23)


In [5]:
job_features_encoders_30 = load_feature_encoders(processed_path + 'job_feature_encoders_14d_30_consider_user.pickle')

# Step 2: Load job embeddings

In [6]:
def deserialize(filename):
    with tf.io.gfile.GFile(filename, 'rb') as handle:
        return pickle.load(handle)  

def load_job_content_embeddings(input_job_content_embeddings_path):
    print('Loading job embeddings from {}'.format(input_job_content_embeddings_path))
    job_content_embeddings = deserialize(input_job_content_embeddings_path)
    return job_content_embeddings

In [7]:
job_content_embeddings_30 = load_job_content_embeddings('../language_models/pickles/jobs_14d_30_consider_user_All_d2v.pickle')
print(job_content_embeddings_30.shape)

print(job_df_30['JobID_encoded'].head(1).values[0])
print(job_df_30['JobID_encoded'].tail(1).values[0])
#Checking whether JobID_encoded are sorted and contiguous
assert (job_df_30['JobID_encoded'].head(1).values[0] == 1) #0 is reserved for padding
assert (len(job_df_30) == job_df_30['JobID_encoded'].tail(1).values[0])

Loading job embeddings from ../language_models/pickles/jobs_14d_30_consider_user_All_d2v.pickle
(207973, 300)
1
207972


# Step 3: Export all job infor

In [8]:
def export_job_metadata_and_embeddings(job_label_encoders, job_metadata_df, job_content_embeddings, output_job_metadata_and_embeddings_path):
    print('Exporting job label encoders, job metadata and embeddings to {}'.format(output_job_metadata_and_embeddings_path))
    to_serialize = (job_label_encoders, job_metadata_df, job_content_embeddings)
    serialize(output_job_metadata_and_embeddings_path, to_serialize)

In [9]:
export_job_metadata_and_embeddings(job_features_encoders_30, job_df_30, job_content_embeddings_30, '../data/cb12/pickles/job_14d_30_metadata_and_embeddings_d2v.pickle')

Exporting job label encoders, job metadata and embeddings to ../data/cb12/pickles/job_14d_30_metadata_and_embeddings_d2v.pickle


# Step 4: Load user metadata

In [10]:
print('Loading user from file: {}'.format(processed_path + 'users_14d_30_consider_user_encoded.csv'))
user_df_30 = pd.read_csv(processed_path + 'users_14d_30_consider_user_encoded.csv', header=0, sep='\t')
print(user_df_30.shape)

Loading user from file: ../data/cb12/processed/users_14d_30_consider_user_encoded.csv
(111785, 21)


In [12]:
user_features_encoders_30 = load_feature_encoders(processed_path + 'user_feature_encoders_14d_30_consider_user.pickle')

# Step 5: Export all user infor

In [13]:
def export_user_metadata_and_embeddings(user_label_encoders, user_metadata_df, output_user_metadata_and_embeddings_path):
    print('Exporting user label encoders and user metadata to {}'.format(output_user_metadata_and_embeddings_path))
    to_serialize = (user_label_encoders, user_metadata_df)
    serialize(output_user_metadata_and_embeddings_path, to_serialize)


export_user_metadata_and_embeddings(user_features_encoders_30, user_df_30, '../data/cb12/pickles/user_14d_30_metadata.pickle')

Exporting user label encoders and user metadata to ../data/cb12/pickles/user_14d_30_metadata.pickle
