In [22]:
import pandas as pd

from gensim.utils import simple_preprocess
from gensim.models import Word2Vec
from gensim.models.phrases import Phrases, Phraser

from ast import literal_eval

In [2]:
df = pd.read_csv('data/job_ofer.csv')

In [3]:
df.shape

(36109, 8)

In [4]:
df.head()

Unnamed: 0,title,company_name,address,description,seniority_level,employment_type,job_function,industries
0,Machine Learning Engineer,Intellipro Group Inc,"Palo Alto, CA, US","['About The Company', ""W*** is reshaping the f...",Entry level,Full-time,Engineering,Information Technology and Services
1,Deep Learning Applied Researcher - Chicago,Ethosia,"Chicago, IL, US","['תיאור המשרה', 'Deep learning for Computer Vi...",Associate,Full-time,Other,Information Technology and Services
2,Machine Learning Engineer,Motorola Solutions,"Chicago, IL, US","['Company Overview', 'At Motorola Solutions, w...",Entry level,Full-time,Engineering,Information Technology and Services
3,Machine Learning / Data Scientist,Proprius LLC,"San Francisco, CA, US",['Our client is a digital invention agency foc...,Entry level,Full-time,Engineering,Information Technology and Services
4,Cloud Architect,TCS,"Framingham, Massachusetts, United States","['Technical/Functional Skills', ' ', 'Good to ...",Mid-Senior level,Full-time,Engineering,Information Technology and Services


## Word2Vec

In [5]:
corpus = df['title'].map(simple_preprocess)

In [6]:
model = Word2Vec(corpus, size=100, window=2, min_count=1)

In [7]:
model.wv.most_similar('machine')

[('deep', 0.9517883658409119),
 ('inference', 0.8504943251609802),
 ('predictive', 0.8503087162971497),
 ('edge', 0.8234385251998901),
 ('captivate', 0.8231526613235474),
 ('genomic', 0.8150447607040405),
 ('bentonville', 0.8115875124931335),
 ('computer', 0.8061255216598511),
 ('researcher', 0.8039858341217041),
 ('natural', 0.8019294142723083)]

## Title + phrases

In [8]:
title_corpus = df['title'].map(simple_preprocess)

title_bigram = Phraser(Phrases(title_corpus, min_count=1, threshold=1))

In [9]:
title_bigram[simple_preprocess('Deep Learning Applied Researcher - Chicago')]

['deep_learning', 'applied', 'researcher', 'chicago']

In [10]:
title_corpus_phrase = [title_bigram[sent] for sent in title_corpus]

In [11]:
model = Word2Vec(title_corpus_phrase, size=100, window=2, min_count=1)

In [12]:
model.wv.most_similar('machine')

[('in_upstate', 0.9350973963737488),
 ('corporation', 0.9333582520484924),
 ('southeast', 0.9330936670303345),
 ('analytics_bangkok', 0.9326107501983643),
 ('technical_specialist', 0.9324313998222351),
 ('us', 0.9322487711906433),
 ('business_process', 0.9321648478507996),
 ('cost', 0.9318822622299194),
 ('asset', 0.9315726161003113),
 ('mobile', 0.9315614700317383)]

In [13]:
def prepare_corpus(corpus, bigram):
    for sent in corpus:
        yield bigram[sent] + sent

In [15]:
ext_corpus = list(prepare_corpus(title_corpus, title_bigram))
title_model = Word2Vec(ext_corpus, size=100, window=2, min_count=1)

In [16]:
title_model.wv.most_similar('machine')

[('deep', 0.9509055018424988),
 ('learning', 0.9464936852455139),
 ('autopilot', 0.933887779712677),
 ('machine_learning', 0.932407557964325),
 ('learning_product', 0.9258176684379578),
 ('computer_vision', 0.9215735197067261),
 ('deep_learning', 0.9208741784095764),
 ('big', 0.9196785092353821),
 ('pkpd', 0.9176803827285767),
 ('big_data', 0.9129390716552734)]

## Description

In [17]:
df.sample()['description'].values[0]

"['Do you have what it takes to be a trusted advisor to Fortune 500 HR and Training managers? Are you a highly organized, customer-driven individual who can uncover the stories behind the data? Can you help our clients unlock the benefits of having English language fluency as a strategic asset for their global success?', 'We are looking for a bright, proactive and motivated Customer Success Manager to join our US Enterprise Sales team, based in our Cambridge, MA office.', 'The EF Corporate Solutions customer success team is dedicated to making every customer and their employees successful with our language training programs. We work in close partnership with sales, marketing, and operations teams to develop and nurture our customer relationships and are responsible for the end-to-end post-sale customer experience from on-boarding to renewal.', 'As a Customer Success Manager for EF Corporate Solutions, no day will be the same for you! You will wear a variety of hats in your relationship

In [18]:
description_corpus = df['description'].map(simple_preprocess)

description_bigram = Phraser(Phrases(description_corpus, min_count=1, threshold=1))

In [19]:
ext_descr_corpus = list(prepare_corpus(description_corpus, description_bigram))
descr_model = Word2Vec(ext_descr_corpus, size=100, window=2, min_count=1)

In [20]:
descr_model.wv.most_similar('machine')

[('naturallanguage', 0.6005061864852905),
 ('ml', 0.5972025990486145),
 ('nlp_machine', 0.58416748046875),
 ('cnc', 0.5832350254058838),
 ('algorithms', 0.5794932842254639),
 ('computer', 0.5742949843406677),
 ('machines', 0.5570603609085083),
 ('reinforcement', 0.5394213199615479),
 ('and_implanting', 0.5347534418106079),
 ('using_machine', 0.5325719714164734)]

In [21]:
title_model.wv.most_similar('machine')

[('deep', 0.9509055018424988),
 ('learning', 0.9464936852455139),
 ('autopilot', 0.933887779712677),
 ('machine_learning', 0.932407557964325),
 ('learning_product', 0.9258176684379578),
 ('computer_vision', 0.9215735197067261),
 ('deep_learning', 0.9208741784095764),
 ('big', 0.9196785092353821),
 ('pkpd', 0.9176803827285767),
 ('big_data', 0.9129390716552734)]

In [23]:
for line in df.sample()['description'].map(literal_eval).values[0]:
    print(line)
    print("")

United in Performance. Inspired by Innovation.

Performance unites us, innovation inspires us, and commitment drives us to keep moving forward. Count on Epiroc to deliver customer-oriented solutions needed to succeed today and the technology to lead tomorrow. To do this, we believe in diverse teams that work together across borders to develop, produce, market and service the most innovative products available. Grow your career at Epiroc!

At Epiroc USA LLC, our Mining and Rock Excavation Service Division provides a broad range of services with the aim of maximizing customers’ productivity. We focus on spare parts supply, professional service, support solutions and training. This open Field Service Technician position will be based in Roanoke, VA.

Field Service Technicians provide timely service support for technical problems, monitoring, testing, and demonstrations that result in minimizing downtime, reducing costs or promoting our product capabilities in the case of a test or demo. T