In [1]:
import gensim, logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
 
# train word2vec on the two sentences
model = gensim.models.Word2Vec.load('./corpus/word2vec300_phrase.model')

In [2]:
phrase = gensim.models.phrases.Phraser.load('corpus/phrase.model')

In [3]:
competency = {'creativity': 'ability to think in a divergent way, and generate novel ideas.',
 'employee_loyalty': 'be committed to the success of the organization and believe that working for this organization is their best option.',
 'leadership': 'ability to engage and facilitate others to create a better result.',
 'passion': 'desire to provide long-term commitment to their organization, demonstrate peak performance, and maintain increased tenure with the organization. with the intense desire or enthusiasm for the work.',
 'people_skills': 'ability to establish social connections and understand the mindset of others.',
 'proactivity': 'take the initiative and responsibility in improving business, rather than looking for causes in outside circumstances or other people.',
 'reliability': 'ability to performing to perform when needed, finishing projects and meeting deadlines.',
 'teamwork': 'ability to adapt to the needs of a group of people, while supporting the work of others.'}

In [4]:
comps = ['_'.join(c.split()) + ' ' + competency[c] for c in competency]
comps = [c.replace('.', '') for c in comps]
comps = [c.replace(',', '') for c in comps]
comps = [c.split() for c in comps] 

In [5]:
model.build_vocab(phrase[comps * 5], update=True)



In [7]:
model.train(phrase[comps * 5])

1166

In [8]:
model.most_similar(positive=['small', 'biggest'], negative=['big'])

[(u'largest', 0.5673831701278687),
 (u'relatively_small', 0.5399969816207886),
 (u'very_small', 0.4757996201515198),
 (u'smallest', 0.45116886496543884),
 (u'most_important', 0.4464262127876282),
 (u'second_largest', 0.4444895386695862),
 (u'worlds_largest', 0.43094953894615173),
 (u'sizable', 0.4223106801509857),
 (u'most_impressive', 0.41765397787094116),
 (u'third_largest', 0.41698721051216125)]

In [9]:
model.most_similar(positive=['woman', 'king'], negative=['man'], topn=1)

[(u'queen', 0.7234086990356445)]

In [10]:
model.doesnt_match("breakfast cereal dinner lunch".split())

'cereal'

In [11]:
new_comp = """
Charisma	Able to establish social connections and a broad network
Confidence	Acts on his/her own initiative with confidence
Creativity	Seeks change and performs creatively
Entrepreneurial	Keeps aware of both organizational issues and market opportunities
Goal Orientated	Works systematically and drives the project to its goals
Goal Orientation	Work systematically and drive the project to the target goals
Growth Potential	Growth Potential
Integrity	Acts with integrity and shows social responsibility
Intellect	Flexible application of knowledge to differing contexts
Job Performance	Job Performance
Leadership	Identifies talents, empowers and motivates other
Logic	Logical thinking and rational analysis
Organisation	Sets objectives, balances resources and time, monitors progress
Organization	Sets objectives, balances resources and time, monitors progress
Performance	Job Performance
Proactive	Proactively deals with ambiguity
Process driven	Demonstrate commitment, accountablity, follow policies and procedures of the organizations
Resilient	Coping and resilient under stressful environment
Self motivated	Work enthusiastically for the achievement of self-development
Strategic Competency	Strategic Competency
Strategy	Foresight for potential problems and comes up with appropriate solutions
Teamwork	Adapts to the team; Supports others
"""

lines = new_comp.lower().strip().split('\n')
new_comp_dict = {'_'.join(splitted[0].split()) : splitted[1] for splitted in [line.split('\t') for line in lines]}
for c in new_comp_dict:
    new_comp_dict[c] = new_comp_dict[c].replace(',', '')
new_comp_dict

{'charisma': 'able to establish social connections and a broad network',
 'confidence': 'acts on his/her own initiative with confidence',
 'creativity': 'seeks change and performs creatively',
 'entrepreneurial': 'keeps aware of both organizational issues and market opportunities',
 'goal_orientated': 'works systematically and drives the project to its goals',
 'goal_orientation': 'work systematically and drive the project to the target goals',
 'growth_potential': 'growth potential',
 'integrity': 'acts with integrity and shows social responsibility',
 'intellect': 'flexible application of knowledge to differing contexts',
 'job_performance': 'job performance',
 'leadership': 'identifies talents empowers and motivates other',
 'logic': 'logical thinking and rational analysis',
 'organisation': 'sets objectives balances resources and time monitors progress',
 'organization': 'sets objectives balances resources and time monitors progress',
 'performance': 'job performance',
 'proactive'

In [12]:
for c in new_comp_dict:
    query = '_'.join(c.split())
    if query not in model.vocab:
        print "{} not in model".format(query)
        s = [query]
        desc = phrase[new_comp_dict[c].split()]
        s += desc
        # print s
        model.build_vocab([s] * 5, update=True)
        model.train([s] * 5)
    sims = [[model.similarity(query, comp), comp] for comp in competency]
    sorted_sims = sorted(sims, key=lambda x: -x[0])
    print "{:25}\t {:20}\t {}".format(query, sorted_sims[0][1], sorted_sims[0][0])

goal_orientated not in model
goal_orientated          	 people_skills       	 0.608985930915
self_motivated not in model
self_motivated           	 leadership          	 0.0948382482061
integrity                	 reliability         	 0.59351816514
intellect                	 creativity          	 0.651332773304
confidence               	 leadership          	 0.478463552197
strategic_competency not in model
strategic_competency     	 employee_loyalty    	 0.0882557561828
organisation             	 leadership          	 0.429196664781
creativity               	 creativity          	 1.0
strategy                 	 leadership          	 0.400780704347
charisma                 	 creativity          	 0.558098409027
teamwork                 	 teamwork            	 1.0
resilient                	 people_skills       	 0.230465615685
performance              	 reliability         	 0.526146058371
growth_potential not in model
growth_potential         	 passion             	 0.095621397246
proc

In [13]:
query = 'loyalty'
if query in model.vocab:
    sims = [[model.similarity(query, comp), comp] for comp in competency]
    sorted_sims = sorted(sims, key=lambda x: -x[0])
    print sorted_sims
    print "{:20} {:20} {}".format(query, sorted_sims[0][1], sorted_sims[0][0])

[[0.471397278541965, 'leadership'], [0.37689997721938284, 'creativity'], [0.337109602723831, 'passion'], [0.28781673238724015, 'reliability'], [0.28574592784189501, 'teamwork'], [0.1480052393880355, 'people_skills'], [0.097241763992310742, 'proactivity'], [0.07661057515409922, 'employee_loyalty']]
loyalty              leadership           0.471397278542


In [14]:
model.similarity('charisma', 'skills')

0.43684333331137493

In [15]:
[comp for comp in competency if comp not in model.vocab]


[]

In [16]:
len(model.vocab)

350034

In [17]:
model.most_similar(positive=['woman', 'king'], negative=['man'], topn=1)

[(u'queen', 0.7234086990356445)]