In [1]:
import numpy as np

In [2]:
SKILLS = ["JS", 'Javascript', "NodeJS", "Node JS", "Node.JS", \
          "Python", "Python 2", "Python 3", "Django", "Flask", \
          "HTML", "HTML5", "CSS", "CSS3",\
         "Random Forest", "SVM", "Support Vector Machine", "Neural Network", "Machine Learning"]

In [3]:
RELS = [
    ("NodeJS", ("R1", 1, 1), "Node JS"), ("Node JS", ("R1", 1, 1), "Node.JS"),
    ("JS", ("R2", 1, 1), "Javascript"), ("SVM", ("R2", 1, 1), "Support Vector Machine"),
    ("Random Forest", ("R3", 0.2, 1), "Machine Learning"), ("Support Vector Machine", ("R3", 0.3, 1), "Machine Learning"), ("Neural Network", ("R3", 0.4, 1), "Machine Learning"),
    ("Python 2", ("R5", 1, 0.9), "Python"), ("Python 3", ("R5", 0.9, 0.9), "Python 2"), ("HTML5", ("R5", 1, 1), "HTML"), ("CSS", ("R5", 1, 1), "CSS3"),
    ("Django", ("R7", 1, 0.2), "Python"), ("Flask", ("R7", 1, 0.4), "Python"), ("Node JS", ("R7", 1, 0.3), "Javascript"),
]

In [4]:
def get_rel(A, B):
    for a, rel, b in RELS:
        if a == A and b == B:
            return rel[1]
        if a == B and b == A:
            return rel[2]
    return None

def get_rel_type(A, B):
    for a, rel, b in RELS:
        if (a == A and b == B) or (a == B and b == A):
            return rel[0]
    return None

def rel_exist(skill, target_skill):
    for skill_A, rel, skill_B in RELS:
        if len(set([skill_A, skill_B]) - set([skill, target_skill])) == 0:
            return True
    return False

In [10]:
def compute_same_level_score(bridge_skill):
    same_level_scores = []
    for _, score, rel_type in bridge_skill:
        if rel_type in ['R1', 'R2', 'R5', 'R6', 'R7']:
            same_level_scores.append(score)
    same_level_score = max(same_level_scores) if same_level_scores else 0
    return same_level_score

def compute_hierarchical_score(bridge_skill):
    hierarchical_scores = []
    for check_rel_type in ['R3', 'R4']:
        score = sum([score for _, score, rel_type in bridge_skill if rel_type==check_rel_type])
        score = min(score, 1)
        hierarchical_scores.append(score)
    hierarchical_score = max(hierarchical_scores) if hierarchical_scores else 0
    return hierarchical_score

In [27]:
def infer_skill(candidate, debug=False):
    expanded_skills = candidate.copy()
    for depth in range(10): # expanding depth
        number_of_current_skills = len(expanded_skills) # for early stop
        for target_skill in SKILLS:
            bridge_skill = []
            if target_skill in list(expanded_skills.keys()):
                continue
            for skill, exp in list(expanded_skills.items()):
                for skill, exp in list(expanded_skills.items()):
                    if rel_exist(skill, target_skill):
                        bridge_skill.append((skill, exp*get_rel(skill, target_skill), get_rel_type(skill, target_skill)))

            if bridge_skill:
                # check same-level relationships
                same_level_score = compute_same_level_score(bridge_skill)
                # check hierarchical relationships
                hierarchical_score = compute_hierarchical_score(bridge_skill)
                expanded_skills[target_skill] = max(same_level_score, hierarchical_score)
                if debug:
                    print("{}: {}".format(target_skill, max(same_level_score, hierarchical_score)))
                
        if len(expanded_skills) == number_of_current_skills:
            break
    return expanded_skills

In [48]:
def get_matching_score(candidate, requirement):
    skill_scores = []
    for req_skill, req_exp in requirement.items():
        if req_skill in candidate:
            skill_scores.append(min(1, candidate[req_skill]/req_exp))
        else:
            skill_scores.append(0)        
    return np.mean(skill_scores)

In [49]:
req_1 = { 'JS': 5, 'CSS': 5, 'Python': 2 }
req_2 = { "Python": 3, "Machine Learning": 3 }
reqs = [req_1, req_2]

cand_1 = { 'Node.JS': 4, 'CSS': 4 }
cand_2 = { 'Django': 5, 'Random Forest': 1 }
cand_3 = { 'Python 3': 5, 'Javascript': 2 }
candidates = [cand_1, cand_2, cand_3]

In [53]:
for i, req in enumerate(reqs):
    print('======== Job', i+1)
    for j, candidate in enumerate(candidates):
        expanded_skills = infer_skill(candidate, False)
        before_score = get_matching_score(candidate, req)
        after_score = get_matching_score(expanded_skills, req)
        print('+++ Person {} before/after inference: {} -> {}'.format(j+1, before_score, after_score))

+++ Person 1 before/after inference: 0.26666666666666666 -> 0.5333333333333333
+++ Person 2 before/after inference: 0.0 -> 0.3333333333333333
+++ Person 3 before/after inference: 0.0 -> 0.4666666666666666
+++ Person 1 before/after inference: 0.0 -> 0.0
+++ Person 2 before/after inference: 0.0 -> 0.6666666666666666
+++ Person 3 before/after inference: 0.0 -> 0.5
