# Models for ranking

The word embedding model is used for computing the similarity score between roles and people, based on skills and interests.
A score is calculated from each Word Mover's Distance computed and a final score is calculated as a weighted combination of all scores.

## Import Libraries

In [65]:
import pandas as pd
from gensim.models import Word2Vec
import numpy as np

## Enter Role Requirements

In [66]:
# Get the role requirements
role_id = 12
#role_skills = ['C#', 'Kotlin', 'Python', 'SQL', 'PostgreSQL', '.NET', 'Apache Kafka', 'Pandas', 'Docker', 'Kubernetes']
#role_experience = 'Associate'

# Get the role from the collected data from the survey
role = pd.read_csv(filepath_or_buffer="../2-data/survey_roles.csv", sep=",", encoding="latin1")
role = role[role['id']==role_id]
role_skills = role['skills'].values[0].split(';')
role_experience = role['explevel'].values[0]

print(role_id)
print(role_skills)
print(role_experience)

12
['Bash/Shell', 'PowerShell', 'SQL', 'Microsoft SQL Server', 'Microsoft Azure', '.NET', 'Apache Spark', 'Ansible', 'Docker', 'Terraform']
Associate


# Load Employees Dataset

In [67]:
# Read the employees dataset - from the data entered in the survey
df_people = pd.read_csv(filepath_or_buffer="../2-data/survey_people.csv", sep=",", encoding="latin1")

# Tokenize the skills
df_people['skills'] = df_people['skills'].apply(lambda x: x.split(';'))

# Get only required columns
df_people = df_people[['id', 'skills', 'skills-want', 'YearsCodePro']]
df_people



Unnamed: 0,id,skills,skills-want,YearsCodePro
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...",Dart;Java;SQL;MySQL;SQLite,3.0
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...",HTML/CSS;JavaScript;TypeScript;AWS;Firebase;Go...,4.0
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...",Python;MySQL;SQLite;Unity 3D;Unreal Engine,4.0
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...",HTML/CSS;JavaScript;TypeScript;AWS;Firebase;An...,2.0
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...",Python;R;SQL;Microsoft SQL Server;MySQL;Oracle...,9.0
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...",Python;Microsoft SQL Server;MongoDB;Neo4j;AWS;...,1.0
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...",Python;R;Scala;Microsoft SQL Server;MongoDB;Ne...,12.0
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...",C#;Python;SQL;Cassandra;MariaDB;Microsoft SQL ...,6.0
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...",C#;Go;HTML/CSS;Java;JavaScript;PowerShell;Pyth...,0.0
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...",C++;Python;Scala;DynamoDB;MongoDB;MySQL;Oracle...,1.5


# Load Word Embedding Model

In [68]:
model = Word2Vec.load("../3-word_embedding/model-w2vcombinedfiltered")

# Compute Similarity Scores

In [69]:
# Scaler to normalize scores
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

## Score from Skills

In [70]:
# Calculate the distance between each person's skills and the role skills
similarity_scores = []
for skills in df_people['skills']:
    similarity_scores.append(model.wv.wmdistance(role_skills, skills))

# Add the dissimilarity score to each role
df_people['similarity_score_skills'] = similarity_scores

# Get the highest distance before inf
df_temp = df_people.loc[(df_people['similarity_score_skills'] < np.inf)]
max_score = df_temp['similarity_score_skills'].max()
print(max_score)

# Remove rows where the score is inf - meaning there is no similarity
df_people['similarity_score_skills'] = df_people['similarity_score_skills'].replace(np.inf, max_score)

# Normalize scores and convert distance to similarity -> 0 = more dissimilar, 1 = more similar
df_people['similarity_score_skills'] = 1 - scaler.fit_transform(df_people[['similarity_score_skills']])

df_people

0.6594705569789168


Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...",Dart;Java;SQL;MySQL;SQLite,3.0,0.116732
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...",HTML/CSS;JavaScript;TypeScript;AWS;Firebase;Go...,4.0,0.0
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...",Python;MySQL;SQLite;Unity 3D;Unreal Engine,4.0,0.043909
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...",HTML/CSS;JavaScript;TypeScript;AWS;Firebase;An...,2.0,0.280536
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...",Python;R;SQL;Microsoft SQL Server;MySQL;Oracle...,9.0,0.526111
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...",Python;Microsoft SQL Server;MongoDB;Neo4j;AWS;...,1.0,0.480727
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...",Python;R;Scala;Microsoft SQL Server;MongoDB;Ne...,12.0,0.682146
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...",C#;Python;SQL;Cassandra;MariaDB;Microsoft SQL ...,6.0,0.572735
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...",C#;Go;HTML/CSS;Java;JavaScript;PowerShell;Pyth...,0.0,0.407636
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...",C++;Python;Scala;DynamoDB;MongoDB;MySQL;Oracle...,1.5,1.0


## Score from Interests

In [71]:
# Tokenize the skills
df_people['skills-want'] = df_people['skills-want'].apply(lambda x: x.split(';'))

# Calculate the similarity between each person's skills and the role skills
similarity_scores = []
for skills in df_people['skills-want']:
    similarity_scores.append(model.wv.wmdistance(role_skills, skills))

# Add the similarity scores to each role
df_people['similarity_score_interests'] = similarity_scores

# Get the highest distance before inf
df_temp = df_people.loc[(df_people['similarity_score_interests'] < np.inf)]
max_score = df_temp['similarity_score_interests'].max()
print(max_score)

# Remove rows where the score is inf - meaning there is no similarity
df_people['similarity_score_interests'] = df_people['similarity_score_interests'].replace(np.inf, max_score)

# Normalize scores and convert distance to similarity - 0 = more dissimilar, 1 = more similar
df_people['similarity_score_interests'] =  1 - scaler.fit_transform(df_people[['similarity_score_interests']])

df_people

0.6779650249471589


Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills,similarity_score_interests
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",3.0,0.116732,0.0
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",4.0,0.0,0.18477
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",4.0,0.043909,0.164968
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",2.0,0.280536,0.168137
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",9.0,0.526111,0.767549
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",1.0,0.480727,0.61746
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",12.0,0.682146,0.60996
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",6.0,0.572735,0.888038
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",0.0,0.407636,0.467402
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",1.5,1.0,0.190599


## Score from Experience

In [72]:
# Define bin edges for each level of experience
bin_edges = [-1, 2, 5, 10, 50]

# Define the labels for each level of experience
labels = ['Junior', 'Associate', 'Senior', 'Architect']

# Convert the 'years' column to categorical levels of experience
df_people['Experience'] = pd.cut(pd.to_numeric(df_people['YearsCodePro']), bins=bin_edges, labels=labels)

df_people

Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills,similarity_score_interests,Experience
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",3.0,0.116732,0.0,Associate
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",4.0,0.0,0.18477,Associate
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",4.0,0.043909,0.164968,Associate
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",2.0,0.280536,0.168137,Junior
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",9.0,0.526111,0.767549,Senior
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",1.0,0.480727,0.61746,Junior
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",12.0,0.682146,0.60996,Architect
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",6.0,0.572735,0.888038,Senior
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",0.0,0.407636,0.467402,Junior
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",1.5,1.0,0.190599,Junior


In [73]:
from scipy.spatial.distance import euclidean

# Define a function to compute the similarity metric
def similarity(level1, level2):
    # Define the vectors representing the levels of experience
    levels = {'Junior': [0, 0, 0, 0],
              'Associate': [1, 0, 0, 0],
              'Senior': [1, 1, 0, 0],
              'Architect': [1, 1, 1, 0]}
    
    # Compute the Euclidean distance between the two vectors
    distance = euclidean(levels[level1], levels[level2])
    
    # Return the similarity metric
    return 1 / (1 + distance)

In [74]:
# Compute the similarity based on level
df_people['similarity_score_experience'] = df_people["Experience"].apply(lambda x: similarity(role_experience, x))
df_people

Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills,similarity_score_interests,Experience,similarity_score_experience
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",3.0,0.116732,0.0,Associate,1.0
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",4.0,0.0,0.18477,Associate,1.0
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",4.0,0.043909,0.164968,Associate,1.0
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",2.0,0.280536,0.168137,Junior,0.5
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",9.0,0.526111,0.767549,Senior,0.5
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",1.0,0.480727,0.61746,Junior,0.5
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",12.0,0.682146,0.60996,Architect,0.414214
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",6.0,0.572735,0.888038,Senior,0.5
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",0.0,0.407636,0.467402,Junior,0.5
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",1.5,1.0,0.190599,Junior,0.5


# Add Results from all 3 Models

## Model 1 - Compute Final Score

In [75]:
# Set weights
weight_skills = 0.8
weight_interests = 0
weight_experience = 0.2

# Calculate final score
df_people['score-model1'] = df_people['similarity_score_skills'] * weight_skills + df_people['similarity_score_interests'] * weight_interests + df_people['similarity_score_experience'] * weight_experience

df_people

Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills,similarity_score_interests,Experience,similarity_score_experience,score-model1
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",3.0,0.116732,0.0,Associate,1.0,0.293385
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",4.0,0.0,0.18477,Associate,1.0,0.2
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",4.0,0.043909,0.164968,Associate,1.0,0.235127
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",2.0,0.280536,0.168137,Junior,0.5,0.324429
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",9.0,0.526111,0.767549,Senior,0.5,0.520889
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",1.0,0.480727,0.61746,Junior,0.5,0.484581
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",12.0,0.682146,0.60996,Architect,0.414214,0.628559
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",6.0,0.572735,0.888038,Senior,0.5,0.558188
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",0.0,0.407636,0.467402,Junior,0.5,0.426109
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",1.5,1.0,0.190599,Junior,0.5,0.9


## Model 2 - Compute Final score

In [76]:
# Set weights
weight_skills = 0.4
weight_interests = 0.4
weight_experience = 0.2

# Calculate final score
df_people['score-model2'] = df_people['similarity_score_skills'] * weight_skills + df_people['similarity_score_interests'] * weight_interests + df_people['similarity_score_experience'] * weight_experience

df_people

Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills,similarity_score_interests,Experience,similarity_score_experience,score-model1,score-model2
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",3.0,0.116732,0.0,Associate,1.0,0.293385,0.246693
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",4.0,0.0,0.18477,Associate,1.0,0.2,0.273908
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",4.0,0.043909,0.164968,Associate,1.0,0.235127,0.283551
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",2.0,0.280536,0.168137,Junior,0.5,0.324429,0.279469
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",9.0,0.526111,0.767549,Senior,0.5,0.520889,0.617464
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",1.0,0.480727,0.61746,Junior,0.5,0.484581,0.539275
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",12.0,0.682146,0.60996,Architect,0.414214,0.628559,0.599685
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",6.0,0.572735,0.888038,Senior,0.5,0.558188,0.684309
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",0.0,0.407636,0.467402,Junior,0.5,0.426109,0.450016
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",1.5,1.0,0.190599,Junior,0.5,0.9,0.57624


## Model 3 - Compute Final Score

In [77]:
# Set weights
weight_skills = 0
weight_interests = 0.8
weight_experience = 0.2

# Calculate final score
df_people['score-model3'] = df_people['similarity_score_skills'] * weight_skills + df_people['similarity_score_interests'] * weight_interests + df_people['similarity_score_experience'] * weight_experience

df_people

Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills,similarity_score_interests,Experience,similarity_score_experience,score-model1,score-model2,score-model3
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",3.0,0.116732,0.0,Associate,1.0,0.293385,0.246693,0.2
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",4.0,0.0,0.18477,Associate,1.0,0.2,0.273908,0.347816
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",4.0,0.043909,0.164968,Associate,1.0,0.235127,0.283551,0.331975
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",2.0,0.280536,0.168137,Junior,0.5,0.324429,0.279469,0.23451
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",9.0,0.526111,0.767549,Senior,0.5,0.520889,0.617464,0.714039
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",1.0,0.480727,0.61746,Junior,0.5,0.484581,0.539275,0.593968
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",12.0,0.682146,0.60996,Architect,0.414214,0.628559,0.599685,0.570811
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",6.0,0.572735,0.888038,Senior,0.5,0.558188,0.684309,0.810431
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",0.0,0.407636,0.467402,Junior,0.5,0.426109,0.450016,0.473922
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",1.5,1.0,0.190599,Junior,0.5,0.9,0.57624,0.25248


# Add Column for Ordering the List

In [78]:
# Rank similarities
df_people['rank-model1'] = df_people['score-model1'].rank(ascending=False)
df_people['rank-model2'] = df_people['score-model2'].rank(ascending=False)
df_people['rank-model3'] = df_people['score-model3'].rank(ascending=False)

# Select only required columns
df_people = df_people[['id','skills','skills-want', 'Experience', 'rank-model1', 'rank-model2', 'rank-model3']]

df_people

Unnamed: 0,id,skills,skills-want,Experience,rank-model1,rank-model2,rank-model3
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",Associate,16.0,19.0,19.0
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",Associate,19.0,18.0,13.0
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",Associate,18.0,15.0,14.0
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",Junior,15.0,17.0,17.0
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",Senior,11.0,5.0,5.0
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",Junior,12.0,10.0,7.0
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",Architect,5.0,6.0,8.0
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",Senior,9.0,4.0,3.0
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",Junior,13.0,12.0,10.0
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",Junior,1.0,8.0,16.0


In [79]:
df_people = df_people[(df_people['rank-model1']<4) | (df_people['rank-model2']<4) | (df_people['rank-model3']<4)]
df_people

Unnamed: 0,id,skills,skills-want,Experience,rank-model1,rank-model2,rank-model3
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",Senior,9.0,4.0,3.0
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",Junior,1.0,8.0,16.0
10,11,"[Python, SQL, Microsoft SQL Server, MySQL, Mic...","[Python, SQL, Microsoft SQL Server, MySQL, Pos...",Junior,2.0,1.0,1.0
12,13,"[Python, SQL, Microsoft SQL Server, MySQL, Mic...","[Python, Microsoft SQL Server, MySQL, Microsof...",Associate,3.0,2.0,2.0
16,17,"[Bash/Shell, C#, C++, HTML/CSS, JavaScript, PH...","[Bash/Shell, C#, JavaScript, PowerShell, SQL, ...",Senior,6.0,3.0,4.0


# Export Results

In [80]:
df_people.to_csv(f"../6-results/rating_role_{role_id}.csv")
df_people

Unnamed: 0,id,skills,skills-want,Experience,rank-model1,rank-model2,rank-model3
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",Senior,9.0,4.0,3.0
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",Junior,1.0,8.0,16.0
10,11,"[Python, SQL, Microsoft SQL Server, MySQL, Mic...","[Python, SQL, Microsoft SQL Server, MySQL, Pos...",Junior,2.0,1.0,1.0
12,13,"[Python, SQL, Microsoft SQL Server, MySQL, Mic...","[Python, Microsoft SQL Server, MySQL, Microsof...",Associate,3.0,2.0,2.0
16,17,"[Bash/Shell, C#, C++, HTML/CSS, JavaScript, PH...","[Bash/Shell, C#, JavaScript, PowerShell, SQL, ...",Senior,6.0,3.0,4.0
