# Models for ranking

The word embedding model is used for computing the similarity score between roles and people, based on skills and interests.
A score is calculated from each Word Mover's Distance computed and a final score is calculated as a weighted combination of all scores.

## Import Libraries

In [442]:
import pandas as pd
from gensim.models import Word2Vec
import numpy as np

## Enter Role Requirements

In [443]:
# Get the role requirements
role_id = 25
#role_skills = ['C#', 'Kotlin', 'Python', 'SQL', 'PostgreSQL', '.NET', 'Apache Kafka', 'Pandas', 'Docker', 'Kubernetes']
#role_experience = 'Associate'

# Get the role from the collected data from the survey
role = pd.read_csv(filepath_or_buffer="../2-data/survey_roles.csv", sep=",", encoding="latin1")
role = role[role['id']==role_id]
role_skills = role['skills'].values[0].split(';')
role_experience = role['explevel'].values[0]

print(role_id)
print(role_skills)
print(role_experience)

25
['C#', 'Java', 'JavaScript', 'Kotlin', 'Python', 'Rust', 'SQL', 'MongoDB', 'PostgreSQL', 'AWS', 'Google Cloud', 'Microsoft Azure', 'React.js', '.NET', 'Apache Kafka', 'Apache Spark', 'Hadoop', 'Pandas', 'Docker', 'Kubernetes', 'Terraform']
Architect


# Load Employees Dataset

In [444]:
# Read the employees dataset - from the data entered in the survey
df_people = pd.read_csv(filepath_or_buffer="../2-data/survey_people.csv", sep=",", encoding="latin1")

# Tokenize the skills
df_people['skills'] = df_people['skills'].apply(lambda x: x.split(';'))

# Get only required columns
df_people = df_people[['id', 'skills', 'skills-want', 'YearsCodePro']]
df_people



Unnamed: 0,id,skills,skills-want,YearsCodePro
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...",Dart;Java;SQL;MySQL;SQLite,3.0
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...",HTML/CSS;JavaScript;TypeScript;AWS;Firebase;Go...,4.0
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...",Python;MySQL;SQLite;Unity 3D;Unreal Engine,4.0
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...",HTML/CSS;JavaScript;TypeScript;AWS;Firebase;An...,2.0
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...",Python;R;SQL;Microsoft SQL Server;MySQL;Oracle...,9.0
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...",Python;Microsoft SQL Server;MongoDB;Neo4j;AWS;...,1.0
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...",Python;R;Scala;Microsoft SQL Server;MongoDB;Ne...,12.0
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...",C#;Python;SQL;Cassandra;MariaDB;Microsoft SQL ...,6.0
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...",C#;Go;HTML/CSS;Java;JavaScript;PowerShell;Pyth...,0.0
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...",C++;Python;Scala;DynamoDB;MongoDB;MySQL;Oracle...,1.5


# Load Word Embedding Model

In [445]:
model = Word2Vec.load("../3-word_embedding/model-w2vcombinedfiltered")

# Compute Similarity Scores

In [446]:
# Scaler to normalize scores
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

## Score from Skills

In [447]:
# Calculate the distance between each person's skills and the role skills
similarity_scores = []
for skills in df_people['skills']:
    similarity_scores.append(model.wv.wmdistance(role_skills, skills))

# Add the dissimilarity score to each role
df_people['similarity_score_skills'] = similarity_scores

# Get the highest distance before inf
df_temp = df_people.loc[(df_people['similarity_score_skills'] < np.inf)]
max_score = df_temp['similarity_score_skills'].max()
print(max_score)

# Remove rows where the score is inf - meaning there is no similarity
df_people['similarity_score_skills'] = df_people['similarity_score_skills'].replace(np.inf, max_score)

# Normalize scores and convert distance to similarity -> 0 = more dissimilar, 1 = more similar
df_people['similarity_score_skills'] = 1 - scaler.fit_transform(df_people[['similarity_score_skills']])

df_people

0.613760513389835


Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...",Dart;Java;SQL;MySQL;SQLite,3.0,0.3097672
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...",HTML/CSS;JavaScript;TypeScript;AWS;Firebase;Go...,4.0,0.482479
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...",Python;MySQL;SQLite;Unity 3D;Unreal Engine,4.0,0.090395
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...",HTML/CSS;JavaScript;TypeScript;AWS;Firebase;An...,2.0,0.3321771
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...",Python;R;SQL;Microsoft SQL Server;MySQL;Oracle...,9.0,1.0
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...",Python;Microsoft SQL Server;MongoDB;Neo4j;AWS;...,1.0,0.4279511
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...",Python;R;Scala;Microsoft SQL Server;MongoDB;Ne...,12.0,0.6595128
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...",C#;Python;SQL;Cassandra;MariaDB;Microsoft SQL ...,6.0,0.8702047
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...",C#;Go;HTML/CSS;Java;JavaScript;PowerShell;Pyth...,0.0,0.658423
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...",C++;Python;Scala;DynamoDB;MongoDB;MySQL;Oracle...,1.5,0.4819207


## Score from Interests

In [448]:
# Tokenize the skills
df_people['skills-want'] = df_people['skills-want'].apply(lambda x: x.split(';'))

# Calculate the similarity between each person's skills and the role skills
similarity_scores = []
for skills in df_people['skills-want']:
    similarity_scores.append(model.wv.wmdistance(role_skills, skills))

# Add the similarity scores to each role
df_people['similarity_score_interests'] = similarity_scores

# Get the highest distance before inf
df_temp = df_people.loc[(df_people['similarity_score_interests'] < np.inf)]
max_score = df_temp['similarity_score_interests'].max()
print(max_score)

# Remove rows where the score is inf - meaning there is no similarity
df_people['similarity_score_interests'] = df_people['similarity_score_interests'].replace(np.inf, max_score)

# Normalize scores and convert distance to similarity - 0 = more dissimilar, 1 = more similar
df_people['similarity_score_interests'] =  1 - scaler.fit_transform(df_people[['similarity_score_interests']])

df_people

0.6643453766017126


Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills,similarity_score_interests
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",3.0,0.3097672,0.0
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",4.0,0.482479,0.343574
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",4.0,0.090395,0.118213
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",2.0,0.3321771,0.253867
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",9.0,1.0,1.0
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",1.0,0.4279511,0.667545
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",12.0,0.6595128,0.791659
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",6.0,0.8702047,0.786521
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",0.0,0.658423,0.833882
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",1.5,0.4819207,0.74949


## Score from Experience

In [449]:
# Define bin edges for each level of experience
bin_edges = [-1, 2, 5, 10, 50]

# Define the labels for each level of experience
labels = ['Junior', 'Associate', 'Senior', 'Architect']

# Convert the 'years' column to categorical levels of experience
df_people['Experience'] = pd.cut(pd.to_numeric(df_people['YearsCodePro']), bins=bin_edges, labels=labels)

df_people

Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills,similarity_score_interests,Experience
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",3.0,0.3097672,0.0,Associate
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",4.0,0.482479,0.343574,Associate
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",4.0,0.090395,0.118213,Associate
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",2.0,0.3321771,0.253867,Junior
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",9.0,1.0,1.0,Senior
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",1.0,0.4279511,0.667545,Junior
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",12.0,0.6595128,0.791659,Architect
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",6.0,0.8702047,0.786521,Senior
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",0.0,0.658423,0.833882,Junior
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",1.5,0.4819207,0.74949,Junior


In [450]:
from scipy.spatial.distance import euclidean

# Define a function to compute the similarity metric
def similarity(level1, level2):
    # Define the vectors representing the levels of experience
    levels = {'Junior': [0, 0, 0, 0],
              'Associate': [1, 0, 0, 0],
              'Senior': [1, 1, 0, 0],
              'Architect': [1, 1, 1, 0]}
    
    # Compute the Euclidean distance between the two vectors
    distance = euclidean(levels[level1], levels[level2])
    
    # Return the similarity metric
    return 1 / (1 + distance)

In [451]:
# Compute the similarity based on level
df_people['similarity_score_experience'] = df_people["Experience"].apply(lambda x: similarity(role_experience, x))
df_people

Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills,similarity_score_interests,Experience,similarity_score_experience
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",3.0,0.3097672,0.0,Associate,0.414214
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",4.0,0.482479,0.343574,Associate,0.414214
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",4.0,0.090395,0.118213,Associate,0.414214
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",2.0,0.3321771,0.253867,Junior,0.366025
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",9.0,1.0,1.0,Senior,0.5
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",1.0,0.4279511,0.667545,Junior,0.366025
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",12.0,0.6595128,0.791659,Architect,1.0
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",6.0,0.8702047,0.786521,Senior,0.5
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",0.0,0.658423,0.833882,Junior,0.366025
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",1.5,0.4819207,0.74949,Junior,0.366025


# Add Results from all 3 Models

## Model 1 - Compute Final Score

In [452]:
# Set weights
weight_skills = 0.8
weight_interests = 0
weight_experience = 0.2

# Calculate final score
df_people['score-model1'] = df_people['similarity_score_skills'].astype(float) * weight_skills + df_people['similarity_score_interests'].astype(float) * weight_interests + df_people['similarity_score_experience'].astype(float) * weight_experience

df_people

Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills,similarity_score_interests,Experience,similarity_score_experience,score-model1
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",3.0,0.3097672,0.0,Associate,0.414214,0.330656
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",4.0,0.482479,0.343574,Associate,0.414214,0.468826
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",4.0,0.090395,0.118213,Associate,0.414214,0.155159
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",2.0,0.3321771,0.253867,Junior,0.366025,0.338947
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",9.0,1.0,1.0,Senior,0.5,0.9
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",1.0,0.4279511,0.667545,Junior,0.366025,0.415566
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",12.0,0.6595128,0.791659,Architect,1.0,0.72761
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",6.0,0.8702047,0.786521,Senior,0.5,0.796164
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",0.0,0.658423,0.833882,Junior,0.366025,0.599943
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",1.5,0.4819207,0.74949,Junior,0.366025,0.458742


## Model 2 - Compute Final score

In [453]:
# Set weights
weight_skills = 0.4
weight_interests = 0.4
weight_experience = 0.2

# Calculate final score
df_people['score-model2'] = df_people['similarity_score_skills'].astype(float) * weight_skills + df_people['similarity_score_interests'].astype(float) * weight_interests + df_people['similarity_score_experience'].astype(float) * weight_experience

df_people

Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills,similarity_score_interests,Experience,similarity_score_experience,score-model1,score-model2
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",3.0,0.3097672,0.0,Associate,0.414214,0.330656,0.20675
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",4.0,0.482479,0.343574,Associate,0.414214,0.468826,0.413264
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",4.0,0.090395,0.118213,Associate,0.414214,0.155159,0.166286
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",2.0,0.3321771,0.253867,Junior,0.366025,0.338947,0.307623
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",9.0,1.0,1.0,Senior,0.5,0.9,0.9
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",1.0,0.4279511,0.667545,Junior,0.366025,0.415566,0.511404
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",12.0,0.6595128,0.791659,Architect,1.0,0.72761,0.780469
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",6.0,0.8702047,0.786521,Senior,0.5,0.796164,0.76269
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",0.0,0.658423,0.833882,Junior,0.366025,0.599943,0.670127
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",1.5,0.4819207,0.74949,Junior,0.366025,0.458742,0.565769


## Model 3 - Compute Final Score

In [454]:
# Set weights
weight_skills = 0
weight_interests = 0.8
weight_experience = 0.2

# Calculate final score
df_people['score-model3'] = df_people['similarity_score_skills'].astype(float) * weight_skills + df_people['similarity_score_interests'].astype(float) * weight_interests + df_people['similarity_score_experience'].astype(float) * weight_experience

df_people

Unnamed: 0,id,skills,skills-want,YearsCodePro,similarity_score_skills,similarity_score_interests,Experience,similarity_score_experience,score-model1,score-model2,score-model3
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",3.0,0.3097672,0.0,Associate,0.414214,0.330656,0.20675,0.082843
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",4.0,0.482479,0.343574,Associate,0.414214,0.468826,0.413264,0.357702
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",4.0,0.090395,0.118213,Associate,0.414214,0.155159,0.166286,0.177413
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",2.0,0.3321771,0.253867,Junior,0.366025,0.338947,0.307623,0.276298
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",9.0,1.0,1.0,Senior,0.5,0.9,0.9,0.9
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",1.0,0.4279511,0.667545,Junior,0.366025,0.415566,0.511404,0.607241
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",12.0,0.6595128,0.791659,Architect,1.0,0.72761,0.780469,0.833327
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",6.0,0.8702047,0.786521,Senior,0.5,0.796164,0.76269,0.729217
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",0.0,0.658423,0.833882,Junior,0.366025,0.599943,0.670127,0.740311
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",1.5,0.4819207,0.74949,Junior,0.366025,0.458742,0.565769,0.672797


# Add Column for Ordering the List

In [455]:
# Rank similarities
df_people['rank-model1'] = df_people['score-model1'].rank(ascending=False)
df_people['rank-model2'] = df_people['score-model2'].rank(ascending=False)
df_people['rank-model3'] = df_people['score-model3'].rank(ascending=False)

# Select only required columns
df_people = df_people[['id','skills','skills-want', 'Experience', 'rank-model1', 'rank-model2', 'rank-model3']]

df_people

Unnamed: 0,id,skills,skills-want,Experience,rank-model1,rank-model2,rank-model3
0,1,"[Java, JavaScript, Python, SQL, MySQL, SQLite,...","[Dart, Java, SQL, MySQL, SQLite]",Associate,15.0,17.0,20.0
1,2,"[C++, HTML/CSS, Java, JavaScript, PHP, Python,...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",Associate,11.0,13.0,15.0
2,3,"[C++, HTML/CSS, Java, PHP, Python, SQL, MariaD...","[Python, MySQL, SQLite, Unity 3D, Unreal Engine]",Associate,19.0,18.0,18.0
3,4,"[HTML/CSS, JavaScript, Kotlin, TypeScript, AWS...","[HTML/CSS, JavaScript, TypeScript, AWS, Fireba...",Junior,14.0,15.0,16.0
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",Senior,2.0,1.0,1.0
5,6,"[MATLAB, Python, R, SQL, Swift, Cassandra, Cou...","[Python, Microsoft SQL Server, MongoDB, Neo4j,...",Junior,13.0,10.0,10.0
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",Architect,4.0,3.0,2.0
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",Senior,3.0,4.0,5.0
8,9,"[Assembly, Bash/Shell, C#, C++, Go, HTML/CSS, ...","[C#, Go, HTML/CSS, Java, JavaScript, PowerShel...",Junior,8.0,7.0,4.0
9,10,"[MATLAB, Python, R, SQL, Microsoft SQL Server,...","[C++, Python, Scala, DynamoDB, MongoDB, MySQL,...",Junior,12.0,9.0,7.0


In [456]:
df_people = df_people[(df_people['rank-model1']<4) | (df_people['rank-model2']<4) | (df_people['rank-model3']<4)]
df_people

Unnamed: 0,id,skills,skills-want,Experience,rank-model1,rank-model2,rank-model3
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",Senior,2.0,1.0,1.0
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",Architect,4.0,3.0,2.0
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",Senior,3.0,4.0,5.0
15,16,"[Bash/Shell, JavaScript, Python, SQL, DynamoDB...","[Kotlin, Ruby, Rust, Redis, Google Cloud, Djan...",Architect,1.0,2.0,8.0
16,17,"[Bash/Shell, C#, C++, HTML/CSS, JavaScript, PH...","[Bash/Shell, C#, JavaScript, PowerShell, SQL, ...",Senior,6.0,5.0,3.0


# Export Results

In [457]:
df_people.to_csv(f"../6-results/rating_role_{role_id}.csv")
df_people

Unnamed: 0,id,skills,skills-want,Experience,rank-model1,rank-model2,rank-model3
4,5,"[C#, Java, JavaScript, PHP, PowerShell, Python...","[Python, R, SQL, Microsoft SQL Server, MySQL, ...",Senior,2.0,1.0,1.0
6,7,"[Bash/Shell, PowerShell, Python, Scala, SQL, M...","[Python, R, Scala, Microsoft SQL Server, Mongo...",Architect,4.0,3.0,2.0
7,8,"[Bash/Shell, C#, C++, Java, PowerShell, Python...","[C#, Python, SQL, Cassandra, MariaDB, Microsof...",Senior,3.0,4.0,5.0
15,16,"[Bash/Shell, JavaScript, Python, SQL, DynamoDB...","[Kotlin, Ruby, Rust, Redis, Google Cloud, Djan...",Architect,1.0,2.0,8.0
16,17,"[Bash/Shell, C#, C++, HTML/CSS, JavaScript, PH...","[Bash/Shell, C#, JavaScript, PowerShell, SQL, ...",Senior,6.0,5.0,3.0
