In [95]:
import pandas as pd
import numpy as np

task_data = [
    ['Data Scientist','Python,Statistics,Machine Learning,Data Visualization'],
    ['ML Engineer','Python,Machine Learning,Deployment,Algorithms'],
    ['Data Analyst','SQL,Python,Data Visualization,Excel'],
    ['Data Engineer','Python,SQL,ETL,Cloud Computing'],
    ['AI Researcher','Python,Deep Learning,Machine Learning,Algorithms'],
    ['Business Analyst','Excel,SQL,Data Visualization,Business Intelligence'],
    ['NLP Engineer','Python,NLP,Machine Learning,Deep Learning']
]

In [96]:
df = pd.DataFrame(task_data, columns=['Role','Skills'])

In [97]:
df

Unnamed: 0,Role,Skills
0,Data Scientist,"Python,Statistics,Machine Learning,Data Visual..."
1,ML Engineer,"Python,Machine Learning,Deployment,Algorithms"
2,Data Analyst,"SQL,Python,Data Visualization,Excel"
3,Data Engineer,"Python,SQL,ETL,Cloud Computing"
4,AI Researcher,"Python,Deep Learning,Machine Learning,Algorithms"
5,Business Analyst,"Excel,SQL,Data Visualization,Business Intellig..."
6,NLP Engineer,"Python,NLP,Machine Learning,Deep Learning"


In [98]:
df.shape

(7, 2)

In [99]:
df['Skills'] = df['Skills'].apply(lambda x:x.lower())

In [100]:
df

Unnamed: 0,Role,Skills
0,Data Scientist,"python,statistics,machine learning,data visual..."
1,ML Engineer,"python,machine learning,deployment,algorithms"
2,Data Analyst,"sql,python,data visualization,excel"
3,Data Engineer,"python,sql,etl,cloud computing"
4,AI Researcher,"python,deep learning,machine learning,algorithms"
5,Business Analyst,"excel,sql,data visualization,business intellig..."
6,NLP Engineer,"python,nlp,machine learning,deep learning"


# Vectorization

In [101]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=10,stop_words='english')

In [102]:
vectors = cv.fit_transform(df['Skills']).toarray()

In [103]:
vectors

array([[0, 1, 0, 0, 0, 1, 1, 1, 0, 1],
       [1, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 1, 0, 0, 1, 0, 0, 1, 1, 1],
       [0, 0, 0, 1, 0, 0, 0, 1, 1, 0],
       [1, 0, 1, 0, 0, 2, 1, 1, 0, 0],
       [0, 1, 0, 0, 1, 0, 0, 0, 1, 1],
       [0, 0, 1, 0, 0, 2, 1, 1, 0, 0]], dtype=int64)

In [104]:
vectors.shape

(7, 10)

In [105]:
cv.get_feature_names_out()

array(['algorithms', 'data', 'deep', 'etl', 'excel', 'learning',
       'machine', 'python', 'sql', 'visualization'], dtype=object)

In [106]:
from sklearn.metrics.pairwise import cosine_similarity

In [107]:
similarity = cosine_similarity(vectors)

In [108]:
similarity.shape

(7, 7)

In [109]:
similarity[1]

array([0.67082039, 1.        , 0.2236068 , 0.28867513, 0.88388348,
       0.        , 0.75592895])

#  Main Function For Recommendation

In [110]:
def recommend(position):
    role_index = df[df['Role'] == position].index[0]
    distances = similarity[role_index]
    top_roles = sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[1:4]
    
    for i in top_roles:
        print(df.iloc[i[0]].Role)

In [111]:
recommend('Data Scientist')

NLP Engineer
ML Engineer
AI Researcher


In [112]:
recommend('Business Analyst')

Data Analyst
Data Scientist
Data Engineer
