# Import Libraries

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

import warnings 
warnings.filterwarnings('ignore')

# Load the Dataset

In [3]:

def load_data(file_path):
    # Load data from the Excel file
    mentee_info = pd.read_excel(file_path, sheet_name='Mentee Info')
    mentor_info = pd.read_excel(file_path, sheet_name='Mentor Info')
    
    # Fill NaN values in text fields with empty strings
    mentee_info.fillna('', inplace=True)
    mentor_info.fillna(''
                       , inplace=True)
    return mentee_info, mentor_info

In [4]:
# Define the path to the Excel file
file_path = 'Mentor-Mentee Matching Data.xlsx'

# Load and preprocess the data
mentee_info, mentor_info = load_data(file_path)

In [8]:
print(" Print the Mentee Information\n")
mentee_info.head()

 Print the Mentee Information



Unnamed: 0,Timestamp,"I acknowledge that I have read and understood the above PDPA Notice and consent to the collection, use and disclosure of my personal data by The Academic Label for the purposes set out in the Notice.",Full Name,Age,Email Address,Phone Number,"How often would you like to be contacted for mentoring sessions? (eg. Weekdays, Weekends, morning, afternoon, night, weekly, fortnightly etc.)","What is your preferred platform(s) to communicate with your mentor? (Phone call, Telegram, WhatsApp, Meet Up etc.)",Which school are you currently attending?,"Which year of school are you currently in? (eg. Sec 4, J1, poly year 1, ITE year 1)",What course/subject combination are you taking?,I have an idea of what course I want to pursue.,5 (Most interested),4,3,2,1 (least interested),"If you have selected the (others) option, do specify what course you are interested in, and the number associated with it [example: Food Science in Netherlands (5)]",What do you hope to achieve out of this program?,Profile Description
0,2023-01-21 23:35:35.853,Yes,,,,,Weekends morning,Video call on any platform/meet ups,Temasek Polytechnic,Graduated,Biomedical Engineering,Yes,Allied Health,,,,,,"Idk, a clearer understanding in my choice of c...",I am currently exploring a career in allied he...
1,2023-03-06 22:18:27.101,Yes,,,,,"Weekdays nights, weekly to monthly all possible",Telegram / WhatsApp,NJC,J2,PCME + H3 Econs,Yes,UK (Economics/Finance/Politics),Finance,Accounting,Economics,Business,,I hope to be able to find out more specific kn...,"My interests lie in economics, finance, accoun..."
2,2023-03-07 08:42:51.290,Yes,,,,,Anything,Whatsapp,Riverside secondary,Sec 4,Expresss (phy Chem) combined elect hist/ss POA...,Yes,Information security,Computer Science,Mass communications,Mass communications,Accounting,Nil,Better results or a better view of tertiary ed...,I am at a crossroads in choosing my tertiary e...
3,2023-03-13 12:45:54.397,Yes,,,,,Weekends,Telegram,Nanyang Girls' High School,Sec 4,"Triple Science (Bio, Chem, Advanced Physics, I...",Yes,UK (Medicine/Dentistry/Veterinarian Science),Australia (Medicine/Dentistry/Veterinarian Sci...,Sciences,UK (Economics/Finance/Politics),Psychology,Veterinary Medicine/Veterinary Science in New ...,I hope to gain advice and opportunities to hel...,"My ambition is to study Veterinary Medicine, a..."
4,2023-03-13 14:30:35.331,Yes,,,,,"Weekend, night","Whatsapp, telegram",Nyjc,J2,Pcme,Yes,Data science,Mathematics,Finance,Economics,Engineering,,Gain more knowledge of the course I want,"I am passionate about data science, mathematic..."


In [9]:
print(" Print the Mentor Information\n")
mentor_info.head()

 Print the Mentor Information



Unnamed: 0,Timestamp,"I acknowledge that I have read and understood the above PDPA Notice and consent to the collection, use and disclosure of my personal data by The Academic Label for the purposes set out in the Notice.",Full Name,How much time can you commit to the mentorship program?,"What is/are your preferred platform(s) to communicate with your mentee? (Phone call, Telegram, WhatsApp, Meet Up etc.)",Which school are you currently attending?,What academic course are you currently enrolled in?,"Which year of school are you currently in? (eg. year 1, 2)",Briefly explain the academic courses and/or subject fields that you would be able to provide advice on.,Profile Description
0,2022-11-18 20:15:35.526,,,once every 2 weeks,Zoom Call,Graduated NTU in Dec 2022,Renaissance Engineering Programme,Fresh Grad,"Data Analytics, Business Analytics, Mechanical...","Hi, I specialize in Data Analytics, Business A..."
1,2022-11-18 22:36:26.148,,,once every 2 weeks,"Tele, whatsapp, zoom, meetups for dec only",NTU,Double degree in accounting and business,1,Consulting (mainly)/High finance/auditing,Hello! My main areas of expertise are Consulti...
2,2022-11-18 22:53:50.506,,,once every 2 weeks,Telegram,NTU,PhD in Physics,year 1,"Physics, engineering, and computer science rel...",Hi there! I have a strong background in Physic...
3,2022-11-18 23:05:20.230,,,once a week,"Telegram, Discord, Zoom",SMU,Information Systems,Year 2,"Information Systems, Computer Science, Softwar...","Hey! I specialize in Information Systems, Comp..."
4,2022-11-19 09:58:28.383,,,once every 2 weeks,Telegram,SMU,"Politics, Law, and Economics + Data Science",4,Courses: Anything economics or gov/policy rela...,Hi! I can provide guidance on courses related ...


# Preprocess of Mentor and Mentee Information

In [10]:
def preprocess_data(mentee_info, mentor_info):
    # Combine relevant text fields for mentees and mentors
    mentee_texts = mentee_info['Profile Description'] + " " + mentee_info['What course/subject combination are you taking?']
    mentor_texts = mentor_info['Profile Description'] + " " + mentor_info['Briefly explain the academic courses and/or subject fields that you would be able to provide advice on.']
    return mentee_texts, mentor_texts

In [11]:
mentee_texts, mentor_texts = preprocess_data(mentee_info, mentor_info)

In [13]:
print(" Print the Mentee Information after processing\n")
mentee_texts

 Print the Mentee Information after processing



0     I am currently exploring a career in allied he...
1     My interests lie in economics, finance, accoun...
2     I am at a crossroads in choosing my tertiary e...
3     My ambition is to study Veterinary Medicine, a...
4     I am passionate about data science, mathematic...
5     I am interested in social sciences, psychology...
6     My primary goal is to pursue a career in medic...
7     I am exploring career options in accounting, f...
8     I am currently exploring my interests in allie...
9     I am passionate about business, economics, law...
10    I am particularly interested in pursuing a deg...
11    I seek guidance to help me achieve my future c...
12    I hope to gain clarity about the courses I wan...
13    I am exploring various career paths, including...
14    I am interested in marketing, visual arts, law...
15    I am exploring my interests in history, media ...
16    I am interested in pursuing a career in occupa...
17    I am currently exploring various fields, i

In [14]:
print(" Print the Mentor Information after processing\n")
mentor_texts

 Print the Mentor Information after processing



0     Hi, I specialize in Data Analytics, Business A...
1     Hello! My main areas of expertise are Consulti...
2     Hi there! I have a strong background in Physic...
3     Hey! I specialize in Information Systems, Comp...
4     Hi! I can provide guidance on courses related ...
5     Hi! I can share insights on JC Biology and off...
6     Hello! I have a strong foundation in Medicine,...
7     Hi! I'm passionate about Global Studies, Inter...
8     Hey! My expertise lies in Mechanical Engineeri...
9     Hello! I'm adept at providing guidance on gene...
10    Hi! I specialize in Consulting, Strategy, Busi...
11    Hello! With a strong foundation in Medicine, B...
12    Hi there! My area of expertise is Economics, a...
13    Hello! As someone deeply involved in the field...
14    Hi! I hold a Business Diploma from NP and have...
dtype: object

# Feature Extraction

In [15]:
def calculate_similarity(mentee_texts, mentor_texts):
    # Feature Engineering using TF-IDF
    tfidf_vectorizer = TfidfVectorizer()
    mentee_tfidf = tfidf_vectorizer.fit_transform(mentee_texts)
    mentor_tfidf = tfidf_vectorizer.transform(mentor_texts)
    
    # Calculate cosine similarity between all mentees and mentors
    return cosine_similarity(mentee_tfidf, mentor_tfidf)

In [16]:

# Calculate similarity and get top matches
similarity_matrix = calculate_similarity(mentee_texts, mentor_texts)

In [17]:
similarity_matrix

array([[0.17230387, 0.12095088, 0.16090645, 0.19524347, 0.13125372,
        0.09561303, 0.0781984 , 0.14411805, 0.1711591 , 0.1279156 ,
        0.18036467, 0.19232614, 0.19043262, 0.16988258, 0.10715965],
       [0.25605601, 0.29044254, 0.17717675, 0.16027241, 0.34031946,
        0.1845811 , 0.10512511, 0.20914005, 0.17096542, 0.14011604,
        0.21870778, 0.21035799, 0.27971468, 0.17483088, 0.19491619],
       [0.18442961, 0.24411645, 0.25255942, 0.28139798, 0.17404766,
        0.17309654, 0.09714764, 0.21909279, 0.16506899, 0.14520103,
        0.19802489, 0.20049152, 0.23082348, 0.16533205, 0.14038716],
       [0.20469448, 0.18634772, 0.22814372, 0.1849713 , 0.11459784,
        0.17673362, 0.17191152, 0.13114794, 0.13492351, 0.14661807,
        0.15438277, 0.18947412, 0.18991347, 0.21650078, 0.20301102],
       [0.33087572, 0.28913152, 0.32353219, 0.19617462, 0.25155633,
        0.15439397, 0.1106003 , 0.33531678, 0.20796646, 0.14330104,
        0.1818192 , 0.21632495, 0.30282616, 

# Print the top matches for each mentee with Mentor

In [18]:
def get_top_n_matches(similarity_matrix, mentee_info, mentor_info, top_n=3):
    match_results = {}
    for mentee_idx, mentee_row in enumerate(similarity_matrix):
        top_indices = np.argsort(mentee_row)[::-1][:top_n]
        match_scores = mentee_row[top_indices]
        mentee_name = mentee_info.iloc[mentee_idx]['Full Name'] if mentee_info.iloc[mentee_idx]['Full Name'] else f'Mentee {mentee_idx+1}'
        match_results[mentee_name] = [
            (mentor_info.iloc[i]['Full Name'] if mentor_info.iloc[i]['Full Name'] else f'Mentor {i+1}', score) for i, score in zip(top_indices, match_scores)
        ]
    return match_results



top_matches = get_top_n_matches(similarity_matrix, mentee_info, mentor_info)

# Print the top matches for each mentee
for mentee, matches in top_matches.items():
    print(f"{mentee} is best matched with:")
    for mentor, score in matches:
        print(f"  {mentor} with a similarity score of {score:.2f}")
    print()


Mentee 1 is best matched with:
  Mentor 4 with a similarity score of 0.20
  Mentor 12 with a similarity score of 0.19
  Mentor 13 with a similarity score of 0.19

Mentee 2 is best matched with:
  Mentor 5 with a similarity score of 0.34
  Mentor 2 with a similarity score of 0.29
  Mentor 13 with a similarity score of 0.28

Mentee 3 is best matched with:
  Mentor 4 with a similarity score of 0.28
  Mentor 3 with a similarity score of 0.25
  Mentor 2 with a similarity score of 0.24

Mentee 4 is best matched with:
  Mentor 3 with a similarity score of 0.23
  Mentor 14 with a similarity score of 0.22
  Mentor 1 with a similarity score of 0.20

Mentee 5 is best matched with:
  Mentor 8 with a similarity score of 0.34
  Mentor 1 with a similarity score of 0.33
  Mentor 3 with a similarity score of 0.32

Mentee 6 is best matched with:
  Mentor 12 with a similarity score of 0.29
  Mentor 10 with a similarity score of 0.26
  Mentor 7 with a similarity score of 0.25

Mentee 7 is best matched wit