In [None]:
import pandas as pd

## soft matching

In [None]:
# import data
df_applicants = pd.read_excel('applicants.xlsx')
df_roles = pd.read_excel("roles.xlsx")


In [None]:
from datetime import datetime, timedelta
from pytz import timezone
# Hard matched package
def check_location(applicant_addr, project_location):
    location_split = project_location.split(";")
    location_split = [location.strip() for location in location_split]
    return applicant_addr in location_split

def check_availability(talent_date, project_date):
    return talent_date <= project_date

def check_skills(talent_skills_str, project_skills_str):
    talent_skills = talent_skills_str.split(",")
    talent_skills = [talent_skill.strip() for talent_skill in talent_skills]
    project_skills = project_skills_str.split(",")
    project_skills = [skill.strip() for skill in project_skills]
    return all(elem in talent_skills for elem in project_skills)

def check_working_hours(talent_hours, project_hours):
    return talent_hours >= project_hours

def check_languages(talent_languages_str, project_languages_str):
    talent_languages = talent_languages_str.split(",")
    talent_languages = [talent_language.strip() for talent_language in talent_languages]
    project_languages = project_languages_str.split(",")
    project_languages = [project_language.strip() for project_language in project_languages]
    return all(item in talent_languages for item in project_languages)

def check_overlap_time(talent_timezone, talent_working_hours, project_timezone, project_working_hours, overlap_time):
  # niform time zone
  applicant_timezone = 'Etc/' + talent_timezone
  role_timezone = 'Etc/' + project_timezone

  applicant_timezone = timezone(applicant_timezone)
  role_timezone = timezone(role_timezone)


  applicants_start_time_str, applicants_end_time_str = talent_working_hours.split(" - ")
  roles_start_time_str, roles_end_time_str = project_working_hours.split(" - ")

  applicant_start_time = applicant_timezone.localize(datetime.strptime(applicants_start_time_str, "%H:%M"))
  applicant_end_time = applicant_timezone.localize(datetime.strptime(applicants_end_time_str, "%H:%M"))

  role_start_time = role_timezone.localize(datetime.strptime(roles_start_time_str, "%H:%M"))
  role_end_time = role_timezone.localize(datetime.strptime(roles_end_time_str, "%H:%M"))


  overlap_start = max(applicant_start_time, role_start_time)
  overlap_end = min(applicant_end_time, role_end_time)

  overlap_duration = overlap_end - overlap_start
  return overlap_duration.seconds/3600 >= int(overlap_time)

In [None]:
# importance of feature

Tools_importance = 0.5
years_importance = 0.75
hourlyRate_importance = 0.5
preferredSkills_importance = 0.5
industry_importance = 0.5
text_importance = 0.25

# weight of feature
Tools_w = 0.5/(Tools_importance + years_importance + hourlyRate_importance + preferredSkills_importance + industry_importance + text_importance)
years_w = 0.75/(Tools_importance + years_importance + hourlyRate_importance + preferredSkills_importance + industry_importance + text_importance)
hourlyRate_w = 0.5/(Tools_importance + years_importance + hourlyRate_importance + preferredSkills_importance + industry_importance + text_importance)
preferredSkills_w = 0.5/(Tools_importance + years_importance + hourlyRate_importance + preferredSkills_importance + industry_importance + text_importance)
industry_w = 0.5/(Tools_importance + years_importance + hourlyRate_importance + preferredSkills_importance + industry_importance + text_importance)
text_w = 0.25/(Tools_importance + years_importance + hourlyRate_importance + preferredSkills_importance + industry_importance + text_importance)


In [None]:
# Traverse the applicants data
roles_info = {}
for _, applicant in df_applicants.iterrows():

  # Initialize the matching score
  roles_list = []
  matches = []
  for _, project in df_roles.iterrows():
    if (
      check_location(applicant['Location'], project['Location']) and
      check_availability(applicant['Availability (Available from date)'], project['Availability Date']) and
      check_skills(applicant['Skills'], project['Required Skills']) and
      check_working_hours(applicant['Hours to commit'], project['Minimum Available Hours per Week']) and
      check_languages(applicant['Languages'], project['Language']) and
      check_overlap_time(applicant['Time Zone'], applicant['Working Hours'], project['Time Zone'], project['Working Hours'], project['Overlap Time'])
    ):
      roles_detail = []
      project_experience = project['Years of Experience']
      project_rate = project['Hourly Rate']
      project_tools = project['Tools'].split(', ')
      project_skills = project['Preferred Skills'].split(', ')
      match_score = 0.0
      # Check that employees have all the skills, tools, and experience required for the position
      count_skills = sum(1 for item in project_skills if item in applicant['Skills'])
      count_tools = sum(1 for item in project_skills if item in applicant['Tools'])
      count_ex = 1 if project_experience <= applicant['Years of Professional Experience'] else 0
      count_rate = 1 if project_rate >= applicant['Hourly Rate($)'] else 0
      # print(applicant['Work Experience (Title, Company, Years, Industry)'].split(';')[3])
      cont_industry = 1 if project['industry'] == applicant['Work Experience (Title, Company, Years, Industry)'].split(';')[3] else 0

      # The match score
      match_score = count_skills / len(project_skills) * preferredSkills_w + count_tools / len(project_tools) * Tools_w + count_ex * years_w + count_rate * hourlyRate_w
      roles_detail.append(project['Project ID'])
      roles_detail.append(project['Role Name'])
      roles_detail.append(match_score)
      # roles_list.append(str(project['Project ID'])+ ',' + project['Role Name'] + ',' + str(match_score))
      roles_list.append(roles_detail)
      sorted_list = sorted(roles_list, key=lambda x: x[2], reverse=True)
  # add matching results to the list
  roles_info[applicant['UID']] = sorted_list
roles_info

{'c059eaf0-9eec-4173-aa9f-c65ab442b7fa': [[48682,
   'CX / Business Designer',
   0.5833333333333333],
  [13844, 'Business Development Manager', 0.5833333333333333],
  [60137, 'Technical Project Manager', 0.5277777777777778],
  [75262, 'Data Engineer', 0.5277777777777778],
  [90991, 'Service/Business Designer', 0.41666666666666663],
  [13844, 'Cloud Expert', 0.3611111111111111]],
 'd1d1563e-9bd4-4ca4-9506-85a00aa4bc28': [[13844,
   'Product Owner',
   0.41666666666666663],
  [75262, 'Data Engineer', 0.41666666666666663],
  [71893, 'Networking/Infrastructure Engineer', 0.3333333333333333],
  [90991, 'Risk & Compliance Consultant', 0.3333333333333333],
  [97963, 'Community Manager', 0.2777777777777778],
  [97963, 'Motion Graphic Designer', 0.2777777777777778],
  [90991, 'Chief of Staff', 0.2777777777777778],
  [82785, 'Security Engineer', 0.16666666666666666],
  [82785, 'Project Manager', 0.1111111111111111]],
 '451572c0-68fa-4cd1-b26c-dc8d3f26716b': [[29145,
   'Product Designer',
   0.