In [1]:
# Dataset url - https://www.kaggle.com/datasets/ravindrasinghrana/employeedataset
import pandas as pd
import numpy as np

In [2]:
employeeData = pd.read_csv('employee_data.csv')
engagementData = pd.read_csv('employee_engagement_survey_data.csv')
recruitmentData = pd.read_csv('recruitment_data.csv')
trainingData = pd.read_csv('training_and_development_data.csv')

In [18]:
employeeData.head(2)

Unnamed: 0,EmpID,FirstName,LastName,StartDate,ExitDate,Title,Supervisor,ADEmail,BusinessUnit,EmployeeStatus,...,Division,DOB,State,JobFunctionDescription,GenderCode,LocationCode,RaceDesc,MaritalDesc,Performance Score,Current Employee Rating
0,3427,Uriah,Bridges,20-Sep-19,,Production Technician I,Peter Oneill,uriah.bridges@bilearner.com,CCDR,Active,...,Finance & Accounting,07-10-1969,MA,Accounting,Female,34904,White,Widowed,Fully Meets,4
1,3428,Paula,Small,11-Feb-23,,Production Technician I,Renee Mccormick,paula.small@bilearner.com,EW,Active,...,Aerial,30-08-1965,MA,Labor,Male,6593,Hispanic,Widowed,Fully Meets,3


In [52]:
recruitmentData.dtypes

Applicant ID                    int64
Application Date       datetime64[ns]
First Name                     object
Last Name                      object
Gender                         object
Date of Birth          datetime64[ns]
Phone Number                   object
Email                          object
Address                        object
City                           object
State                          object
Zip Code                        int64
Country                        object
Education Level                object
Years of Experience             int64
Desired Salary                float64
Job Title                      object
Status                         object
dtype: object

In [50]:
# convert date columns to datetime format
recruitmentData['Application Date'] = pd.to_datetime(recruitmentData['Application Date'], format='%d-%b-%y')
recruitmentData['Date of Birth'] = pd.to_datetime(recruitmentData['Date of Birth'], format='mixed')

In [36]:
# let see if we have anything common with Employee_data table
recruitmentData.merge(employeeData, how='inner', left_on=['First Name', 'Last Name', 'Gender'], right_on=['FirstName', 'LastName', 'GenderCode'])
# only 3 of the applicants are hired by the firm

Unnamed: 0,Applicant ID,Application Date,First Name,Last Name,Gender,Date of Birth,Phone Number,Email,Address,City,...,Division,DOB,State_y,JobFunctionDescription,GenderCode,LocationCode,RaceDesc,MaritalDesc,Performance Score,Current Employee Rating
0,2293,10-Jun-23,John,Smith,Male,18-12-1986,(604)783-2285x1452,crystal51@example.net,7991 Susan Drives,Kaylatown,...,General - Sga,16-08-1977,MA,Project Manager,Male,1886,Other,Widowed,Fully Meets,3
1,3134,09-Jul-23,Dale,Mendoza,Female,12-04-1969,+1-363-373-9049x421,erose@example.net,630 Jenkins Walk,Brayfort,...,Project Management - Eng,06-11-2000,MA,Coordinator,Female,41529,White,Widowed,Exceeds,5
2,3283,08-Jul-23,Jason,Smith,Male,12-11-1999,957-801-1257x7066,fatkinson@example.org,9619 Collins Mountain,Jorgeville,...,Finance & Accounting,17-06-2001,MA,Intern,Male,2045,Other,Divorced,Needs Improvement,3


In [194]:
# count applicants on application year and month
recruitmentData.groupby(
    [
        recruitmentData['Application Date'].dt.year.rename('Application Year'), 
     recruitmentData['Application Date'].dt.month.rename('Application Month')
    ]
).size().reset_index(name='No of Applicants')

Unnamed: 0,Application Year,Application Month,No of Applicants
0,2023,5,808
1,2023,6,983
2,2023,7,1047
3,2023,8,162


In [116]:
# find applicant distribution by country and plot
recruitmentData.groupby('Country').size().reset_index(name='No of Applicants')

Unnamed: 0,Country,No of Applicants
0,Afghanistan,9
1,Albania,14
2,Algeria,12
3,American Samoa,8
4,Andorra,11
...,...,...
238,Wallis and Futuna,9
239,Western Sahara,15
240,Yemen,13
241,Zambia,13


In [136]:
# how many applicants were offered job roles
recruitmentData.loc[recruitmentData['Status'] == 'Offered'].shape

(610, 18)

In [138]:
# find applicant distribution by country, education level where status = Offerd
recruitmentData[recruitmentData['Status'] == 'Offered'].groupby(['Country', 'Education Level']).size().reset_index(name='No of Applicants')

Unnamed: 0,Country,Education Level,No of Applicants
0,Albania,Bachelor's Degree,2
1,Albania,Master's Degree,1
2,Algeria,Bachelor's Degree,1
3,Algeria,High School,1
4,Algeria,Master's Degree,1
...,...,...,...
448,Zambia,Bachelor's Degree,1
449,Zambia,Master's Degree,1
450,Zambia,PhD,2
451,Zimbabwe,High School,2


In [164]:
# Add a column to categories the years of experience
def experienceLevel(x):
    if x < 5:
        return 'Junior'
    elif x > 5 and x < 10:
        return 'Mid Level'
    else:
        return 'Senior'
    
recruitmentData['Experience Level'] = recruitmentData['Years of Experience'].apply(lambda x: experienceLevel(x))

In [172]:
# find applicant count distribution on years of experience and  education level
recruitmentData.groupby(['Education Level', 'Experience Level']).size().reset_index(name='No of Applicants')

Unnamed: 0,Education Level,Experience Level,No of Applicants
0,Bachelor's Degree,Junior,187
1,Bachelor's Degree,Mid Level,158
2,Bachelor's Degree,Senior,440
3,High School,Junior,167
4,High School,Mid Level,147
5,High School,Senior,424
6,Master's Degree,Junior,173
7,Master's Degree,Mid Level,140
8,Master's Degree,Senior,423
9,PhD,Junior,188


In [174]:
# categories applicant desired salary into groups
def salaryLevel(x):
    if x < 50000:
        return 'Low'
    elif x > 50000 and x < 100000:
        return 'Mid'
    else:
        return 'High'
recruitmentData['Salary Level'] = recruitmentData['Desired Salary'].apply(lambda x: salaryLevel(x))

In [178]:
# find applicant salary distribution on years of experience and  education level
recruitmentData.groupby(['Education Level', 'Experience Level', 'Salary Level']).size().reset_index(name='No of Applicants')

Unnamed: 0,Education Level,Experience Level,Salary Level,No of Applicants
0,Bachelor's Degree,Junior,Low,51
1,Bachelor's Degree,Junior,Mid,136
2,Bachelor's Degree,Mid Level,Low,35
3,Bachelor's Degree,Mid Level,Mid,123
4,Bachelor's Degree,Senior,Low,124
5,Bachelor's Degree,Senior,Mid,316
6,High School,Junior,Low,50
7,High School,Junior,Mid,117
8,High School,Mid Level,Low,59
9,High School,Mid Level,Mid,88


In [176]:
recruitmentData.tail(2)

Unnamed: 0,Applicant ID,Application Date,First Name,Last Name,Gender,Date of Birth,Phone Number,Email,Address,City,State,Zip Code,Country,Education Level,Years of Experience,Desired Salary,Job Title,Status,Experience Level,Salary Level
2998,3999,2023-05-16,Danielle,Villegas,Female,1994-08-08,(385)467-6434x67311,alvarezstephen@example.net,0983 Jeremy Burgs,Michaelhaven,KY,37855,Ghana,Bachelor's Degree,14,59442.38,Housing manager/officer,Applied,Senior,Mid
2999,4000,2023-07-07,Charles,Hernandez,Female,1980-01-06,(772)767-2580,murrayallison@example.com,146 Cheryl Highway,Hallland,OR,8592,Netherlands,Bachelor's Degree,1,89853.85,"Loss adjuster, chartered",Rejected,Junior,Mid


In [190]:
# find the applicants who were hired and their desired salaries based on name
recruitmentData.loc[recruitmentData.Status == 'Offered'].groupby(['First Name', 'Last Name'])['Desired Salary'].sum().reset_index(name='Desired Salary')

Unnamed: 0,First Name,Last Name,Desired Salary
0,Aaron,Caldwell,39953.67
1,Aaron,Sullivan,80209.01
2,Adam,Martinez,94935.35
3,Alan,Green,74369.60
4,Alan,Keith,99442.76
...,...,...,...
599,William,Reynolds,81588.84
600,William,Williams,79343.59
601,Willie,Lester,45788.02
602,Yolanda,Stephens,41031.29
