In [14]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [15]:
resume_path = '../../../demo3/resume_duplicate.xlsx'
joblist_path = '../../../demo3/Q3_joblist.xlsx'

In [16]:
resume_inf = pd.read_excel(resume_path, dtype = {'id':str})
edu_dict = {
    np.nan : 0,
    '大专': 1,
    '本科': 2,
    '硕士': 3,
    '博士': 4
}
exp_dict = {
    np.nan:-1,
    '无经验':0,
    '1年工作经验':1,
    '2年工作经验':2,
    '3年工作经验':3,
    '4年工作经验':4,
    '5年工作经验':5,
    '10年以上工作经验':10
}
resume_inf['highest_education'] = resume_inf['highest_education'].map(edu_dict)
resume_inf['exp_max'] = resume_inf['exp'].map(exp_dict)
resume_inf.set_index('id',inplace=True) 
resume_inf.replace(np.nan, '', inplace= True)

In [17]:
joblist_inf = pd.read_excel(joblist_path, dtype = {'id':str})
edu_dict = {
    '不限' : 0,
    '技工' : 0,
    '大专': 1,
    '本科': 2,
    '硕士': 3,
    '博士': 4
}
joblist_inf['edu_require'] = joblist_inf['edu_require'].map(edu_dict)
joblist_inf.set_index(['id'],inplace=True)
joblist_inf.replace(np.nan, '', inplace= True)

In [18]:
welfare_weights = {
    '年终奖金': 0.15,
    '餐饮补贴': 0.05,
    '免费住宿': 0.1,
    '绩效奖金': 0.1,
    '交通补贴': 0.05,
    '员工旅游': 0.05,
    '五险一金': 0.15,
    '弹性工作': 0.05,
    '股票期权': 0.1,
    '通讯补贴': 0.025,
    '专业培训': 0.075,
    '定期体检': 0.05,
    '免费班车': 0.025,
    '出国机会': 0.075
}

In [19]:
company_size_weights = {
    '少于50人': 0.5,
    '50-100人': 0.55,
    '150-500人': 0.6,
    '500-1000人': 0.65,
    '1000-1500人': 0.7,
    '1000-5000人': 0.8,
    '5000-10000人': 0.9,
    '10000人以上': 1
}
conpany_capi_weights = {
    '' : 0.8,
    '100万元以内': 0.2,
    '100-200万': 0.3,
    '200-500万': 0.4,
    '500-1000万': 0.8,
    '1000万以上': 1
}

company_type_weights = {
    '外资': 0.8,
    '私企': 0.4,
    '上市公司': 1,
    '合资': 0.6,
    '国企': 0.5,
    '民营公司': 0.3
}

In [20]:
def count_welfare_score(x):
    if x == '':
        return 0
    else:
        benefits_list = x.split(',')
        total_score = 0
        for benefit in benefits_list:
            total_score += welfare_weights[benefit]
        return total_score
    
joblist_inf['welfare_score'] = joblist_inf['welfare'].apply(count_welfare_score)
joblist_inf['person_score'] = joblist_inf['job_personScope'].map(company_size_weights)
joblist_inf['registCapi_score'] = joblist_inf['job_registCapi'].map(conpany_capi_weights)
joblist_inf['type_score'] = joblist_inf['eA_econKind'].map(company_type_weights)

In [21]:
class MatchCalculator:
    
    # 实例化类的时候会自动调用构造函数，不能将有默认值的参数放在没有默认值参数的前面
    def __init__(self,  bert_model = None , graph = None):
        self.model = bert_model
        self.graph = graph

    def is_zero(self, resume_id, joblist_id):
        # 计算薪资匹配度是否为0
        def zero_nature():
            resume_nature =resume_inf.loc[resume_id,'workNature']
            joblist_nature = joblist_inf.loc[joblist_id,'willNature']
            if resume_nature == '':
                return 1
            elif resume_nature != joblist_nature:
                return 0
            else:
                return 1
        def zero_salary():
            resume_salarystart =  resume_inf.loc[resume_id, 'willSalaryStart']
            joblist_salaryend = joblist_inf.loc[joblist_id, 'maximumWage']
            if resume_salarystart == '' or joblist_salaryend == '':
                return 1
            elif resume_salarystart > joblist_salaryend:
                return 0
            else: 
                return 1
        # 计算工作地点匹配度是否为0 
        def zero_address():
            resume_province = resume_inf.loc[resume_id, 'province']
            joblist_province = joblist_inf.loc[joblist_id, 'fixed_province']
            if resume_province == '' or joblist_province == '':
                return 1
            elif resume_province != joblist_province:
                return 0
            else:
                return 1
        # 计算工作经验匹配度是否为0
        def zero_exp():
            resume_exp = resume_inf.loc[resume_id, 'exp_max']
            joblist_exp_min = joblist_inf.loc[joblist_id, 'exp_min']
            if resume_exp == -1 or joblist_exp_min == 0:
                return 1
            elif resume_exp < joblist_exp_min:
                return 0
            else:
                return 1
        def zero_edu():
            resume_edu = resume_inf.loc[resume_id, 'highest_education']
            joblist_edu = joblist_inf.loc[joblist_id,'edu_require']
            # 当resume_edu为0时，就说明应聘者的学历未知
            if resume_edu == 0:
                return 1
            elif resume_edu < joblist_edu:
                return 0
            else:
                return 1
        if zero_nature() == 0 or zero_salary() == 0 or zero_address() == 0 or zero_exp()  == 0 or zero_edu() == 0:
            return 0
        else:
            return 1

    def calculate_other_match(self, resume_id, joblist_id):
        # 自定义的匹配度计算方法

        # 薪资满意度，计算公司最高薪资与求职者要求最高薪资的差值
        def salary_sat():
            # resume_min_salary =  resume_inf.loc[resume_id, 'willSalaryStart']
            resume_max_salary =  resume_inf.loc[resume_id, 'willSalaryEnd']
            # joblist_min_salary = joblist_inf.loc[joblist_id, 'minimumWage']
            joblist_max_salary = joblist_inf.loc[joblist_id, 'maximumWage']
            if joblist_max_salary == resume_max_salary:
                return 0
            else:
                return joblist_max_salary - resume_max_salary
        
        # 福利待遇满意度
        def welfare_sat():
            return joblist_inf.loc[joblist_id,'welfare_score']
        
        # 公司满意度
        def company_sat():
            person_score = joblist_inf.loc[joblist_id, 'person_score']
            cspi_score = joblist_inf.loc[joblist_id, 'registCapi_score']
            type_score = joblist_inf.loc[joblist_id, 'type_score']
            return (person_score + cspi_score + type_score)/3

        return [salary_sat(), welfare_sat(), company_sat()]
    # def calculate_bert_match(self, resume_id, joblist_id):
    #     resume_embedding = self.model.encode(resume_inf.loc[resume_id, 'concatenated'], convert_to_tensor=True)
    #     joblist_embedding = self.model.encode(joblist_inf.loc[joblist_id, 'concatenated'], convert_to_tensor=True)

    #     # 计算两个句子的余弦相似度
    #     cosine_scores = util.pytorch_cos_sim(resume_embedding, joblist_embedding)
    #     return cosine_scores.item()
    
    # def knowledge_graph_match(self, resume_id, joblist_id):
    #     # 基于Neo4j的知识图谱匹配度计算方法

    #     # 定义您的 Cypher 查询
    #     cypher_query = f"""
    #     MATCH (a1:ID {{name: '{resume_id}'}})-->(b1)
    #     WITH a1, collect(id(b1)) AS p1
    #     MATCH (a2:ID {{name: '{joblist_id}'}})-->(b2)
    #     WITH a1, p1, a2, collect(id(b2)) AS p2
    #     RETURN a1.name AS from, a2.name AS to, gds.similarity.jaccard(p1, p2) AS similarity
    #     """
    #     # 在 Neo4j 中运行查询并获取结果
    #     result = self.graph.run(cypher_query).to_data_frame()
    #     return result.loc[0,'similarity']
    
    def Match_Calculator(self, resume_id, joblist_id):
        if self.is_zero(resume_id, joblist_id) == 0:
            # print('匹配度为0')
            return [0,0,0]
        else:
            return self.calculate_other_match(resume_id, joblist_id)


In [22]:
# model = SentenceTransformer("uer/sbert-base-chinese-nli")
# graph = Graph('http://localhost:7474/',auth = ('neo4j','zhouyang201202'))

In [23]:
matchdegree = MatchCalculator()
match_list = matchdegree.Match_Calculator('1461512488951611392', '1374181407047421952')
match_list

[0, 0.25, 0.5666666666666667]

In [24]:
match_degree = pd.DataFrame(columns=['招聘信息 ID', '求职者 ID','规则匹配度', '长文本匹配度', '知识图谱匹配度', '岗位匹配度'])
Satisfaction_degree = pd.DataFrame(columns=['求职者 ID', '招聘信息 ID', '公司名称', '岗位匹配度'])

In [25]:
match_degree = []
for i in tqdm(joblist_inf.index.tolist()):
    for j in resume_inf.index.tolist():
        match_list = matchdegree.Match_Calculator(j,i)
        if match_list == [0,0,0]:
            continue
        else:
            match_degree.append({'招聘信息 ID' : i,
                                '求职者 ID' : j,
                                '薪资满意度' : match_list[0],
                                '福利满意度' : match_list[1],
                                '公司满意度' : match_list[2]})
match_degree = pd.DataFrame(match_degree)

100%|██████████| 1572/1572 [00:27<00:00, 57.66it/s]


In [26]:
match_degree.to_csv('../../../demo3/求职者满意度.csv',index= False)

In [16]:
# match_degree = pd.read_excel('../../../demo3/BERT匹配度.xlsx',dtype = {'招聘信息 ID':str,'求职者 ID':str}).drop(columns='长文本匹配度归一化').drop(columns='Unnamed: 0')

In [14]:
match_degree_copy = match_degree.copy()
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
match_degree_copy.drop(match_degree_copy.loc[match_degree_copy['长文本匹配度']==0].index, inplace=True)

def ratings_norm(df):
    df["长文本匹配度归一化"] = scaler.fit_transform(df[['长文本匹配度']])
    return df  # 返回归一化后的数据

match_degree_group_joblist = match_degree_copy.groupby("招聘信息 ID", group_keys=True).apply(ratings_norm)
match_degree_group_joblist['长文本匹配度归一化'] = match_degree_group_joblist['长文本匹配度归一化'].apply(lambda x: 0.0005 if x == 0 else x)

match_degree_group_joblist.to_excel('../../../demo3/BERT匹配度_招聘信息.xlsx')

In [15]:
match_degree_copy = match_degree.copy()
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
match_degree_copy.drop(match_degree_copy.loc[match_degree_copy['长文本匹配度']==0].index, inplace=True)

def ratings_norm(df):
    df["长文本匹配度归一化"] = scaler.fit_transform(df[['长文本匹配度']])
    return df  # 返回归一化后的数据

match_degree_group_resume = match_degree_copy.groupby('求职者 ID', group_keys=True).apply(ratings_norm)
match_degree_group_resume['长文本匹配度归一化'] = match_degree_group_resume['长文本匹配度归一化'].apply(lambda x: 0.0005 if x == 0 else x)
match_degree_group_resume.to_excel('../../../demo3/BERT匹配度_求职者.xlsx')