In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
from torch import nn
from torch.utils import data
from d2l import torch as d2l

In [2]:
# 这里使用的ID为0~N-1版本
class Person:
  def __init__(self, id):
    self.id = id
    self.alive = True # 是否存活
    self.offers = []  # 收到的offer
    self.job = -1 # 签约的企业

class Recruit:
  def __init__(self, id, num):
    self.id = id
    self.num = num # 岗位剩余容量
    self.workers = [] # 签约的员工
    
class Pair:
  def __init__(self, per, rec, val):
    self.per = per
    self.rec = rec
    self.pid = per.id
    self.rid = rec.id
    self.val = val
  
  def print(self):
    print(self.pid, self.rid, self.val)

In [3]:
# 属性嵌入，这里的序号是0~N-1
# 用于拼接向量 and 判断行业
a_emb_job = pd.read_csv('./attribute_embedding_position.txt',sep=' ',index_col=0,header=None,na_values=[])
a_emb_per = pd.read_csv('./attribute_embedding_user.txt',sep=' ',index_col=0,header=None,na_values=[])

In [4]:
def is_fw(pid):
  val = fw_list.loc[pid,'是否为废人']
  return val==1

fw_list = pd.read_csv('./fw.txt',sep=' ',index_col=0)

In [5]:
alive = []
for i in a_emb_per.index:
  alive.append(i)
ss = pd.Series(alive)
ss.to_csv('./no_fw.csv')

In [6]:
pca_per_ppd = pd.read_csv('./pca_per.csv',index_col=0,header=None)
pca_job_ppd = pd.read_csv('./pca_job.csv',index_col=0,header=None)

In [7]:
pca_per_myd = pd.read_csv('./pca_per_myd.csv',index_col=0,header=None)
pca_job_myd = pd.read_csv('./pca_job_myd.csv',index_col=0,header=None)

In [8]:
def get_input_ppd(pid,rid):
  per = pca_per_ppd.loc[pid].values
  job = pca_job_myd.loc[rid].values
  all = np.concatenate([per,job],axis=0)
  return all

def get_input_myd(pid,rid):
  per = pca_per_myd.loc[pid].values
  job = pca_job_myd.loc[rid].values
  all = np.concatenate([per,job],axis=0)
  return all

In [9]:
net_ppd = torch.load('./Sigmoid-200.pt')
net_myd = torch.load('./Sigmoid-MYD.pt')

In [10]:
# 硬核条件，这里的序号是0~N-1
hd_job = pd.read_csv('./hardcore_position.csv',index_col=0,header=0,encoding='gbk')
hd_job.index = [i for i in range(hd_job.shape[0])]
hd_per = pd.read_csv('./hardcore_user.csv',index_col=0,header=0,encoding='gbk')
hd_per.index = [i for i in range(hd_per.shape[0])]
hd_job.shape, hd_per.shape

((1575, 4), (10877, 3))

In [11]:
def judge_hangye(pid,rid): # 0~N-1
  # print(pid,rid)
  x = a_emb_per.loc[pid,6:14].values.astype('int')
  y = a_emb_job.loc[rid,6:14].values.astype('int')
  sum = (x*y).sum()
  return sum > 0

def judge_gongzi(pid,rid): # 0~N-1
  p2, p1 = hd_per.iloc[pid,:-1]
  r2, r1 = hd_job.iloc[rid,-2:]
  return max(p1,r1)<=min(p2,r2)

def judge_type(pid,rid):
  pt = hd_per.iloc[pid,-1]
  rt = hd_job.iloc[rid,0]
  # print(pt,rt)
  return pt == rt

def judge(pid,rid):
  if judge_buf[pid,rid]>=0:
    return judge_buf[pid,rid]==1
  # b1 = judge_hangye(pid,rid)
  b2 = judge_gongzi(pid,rid)
  b3 = judge_type(pid,rid)
  if b2 and b3:
    judge_buf[pid,rid] = 1
  else:
    judge_buf[pid,rid] = 0
  # print('build buf')
  return judge_buf[pid,rid]==1

judge_buf = np.zeros(shape=(10877,1575),dtype='int')-1
# judge_buf

In [12]:
judge(1,444)

False

In [13]:
ppd_buf = {}
myd_buf = {}
for i in pca_per_ppd.index:
  ppd_buf[i] = np.zeros(1575)-1
  myd_buf[i] = np.zeros(1575)-1

def get_ppd(pid,rid):
  if judge(pid,rid)==False: # 硬性条件不满足为0
    return 0
  elif is_fw(pid):  # 废物为1
    return 0.1
  elif ppd_buf[pid][rid]>=0:
    return ppd_buf[pid][rid]
  else:
    ppd = net_ppd(torch.Tensor(get_input_ppd(pid,rid))).item()
    # print(ppd)
    # print(f'ppd({pid},{rid})={ppd}')
  ppd = min(ppd,10)
  ppd = max(ppd,1)/10
  ppd_buf[pid][rid] = ppd
  return ppd

def get_myd(pid,rid):
  if judge(pid,rid)==False: # 硬性条件不满足为0
    return 0
  elif is_fw(pid):  # 废物为1
    return 0.1
  elif myd_buf[pid][rid]>=0:
    return myd_buf[pid][rid]
  else:
    myd = net_myd(torch.Tensor(get_input_myd(pid,rid))).item()
    # print(myd)
    # print(f'ppd({pid},{rid})={ppd}')
  myd = min(myd,10)
  myd = max(myd,1)/10
  myd_buf[pid][rid] = myd
  return myd

In [25]:
persons = []
recruits = []

In [26]:
# for i in tqdm(range(hd_per.shape[0])):
for i in tqdm(range(5000)):
  # id = hd_per.loc[i, '序号']
  tmp = Person(i)
  persons.append(tmp)
  # print(id)

100%|██████████| 5000/5000 [00:00<00:00, 1247562.17it/s]


In [27]:
# for i in tqdm(range(hd_job.shape[0])):
for i in tqdm(range(500)):
  # id = hd_rec.loc[i+1, '序号']
  num = hd_job.loc[i, '岗位需求量']
  tmp = Recruit(i, num)
  recruits.append(tmp)
  # print(id)

100%|██████████| 500/500 [00:00<00:00, 124949.48it/s]


In [28]:
def get_cnt_rec():
  sum = 0
  for i in recruits:
    sum += i.num
  return sum

old_cnt_rec = get_cnt_rec()
old_cnt_rec

2788

In [29]:
epoch = 0
total_pair = 0
cnt_rec = get_cnt_rec()

while True:
  epoch += 1
  # 招聘回合
  print(f'epoch {epoch}:')
  
  for rec in tqdm(recruits):  # 对于每个岗位
    # 发放offer
    if rec.num <= 0:
      continue
    offer_list = [] # 临时数组：<per, rec, val>
    # 计算与每个求职者的匹配度
    for per in persons:
      if per.alive==False:
        continue
      if judge(per.id, rec.id)==False:
        continue
      val = get_ppd(per.id, rec.id)
      if val<=0:
        continue
      pair = Pair(per, rec, val)
      offer_list.append(pair)
    # 就地排序 by val
    if len(offer_list)<=0:
      continue
    
    offer_list.sort(reverse=True, key=lambda x:x.val)
    
    # for of in offer_list:
    #   if get_ppd(of.pid,of.rid)==0:
    #     print(f'1.ppd={get_ppd(of.pid,of.rid)}, val={of.val}')
        
    if len(offer_list)>rec.num:
      offer_list = offer_list[:rec.num] # 取TopK
    
    for pair in offer_list:
      if pair.val>0:
        pair.per.offers.append(pair) # 发放offer
  
  # 所有岗位offer发放完毕
  np.random.shuffle(persons) # 随机优化
    
  # 求职者签约
  for per in persons: # 对于每个求职者
    if per.alive == False:
      continue
    # offer_list2 = [] # 临时数组：Pair
    for of in per.offers:  # 对于每一份offer
      # rec = recruits[rid]
      val = get_myd(of.pid, of.rid)
      of.val = val
    #   pair = Pair(per, rec, val)
    #   offer_list2.append(pair)
    # if len(offer_list2)==0:  # 当前求职者无offer
    #   continue
    
    # offer_list2.sort(reverse=True, key=lambda x:x.val) # 取最高满意度
    per.offers.sort(reverse=True, key=lambda x:x.val)
    # 目标岗位
    for of in per.offers:
      # dest_rec = recruits[k.rid]
      if of.rec.num > 0: # 若未招满
        of.rec.num -= 1
        per.job = of.rid
        per.alive = False
        # per.offers.clear()
        of.rec.workers.append(per.id)
        # if of.pid!=per.id:
        #   print('ID ERR!')
        # 检测非法值
        # if get_myd(per.id,of.rid)==0:
        #   print(f'2.<{per.id},{of.rid}>:myd={get_myd(per.id,of.rid)},ppd={get_ppd(per.id,of.rid)}')
        break
    
    per.offers.clear()
    
  # 删除冗余
  
  new_cnt_rec = get_cnt_rec()
  increase = cnt_rec - new_cnt_rec
  cnt_rec = new_cnt_rec
  total_pair += increase
  # print(net1_cnt, net2_cnt)
  print(f'increase {increase}, total {total_pair}\n')
  
  # break
  
  if increase <= 0:
    break

epoch 1:


 23%|██▎       | 113/500 [02:33<09:52,  1.53s/it]

In [None]:
# # 求职者offer情况
# for i in persons:
#   if i.job!=-1:
#     print(f'{i.id},{i.alive},offers:{len(i.offers)}')

In [19]:
st = set()
cnt_pair = 0
for rec in recruits: # 对于每个岗位
  print(f'岗位{rec.id}, 招到{len(rec.workers)}, 还差{rec.num}' ,end='\t: ')
  if len(rec.workers)>0:  # 若非空
    
    for pid in rec.workers: # 对于每个签约求职者
      # if get_ppd(per.id,rec.id)==0:
      #   continue
      print(f'<{pid},{get_myd(pid,rec.id)}>', end=' ')
      cnt_pair += 1
      st.add(pid)
  print('')
    
print(cnt_pair, len(st))

岗位0, 招到5, 还差1	: <59,0.494078254699707> <68,0.5427844524383545> <17,0.1> <6,0.1> <11,0.1> 
岗位1, 招到0, 还差10	: 
岗位2, 招到6, 还差94	: <70,0.5950346469879151> <60,0.6839651107788086> <53,0.1> <87,0.1> <48,0.5302452564239502> <5,0.1> 
岗位3, 招到5, 还差0	: <37,0.5645097732543946> <369,0.1> <374,0.1> <477,0.1> <320,0.1> 
岗位4, 招到6, 还差0	: <22,0.7611891746520996> <205,0.8435685157775878> <35,0.6897110939025879> <312,0.1> <99,0.1> <287,0.1> 
岗位5, 招到10, 还差0	: <86,0.5182445526123047> <149,0.8074644088745118> <39,0.7265212059020996> <250,0.1> <424,0.1> <462,0.1> <388,0.1> <332,0.1> <265,0.1> <234,0.1> 
岗位6, 招到100, 还差0	: <152,0.2821060657501221> <194,0.2429109573364258> <65,0.553601598739624> <104,0.815860366821289> <102,0.5338583946228027> <94,0.7825079917907715> <132,0.4100177764892578> <26,0.5618669509887695> <187,0.5764382839202881> <85,0.34040036201477053> <390,0.4437525749206543> <166,0.5924441337585449> <80,0.36093339920043943> <189,0.5317788600921631> <103,0.6315285682678222> <110,0.6514101028442383> <6

In [44]:
obj = cnt_pair/old_cnt_rec
obj

0.6383928571428571

In [307]:
# 调用实例，使用时可以把函数里的print关掉
# 仅第一次调用神经网络，即建立缓存时会print
a,b = 317,46
get_ppd(a,b),get_myd(a,b)

(0, 0)

In [308]:
judge(a,b)

False