In [58]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
from torch import nn
from torch.utils import data
from d2l import torch as d2l

In [3]:
# 这里使用的ID为1~N版本
class Person:
  def __init__(self, id):
    self.id = id
    self.alive = True # 是否存活
    self.offers = []  # 收到的offer
    self.job = -1 # 签约的企业

class Recruit:
  def __init__(self, id, num):
    self.id = id
    self.num = num # 岗位剩余容量
    self.workers = [] # 签约的员工
    
class Pair:
  def __init__(self, per, rec, val):
    self.pid = per.id
    self.rid = rec.id
    self.val = val
  
  def print(self):
    print(self.pid, self.rid, self.val)

In [4]:
# 属性嵌入，这里的序号是0~N-1
# 用于拼接向量 and 判断行业
a_emb_job = pd.read_csv('./attribute_embedding_position.txt',sep=' ',index_col=0,header=None,na_values=[])
a_emb_per = pd.read_csv('./attribute_embedding_user.txt',sep=' ',index_col=0,header=None,na_values=[])

print(a_emb_job.shape, a_emb_per.shape)

(1575, 74) (868, 74)


In [41]:
alive = []
for i in a_emb_per.index:
  alive.append(i)
ss = pd.Series(alive)
ss.to_csv('./no_fw.csv')

In [6]:
def is_fw(pid):
  # return not pid in idx
  # print(f'is_fw({pid})')
  val = fw_list.loc[pid,'是否为废人']
  return val==1

fw_list = pd.read_csv('./fw.txt',sep=' ',index_col=0)
# fw_list.head()

In [48]:
l = fw_list.shape[0]
sum = 0
map_list = []
for i in range(100):
  if is_fw(i):
    map_list.append(-1)
  else:
    map_list.append(sum)
    sum += 1

# map_list

In [49]:
# 硬核条件，这里的序号是0~N-1
hd_job = pd.read_csv('./hardcore_position.csv',index_col=0,header=0,encoding='gbk')
hd_job.index = [i for i in range(hd_job.shape[0])]
hd_per = pd.read_csv('./hardcore_user.csv',index_col=0,header=0,encoding='gbk')
hd_per.index = [i for i in range(hd_per.shape[0])]
hd_job.shape, hd_per.shape

((1575, 4), (10877, 3))

In [99]:
def get_input_1(pid, rid):
  per = pca_per_1.loc[pid].values
  job = pca_job_1.loc[rid].values
  all = np.concatenate([per,job],axis=0)
  return all

def get_input_2(pid, rid):
  per = pca_per_2.loc[pid].values
  job = pca_job_2.loc[rid].values
  all = np.concatenate([per,job],axis=0)
  return all

pca_per_1 = pd.read_csv('./pca_per.csv',index_col=0,header=None)
pca_job_1 = pd.read_csv('./pca_job.csv',index_col=0,header=None)

pca_per_2 = pd.read_csv('./pca_per.csv',index_col=0,header=None)
pca_job_2 = pd.read_csv('./pca_job.csv',index_col=0,header=None)

In [95]:
# net = nn.Sequential(nn.Linear(128,32),
#                     nn.ReLU(),
#                     nn.Linear(32,1)
#                     )
net_1 = torch.load('./DNN-4000.pt')
net_2 = torch.load('./DNN-4000.pt')
get_input_1(7,1).shape

(128,)

In [124]:
ppd_buf = {}
myd_buf = {}
for i in pca_per_1.index:
  ppd_buf[i] = np.zeros(1575)-1
  myd_buf[i] = np.zeros(1575)-1
  # print(i)

def get_ppd(pid,rid):
  if is_fw(pid):  # 废物为1
    return 0.1
  elif judge(pid,rid)==False: # 硬性条件不满足为0
    return 0
  elif ppd_buf[pid][rid]>=0:
    return ppd_buf[pid][rid]
  else:
    ppd = net_1(torch.Tensor(get_input_1(pid,rid))).item()
  ppd = min(ppd,10)
  ppd = max(ppd,0)/10
  ppd_buf[pid][rid] = ppd
  return ppd

def get_myd(pid,rid):
  if is_fw(pid):  # 废物为1
    return 0.1
  elif judge(pid,rid)==False: # 硬性条件不满足为0
    return 0
  elif myd_buf[pid][rid]>=0:
    return myd_buf[pid][rid]
  else:
    ppd = net_2(torch.Tensor(get_input_2(pid,rid))).item()
  ppd = min(ppd,10)
  ppd = max(ppd,0)/10
  myd_buf[pid][rid] = ppd
  return ppd

get_ppd(0,7)

0

In [125]:
def judge_hangye(pid,rid): # 0~N-1
  # print(pid,rid)
  x = a_emb_per.loc[pid,6:14].values.astype('int')
  y = a_emb_job.loc[rid,6:14].values.astype('int')
  sum = (x*y).sum()
  return sum > 0

def judge_gongzi(pid,rid): # 0~N-1
  p2, p1 = hd_per.iloc[pid,:-1]
  r2, r1 = hd_job.iloc[rid,-2:]
  return max(p1,r1)<=min(p2,r2)

def judge_type(pid,rid):
  pt = hd_per.iloc[pid,-1]
  rt = hd_job.iloc[rid,0]
  # print(pt,rt)
  return pt == rt

def judge(pid,rid):
  b1 = judge_hangye(pid,rid)
  b2 = judge_gongzi(pid,rid)
  b3 = judge_type(pid,rid)
  return b1 and b2 and b3

In [None]:
# print(judge_hangye(41,2))
true_cnt = 0
false_cnt = 0

for i in tqdm(range(10877)):
  if is_fw(i):
    continue
  for j in range(1575):
    b1 = judge_hangye(i,j)
    b2 = judge_gongzi(i,j)
    b3 = judge_type(i,j)
    res = b1 and b2 and b3
    if not res:
      false_cnt+=1
    else:
      true_cnt+=1
      
true_cnt,false_cnt

In [38]:
140933/(140933+1226167)

0.10308902055445834

In [132]:
persons = []
recruits = []

In [133]:
for i in tqdm(range(hd_per.shape[0])):
  # id = hd_per.loc[i, '序号']
  tmp = Person(i)
  persons.append(tmp)
  # print(id)

100%|██████████| 10877/10877 [00:00<00:00, 1086897.71it/s]


In [134]:
for i in tqdm(range(hd_job.shape[0])):
  # id = hd_rec.loc[i+1, '序号']
  num = hd_job.loc[i, '岗位需求量']
  tmp = Recruit(i, num)
  recruits.append(tmp)
  # print(id)

100%|██████████| 1575/1575 [00:00<00:00, 157252.70it/s]


In [135]:
recruits[0].num

6

In [136]:
def get_cnt_rec():
  sum = 0
  for i in recruits:
    sum += i.num
  return sum

old_cnt_rec = get_cnt_rec()
old_cnt_rec

5520

In [137]:
epoch = 0
cnt_rec = get_cnt_rec()
while True:
  epoch += 1
  # 招聘回合
  print(f'epoch {epoch}:')
  for rec in tqdm(recruits):  # 对于每个岗位
    # 发放offer
    if rec.num <= 0:
      continue
    pair_list = [] # 临时数组：<per, rec, val>
    # 计算与每个求职者的匹配度
    for per in persons:
      val = get_ppd(per.id, rec.id)
      if per.alive==False or val==0:
        continue
      pair = Pair(per, rec, val)
      pair_list.append(pair)
    # 就地排序 by val
    pair_list.sort(reverse=True, key=lambda x:x.val)
    pair_list = pair_list[:rec.num] # 取TopK
    
    for pair in pair_list:
      per = persons[pair.pid]
      rec = recruits[pair.rid]
      if per.alive:
        per.offers.append(rec.id) # 发放offer
  
  # 所有岗位offer发放完毕
    
  # 求职者签约
  for per in persons: # 对于每个求职者
    if per.alive == False:
      continue
    tmp2list = [] # 临时数组：Pair
    for rid in per.offers:  # 对于每一份offer
      rec = recruits[rid]
      val = get_myd(per.id, rec.id)
      pair = Pair(per, rec, val)
      tmp2list.append(pair)
    if len(tmp2list)==0:  # 当前求职者无offer
      continue
    
    tmp2list.sort(reverse=True, key=lambda x:x.val) # 取最高满意度
    # 目标岗位
    dest_rec = recruits[tmp2list[0].rid]
    
    if dest_rec.num > 0: # 若未招满
      dest_rec.num -= 1
      per.job = dest_rec.id
      per.alive = False
      per.offers.clear()
      dest_rec.workers.append(per)
  
  new_cnt_rec = get_cnt_rec()
  increase = cnt_rec - new_cnt_rec
  cnt_rec = new_cnt_rec
  print(f'increase {increase}\n')
  break
  if increase <= 0:
    break

epoch 1:


 69%|██████▉   | 1083/1575 [11:44<05:43,  1.43it/s]

In [117]:
# 求职者offer情况
for i in persons[:100]:
  print(f'{i.id},{i.alive},offers:{len(i.offers)}')

0,True,offers:0
1,False,offers:0
2,False,offers:0
3,False,offers:0
4,False,offers:0
5,False,offers:0
6,False,offers:0
7,True,offers:0
8,False,offers:0
9,False,offers:0
10,False,offers:0
11,False,offers:0
12,False,offers:0
13,False,offers:0
14,False,offers:0
15,True,offers:0
16,False,offers:0
17,False,offers:0
18,False,offers:0
19,False,offers:0
20,True,offers:0
21,False,offers:0
22,False,offers:0
23,True,offers:0
24,True,offers:0
25,True,offers:0
26,False,offers:0
27,False,offers:0
28,False,offers:0
29,False,offers:0
30,True,offers:0
31,False,offers:0
32,False,offers:0
33,False,offers:0
34,False,offers:0
35,False,offers:0
36,False,offers:0
37,False,offers:0
38,True,offers:0
39,False,offers:0
40,False,offers:0
41,False,offers:0
42,False,offers:0
43,True,offers:0
44,True,offers:0
45,False,offers:0
46,True,offers:0
47,False,offers:0
48,False,offers:0
49,False,offers:0
50,False,offers:0
51,False,offers:0
52,False,offers:0
53,False,offers:0
54,False,offers:0
55,False,offers:0
56,False,offer

In [118]:
# for item in pair_list:
#   # print(item.pid, item.rid, item.val)
#   item.print()
st = set()
cnt_pair = 0
for rec in recruits: # 对于每个岗位
  print(f'岗位{rec.id}, 招到{len(rec.workers)}, 还差{rec.num}' ,end='\t: ')
  if len(rec.workers)>0:  # 若非空
    
    for per in rec.workers: # 对于每个签约求职者
      print(f'<{per.id},{get_ppd(per.id,rec.id)}>', end=' ')
      cnt_pair += 1
      st.add(per.id)
  print('')
    
print(cnt_pair, len(st))

岗位0, 招到6, 还差0	: <39,0.7903871536254883> <268,0.1> <269,0.1> <270,0.1> <271,0.1> <295,0.1> 
岗位1, 招到10, 还差0	: <1,0.1> <2,0.1> <3,0.1> <4,0.1> <5,0.1> <6,0.1> <8,0.1> <9,0.1> <11,0.1> <16,0.1> 
岗位2, 招到100, 还差0	: <17,0.1> <18,0.1> <19,0.1> <21,0.1> <33,0.1> <34,0.1> <50,0.1> <51,0.1> <53,0.1> <56,0.1> <58,0.1> <61,0.1> <62,0.1> <67,0.1> <73,0.1> <78,0.1> <87,0.1> <95,0.1> <98,0.1> <99,0.1> <111,0.1> <115,0.1> <122,0.1> <125,0.1> <135,0.1> <136,0.1> <137,0.1> <148,0.1> <169,0.1> <171,0.1> <174,0.1> <175,0.1> <176,0.1> <179,0.1> <192,0.1> <197,0.1> <206,0.1> <213,0.1> <214,0.1> <215,0.1> <216,0.1> <217,0.1> <218,0.1> <219,0.1> <220,0.1> <221,0.1> <222,0.1> <223,0.1> <224,0.1> <225,0.1> <228,0.1> <229,0.1> <230,0.1> <231,0.1> <232,0.1> <233,0.1> <234,0.1> <235,0.1> <236,0.1> <238,0.1> <239,0.1> <240,0.1> <241,0.1> <242,0.1> <243,0.1> <244,0.1> <245,0.1> <246,0.1> <247,0.1> <248,0.1> <249,0.1> <250,0.1> <251,0.1> <252,0.1> <253,0.1> <254,0.1> <255,0.1> <257,0.1> <258,0.1> <259,0.1> <260,0.1> <

In [14]:
obj = cnt_pair/old_cnt_rec
obj

1.0