In [1]:
import numpy as np
import pandas as pd

import gc
import os
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.keras.backend as K

from tensorflow.keras.layers import *
from tensorflow.python.keras.layers import Layer
from tensorflow.keras import regularizers

from tensorflow.keras.models import Model,load_model,Sequential
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint,ReduceLROnPlateau,EarlyStopping

from tensorflow.keras import optimizers,initializers
from tensorflow.python.keras.initializers import glorot_normal


from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score



In [68]:
from imblearn.over_sampling import SMOTE

https://github.com/ShowMeAI-Hub/multi-task-learning/blob/main/README.md

In [2]:
data_origin = pd.read_excel('D:/OneDrive - University of South Carolina/Research/multitasking learning/data cleaning/DHEC_tests_final_lag1.xlsx', sheet_name='Sheet1', engine='openpyxl')
variables = pd.read_excel("D:/OneDrive - University of South Carolina/Research/multitasking learning/data cleaning/0130variables-VSVBVR.xlsx", sheet_name='Sheet1', engine='openpyxl')

## Model 1

In [84]:
temp = variables[variables['model1'] != 'delete']
data = data_origin[temp['variables'].tolist()]
data

Unnamed: 0,TIME_DXDATE_TESTDATE,time_window_index_year,VS,VR,VB,dx_yr,dx_mth,age,sex,race,risk,region,CD4_baseline,VL_baseline_interpretation,VL_baseline
0,591.000000,2,0,0,0,2005,12,39,F,Black,Others,Urban,401.0,=,160288.0
1,1018.000000,3,0,0,0,2005,12,39,F,Black,Others,Urban,401.0,=,160288.0
2,2072.250000,6,1,0,0,2005,12,39,F,Black,Others,Urban,401.0,=,160288.0
3,2495.333333,7,1,0,0,2005,12,39,F,Black,Others,Urban,401.0,=,160288.0
4,2793.333333,8,1,0,0,2005,12,39,F,Black,Others,Urban,401.0,=,160288.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40274,2737.500000,8,1,0,0,2009,9,48,M,Black,MSM,Urban,414.0,=,15127.0
40275,3193.000000,9,1,0,0,2009,9,48,M,Black,MSM,Urban,414.0,=,15127.0
40276,3499.666667,10,1,0,0,2009,9,48,M,Black,MSM,Urban,414.0,=,15127.0
40277,3884.000000,11,1,0,0,2009,9,48,M,Black,MSM,Urban,414.0,=,15127.0


In [85]:
target = variables[variables['model1'] == 'outcome']['variables'].tolist()
sparse_features = variables[variables['model1'] == 'cat']['variables'].tolist()
dense_features = variables[variables['model1'] == 'num']['variables'].tolist()
varlen_features = []

In [86]:
target

['VS', 'VR', 'VB']

In [87]:
encoder = {}
# 稀疏特征编码
for featid in sparse_features:
    # print(f"编码ID字段：{featid}")
    encoder[featid] = {uid:ucode+1 for ucode,uid in enumerate(data[featid].unique())} 
    data[featid] = data[featid].apply(lambda x: encoder[featid].get(x,0))
    
# 生成输入特征设置
sparse_max_len = {f:len(encoder[f]) + 1 for f in sparse_features}
varlens_max_len = {f:len(encoder[f]) + 1 for f in varlen_features}
feature_names = sparse_features+varlen_features+dense_features

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[featid] = data[featid].apply(lambda x: encoder[featid].get(x,0))


In [88]:
n_train = round(data.shape[0] * 0.6)
n_val = round(data.shape[0] * 0.2)

train = data[:n_train]
val = data[n_train:(n_train+n_val)]
test = data[(n_train+n_val):]

In [89]:
# 构建输入数据
train_model_input = {name: train[name] if name not in varlen_features else np.stack(train[name]) for name in feature_names } #训练模型的输入，字典类型。名称和具体值
val_model_input = {name: val[name] if name not in varlen_features else np.stack(val[name]) for name in feature_names }
test_model_input = {name: test[name] if name not in varlen_features else np.stack(test[name]) for name in feature_names}

train_labels = [train[y].values for y in target]
val_labels = [val[y].values for y in target]
test_labels = [test[y].values for y in target]

### Seperate models

#### VS

In [92]:
X_train = data[:(n_train+n_val)].copy()
X_train.drop(columns=target, inplace=True)
y_train = data[:(n_train+n_val)]["VS"]
X_train.shape

(32223, 12)

In [93]:
def check_class_distribution(y):
    class_counts = pd.Series(y).value_counts(normalize=True)  # 计算比例
    print(class_counts)

In [94]:
check_class_distribution(y_train)

1    0.714893
0    0.285107
Name: VS, dtype: float64


In [95]:
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

In [96]:
X_resampled

Unnamed: 0,TIME_DXDATE_TESTDATE,time_window_index_year,dx_yr,dx_mth,age,sex,race,risk,region,CD4_baseline,VL_baseline_interpretation,VL_baseline
0,591.000000,2,2005,12,39,1,1,1,1,401.000000,1,160288.000000
1,1018.000000,3,2005,12,39,1,1,1,1,401.000000,1,160288.000000
2,2072.250000,6,2005,12,39,1,1,1,1,401.000000,1,160288.000000
3,2495.333333,7,2005,12,39,1,1,1,1,401.000000,1,160288.000000
4,2793.333333,8,2005,12,39,1,1,1,1,401.000000,1,160288.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
46067,3289.496781,8,2010,11,24,1,1,2,1,103.579503,1,18327.528528
46068,584.955121,2,2006,5,36,1,1,2,1,1124.299176,1,200.000000
46069,3634.435072,8,2007,7,35,2,1,2,1,116.403422,1,186215.368465
46070,786.515509,2,2011,9,24,1,1,1,1,1004.639387,1,200.000000


In [71]:
# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss=focal_loss(alpha=0.25, gamma=2),
              metrics=['AUC'])
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_68 (Dense)             (None, 64)                832       
_________________________________________________________________
dense_69 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_70 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_71 (Dense)             (None, 1)                 65        
Total params: 17,473
Trainable params: 17,473
Non-trainable params: 0
_________________________________________________________________


In [72]:
# Train the model
history = model.fit(X_resampled.to_numpy(), np.array(y_resampled), epochs=20, batch_size=1000, validation_split=0.25)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [98]:
X_test = test.copy()
X_test.drop(columns=target, inplace=True)
pred = model.predict(X_test)
pred

array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)

In [52]:
print(accuracy_score(test_labels[0], (pred > 0.5).astype(int)))

0.7768123138033763


In [53]:
print(roc_auc_score(test_labels[0], pred))

0.5


In [97]:
X_test

Unnamed: 0,TIME_DXDATE_TESTDATE,time_window_index_year,VL_interpretation,VL,Months_to_ini_VS,VR_N,VR_size,prop_time,dx_yr,dx_mth,...,msld.cum_1,metacanc.cum_1,aids.cum_1,Depression_1,Anxiety_1,Psychiatric_disorder_1,Alcohol_use_1,Tobacco_use_1,Illicit_drug_use_1,visits_1
32223,3510.333333,10,2,200.0,18.966667,0,1,0.900985,2009,4,...,0.0,0.0,0.0,1,1,1,1,1,1,3
32224,3894.000000,11,2,200.0,18.966667,0,1,0.909405,2009,4,...,0.0,0.0,0.0,1,1,1,1,1,1,6
32225,4198.500000,12,2,200.0,18.966667,0,1,0.915052,2009,4,...,0.0,0.0,0.0,1,1,1,1,1,1,4
32226,590.333333,2,2,200.0,19.677778,0,1,0.524486,2008,4,...,0.0,0.0,0.0,1,1,1,1,1,1,10
32227,967.000000,3,2,200.0,19.677778,0,1,0.675868,2008,4,...,0.0,0.0,0.0,1,1,1,1,1,1,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40274,2737.500000,8,2,200.0,17.616667,0,1,0.885198,2009,9,...,0.0,0.0,0.0,1,1,1,1,1,1,3
40275,3193.000000,9,2,200.0,17.616667,0,1,0.898272,2009,9,...,0.0,0.0,0.0,1,1,1,1,1,1,8
40276,3499.666667,10,2,200.0,17.616667,0,1,0.907792,2009,9,...,0.0,0.0,0.0,1,1,1,1,1,1,3
40277,3884.000000,11,2,200.0,17.616667,0,1,0.915576,2009,9,...,0.0,0.0,0.0,1,1,1,1,1,1,6


In [99]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score

# 训练逻辑回归模型
log_reg = LogisticRegression(class_weight='balanced', solver='liblinear', random_state=42)
log_reg.fit(X_train.to_numpy(), np.array(y_train))

# 在测试集上进行预测
log_pred_prob = log_reg.predict_proba(X_test)[:, 1]  # 获取正类的概率
log_pred = (log_pred_prob > 0.5).astype(int)  # 转换为二分类预测

# 计算 Accuracy 和 AUC
log_accuracy = accuracy_score(test_labels[0], log_pred)
log_auc = roc_auc_score(test_labels[0], log_pred_prob)

print(f"Logistic Regression Accuracy: {log_accuracy:.4f}")
print(f"Logistic Regression AUC: {log_auc:.4f}")

Logistic Regression Accuracy: 0.4916
Logistic Regression AUC: 0.5348




#### VB

In [17]:
X_train = data[:(n_train+n_val)].copy()
X_train.drop(columns=target, inplace=True)
y_train = data[:(n_train+n_val)]["VB"]

In [18]:
# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')   # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['AUC'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 64)                832       
_________________________________________________________________
dense_9 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_10 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_11 (Dense)             (None, 1)                 65        
Total params: 17,473
Trainable params: 17,473
Non-trainable params: 0
_________________________________________________________________


In [19]:
# Train the model
history = model.fit(X_train.to_numpy(), np.array(y_train), epochs=20, batch_size=1000, validation_split=0.25)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [20]:
X_test = test.copy()
X_test.drop(columns=target, inplace=True)
pred = model.predict(X_test)
pred

array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)

In [21]:
print(accuracy_score(test_labels[1], (pred > 0.5).astype(int)))

0.9046673286991063


In [22]:
print(roc_auc_score(test_labels[1], pred))

0.5


#### VR

In [23]:
X_train = data[:(n_train+n_val)].copy()
X_train.drop(columns=target, inplace=True)
y_train = data[:(n_train+n_val)]["VR"]

In [24]:
# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')   # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['AUC'])
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 64)                832       
_________________________________________________________________
dense_13 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_14 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_15 (Dense)             (None, 1)                 65        
Total params: 17,473
Trainable params: 17,473
Non-trainable params: 0
_________________________________________________________________


In [25]:
# Train the model
history = model.fit(X_train.to_numpy(), np.array(y_train), epochs=20, batch_size=1000, validation_split=0.25)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [26]:
X_test = test.copy()
X_test.drop(columns=target, inplace=True)
pred = model.predict(X_test)
pred

array([[0.0000000e+00],
       [0.0000000e+00],
       [0.0000000e+00],
       ...,
       [0.0000000e+00],
       [0.0000000e+00],
       [1.8340146e-28]], dtype=float32)

In [27]:
print(accuracy_score(test_labels[1], (pred > 0.5).astype(int)))

0.9046673286991063


In [28]:
print(roc_auc_score(test_labels[1], pred))

0.5145067591131541


### MMoE

In [29]:
class MeanPoolLayer(Layer):
    def __init__(self, axis, **kwargs):
        super(MeanPoolLayer, self).__init__(**kwargs)
        self.axis = axis

    def call(self, x, mask):
        mask = tf.expand_dims(tf.cast(mask,tf.float32),axis = -1)
        x = x * mask
        return K.sum(x, axis=self.axis) / (K.sum(mask, axis=self.axis) + 1e-9)

class MmoeLayer(tf.keras.layers.Layer):
    def __init__(self,expert_dim,n_expert,n_task):
        super(MmoeLayer, self).__init__()
        self.n_task = n_task
        self.expert_layer = [Dense(expert_dim,activation = 'relu') for i in range(n_expert)]
        self.gate_layers = [Dense(n_expert,activation = 'softmax') for i in range(n_task)]
    
    def call(self,x):
        #多个专家网络
        E_net = [expert(x) for expert in self.expert_layer]
        E_net = Concatenate(axis = 1)([e[:,tf.newaxis,:] for e in E_net]) #(bs,n_expert,n_dims)
        #多个门网络
        gate_net = [gate(x) for gate in self.gate_layers]     #n_task个(bs,n_expert)
        
        #每个towers等于，对应的门网络乘上所有的专家网络。
        towers = []
        for i in range(self.n_task):
            g = tf.expand_dims(gate_net[i],axis = -1)  #(bs,n_expert,1)
            _tower = tf.matmul(E_net, g,transpose_a=True)
            towers.append(Flatten()(_tower))           #(bs,expert_dim)
            
        return towers

def build_mmoe(sparse_cols,dense_cols,sparse_max_len,embed_dim,expert_dim,
              varlens_cols,varlens_max_len,n_expert,n_task,target = [],
              dnn_hidden_units = (64,),dnn_reg_l2 = 1e-5,drop_rate = 0.1,
                embedding_reg_l2 = 1e-6):
    
    
    #输入部分，分为sparse,varlens,dense部分。
    sparse_inputs = {f:Input([1],name = f) for f in sparse_cols}
    dense_inputs = {f:Input([1],name = f) for f in dense_cols}
    varlens_inputs = {f:Input([None,1],name = f) for f in varlens_cols}
        
    input_embed = {}
    #离散特征，embedding到k维
    for f in sparse_cols:
        _input = sparse_inputs[f]
        embedding = Embedding(sparse_max_len[f], embed_dim, 
            embeddings_regularizer=tf.keras.regularizers.l2(embedding_reg_l2)) 
        input_embed[f] =Flatten()(embedding(_input)) #(bs,k)
        
    #多标签离散变量
    for f in varlens_inputs:
        _input = varlens_inputs[f]
        mask = Masking(mask_value = 0).compute_mask(_input)
        embedding = Embedding(varlens_max_len[f], embed_dim,
            embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
        _embed =Reshape([-1,embed_dim])(embedding(_input))
        out_embed = MeanPoolLayer(axis=1)(_embed,mask)
        input_embed[f] = out_embed
        
    input_embed.update(dense_inputs) #加入连续变量
    input_embed = Concatenate(axis = -1)([input_embed[f] for f in input_embed])
    for num in dnn_hidden_units:
        input_embed = Dropout(drop_rate)(Dense(num,activation = 'relu',
                    kernel_regularizer=regularizers.l2(dnn_reg_l2))(input_embed))
    
    #mmoe网络层
    towers = MmoeLayer(expert_dim,n_expert,n_task)(input_embed)
    outputs = [Dense(1,activation = 'sigmoid', kernel_regularizer=regularizers.l2(dnn_reg_l2),
                     name = f,use_bias = True)(_t) for _t,f in zip(towers,target)]
    inputs = [sparse_inputs[f] for f in sparse_inputs]+[varlens_inputs[f] for f in varlens_inputs]\
                +[dense_inputs[f] for f in dense_inputs]
    model = Model(inputs,outputs) 
    return model

In [30]:
# 构建模型，训练和评估
model = build_mmoe(sparse_features,dense_features,sparse_max_len,embed_dim = 64,expert_dim = 32,
          n_task = 3,n_expert = 6,varlens_cols = varlen_features,varlens_max_len = varlens_max_len,
          dnn_hidden_units = (64,128,64),target = target,dnn_reg_l2 = 0.001,drop_rate = 0.1)

adam = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(adam, loss = 'binary_crossentropy' ,metrics = ["AUC"])
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
sex (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
race (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
risk (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
region (InputLayer)             [(None, 1)]          0                                            
______________________________________________________________________________________________

In [31]:
history = model.fit(train_model_input, train_labels,validation_data = (val_model_input,val_labels),
                    batch_size=1000, epochs=20, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [32]:
pred = model.predict(test_model_input)
pred

[array([[0.7064768],
        [0.7064768],
        [0.7064768],
        ...,
        [0.7064768],
        [0.7064768],
        [0.7064768]], dtype=float32),
 array([[0.12527491],
        [0.12527491],
        [0.12527491],
        ...,
        [0.12527491],
        [0.12527491],
        [0.12527491]], dtype=float32),
 array([[0.02263079],
        [0.02263079],
        [0.02263079],
        ...,
        [0.02263079],
        [0.02263079],
        [0.02263079]], dtype=float32)]

In [33]:
print(accuracy_score(test_labels[0], (pred[0] > 0.5).astype(int)))
print(accuracy_score(test_labels[1], (pred[1] > 0.5).astype(int)))
print(accuracy_score(test_labels[2], (pred[2] > 0.5).astype(int)))

0.7768123138033763
0.9046673286991063
0.974180734856008


In [34]:
print(roc_auc_score(test_labels[0], pred[0]))
print(roc_auc_score(test_labels[1], pred[1]))
print(roc_auc_score(test_labels[2], pred[2]))

0.5
0.5
0.5


### PLE

In [36]:
class MeanPoolLayer(Layer):
    def __init__(self, axis, **kwargs):
        super(MeanPoolLayer, self).__init__(**kwargs)
        self.axis = axis

    def call(self, x, mask):
        mask = tf.expand_dims(tf.cast(mask,tf.float32),axis = -1)
        x = x * mask
        return K.sum(x, axis=self.axis) / (K.sum(mask, axis=self.axis) + 1e-9)

class PleLayer(tf.keras.layers.Layer):
    '''
    n_experts:list,每个任务使用几个expert。[2,3]第一个任务使用2个expert，第二个任务使用3个expert。
    n_expert_share:int,共享的部分设置的expert个数。
    expert_dim:int,每个专家网络输出的向量维度。
    n_task:int,任务个数。
    '''
    def __init__(self,n_task,n_experts,expert_dim,n_expert_share,dnn_reg_l2 = 1e-5):
        super(PleLayer, self).__init__()
        self.n_task = n_task
        
        # 生成多个任务特定网络和1个共享网络。
        self.E_layer = []
        for i in range(n_task):
            sub_exp = [Dense(expert_dim,activation = 'relu') for j in range(n_experts[i])]
            self.E_layer.append(sub_exp)
            
        self.share_layer = [Dense(expert_dim,activation = 'relu') for j in range(n_expert_share)]
        #定义门控网络
        self.gate_layers = [Dense(n_expert_share+n_experts[i],kernel_regularizer=regularizers.l2(dnn_reg_l2),
                                  activation = 'softmax') for i in range(n_task)]

    def call(self,x):
        #特定网络和共享网络
        E_net = [[expert(x) for expert in sub_expert] for sub_expert in self.E_layer]
        share_net = [expert(x) for expert in self.share_layer]
        
        #门的权重乘上，指定任务和共享任务的输出。
        towers = []
        for i in range(self.n_task):
            g = self.gate_layers[i](x)
            g = tf.expand_dims(g,axis = -1) #(bs,n_expert_share+n_experts[i],1)
            _e = share_net+E_net[i]  
            _e = Concatenate(axis = 1)([expert[:,tf.newaxis,:] for expert in _e]) #(bs,n_expert_share+n_experts[i],expert_dim)
            _tower = tf.matmul(_e, g,transpose_a=True)
            towers.append(Flatten()(_tower)) #(bs,expert_dim)
        return towers

def build_ple(sparse_cols,dense_cols,sparse_max_len,embed_dim,expert_dim = 4,
              varlens_cols = [],varlens_max_len = [],dnn_hidden_units = (64,64),
              n_task = 2,n_experts = [2,2],n_expert_share = 4,dnn_reg_l2 = 1e-6,
              drop_rate = 0.0,embedding_reg_l2 = 1e-6,targets = []):

   #输入部分，分为sparse,varlens,dense部分。
    sparse_inputs = {f:Input([1],name = f) for f in sparse_cols}
    dense_inputs = {f:Input([1],name = f) for f in dense_cols}
    varlens_inputs = {f:Input([None,1],name = f) for f in varlens_cols}
        
    input_embed = {}
    #离散特征，embedding到k维
    for f in sparse_cols:
        _input = sparse_inputs[f]
        embedding = Embedding(sparse_max_len[f], embed_dim, 
            embeddings_regularizer=tf.keras.regularizers.l2(embedding_reg_l2)) 
        input_embed[f] =Flatten()(embedding(_input)) #(bs,k)
        
    #多标签离散变量
    for f in varlens_inputs:
        _input = varlens_inputs[f]
        mask = Masking(mask_value = 0).compute_mask(_input)
        embedding = Embedding(varlens_max_len[f], embed_dim,
            embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
        _embed =Reshape([-1,embed_dim])(embedding(_input))
        out_embed = MeanPoolLayer(axis=1)(_embed,mask)
        input_embed[f] = out_embed
        
    input_embed.update(dense_inputs) #加入连续变量
    input_embed = Concatenate(axis = -1)([input_embed[f] for f in input_embed])    
                                  
    for num in dnn_hidden_units:
        input_embed = Dropout(drop_rate)(Dense(num,activation = 'relu',
                    kernel_regularizer=regularizers.l2(dnn_reg_l2))(input_embed))
    #Ple网络层
    towers = PleLayer(n_task,n_experts,expert_dim,n_expert_share)(input_embed)
    outputs = [Dense(1,activation = 'sigmoid',kernel_regularizer=regularizers.l2(dnn_reg_l2),
                       name = f,use_bias = True)(_t) for f,_t in zip(targets,towers)]
    inputs = [sparse_inputs[f] for f in sparse_inputs]+[varlens_inputs[f] for f in varlens_inputs]\
                +[dense_inputs[f] for f in dense_inputs]
    model = Model(inputs,outputs) 
    return model

In [43]:
# 构建模型，训练和评估
model = build_ple(sparse_features,dense_features,sparse_max_len,embed_dim = 64,expert_dim = 16,
          varlens_cols = varlen_features,varlens_max_len = varlens_max_len,dnn_hidden_units = (64,64),
          n_task = 3,n_experts = [1,1,1],n_expert_share = 2,dnn_reg_l2 = 0.001,
          drop_rate = 0.1,embedding_reg_l2 = 0.001,targets = target)

adam = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(adam, loss = 'binary_crossentropy' ,metrics = ["AUC"],)
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
sex (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
race (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
risk (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
region (InputLayer)             [(None, 1)]          0                                            
____________________________________________________________________________________________

In [44]:
history = model.fit(train_model_input, train_labels,validation_data = (val_model_input,val_labels),
                    batch_size=1000, epochs=20, verbose=1)

Epoch 1/20


TypeError: in user code:

    C:\Users\Yunqing\.conda\envs\tfgpu\lib\site-packages\keras\engine\training.py:853 train_function  *
        return step_function(self, iterator)
    C:\Users\Yunqing\AppData\Local\Temp\ipykernel_40004\346764826.py:4 loss  *
        loss = - y_true * (alpha * K.pow(1 - y_pred, gamma) * K.log(y_pred)) -                (1 - y_true) * ((1 - alpha) * K.pow(y_pred, gamma) * K.log(1 - y_pred))
    C:\Users\Yunqing\.conda\envs\tfgpu\lib\site-packages\tensorflow\python\ops\math_ops.py:1383 binary_op_wrapper
        raise e
    C:\Users\Yunqing\.conda\envs\tfgpu\lib\site-packages\tensorflow\python\ops\math_ops.py:1367 binary_op_wrapper
        return func(x, y, name=name)
    C:\Users\Yunqing\.conda\envs\tfgpu\lib\site-packages\tensorflow\python\ops\math_ops.py:1710 _mul_dispatch
        return multiply(x, y, name=name)
    C:\Users\Yunqing\.conda\envs\tfgpu\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\Users\Yunqing\.conda\envs\tfgpu\lib\site-packages\tensorflow\python\ops\math_ops.py:530 multiply
        return gen_math_ops.mul(x, y, name)
    C:\Users\Yunqing\.conda\envs\tfgpu\lib\site-packages\tensorflow\python\ops\gen_math_ops.py:6244 mul
        _, _, _op, _outputs = _op_def_library._apply_op_helper(
    C:\Users\Yunqing\.conda\envs\tfgpu\lib\site-packages\tensorflow\python\framework\op_def_library.py:555 _apply_op_helper
        raise TypeError(

    TypeError: Input 'y' of 'Mul' Op has type float32 that does not match type int64 of argument 'x'.


In [39]:
pred = model.predict(test_model_input)
pred

[array([[0.70129216],
        [0.70129216],
        [0.70129216],
        ...,
        [0.70678955],
        [0.70614773],
        [0.7061447 ]], dtype=float32),
 array([[0.13284911],
        [0.13284911],
        [0.13284911],
        ...,
        [0.14583455],
        [0.14618456],
        [0.14618663]], dtype=float32),
 array([[0.02366647],
        [0.02366647],
        [0.02366647],
        ...,
        [0.04442582],
        [0.07810827],
        [0.00534914]], dtype=float32)]

In [40]:
print(accuracy_score(test_labels[0], (pred[0] > 0.5).astype(int)))
print(accuracy_score(test_labels[1], (pred[1] > 0.5).astype(int)))
print(accuracy_score(test_labels[2], (pred[2] > 0.5).astype(int)))

0.7768123138033763
0.9046673286991063
0.974180734856008


In [41]:
print(roc_auc_score(test_labels[0], pred[0]))
print(roc_auc_score(test_labels[1], pred[1]))
print(roc_auc_score(test_labels[2], pred[2]))

0.5130123097607477
0.5193731347763446
0.5141180016466714


## Model 2

In [51]:
temp = variables[variables['model2'] != 'delete']
data = data_origin[temp['variables'].tolist()]
data

Unnamed: 0,TIME_DXDATE_TESTDATE,time_window_index_year,VS,VR,VB,dx_yr,dx_mth,age,sex,race,...,msld.cum_1,metacanc.cum_1,aids.cum_1,Depression_1,Anxiety_1,Psychiatric_disorder_1,Alcohol_use_1,Tobacco_use_1,Illicit_drug_use_1,visits_1
0,591.000000,2,0,0,0,2005,12,39,F,Black,...,0.0,0.0,0.0,0,0,0,0,0,0,4
1,1018.000000,3,0,0,0,2005,12,39,F,Black,...,0.0,0.0,0.0,0,1,0,1,1,1,2
2,2072.250000,6,1,0,0,2005,12,39,F,Black,...,0.0,0.0,0.0,0,0,0,0,0,0,2
3,2495.333333,7,1,0,0,2005,12,39,F,Black,...,0.0,0.0,0.0,0,0,0,0,0,0,8
4,2793.333333,8,1,0,0,2005,12,39,F,Black,...,0.0,0.0,0.0,0,0,0,0,0,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40274,2737.500000,8,1,0,0,2009,9,48,M,Black,...,0.0,0.0,0.0,0,0,0,0,0,0,3
40275,3193.000000,9,1,0,0,2009,9,48,M,Black,...,0.0,0.0,0.0,0,0,0,0,0,0,8
40276,3499.666667,10,1,0,0,2009,9,48,M,Black,...,0.0,0.0,0.0,0,0,0,0,0,0,3
40277,3884.000000,11,1,0,0,2009,9,48,M,Black,...,0.0,0.0,0.0,0,0,0,0,0,0,6


In [52]:
target = variables[variables['model2'] == 'outcome']['variables'].tolist()
sparse_features = variables[variables['model2'] == 'cat']['variables'].tolist()
dense_features = variables[variables['model2'] == 'num']['variables'].tolist()
varlen_features = []

In [53]:
target

['VS', 'VR', 'VB']

In [54]:
encoder = {}
# 稀疏特征编码
for featid in sparse_features:
    # print(f"编码ID字段：{featid}")
    encoder[featid] = {uid:ucode+1 for ucode,uid in enumerate(data[featid].unique())} 
    data[featid] = data[featid].apply(lambda x: encoder[featid].get(x,0))
    
# 生成输入特征设置
sparse_max_len = {f:len(encoder[f]) + 1 for f in sparse_features}
varlens_max_len = {f:len(encoder[f]) + 1 for f in varlen_features}
feature_names = sparse_features+varlen_features+dense_features

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[featid] = data[featid].apply(lambda x: encoder[featid].get(x,0))


In [55]:
n_train = round(data.shape[0] * 0.6)
n_val = round(data.shape[0] * 0.2)

train = data[:n_train]
val = data[n_train:(n_train+n_val)]
test = data[(n_train+n_val):]

In [56]:
# 构建输入数据
train_model_input = {name: train[name] if name not in varlen_features else np.stack(train[name]) for name in feature_names } #训练模型的输入，字典类型。名称和具体值
val_model_input = {name: val[name] if name not in varlen_features else np.stack(val[name]) for name in feature_names }
test_model_input = {name: test[name] if name not in varlen_features else np.stack(test[name]) for name in feature_names}

train_labels = [train[y].values for y in target]
val_labels = [val[y].values for y in target]
test_labels = [test[y].values for y in target]

### Seperate models

#### VS

In [57]:
X_train = data[:(n_train+n_val)].copy()
X_train.drop(columns=target, inplace=True)
y_train = data[:(n_train+n_val)]["VS"]
X_train.shape

(32223, 103)

In [58]:
# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_38 (Dense)             (None, 64)                6656      
_________________________________________________________________
dense_39 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_40 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_41 (Dense)             (None, 1)                 65        
Total params: 23,297
Trainable params: 23,297
Non-trainable params: 0
_________________________________________________________________


In [63]:
# Train the model
history = model.fit(X_train.to_numpy(), np.array(y_train), epochs=20, batch_size=1000, validation_split=0.25)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [64]:
X_test = test.copy()
X_test.drop(columns=target, inplace=True)
pred = model.predict(X_test)
pred

array([[1.        ],
       [1.        ],
       [1.        ],
       ...,
       [1.        ],
       [1.        ],
       [0.08003461]], dtype=float32)

In [65]:
print(accuracy_score(test_labels[0], (pred > 0.5).astype(int)))

0.660377358490566


In [66]:
print(roc_auc_score(test_labels[0], pred))

0.48872708783702357


#### VB

In [67]:
X_train = data[:(n_train+n_val)].copy()
X_train.drop(columns=target, inplace=True)
y_train = data[:(n_train+n_val)]["VB"]

In [68]:
# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')   # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_42 (Dense)             (None, 64)                6656      
_________________________________________________________________
dense_43 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_44 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_45 (Dense)             (None, 1)                 65        
Total params: 23,297
Trainable params: 23,297
Non-trainable params: 0
_________________________________________________________________


In [69]:
# Train the model
history = model.fit(X_train.to_numpy(), np.array(y_train), epochs=20, batch_size=1000, validation_split=0.25)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [70]:
X_test = test.copy()
X_test.drop(columns=target, inplace=True)
pred = model.predict(X_test)
pred

array([[0.0000000e+00],
       [0.0000000e+00],
       [0.0000000e+00],
       ...,
       [0.0000000e+00],
       [0.0000000e+00],
       [1.8997129e-11]], dtype=float32)

In [71]:
print(accuracy_score(test_labels[1], (pred > 0.5).astype(int)))

0.9046673286991063


In [72]:
print(roc_auc_score(test_labels[1], pred))

0.4995335154248994


#### VR

In [73]:
X_train = data[:(n_train+n_val)].copy()
X_train.drop(columns=target, inplace=True)
y_train = data[:(n_train+n_val)]["VR"]

In [74]:
# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')   # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_46 (Dense)             (None, 64)                6656      
_________________________________________________________________
dense_47 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_48 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_49 (Dense)             (None, 1)                 65        
Total params: 23,297
Trainable params: 23,297
Non-trainable params: 0
_________________________________________________________________


In [75]:
# Train the model
history = model.fit(X_train.to_numpy(), np.array(y_train), epochs=20, batch_size=1000, validation_split=0.25)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [76]:
X_test = test.copy()
X_test.drop(columns=target, inplace=True)
pred = model.predict(X_test)
pred

array([[0.0000000e+00],
       [0.0000000e+00],
       [1.7517699e-38],
       ...,
       [0.0000000e+00],
       [0.0000000e+00],
       [1.0360856e-06]], dtype=float32)

In [77]:
print(accuracy_score(test_labels[1], (pred > 0.5).astype(int)))

0.9023088381330685


In [78]:
print(roc_auc_score(test_labels[1], pred))

0.5173960691662094


### MMoE

In [79]:
class MeanPoolLayer(Layer):
    def __init__(self, axis, **kwargs):
        super(MeanPoolLayer, self).__init__(**kwargs)
        self.axis = axis

    def call(self, x, mask):
        mask = tf.expand_dims(tf.cast(mask,tf.float32),axis = -1)
        x = x * mask
        return K.sum(x, axis=self.axis) / (K.sum(mask, axis=self.axis) + 1e-9)

class MmoeLayer(tf.keras.layers.Layer):
    def __init__(self,expert_dim,n_expert,n_task):
        super(MmoeLayer, self).__init__()
        self.n_task = n_task
        self.expert_layer = [Dense(expert_dim,activation = 'relu') for i in range(n_expert)]
        self.gate_layers = [Dense(n_expert,activation = 'softmax') for i in range(n_task)]
    
    def call(self,x):
        #多个专家网络
        E_net = [expert(x) for expert in self.expert_layer]
        E_net = Concatenate(axis = 1)([e[:,tf.newaxis,:] for e in E_net]) #(bs,n_expert,n_dims)
        #多个门网络
        gate_net = [gate(x) for gate in self.gate_layers]     #n_task个(bs,n_expert)
        
        #每个towers等于，对应的门网络乘上所有的专家网络。
        towers = []
        for i in range(self.n_task):
            g = tf.expand_dims(gate_net[i],axis = -1)  #(bs,n_expert,1)
            _tower = tf.matmul(E_net, g,transpose_a=True)
            towers.append(Flatten()(_tower))           #(bs,expert_dim)
            
        return towers

def build_mmoe(sparse_cols,dense_cols,sparse_max_len,embed_dim,expert_dim,
              varlens_cols,varlens_max_len,n_expert,n_task,target = [],
              dnn_hidden_units = (64,),dnn_reg_l2 = 1e-5,drop_rate = 0.1,
                embedding_reg_l2 = 1e-6):
    
    
    #输入部分，分为sparse,varlens,dense部分。
    sparse_inputs = {f:Input([1],name = f) for f in sparse_cols}
    dense_inputs = {f:Input([1],name = f) for f in dense_cols}
    varlens_inputs = {f:Input([None,1],name = f) for f in varlens_cols}
        
    input_embed = {}
    #离散特征，embedding到k维
    for f in sparse_cols:
        _input = sparse_inputs[f]
        embedding = Embedding(sparse_max_len[f], embed_dim, 
            embeddings_regularizer=tf.keras.regularizers.l2(embedding_reg_l2)) 
        input_embed[f] =Flatten()(embedding(_input)) #(bs,k)
        
    #多标签离散变量
    for f in varlens_inputs:
        _input = varlens_inputs[f]
        mask = Masking(mask_value = 0).compute_mask(_input)
        embedding = Embedding(varlens_max_len[f], embed_dim,
            embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
        _embed =Reshape([-1,embed_dim])(embedding(_input))
        out_embed = MeanPoolLayer(axis=1)(_embed,mask)
        input_embed[f] = out_embed
        
    input_embed.update(dense_inputs) #加入连续变量
    input_embed = Concatenate(axis = -1)([input_embed[f] for f in input_embed])
    for num in dnn_hidden_units:
        input_embed = Dropout(drop_rate)(Dense(num,activation = 'relu',
                    kernel_regularizer=regularizers.l2(dnn_reg_l2))(input_embed))
    
    #mmoe网络层
    towers = MmoeLayer(expert_dim,n_expert,n_task)(input_embed)
    outputs = [Dense(1,activation = 'sigmoid', kernel_regularizer=regularizers.l2(dnn_reg_l2),
                     name = f,use_bias = True)(_t) for _t,f in zip(towers,target)]
    inputs = [sparse_inputs[f] for f in sparse_inputs]+[varlens_inputs[f] for f in varlens_inputs]\
                +[dense_inputs[f] for f in dense_inputs]
    model = Model(inputs,outputs) 
    return model

In [80]:
# 构建模型，训练和评估
model = build_mmoe(sparse_features,dense_features,sparse_max_len,embed_dim = 64,expert_dim = 32,
          n_task = 3,n_expert = 6,varlens_cols = varlen_features,varlens_max_len = varlens_max_len,
          dnn_hidden_units = (64,128,64),target = target,dnn_reg_l2 = 0.001,drop_rate = 0.1)

adam = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(adam, loss = 'binary_crossentropy' ,metrics = ["accuracy"])
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
sex (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
race (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
risk (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
region (InputLayer)             [(None, 1)]          0                                            
____________________________________________________________________________________________

In [81]:
history = model.fit(train_model_input, train_labels,validation_data = (val_model_input,val_labels),
                    batch_size=1000, epochs=20, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [82]:
pred = model.predict(test_model_input)
pred

[array([[0.71051854],
        [0.71051854],
        [0.71051854],
        ...,
        [0.7105179 ],
        [0.7105179 ],
        [0.7239818 ]], dtype=float32),
 array([[0.12581421],
        [0.12581421],
        [0.12581421],
        ...,
        [0.12581314],
        [0.12581314],
        [0.11457289]], dtype=float32),
 array([[0.01823806],
        [0.01823806],
        [0.01823806],
        ...,
        [0.01823815],
        [0.01823815],
        [0.01619718]], dtype=float32)]

In [83]:
print(accuracy_score(test_labels[0], (pred[0] > 0.5).astype(int)))
print(accuracy_score(test_labels[1], (pred[1] > 0.5).astype(int)))
print(accuracy_score(test_labels[2], (pred[2] > 0.5).astype(int)))

0.7768123138033763
0.9046673286991063
0.974180734856008


In [84]:
print(roc_auc_score(test_labels[0], pred[0]))
print(roc_auc_score(test_labels[1], pred[1]))
print(roc_auc_score(test_labels[2], pred[2]))

0.5288075756913242
0.5140566220442279
0.526198492511566


### PLE

In [85]:
class MeanPoolLayer(Layer):
    def __init__(self, axis, **kwargs):
        super(MeanPoolLayer, self).__init__(**kwargs)
        self.axis = axis

    def call(self, x, mask):
        mask = tf.expand_dims(tf.cast(mask,tf.float32),axis = -1)
        x = x * mask
        return K.sum(x, axis=self.axis) / (K.sum(mask, axis=self.axis) + 1e-9)

class PleLayer(tf.keras.layers.Layer):
    '''
    n_experts:list,每个任务使用几个expert。[2,3]第一个任务使用2个expert，第二个任务使用3个expert。
    n_expert_share:int,共享的部分设置的expert个数。
    expert_dim:int,每个专家网络输出的向量维度。
    n_task:int,任务个数。
    '''
    def __init__(self,n_task,n_experts,expert_dim,n_expert_share,dnn_reg_l2 = 1e-5):
        super(PleLayer, self).__init__()
        self.n_task = n_task
        
        # 生成多个任务特定网络和1个共享网络。
        self.E_layer = []
        for i in range(n_task):
            sub_exp = [Dense(expert_dim,activation = 'relu') for j in range(n_experts[i])]
            self.E_layer.append(sub_exp)
            
        self.share_layer = [Dense(expert_dim,activation = 'relu') for j in range(n_expert_share)]
        #定义门控网络
        self.gate_layers = [Dense(n_expert_share+n_experts[i],kernel_regularizer=regularizers.l2(dnn_reg_l2),
                                  activation = 'softmax') for i in range(n_task)]

    def call(self,x):
        #特定网络和共享网络
        E_net = [[expert(x) for expert in sub_expert] for sub_expert in self.E_layer]
        share_net = [expert(x) for expert in self.share_layer]
        
        #门的权重乘上，指定任务和共享任务的输出。
        towers = []
        for i in range(self.n_task):
            g = self.gate_layers[i](x)
            g = tf.expand_dims(g,axis = -1) #(bs,n_expert_share+n_experts[i],1)
            _e = share_net+E_net[i]  
            _e = Concatenate(axis = 1)([expert[:,tf.newaxis,:] for expert in _e]) #(bs,n_expert_share+n_experts[i],expert_dim)
            _tower = tf.matmul(_e, g,transpose_a=True)
            towers.append(Flatten()(_tower)) #(bs,expert_dim)
        return towers

def build_ple(sparse_cols,dense_cols,sparse_max_len,embed_dim,expert_dim = 4,
              varlens_cols = [],varlens_max_len = [],dnn_hidden_units = (64,64),
              n_task = 2,n_experts = [2,2],n_expert_share = 4,dnn_reg_l2 = 1e-6,
              drop_rate = 0.0,embedding_reg_l2 = 1e-6,targets = []):

   #输入部分，分为sparse,varlens,dense部分。
    sparse_inputs = {f:Input([1],name = f) for f in sparse_cols}
    dense_inputs = {f:Input([1],name = f) for f in dense_cols}
    varlens_inputs = {f:Input([None,1],name = f) for f in varlens_cols}
        
    input_embed = {}
    #离散特征，embedding到k维
    for f in sparse_cols:
        _input = sparse_inputs[f]
        embedding = Embedding(sparse_max_len[f], embed_dim, 
            embeddings_regularizer=tf.keras.regularizers.l2(embedding_reg_l2)) 
        input_embed[f] =Flatten()(embedding(_input)) #(bs,k)
        
    #多标签离散变量
    for f in varlens_inputs:
        _input = varlens_inputs[f]
        mask = Masking(mask_value = 0).compute_mask(_input)
        embedding = Embedding(varlens_max_len[f], embed_dim,
            embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
        _embed =Reshape([-1,embed_dim])(embedding(_input))
        out_embed = MeanPoolLayer(axis=1)(_embed,mask)
        input_embed[f] = out_embed
        
    input_embed.update(dense_inputs) #加入连续变量
    input_embed = Concatenate(axis = -1)([input_embed[f] for f in input_embed])    
                                  
    for num in dnn_hidden_units:
        input_embed = Dropout(drop_rate)(Dense(num,activation = 'relu',
                    kernel_regularizer=regularizers.l2(dnn_reg_l2))(input_embed))
    #Ple网络层
    towers = PleLayer(n_task,n_experts,expert_dim,n_expert_share)(input_embed)
    outputs = [Dense(1,activation = 'sigmoid',kernel_regularizer=regularizers.l2(dnn_reg_l2),
                       name = f,use_bias = True)(_t) for f,_t in zip(targets,towers)]
    inputs = [sparse_inputs[f] for f in sparse_inputs]+[varlens_inputs[f] for f in varlens_inputs]\
                +[dense_inputs[f] for f in dense_inputs]
    model = Model(inputs,outputs) 
    return model

In [86]:
# 构建模型，训练和评估
model = build_ple(sparse_features,dense_features,sparse_max_len,embed_dim = 64,expert_dim = 16,
          varlens_cols = varlen_features,varlens_max_len = varlens_max_len,dnn_hidden_units = (64,64),
          n_task = 3,n_experts = [1,1,1],n_expert_share = 2,dnn_reg_l2 = 0.001,
          drop_rate = 0.1,embedding_reg_l2 = 0.001,targets = target)

adam = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(adam, loss = 'binary_crossentropy' ,metrics = ["accuracy"],)
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
sex (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
race (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
risk (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
region (InputLayer)             [(None, 1)]          0                                            
____________________________________________________________________________________________

In [87]:
history = model.fit(train_model_input, train_labels,validation_data = (val_model_input,val_labels),
                    batch_size=1000, epochs=20, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [88]:
pred = model.predict(test_model_input)
pred

[array([[0.703185 ],
        [0.703185 ],
        [0.703185 ],
        ...,
        [0.703185 ],
        [0.703185 ],
        [0.6597895]], dtype=float32),
 array([[0.13506533],
        [0.13506533],
        [0.13506533],
        ...,
        [0.13506533],
        [0.13506533],
        [0.06071173]], dtype=float32),
 array([[0.05286883],
        [0.05286883],
        [0.05286883],
        ...,
        [0.05286883],
        [0.05286883],
        [0.00869927]], dtype=float32)]

In [89]:
print(accuracy_score(test_labels[0], (pred[0] > 0.5).astype(int)))
print(accuracy_score(test_labels[1], (pred[1] > 0.5).astype(int)))
print(accuracy_score(test_labels[2], (pred[2] > 0.5).astype(int)))

0.7768123138033763
0.9046673286991063
0.974180734856008


In [90]:
print(roc_auc_score(test_labels[0], pred[0]))
print(roc_auc_score(test_labels[1], pred[1]))
print(roc_auc_score(test_labels[2], pred[2]))

0.5095318703961043
0.4911196415912001
0.5350174958833216


## Model 3

In [101]:
temp = variables[variables['model3'] != 'delete']
data = data_origin[temp['variables'].tolist()]
data

Unnamed: 0,TIME_DXDATE_TESTDATE,time_window_index_year,VL_interpretation,VL,VS,VR,VB,Months_to_ini_VS,VR_N,VR_size,...,msld.cum_1,metacanc.cum_1,aids.cum_1,Depression_1,Anxiety_1,Psychiatric_disorder_1,Alcohol_use_1,Tobacco_use_1,Illicit_drug_use_1,visits_1
0,591.000000,2,=,2902.0,0,0,0,69.075000,0,none,...,0.0,0.0,0.0,0,0,0,0,0,0,4
1,1018.000000,3,=,4804.0,0,0,0,69.075000,0,none,...,0.0,0.0,0.0,0,1,0,1,1,1,2
2,2072.250000,6,<,200.0,1,0,0,69.075000,0,none,...,0.0,0.0,0.0,0,0,0,0,0,0,2
3,2495.333333,7,<,200.0,1,0,0,69.075000,0,none,...,0.0,0.0,0.0,0,0,0,0,0,0,8
4,2793.333333,8,<,200.0,1,0,0,69.075000,0,none,...,0.0,0.0,0.0,0,0,0,0,0,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40274,2737.500000,8,<,200.0,1,0,0,17.616667,0,none,...,0.0,0.0,0.0,0,0,0,0,0,0,3
40275,3193.000000,9,<,200.0,1,0,0,17.616667,0,none,...,0.0,0.0,0.0,0,0,0,0,0,0,8
40276,3499.666667,10,<,200.0,1,0,0,17.616667,0,none,...,0.0,0.0,0.0,0,0,0,0,0,0,3
40277,3884.000000,11,<,200.0,1,0,0,17.616667,0,none,...,0.0,0.0,0.0,0,0,0,0,0,0,6


In [102]:
target = variables[variables['model3'] == 'outcome']['variables'].tolist()
sparse_features = variables[variables['model3'] == 'cat']['variables'].tolist()
dense_features = variables[variables['model3'] == 'num']['variables'].tolist()
varlen_features = []

In [103]:
target

['VS', 'VR', 'VB']

In [104]:
encoder = {}
# 稀疏特征编码
for featid in sparse_features:
    # print(f"编码ID字段：{featid}")
    encoder[featid] = {uid:ucode+1 for ucode,uid in enumerate(data[featid].unique())} 
    data[featid] = data[featid].apply(lambda x: encoder[featid].get(x,0))
    
# 生成输入特征设置
sparse_max_len = {f:len(encoder[f]) + 1 for f in sparse_features}
varlens_max_len = {f:len(encoder[f]) + 1 for f in varlen_features}
feature_names = sparse_features+varlen_features+dense_features

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[featid] = data[featid].apply(lambda x: encoder[featid].get(x,0))


In [105]:
n_train = round(data.shape[0] * 0.6)
n_val = round(data.shape[0] * 0.2)

train = data[:n_train]
val = data[n_train:(n_train+n_val)]
test = data[(n_train+n_val):]

In [106]:
# 构建输入数据
train_model_input = {name: train[name] if name not in varlen_features else np.stack(train[name]) for name in feature_names } #训练模型的输入，字典类型。名称和具体值
val_model_input = {name: val[name] if name not in varlen_features else np.stack(val[name]) for name in feature_names }
test_model_input = {name: test[name] if name not in varlen_features else np.stack(test[name]) for name in feature_names}

train_labels = [train[y].values for y in target]
val_labels = [val[y].values for y in target]
test_labels = [test[y].values for y in target]

### Seperate models

#### VS

In [107]:
X_train = data[:(n_train+n_val)].copy()
X_train.drop(columns=target, inplace=True)
y_train = data[:(n_train+n_val)]["VS"]
X_train.shape

(32223, 121)

In [108]:
# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_72 (Dense)             (None, 64)                7808      
_________________________________________________________________
dense_73 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_74 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_75 (Dense)             (None, 1)                 65        
Total params: 24,449
Trainable params: 24,449
Non-trainable params: 0
_________________________________________________________________


In [109]:
# Train the model
history = model.fit(X_train.to_numpy(), np.array(y_train), epochs=20, batch_size=1000, validation_split=0.25)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [110]:
X_test = test.copy()
X_test.drop(columns=target, inplace=True)
pred = model.predict(X_test)
pred

array([[1.],
       [1.],
       [1.],
       ...,
       [1.],
       [1.],
       [1.]], dtype=float32)

In [111]:
print(accuracy_score(test_labels[0], (pred > 0.5).astype(int)))

0.8597318768619663


In [112]:
print(roc_auc_score(test_labels[0], pred))

0.6922403839214837


In [113]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score

# 训练逻辑回归模型
log_reg = LogisticRegression(class_weight='balanced', solver='liblinear', random_state=42)
log_reg.fit(X_train.to_numpy(), np.array(y_train))

# 在测试集上进行预测
log_pred_prob = log_reg.predict_proba(X_test)[:, 1]  # 获取正类的概率
log_pred = (log_pred_prob > 0.5).astype(int)  # 转换为二分类预测

# 计算 Accuracy 和 AUC
log_accuracy = accuracy_score(test_labels[0], log_pred)
log_auc = roc_auc_score(test_labels[0], log_pred_prob)

print(f"Logistic Regression Accuracy: {log_accuracy:.4f}")
print(f"Logistic Regression AUC: {log_auc:.4f}")

Logistic Regression Accuracy: 0.9018
Logistic Regression AUC: 0.8414




#### VB

In [107]:
X_train = data[:(n_train+n_val)].copy()
X_train.drop(columns=target, inplace=True)
y_train = data[:(n_train+n_val)]["VB"]

In [108]:
# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')   # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_76 (Dense)             (None, 64)                7808      
_________________________________________________________________
dense_77 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_78 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_79 (Dense)             (None, 1)                 65        
Total params: 24,449
Trainable params: 24,449
Non-trainable params: 0
_________________________________________________________________


In [113]:
# Train the model
history = model.fit(X_train.to_numpy(), np.array(y_train), epochs=20, batch_size=1000, validation_split=0.25)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [114]:
X_test = test.copy()
X_test.drop(columns=target, inplace=True)
pred = model.predict(X_test)
pred

array([[0.0000000e+00],
       [0.0000000e+00],
       [0.0000000e+00],
       ...,
       [0.0000000e+00],
       [0.0000000e+00],
       [1.6281023e-23]], dtype=float32)

In [115]:
print(accuracy_score(test_labels[1], (pred > 0.5).astype(int)))

0.9046673286991063


In [116]:
print(roc_auc_score(test_labels[1], pred))

0.4832440741630077


#### VR

In [117]:
X_train = data[:(n_train+n_val)].copy()
X_train.drop(columns=target, inplace=True)
y_train = data[:(n_train+n_val)]["VR"]

In [118]:
# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')   # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_80 (Dense)             (None, 64)                7808      
_________________________________________________________________
dense_81 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_82 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_83 (Dense)             (None, 1)                 65        
Total params: 24,449
Trainable params: 24,449
Non-trainable params: 0
_________________________________________________________________


In [123]:
# Train the model
history = model.fit(X_train.to_numpy(), np.array(y_train), epochs=20, batch_size=1000, validation_split=0.25)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [124]:
X_test = test.copy()
X_test.drop(columns=target, inplace=True)
pred = model.predict(X_test)
pred

array([[4.1128482e-30],
       [7.7010084e-30],
       [9.2051642e-30],
       ...,
       [1.9518611e-26],
       [4.8199402e-30],
       [2.0585395e-29]], dtype=float32)

In [125]:
print(accuracy_score(test_labels[1], (pred > 0.5).astype(int)))

0.9018123138033763


In [126]:
print(roc_auc_score(test_labels[1], pred))

0.6819560514715971


### MMoE

In [121]:
# 先将所有任务合并，创建一个多标签数据集
y_train_combined = np.vstack([data[:(n_train+n_val)]["VS"], data[:(n_train+n_val)]["VB"], data[:(n_train+n_val)]["VR"]]).T  # 转换为 (样本数, 3)

# 进行 SMOTE 采样（使用多标签策略）
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled_combined = smote.fit_resample(X_train, y_train_combined)

# 拆分出三个任务的 y
y_resampled_vs = y_resampled_combined[:, 0]
y_resampled_vb = y_resampled_combined[:, 1]
y_resampled_vr = y_resampled_combined[:, 2]

# 打印新分布
print("Resampled VS:", pd.Series(y_resampled_vs).value_counts())
print("Resampled VB:", pd.Series(y_resampled_vb).value_counts())
print("Resampled VR:", pd.Series(y_resampled_vr).value_counts())


Resampled VS: 0    55232
1    27616
dtype: int64
Resampled VB: 0    55232
1    27616
dtype: int64
Resampled VR: 0    55232
1    27616
dtype: int64


In [123]:
y_resampled_combined

array([[1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       ...,
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1]])

In [134]:
class MeanPoolLayer(Layer):
    def __init__(self, axis, **kwargs):
        super(MeanPoolLayer, self).__init__(**kwargs)
        self.axis = axis

    def call(self, x, mask):
        mask = tf.expand_dims(tf.cast(mask,tf.float32),axis = -1)
        x = x * mask
        return K.sum(x, axis=self.axis) / (K.sum(mask, axis=self.axis) + 1e-9)

class MmoeLayer(tf.keras.layers.Layer):
    def __init__(self,expert_dim,n_expert,n_task):
        super(MmoeLayer, self).__init__()
        self.n_task = n_task
        self.expert_layer = [Dense(expert_dim,activation = 'relu') for i in range(n_expert)]
        self.gate_layers = [Dense(n_expert,activation = 'softmax') for i in range(n_task)]
    
    def call(self,x):
        #多个专家网络
        E_net = [expert(x) for expert in self.expert_layer]
        E_net = Concatenate(axis = 1)([e[:,tf.newaxis,:] for e in E_net]) #(bs,n_expert,n_dims)
        #多个门网络
        gate_net = [gate(x) for gate in self.gate_layers]     #n_task个(bs,n_expert)
        
        #每个towers等于，对应的门网络乘上所有的专家网络。
        towers = []
        for i in range(self.n_task):
            g = tf.expand_dims(gate_net[i],axis = -1)  #(bs,n_expert,1)
            _tower = tf.matmul(E_net, g,transpose_a=True)
            towers.append(Flatten()(_tower))           #(bs,expert_dim)
            
        return towers

def build_mmoe(sparse_cols,dense_cols,sparse_max_len,embed_dim,expert_dim,
              varlens_cols,varlens_max_len,n_expert,n_task,target = [],
              dnn_hidden_units = (64,),dnn_reg_l2 = 1e-5,drop_rate = 0.1,
                embedding_reg_l2 = 1e-6):
    
    
    #输入部分，分为sparse,varlens,dense部分。
    sparse_inputs = {f:Input([1],name = f) for f in sparse_cols}
    dense_inputs = {f:Input([1],name = f) for f in dense_cols}
    varlens_inputs = {f:Input([None,1],name = f) for f in varlens_cols}
        
    input_embed = {}
    #离散特征，embedding到k维
    for f in sparse_cols:
        _input = sparse_inputs[f]
        embedding = Embedding(sparse_max_len[f], embed_dim, 
            embeddings_regularizer=tf.keras.regularizers.l2(embedding_reg_l2)) 
        input_embed[f] =Flatten()(embedding(_input)) #(bs,k)
        
    #多标签离散变量
    for f in varlens_inputs:
        _input = varlens_inputs[f]
        mask = Masking(mask_value = 0).compute_mask(_input)
        embedding = Embedding(varlens_max_len[f], embed_dim,
            embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
        _embed =Reshape([-1,embed_dim])(embedding(_input))
        out_embed = MeanPoolLayer(axis=1)(_embed,mask)
        input_embed[f] = out_embed
        
    input_embed.update(dense_inputs) #加入连续变量
    input_embed = Concatenate(axis = -1)([input_embed[f] for f in input_embed])
    for num in dnn_hidden_units:
        input_embed = Dropout(drop_rate)(Dense(num,activation = 'relu',
                    kernel_regularizer=regularizers.l2(dnn_reg_l2))(input_embed))
    
    #mmoe网络层
    towers = MmoeLayer(expert_dim,n_expert,n_task)(input_embed)
    outputs = [Dense(1,activation = 'sigmoid', kernel_regularizer=regularizers.l2(dnn_reg_l2),
                     name = f,use_bias = True)(_t) for _t,f in zip(towers,target)]
    inputs = [sparse_inputs[f] for f in sparse_inputs]+[varlens_inputs[f] for f in varlens_inputs]\
                +[dense_inputs[f] for f in dense_inputs]
    model = Model(inputs,outputs) 
    return model

In [135]:
# 构建模型，训练和评估
model = build_mmoe(sparse_features,dense_features,sparse_max_len,embed_dim = 32,expert_dim = 32,
          n_task = 3,n_expert = 4,varlens_cols = varlen_features,varlens_max_len = varlens_max_len,
          dnn_hidden_units = (64,128,64),target = target,dnn_reg_l2 = 0.001,drop_rate = 0.1)

adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(adam, loss = 'binary_crossentropy' ,metrics = ["AUC"])
model.summary()

Model: "model_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
VL_interpretation (InputLayer)  [(None, 1)]          0                                            
__________________________________________________________________________________________________
VR_size (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
sex (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
race (InputLayer)               [(None, 1)]          0                                            
____________________________________________________________________________________________

In [117]:
history = model.fit(train_model_input, train_labels,validation_data = (val_model_input,val_labels),
                    batch_size=1000, epochs=20, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [118]:
pred = model.predict(test_model_input)
pred

[array([[0.69262433],
        [0.69262433],
        [0.69262433],
        ...,
        [0.69262433],
        [0.69262433],
        [0.739012  ]], dtype=float32),
 array([[0.14249949],
        [0.14249949],
        [0.14249949],
        ...,
        [0.14249949],
        [0.14249949],
        [0.13933715]], dtype=float32),
 array([[0.02694018],
        [0.02694018],
        [0.02694018],
        ...,
        [0.02694018],
        [0.02694018],
        [0.03007615]], dtype=float32)]

In [119]:
print(accuracy_score(test_labels[0], (pred[0] > 0.5).astype(int)))
print(accuracy_score(test_labels[1], (pred[1] > 0.5).astype(int)))
print(accuracy_score(test_labels[2], (pred[2] > 0.5).astype(int)))

0.775695134061569
0.9046673286991063
0.974180734856008


In [120]:
print(roc_auc_score(test_labels[0], pred[0]))
print(roc_auc_score(test_labels[1], pred[1]))
print(roc_auc_score(test_labels[2], pred[2]))

0.5594468446350852
0.5284342090594127
0.5317590713949658


### New MMoE

In [136]:
from imblearn.over_sampling import RandomOverSampler

# 先将所有任务合并，创建一个多标签数据集
y_train_combined = np.vstack([train_labels[0], train_labels[1], train_labels[2]]).T  # (样本数, 3)

# # 进行 SMOTE 采样（使用多标签策略）
# smote = SMOTE(sampling_strategy='auto', random_state=42)
# X_resampled, y_resampled_combined = smote.fit_resample(pd.DataFrame(train_model_input), y_train_combined)

ros = RandomOverSampler(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled_combined = ros.fit_resample(pd.DataFrame(train_model_input), y_train_combined)

# 拆分出三个任务的 y
y_resampled_vs = y_resampled_combined[:, 0]
y_resampled_vb = y_resampled_combined[:, 1]
y_resampled_vr = y_resampled_combined[:, 2]

# 生成新的训练标签
train_labels_resampled = [y_resampled_vs, y_resampled_vb, y_resampled_vr]

# 打印新数据分布
print("Resampled VS Distribution:", pd.Series(y_resampled_vs).value_counts())
print("Resampled VB Distribution:", pd.Series(y_resampled_vb).value_counts())
print("Resampled VR Distribution:", pd.Series(y_resampled_vr).value_counts())

Resampled VS Distribution: 0    41172
1    20586
dtype: int64
Resampled VB Distribution: 0    41172
1    20586
dtype: int64
Resampled VR Distribution: 0    41172
1    20586
dtype: int64


In [137]:
# 重新构建 train_model_input
train_model_input_resampled = {name: X_resampled[name] if name not in varlen_features 
                               else np.stack(X_resampled[name]) for name in feature_names}

In [138]:
# 重新编译模型
adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(adam, loss='binary_crossentropy', metrics=["AUC"])

# 使用 Oversampled 训练数据训练模型
history = model.fit(train_model_input_resampled, train_labels_resampled,
                    validation_data=(val_model_input, val_labels),
                    batch_size=1000, epochs=20, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [139]:
# 预测测试集
preds = model.predict(test_model_input)

# 计算评估指标
from sklearn.metrics import accuracy_score, roc_auc_score

# 计算每个任务的 AUC 和 Accuracy
for i, task in enumerate(target):
    auc = roc_auc_score(test_labels[i], preds[i])
    acc = accuracy_score(test_labels[i], (preds[i] > 0.5).astype(int))
    print(f"{task} - Accuracy: {acc:.4f}, AUC: {auc:.4f}")

VS - Accuracy: 0.2232, AUC: 0.5000
VR - Accuracy: 0.9047, AUC: 0.5000
VB - Accuracy: 0.9742, AUC: 0.5000


### PLE

In [133]:
class MeanPoolLayer(Layer):
    def __init__(self, axis, **kwargs):
        super(MeanPoolLayer, self).__init__(**kwargs)
        self.axis = axis

    def call(self, x, mask):
        mask = tf.expand_dims(tf.cast(mask,tf.float32),axis = -1)
        x = x * mask
        return K.sum(x, axis=self.axis) / (K.sum(mask, axis=self.axis) + 1e-9)

class PleLayer(tf.keras.layers.Layer):
    '''
    n_experts:list,每个任务使用几个expert。[2,3]第一个任务使用2个expert，第二个任务使用3个expert。
    n_expert_share:int,共享的部分设置的expert个数。
    expert_dim:int,每个专家网络输出的向量维度。
    n_task:int,任务个数。
    '''
    def __init__(self,n_task,n_experts,expert_dim,n_expert_share,dnn_reg_l2 = 1e-5):
        super(PleLayer, self).__init__()
        self.n_task = n_task
        
        # 生成多个任务特定网络和1个共享网络。
        self.E_layer = []
        for i in range(n_task):
            sub_exp = [Dense(expert_dim,activation = 'relu') for j in range(n_experts[i])]
            self.E_layer.append(sub_exp)
            
        self.share_layer = [Dense(expert_dim,activation = 'relu') for j in range(n_expert_share)]
        #定义门控网络
        self.gate_layers = [Dense(n_expert_share+n_experts[i],kernel_regularizer=regularizers.l2(dnn_reg_l2),
                                  activation = 'softmax') for i in range(n_task)]

    def call(self,x):
        #特定网络和共享网络
        E_net = [[expert(x) for expert in sub_expert] for sub_expert in self.E_layer]
        share_net = [expert(x) for expert in self.share_layer]
        
        #门的权重乘上，指定任务和共享任务的输出。
        towers = []
        for i in range(self.n_task):
            g = self.gate_layers[i](x)
            g = tf.expand_dims(g,axis = -1) #(bs,n_expert_share+n_experts[i],1)
            _e = share_net+E_net[i]  
            _e = Concatenate(axis = 1)([expert[:,tf.newaxis,:] for expert in _e]) #(bs,n_expert_share+n_experts[i],expert_dim)
            _tower = tf.matmul(_e, g,transpose_a=True)
            towers.append(Flatten()(_tower)) #(bs,expert_dim)
        return towers

def build_ple(sparse_cols,dense_cols,sparse_max_len,embed_dim,expert_dim = 4,
              varlens_cols = [],varlens_max_len = [],dnn_hidden_units = (64,64),
              n_task = 2,n_experts = [2,2],n_expert_share = 4,dnn_reg_l2 = 1e-6,
              drop_rate = 0.0,embedding_reg_l2 = 1e-6,targets = []):

   #输入部分，分为sparse,varlens,dense部分。
    sparse_inputs = {f:Input([1],name = f) for f in sparse_cols}
    dense_inputs = {f:Input([1],name = f) for f in dense_cols}
    varlens_inputs = {f:Input([None,1],name = f) for f in varlens_cols}
        
    input_embed = {}
    #离散特征，embedding到k维
    for f in sparse_cols:
        _input = sparse_inputs[f]
        embedding = Embedding(sparse_max_len[f], embed_dim, 
            embeddings_regularizer=tf.keras.regularizers.l2(embedding_reg_l2)) 
        input_embed[f] =Flatten()(embedding(_input)) #(bs,k)
        
    #多标签离散变量
    for f in varlens_inputs:
        _input = varlens_inputs[f]
        mask = Masking(mask_value = 0).compute_mask(_input)
        embedding = Embedding(varlens_max_len[f], embed_dim,
            embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
        _embed =Reshape([-1,embed_dim])(embedding(_input))
        out_embed = MeanPoolLayer(axis=1)(_embed,mask)
        input_embed[f] = out_embed
        
    input_embed.update(dense_inputs) #加入连续变量
    input_embed = Concatenate(axis = -1)([input_embed[f] for f in input_embed])    
                                  
    for num in dnn_hidden_units:
        input_embed = Dropout(drop_rate)(Dense(num,activation = 'relu',
                    kernel_regularizer=regularizers.l2(dnn_reg_l2))(input_embed))
    #Ple网络层
    towers = PleLayer(n_task,n_experts,expert_dim,n_expert_share)(input_embed)
    outputs = [Dense(1,activation = 'sigmoid',kernel_regularizer=regularizers.l2(dnn_reg_l2),
                       name = f,use_bias = True)(_t) for f,_t in zip(targets,towers)]
    inputs = [sparse_inputs[f] for f in sparse_inputs]+[varlens_inputs[f] for f in varlens_inputs]\
                +[dense_inputs[f] for f in dense_inputs]
    model = Model(inputs,outputs) 
    return model

In [134]:
# 构建模型，训练和评估
model = build_ple(sparse_features,dense_features,sparse_max_len,embed_dim = 64,expert_dim = 16,
          varlens_cols = varlen_features,varlens_max_len = varlens_max_len,dnn_hidden_units = (64,64),
          n_task = 3,n_experts = [1,1,1],n_expert_share = 2,dnn_reg_l2 = 0.001,
          drop_rate = 0.1,embedding_reg_l2 = 0.001,targets = target)

adam = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(adam, loss = 'binary_crossentropy' ,metrics = ["accuracy"],)
model.summary()

Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
VL_interpretation (InputLayer)  [(None, 1)]          0                                            
__________________________________________________________________________________________________
VR_size (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
sex (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
race (InputLayer)               [(None, 1)]          0                                            
____________________________________________________________________________________________

In [139]:
history = model.fit(train_model_input, train_labels,validation_data = (val_model_input,val_labels),
                    batch_size=1000, epochs=20, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [140]:
pred = model.predict(test_model_input)
pred

[array([[0.6958191],
        [0.6958191],
        [0.6958191],
        ...,
        [0.6958191],
        [0.6958191],
        [0.6958191]], dtype=float32),
 array([[0.13125898],
        [0.13125898],
        [0.13125898],
        ...,
        [0.13125898],
        [0.13125898],
        [0.13125898]], dtype=float32),
 array([[0.03083998],
        [0.03083998],
        [0.03083998],
        ...,
        [0.03083998],
        [0.03083998],
        [0.03083998]], dtype=float32)]

In [141]:
print(accuracy_score(test_labels[0], (pred[0] > 0.5).astype(int)))
print(accuracy_score(test_labels[1], (pred[1] > 0.5).astype(int)))
print(accuracy_score(test_labels[2], (pred[2] > 0.5).astype(int)))

0.7768123138033763
0.9046673286991063
0.974180734856008


In [142]:
print(roc_auc_score(test_labels[0], pred[0]))
print(roc_auc_score(test_labels[1], pred[1]))
print(roc_auc_score(test_labels[2], pred[2]))

0.5
0.5
0.5903414882772681
