<a href="https://colab.research.google.com/github/yeyekang/Comparison-of-Multi-task-Models/blob/main/MMoE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from sklearn.metrics import roc_auc_score

# ---------------------------
# MMoE Layer
# ---------------------------
class MMoE(layers.Layer):
    def __init__(self, units, num_experts, num_tasks, **kwargs):
        super(MMoE, self).__init__(**kwargs)
        self.units = units
        self.num_experts = num_experts
        self.num_tasks = num_tasks

    def build(self, input_shape):
        input_dim = int(input_shape[-1])

        # Experts
        self.expert_kernels = self.add_weight(
            name="expert_kernels",
            shape=(self.num_experts, input_dim, self.units),
            initializer="glorot_uniform",
            trainable=True,
        )
        self.expert_bias = self.add_weight(
            name="expert_bias",
            shape=(self.num_experts, self.units),
            initializer="zeros",
            trainable=True,
        )

        # Gates
        self.gate_kernels = self.add_weight(
            name="gate_kernels",
            shape=(self.num_tasks, input_dim, self.num_experts),
            initializer="glorot_uniform",
            trainable=True,
        )
        self.gate_bias = self.add_weight(
            name="gate_bias",
            shape=(self.num_tasks, self.num_experts),
            initializer="zeros",
            trainable=True,
        )

        super(MMoE, self).build(input_shape)

    def call(self, inputs):
        # Experts: (batch, num_experts, units)
        expert_outputs = tf.einsum("bi,eiu->beu", inputs, self.expert_kernels) + self.expert_bias
        expert_outputs = tf.nn.relu(expert_outputs)

        outputs = []
        for i in range(self.num_tasks):
            gate_logits = tf.matmul(inputs, self.gate_kernels[i]) + self.gate_bias[i]
            gate_softmax = tf.nn.softmax(gate_logits, axis=-1)
            # Weighted sum of experts: (batch, units)
            weighted_expert_output = tf.einsum("beu,be->bu", expert_outputs, gate_softmax)
            outputs.append(weighted_expert_output)

        return outputs


# ---------------------------
# Build Model
# ---------------------------
def build_mmoe_model(input_dim, num_experts=8, units=16):
    inputs = layers.Input(shape=(input_dim,))
    mmoe_outputs = MMoE(units=units, num_experts=num_experts, num_tasks=2)(inputs)

    # Task 1: income
    tower_income = layers.Dense(8, activation="relu")(mmoe_outputs[0])
    output_income = layers.Dense(2, activation="softmax", name="income")(tower_income)

    # Task 2: marital
    tower_marital = layers.Dense(8, activation="relu")(mmoe_outputs[1])
    output_marital = layers.Dense(2, activation="softmax", name="marital")(tower_marital)

    model = Model(inputs=inputs, outputs=[output_income, output_marital])
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics={"income": "accuracy", "marital": "accuracy"},
    )
    return model


# ---------------------------
# Training & Evaluation
# ---------------------------
def evaluate_model(model, x_train, y_train, x_val, y_val, x_test, y_test, epochs):
    for epoch in range(epochs):
        model.fit(
            x_train,
            {"income": y_train[:, 0], "marital": y_train[:, 1]},
            validation_data=(x_val, {"income": y_val[:, 0], "marital": y_val[:, 1]}),
            epochs=1,
            batch_size=128,
            verbose=1,
        )

        # predict prob for AUC
        y_train_pred = model.predict(x_train, verbose=0)
        y_val_pred = model.predict(x_val, verbose=0)
        y_test_pred = model.predict(x_test, verbose=0)

        for i, task in enumerate(["income", "marital"]):
            auc_train = roc_auc_score(y_train[:, i], y_train_pred[i][:, 1])
            auc_val = roc_auc_score(y_val[:, i], y_val_pred[i][:, 1])
            auc_test = roc_auc_score(y_test[:, i], y_test_pred[i][:, 1])
            print(f"[Epoch {epoch+1}] {task} - AUC: Train={auc_train:.4f}, Val={auc_val:.4f}, Test={auc_test:.4f}")


# ---------------------------
# Main
# ---------------------------
def main():
    # TODO: 替换成你真实的数据加载部分
    # 这里假设数据已经是 np.array 并且 y 有两列: [income, marital]
    x_train = np.random.rand(34189, 98).astype(np.float32)
    y_train = np.random.randint(0, 2, size=(34189, 2))
    x_val = np.random.rand(7326, 98).astype(np.float32)
    y_val = np.random.randint(0, 2, size=(7326, 2))
    x_test = np.random.rand(7327, 98).astype(np.float32)
    y_test = np.random.randint(0, 2, size=(7327, 2))

    print("Training data shape:", x_train.shape)
    print("Validation data shape:", x_val.shape)
    print("Test data shape:", x_test.shape)

    model = build_mmoe_model(input_dim=x_train.shape[1])
    evaluate_model(model, x_train, y_train, x_val, y_val, x_test, y_test, epochs=10)


if __name__ == "__main__":
    main()


Training data shape: (34189, 98)
Validation data shape: (7326, 98)
Test data shape: (7327, 98)
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 17ms/step - income_accuracy: 0.4996 - income_loss: 0.6944 - loss: 1.3877 - marital_accuracy: 0.5050 - marital_loss: 0.6933 - val_income_accuracy: 0.4993 - val_income_loss: 0.6966 - val_loss: 1.3900 - val_marital_accuracy: 0.4999 - val_marital_loss: 0.6932
[Epoch 1] income - AUC: Train=0.5246, Val=0.4884, Test=0.5051
[Epoch 1] marital - AUC: Train=0.5075, Val=0.4937, Test=0.4917
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - income_accuracy: 0.5065 - income_loss: 0.6934 - loss: 1.3866 - marital_accuracy: 0.5068 - marital_loss: 0.6931 - val_income_accuracy: 0.4993 - val_income_loss: 0.6959 - val_loss: 1.3893 - val_marital_accuracy: 0.4997 - val_marital_loss: 0.6932
[Epoch 2] income - AUC: Train=0.5290, Val=0.4851, Test=0.5094
[Epoch 2] marital - AUC: Train=0.5037, Val=0.4946, Test=0.4995
[1m268/268