In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# 用户塔网络
class UserTower(nn.Module):
    def __init__(self, user_feature_size, embedding_dim):
        super(UserTower, self).__init__()
        self.embedding = nn.Linear(user_feature_size, embedding_dim)
        self.fc1 = nn.Linear(embedding_dim, embedding_dim)
    
    def forward(self, user_features):
        # 用户特征 -> 低维嵌入
        user_embedding = F.relu(self.embedding(user_features))
        user_embedding = F.relu(self.fc1(user_embedding))
        return user_embedding

# 视频塔网络
class VideoTower(nn.Module):
    def __init__(self, video_feature_size, embedding_dim):
        super(VideoTower, self).__init__()
        self.embedding = nn.Linear(video_feature_size, embedding_dim)
        self.fc1 = nn.Linear(embedding_dim, embedding_dim)
    
    def forward(self, video_features):
        # 视频特征 -> 低维嵌入
        video_embedding = F.relu(self.embedding(video_features))
        video_embedding = F.relu(self.fc1(video_embedding))
        return video_embedding

# 双塔召回模型
class TwoTowerModel(nn.Module):
    def __init__(self, user_feature_size, video_feature_size, embedding_dim):
        super(TwoTowerModel, self).__init__()
        self.user_tower = UserTower(user_feature_size, embedding_dim)
        self.video_tower = VideoTower(video_feature_size, embedding_dim)
    
    def forward(self, user_features, video_features):
        user_embeddings = self.user_tower(user_features) #(num_users, embedding_dim)
        video_embeddings = self.video_tower(video_features) #(num_videos, embedding_dim)
        # 计算用户与视频的相似度（余弦相似度）
        user_embeddings = F.normalize(user_embeddings, p=2, dim=1)
        video_embeddings = F.normalize(video_embeddings, p=2, dim=1)
        similarity = torch.mm(user_embeddings, video_embeddings.T)
        return similarity

In [9]:
# 模拟用户特征和视频特征
import numpy as np

# 随机生成用户特征和视频特征数据
num_users = 1000
num_videos = 2000
user_feature_size = 32  # 用户特征维度
video_feature_size = 32  # 视频特征维度
embedding_dim = 16  # 嵌入维度

# 随机生成用户和视频的特征
user_features = torch.FloatTensor(np.random.rand(num_users, user_feature_size))
video_features = torch.FloatTensor(np.random.rand(num_videos, video_feature_size))

# 模拟标签 (0-1的相似性标签)
labels = torch.FloatTensor(np.random.randint(0, 2, [num_users, num_videos]))

# 模型初始化
model = TwoTowerModel(user_feature_size, video_feature_size, embedding_dim)
criterion = nn.BCELoss()  # 使用二分类交叉熵作为损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 训练
for epoch in range(10):
    optimizer.zero_grad()
    
    # 随机选择视频进行训练
    similarities = model(user_features, video_features)
    
    # 计算损失
    loss = criterion(similarities, labels)
    
    # 反向传播和优化
    loss.backward()
    optimizer.step()
    
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")


Epoch 1, Loss: 0.9904051423072815
Epoch 2, Loss: 0.929792582988739
Epoch 3, Loss: 0.8764756321907043
Epoch 4, Loss: 0.8308658599853516
Epoch 5, Loss: 0.7928587794303894
Epoch 6, Loss: 0.7624101638793945
Epoch 7, Loss: 0.7391881942749023
Epoch 8, Loss: 0.7227169275283813
Epoch 9, Loss: 0.7123593688011169
Epoch 10, Loss: 0.7074384093284607
