In [21]:
import torch.nn as nn
import torch.nn.functional as F
import torch

def kl_loss_with_uniform_distribution(probabilities):
    # probabilities shape: (36, 6), 每个step的6个动作的执行概率

    # 理想的均匀分布，目标是每个动作的概率都是1/6
    uniform_distribution = torch.full_like(probabilities, 1.0 / 6)

    # 计算KL散度
    kl_divergence = F.kl_div(probabilities.log(), uniform_distribution, reduction='batchmean')

    return kl_divergence

def kl_loss_with_uniform_distribution2(probabilities):
    # probabilities shape: (36, 6), 每个step的6个动作的执行概率
    probabilities = probabilities.mean(0)

    # 理想的均匀分布，目标是每个动作的概率都是1/6
    uniform_distribution = torch.full_like(probabilities, 1.0 / 6)

    # 计算KL散度
    kl_divergence = F.kl_div(probabilities.log(), uniform_distribution, reduction='batchmean')

    return kl_divergence

probs = torch.rand((36, 6))
loss = kl_loss_with_uniform_distribution(probs)
loss2 = kl_loss_with_uniform_distribution2(probs)
print(loss)
print(loss2)

tensor(-0.7597)
tensor(-0.1707)


In [2]:
import torch.nn as nn
import torch.nn.functional as F
import torch

class GumbelSoftmaxLayer(nn.Module):
    def __init__(self, input_dim, output_dim, temperature=1.0):
        super(GumbelSoftmaxLayer, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)
        self.temperature = temperature
    
    def forward(self, x):
        logits = self.fc(x)
        return self.gumbel_softmax(logits, self.temperature)
    
    def gumbel_softmax(self, logits, temperature):
        gumbels = -torch.empty_like(logits).exponential_().log()  # Sample from Gumbel(0,1)
        gumbels = (logits + gumbels) / temperature
        y_soft = F.softmax(gumbels, dim=-1)
        return y_soft

class GumbelSoftmaxNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, temperature=1.0):
        super(GumbelSoftmaxNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.gumbel_softmax_layer = GumbelSoftmaxLayer(hidden_dim, output_dim, temperature)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.gumbel_softmax_layer(x)
        return x

In [6]:
next_t = GumbelSoftmaxNetwork(32, 64, 6, 0.5)

random_tensor = torch.rand(5, 1, 32)
print(random_tensor)

new_tensor = next_t(random_tensor)
print(new_tensor.sum(dim=-1))

tensor([[[0.5346, 0.3477, 0.6577, 0.7236, 0.6576, 0.5701, 0.0972, 0.9726,
          0.5077, 0.7952, 0.5268, 0.2911, 0.3786, 0.0643, 0.3246, 0.7095,
          0.7140, 0.5855, 0.9744, 0.8222, 0.2649, 0.0059, 0.8495, 0.5385,
          0.7412, 0.9559, 0.2478, 0.5536, 0.0400, 0.5015, 0.1610, 0.7644]],

        [[0.0535, 0.7681, 0.9845, 0.0975, 0.0649, 0.4816, 0.8915, 0.6737,
          0.3466, 0.5498, 0.0342, 0.6114, 0.4362, 0.1277, 0.7215, 0.3138,
          0.9904, 0.9386, 0.4814, 0.2683, 0.9539, 0.5305, 0.8033, 0.2631,
          0.6895, 0.8195, 0.0763, 0.1464, 0.7540, 0.5326, 0.3904, 0.9345]],

        [[0.8986, 0.4886, 0.9672, 0.2825, 0.2132, 0.4133, 0.0741, 0.8025,
          0.1921, 0.8384, 0.9725, 0.7110, 0.3050, 0.3421, 0.6498, 0.5822,
          0.4542, 0.9894, 0.4632, 0.0188, 0.2366, 0.4250, 0.7348, 0.0617,
          0.8287, 0.9939, 0.7216, 0.4708, 0.1142, 0.1647, 0.7365, 0.8480]],

        [[0.8470, 0.5153, 0.5823, 0.0641, 0.6579, 0.9118, 0.0278, 0.4377,
          0.6372, 0.2990, 0.4

In [1]:
a = True
b = not a
print(b)

False


In [12]:
import torch

def gram_schmidt(vectors):
    orthogonal_vectors = []
    for v in vectors:
        w = v.clone()
        for u in orthogonal_vectors:
            w -= (u @ w) * u
        w /= torch.norm(w)
        orthogonal_vectors.append(w)
    return torch.stack(orthogonal_vectors)

def generate_orthogonal_vectors(n, m):
    # 生成n个维度为m的随机向量
    random_vectors = torch.randn(n, m)
    # 进行Gram-Schmidt正交化
    orthogonal_vectors = gram_schmidt(random_vectors)
    return orthogonal_vectors

def check_orthogonality(vectors, tol=1e-6):
    n = vectors.size(0)
    for i in range(n):
        for j in range(i + 1, n):
            dot_product = torch.dot(vectors[i], vectors[j])
            if torch.abs(dot_product) > tol:
                return False, i, j, dot_product
    return True, None, None, None

# 示例使用
n = 3  # 生成3个向量
m = 5  # 每个向量的维度为5
orthogonal_vectors = generate_orthogonal_vectors(n, m)
print(orthogonal_vectors.shape)
# 检查正交性
is_orthogonal, i, j, dot_product = check_orthogonality(orthogonal_vectors)
if is_orthogonal:
    print("生成的向量是相互正交的。")
else:
    print(f"向量 {i} 和 {j} 不是正交的，它们的点积为 {dot_product}。")


torch.Size([3, 5])
生成的向量是相互正交的。
