In [7]:
import copy

# 创建一个包含列表的列表
a = [1, 2, [3, 4]]

# 浅复制
b = copy.copy(a)

# 深度复制
c = copy.deepcopy(a)

# 修改原始列表中的嵌套列表
a[2][0] = 'X'

print("原始列表 a:", a)
print("浅复制 b:", b)
print("深度复制 c:", c)

原始列表 a: [1, 2, ['X', 4]]
浅复制 b: [1, 2, ['X', 4]]
深度复制 c: [1, 2, [3, 4]]


In [None]:
import torch

a = torch.tensor([
    [1, 2]
])

b = torch.nn.Conv1d(1, 1, 1)

c = b(a)

print(c)

In [3]:
import torch
a = torch.tensor([
    [1, 2, 3, 4, 5, 6]
])

x1, x2 = a.split(3, dim=1)

print(x1.shape)
print(x2.shape)

torch.Size([1, 3])
torch.Size([1, 3])


In [29]:
import torch
import time

# 情况 1：768 x 768 的单个矩阵
matrix1 = torch.randn(768, 768)

start_time = time.time()
matrix1_T = matrix1.T  # 转置操作
time1 = time.time() - start_time

# 情况 2：768 x 256 的 3 个矩阵
matrix2_1 = torch.randn(768, 256)
matrix2_2 = torch.randn(768, 256)
matrix2_3 = torch.randn(768, 256)

start_time = time.time()
matrix2_1_T = matrix2_1.T  # 第一个矩阵的转置
matrix2_2_T = matrix2_2.T  # 第二个矩阵的转置
matrix2_3_T = matrix2_3.T  # 第三个矩阵的转置
time2 = time.time() - start_time

# 输出结果
print(f"单个 768x768 矩阵转置时间：{time1:.6f} 秒")
print(f"三个 768x256 矩阵转置时间：{time2:.6f} 秒")

单个 768x768 矩阵转置时间：0.000127 秒
三个 768x256 矩阵转置时间：0.000133 秒


In [75]:
# 帮我写一个 pytorch 反向传播的最简单示例
import torch
import torch.nn as nn
import torch.optim as optim

# 定义一个简单的线性模型


class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.linear1 = nn.Linear(2, 2)
        self.linear2 = nn.Linear(2, 2)
        self.linear3 = nn.Linear(2, 1)
        self.linear2.weight = self.linear1.weight
        self.linear2.bias = self.linear1.bias
        self.act = nn.ReLU()

    def forward(self, x):
        x = self.linear1(x)
        x = self.act(x)
        x = self.linear2(x)
        x = self.act(x)
        x = self.linear3(x)
        return x


# 创建模型实例
model = SimpleModel()

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 生成一些示例数据
x = torch.randn(10, 2)
y = torch.randn(10, 1)

# 前向传播
output = model(x)
loss = criterion(output, y)

# 反向传播
loss.backward()

print('梯度1-w', model.linear1.weight.grad)
print('梯度1-b', model.linear1.bias.grad)

print('梯度2-w', model.linear2.weight.grad)
print('梯度2-b', model.linear2.bias.grad)


# 更新参数
optimizer.step()

print('---')
print('更新后的w1', model.linear1.weight)
print('更新后的w2', model.linear2.weight)

梯度1-w tensor([[ 0.3127, -0.0082],
        [-0.3650, -0.1062]])
梯度1-b tensor([ 0.6028, -0.5385])
梯度2-w tensor([[ 0.3127, -0.0082],
        [-0.3650, -0.1062]])
梯度2-b tensor([ 0.6028, -0.5385])
---
更新后的w1 Parameter containing:
tensor([[-0.1656,  0.2504],
        [ 0.2653,  0.3041]], requires_grad=True)
更新后的w2 Parameter containing:
tensor([[-0.1656,  0.2504],
        [ 0.2653,  0.3041]], requires_grad=True)


In [79]:
import torch
a = torch.tensor([1, 2, 3])
a.unsqueeze(0).repeat(3, 1)

tensor([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]])

In [89]:
from atexit import register
import torch


def get_device():
    if torch.backends.mps.is_available():
        return 'mps'
    elif torch.cuda.is_available():
        return 'cuda'
    else:
        return 'cpu'


class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.linear1 = nn.Linear(2, 2)
        self.register_buffer('var_1', torch.tensor([1, 2]))
        self.var = torch.tensor([1, 2], requires_grad=False)

    def forward(self, x):
        x = self.linear1(x)
        return x


model = SimpleModel()
# apple mps device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model.to(device)

model.var, model.var_1

(tensor([1, 2]), tensor([1, 2], device='mps:0'))

In [90]:
(1, 2) + (3, 4)

(1, 2, 3, 4)

In [106]:
import torch
import torch.nn.functional as F


top_k = 3
logits = torch.tensor([[3, 2, 1, 4, 5]], dtype=torch.float32)
values, indices = torch.topk(logits, k=top_k)
tok_k_logits = torch.where(
    logits < values[:, -1], torch.ones_like(logits) * -1e10, logits)
F.softmax(tok_k_logits, dim=-1)

tensor([[0.0900, 0.0000, 0.0000, 0.2447, 0.6652]])

In [117]:
import torch

log_probs = torch.tensor([0.2, 0.3, 0.5])
torch.multinomial(log_probs, num_samples=3)

tensor([0, 2, 1])

In [137]:
import torch

a = torch.tril(torch.ones(3, 3))
b = torch.ones(1, 3)
mask = a[:1, :1]
b = b.masked_fill(mask == 0, -1e9)
b

tensor([[-1.0000e+09, -1.0000e+09, -1.0000e+09]])

In [14]:
from audioop import bias
import torch

a = torch.tensor([
    [1, 2, 3],
    [4, 5, 6],
], dtype=torch.float32)

p = torch.nn.Parameter(torch.ones((3, 1), dtype=torch.float32))
# 打印 p 的参数尺寸

bias = torch.ones((1))

torch.addmm(bias, a, p)

tensor([[ 7.],
        [16.]], grad_fn=<AddmmBackward0>)

In [2]:
vocab = set([
    '<pad>',
    '<s>',
    '</s>',
    '<unk>',
])

for word in vocab:
    print('word', word)

word </s>
word <s>
word <pad>
word <unk>


In [5]:
a = [1, 2, 3]
' \n'.rstrip()

''