In [3]:
import unicodedata

result = ''
name_asc2 = unicodedata.normalize('NFD', 'café')
# 将拉丁字母和重音符号组合成一个单字符
print(name_asc2)
for c in name_asc2:
    print(c)

unicodedata.normalize('NFD', 'café').encode(
    'ascii', 'ignore').decode('ascii')

café
c
a
f
e
́


'cafe'

In [10]:
import unicodedata
import string
# 设置一个全局变量all_letters
# 保存了训练数据中全部可能出现的字符
# 包括英文的大小写，加上空格、点、逗号、分号、引号等标点符号
all_letters = string.ascii_letters + " .,;'"

# 实现一个unicode转Asc2码的函数，这个函数的主要作用是，将拉丁字符转为英文字符
# 在未来在训练的时候，我们只关注英文单词中的大小写字符
# 一些语言中的特殊字符，会直接转为英文的大小


def unicode_to_asc2(name):
    result = ""  # 保存转换后的结果
    # 对输入的name进行标准化
    name_asc2 = unicodedata.normalize('NFD', name)
    # 遍历标准化后的字符串中的字符
    for c in name_asc2:
        # 如果字符c不是词素记号，例如不是重音符号，并且c还是英文字符
        if unicodedata.category(c) != 'Mn' and c in all_letters:
            result += c  # 将c添加到结果中
    return result  # 返回结果


unicode_to_asc2('café')

'cafe'

In [7]:
import re
import string

all_letters = string.ascii_letters + " .,;'"
pattern = re.compile(f'[^{all_letters}]')
pattern.sub('', 'café')

'caf'

In [1]:
'café'.lower()

'café'

In [23]:
import torch
a = torch.tensor([[1, 2, 3]])
b = torch.tensor([[4, 5, 6]])

torch.concat((a, b), dim=1)

torch.rand((10, 2))

torch.float32

In [28]:
import numpy as np

confusion = np.array([[20, 5, 5],
                      [5, 15, 0],
                      [5, 0, 15]])

confusion.sum(axis=1)

array([30, 20, 20])

In [30]:
dict_ = {
    'a': 1,
    'b': 2
}

list(dict_.keys())

['a', 'b']

In [36]:
import torch
total_loss = torch.tensor(0.0, dtype=torch.float32)
total_loss += torch.tensor(2.0, dtype=torch.float32)
total_loss

tensor(2.)

In [None]:
import torch
import torch.nn as nn
import string

# 1. 数据准备
sequence = "hello"
all_chars = string.ascii_lowercase  # 'abcdefghijklmnopqrstuvwxyz'
char_to_idx = {char: idx for idx, char in enumerate(all_chars)}
idx_to_char = {idx: char for idx, char in enumerate(all_chars)}

# 将字符转换为索引
input_seq = [char_to_idx[char]
             for char in sequence[:-1]]  # 输入序列：'h', 'e', 'l', 'l'
target_seq = [char_to_idx[char]
              for char in sequence[1:]]  # 目标序列：'e', 'l', 'l', 'o'

# 转换为张量
input_tensor = torch.tensor(input_seq, dtype=torch.long).unsqueeze(
    1)  # 形状：(seq_len, batch_size)
target_tensor = torch.tensor(target_seq, dtype=torch.long)

# 2. 定义模型参数
input_size = len(all_chars)     # 输入维度（字符数量）
hidden_size = 10                # 隐藏层维度
num_layers = 1                  # RNN 层数

# 3. 定义模型


class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # 定义一个嵌入层，将字符索引转换为向量
        self.embedding = nn.Embedding(input_size, input_size)
        # 定义 RNN 层
        self.rnn = nn.RNN(input_size, hidden_size, num_layers)
        # 定义全连接层，将隐藏状态转换为输出
        self.fc = nn.Linear(hidden_size, input_size)

    def forward(self, x, hidden):
        # x 的形状：(seq_len, batch_size)
        x = self.embedding(x)  # 形状：(seq_len, batch_size, input_size)
        output, hidden = self.rnn(x, hidden)
        output = self.fc(output)  # 形状：(seq_len, batch_size, input_size)
        return output, hidden

    def init_hidden(self):
        # 初始化隐藏状态
        return torch.zeros(self.num_layers, 1, self.hidden_size)


# 4. 实例化模型
model = CharRNN(input_size, hidden_size, num_layers)

# 5. 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# 6. 训练模型
num_epochs = 100
for epoch in range(num_epochs):
    hidden = model.init_hidden()
    optimizer.zero_grad()
    outputs, hidden = model(input_tensor, hidden)
    loss = criterion(outputs.view(-1, input_size), target_tensor)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 20 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# 7. 测试模型


def predict(model, start_char, predict_len):
    model.eval()
    hidden = model.init_hidden()
    input_char = torch.tensor(
        [char_to_idx[start_char]], dtype=torch.long).unsqueeze(1)
    predicted = start_char

    for _ in range(predict_len):
        output, hidden = model(input_char, hidden)
        output = output.squeeze(0)
        _, top_idx = torch.max(output, dim=1)
        char = idx_to_char[top_idx.item()]
        predicted += char
        input_char = top_idx.unsqueeze(0)

    return predicted


print("\n生成文本:")
print(predict(model, 'h', 10))

In [2]:
import torch
import torch.nn as nn

# 参数设置
input_size = 10       # 输入特征的维度
hidden_size = 20      # 隐藏状态的维度
num_layers = 1        # LSTM 层数
batch_size = 5        # 批量大小
seq_len = 7           # 序列长度

lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
input = torch.randn(batch_size, seq_len, input_size)
h_0 = torch.randn(num_layers, batch_size, hidden_size)
c_0 = torch.randn(num_layers, batch_size, hidden_size)

output, (h_n, c_n) = lstm(input, (h_0, c_0))
output.shape, h_n.shape, c_n.shape

(torch.Size([5, 7, 20]), torch.Size([1, 5, 20]), torch.Size([1, 5, 20]))

In [3]:
import torch
import torch.nn as nn

a = torch.tensor([
    [1, 2, 3]
])
b = torch.tensor([
    [4, 5, 6]
])

torch.cat((a, b), dim=-1)

tensor([[1, 2, 3, 4, 5, 6]])

In [9]:
import torch
import torch.nn as nn

a = torch.tensor([
    [1, 2, 3],
    [4, 5, 6]
])
torch.flip(a, dims=[0])

tensor([[4, 5, 6],
        [1, 2, 3]])

In [19]:
import torch
import json
import torch.nn as nn
import torch.optim as optim
import random
import string
import time
import math

# 1. 准备数据
all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)

category_lines = {
    'English': ['Adam', 'Henry', 'Scott'],
    'French': ['Albert', 'Pierre', 'Jacques'],
    'Spanish': ['Carlos', 'Miguel', 'Jose']
}

all_categories = list(category_lines.keys())
n_categories = len(all_categories)


def letter_to_index(letter):
    return all_letters.find(letter)


def letter_to_tensor(letter):
    tensor = torch.zeros(1, n_letters)
    tensor[0][letter_to_index(letter)] = 1
    return tensor


def line_to_tensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li, letter in enumerate(line):
        tensor[li][0][letter_to_index(letter)] = 1
    return tensor

# 2. 定义双向 LSTM 模型


class CustomBiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CustomBiLSTM, self).__init__()
        self.hidden_size = hidden_size

        # 正向和反向 LSTM 的权重和偏置
        self.i2h_f = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2h_b = nn.Linear(input_size + hidden_size, hidden_size)
        self.h2o = nn.Linear(hidden_size * 2, output_size)

    def forward(self, input_tensor):
        seq_len = input_tensor.size(0)
        batch_size = input_tensor.size(1)

        # 初始化正向和反向的隐藏状态
        h_f = torch.zeros(batch_size, self.hidden_size)
        h_b = torch.zeros(batch_size, self.hidden_size)

        # 正向 LSTM
        outputs_f = []
        for t in range(seq_len):
            combined = torch.cat((input_tensor[t], h_f), 1)
            h_f = torch.tanh(self.i2h_f(combined))
            outputs_f.append(h_f.unsqueeze(0))
        outputs_f = torch.cat(outputs_f, dim=0)

        # 反向 LSTM
        outputs_b = []
        for t in reversed(range(seq_len)):
            combined = torch.cat((input_tensor[t], h_b), 1)
            h_b = torch.tanh(self.i2h_b(combined))
            outputs_b.insert(0, h_b.unsqueeze(0))
        outputs_b = torch.cat(outputs_b, dim=0)

        # 拼接正向和反向的输出
        outputs = torch.cat((outputs_f, outputs_b), dim=2)
        last_output = outputs[-1][0].data.tolist()
        with open('last_output.json', 'w') as f:
            json.dump(last_output, f)
        print('outputs', outputs.shape)

        # 取最后一个时间步的输出用于分类
        output = self.h2o(outputs[-1])
        return output

# 3. 训练模型


def category_from_output(output):
    top_n, top_i = output.topk(1)
    category_i = top_i[0].item()
    return all_categories[category_i], category_i


def random_training_example():
    # 设置种子为固定值，这里以 42 为例
    random.seed(42)
    category = random.choice(all_categories)
    line = random.choice(category_lines[category])
    category_tensor = torch.tensor(
        [all_categories.index(category)], dtype=torch.long)
    line_tensor = line_to_tensor(line)
    return category, line, category_tensor, line_tensor


n_hidden = 10
model = CustomBiLSTM(n_letters, n_hidden, n_categories)
model.load_state_dict(torch.load('model.pth'))

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

n_iters = 1
print_every = 200


def time_since(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return f'{m}m {s:.2f}s'


start = time.time()

for iter in range(1, n_iters + 1):
    category, line, category_tensor, line_tensor = random_training_example()

    model.zero_grad()

    output = model(line_tensor)
    loss = criterion(output, category_tensor)
    loss.backward()
    optimizer.step()

    if iter % print_every == 0:
        guess, guess_i = category_from_output(output)
        correct = '✓' if guess == category else f'✗ ({category})'
        print(f'{iter} {iter / n_iters * 100:.2f}% ({time_since(start)}) Loss: {loss.item():.4f} {line} / {guess} {correct}')

# 4. 测试模型
# torch.save(model.state_dict(), 'model.pth')


def predict(input_line):
    with torch.no_grad():
        line_tensor = line_to_tensor(input_line)
        output = model(line_tensor)
        guess, guess_i = category_from_output(output)
        print(f'{input_line} -> {guess}')


# 测试一些名字
# predict('Henry')
# predict('Jacques')
# predict('Carlos')

outputs torch.Size([6, 1, 20])


In [16]:
a = []
a.insert(0, 1)
a.insert(0, 2)
a

[2, 1]

In [19]:
import torch
outputs_f = torch.tensor([
    [1, 2, 3]
])
outputs_b = torch.tensor([
    [4, 5, 6]
])
torch.concat((outputs_f, outputs_b), dim=1)

tensor([[1, 2, 3, 4, 5, 6]])

In [22]:
import torch
import torch.nn as nn


# 定义LSTM的单元计算（单步）
def lstm_cell(input, hidden, cell, w_ih, w_hh, b_ih, b_hh):
    gates = torch.mm(input, w_ih.t()) + \
        torch.mm(hidden, w_hh.t()) + b_ih + b_hh
    ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)

    ingate = torch.sigmoid(ingate)
    forgetgate = torch.sigmoid(forgetgate)
    cellgate = torch.tanh(cellgate)
    outgate = torch.sigmoid(outgate)

    new_cell = (forgetgate * cell) + (ingate * cellgate)
    new_hidden = outgate * torch.tanh(new_cell)

    return new_hidden, new_cell


# 定义双向LSTM的前向传播（简化版，未处理批次维度等复杂情况）
def bidirectional_lstm(inputs, hidden_size):
    num_timesteps = inputs.size(0)
    hidden_size = hidden_size
    input_size = inputs.size(1)

    # 初始化权重（这里简单随机初始化，实际中需谨慎初始化）
    w_ih_f = torch.randn(4 * hidden_size, input_size)
    w_hh_f = torch.randn(4 * hidden_size, hidden_size)
    b_ih_f = torch.randn(4 * hidden_size)
    b_hh_f = torch.randn(4 * hidden_size)

    w_ih_b = torch.randn(4 * hidden_size, input_size)
    w_hh_b = torch.randn(4 * hidden_size, hidden_size)
    b_ih_b = torch.randn(4 * hidden_size)
    b_hh_b = torch.randn(4 * hidden_size)

    # 前向初始化隐藏状态和细胞状态
    hidden_f = torch.randn(hidden_size)
    cell_f = torch.randn(hidden_size)

    # 后向初始化隐藏状态和细胞状态
    hidden_b = torch.randn(hidden_size)
    cell_b = torch.randn(hidden_size)

    output_f = []
    output_b = []

    for t in range(num_timesteps):
        hidden_f, cell_f = lstm_cell(
            inputs[t], hidden_f, cell_f, w_ih_f, w_hh_f, b_ih_f, b_hh_f)
        output_f.append(hidden_f)

        # 后向时间步
        t_back = num_timesteps - t - 1
        hidden_b, cell_b = lstm_cell(
            inputs[t_back], hidden_b, cell_b, w_ih_b, w_hh_b, b_ih_b, b_hh_b)
        output_b.append(hidden_b)

    output_f = torch.stack(output_f)
    output_b = torch.stack(output_b[::-1])

    output = torch.cat([output_f, output_b], dim=1)

    return output


# 测试示例
input_seq = torch.randn(5, 3)  # 假设输入序列长度为5，输入特征维度为3
hidden_size = 4
output = bidirectional_lstm(input_seq, hidden_size)
print(output)

RuntimeError: self must be a matrix

In [24]:
a = [1, 2, 3]
a[::-1]

[3, 2, 1]

In [3]:
import torch
import torch.nn as nn
import torch
import torch.nn.functional as F


class LSTMCell:
    def __init__(self, input_size, hidden_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        # 初始化权重和偏置
        self.W_ih = torch.randn(4 * hidden_size, input_size)
        self.W_hh = torch.randn(4 * hidden_size, hidden_size)
        self.b_ih = torch.randn(4 * hidden_size)
        self.b_hh = torch.randn(4 * hidden_size)

    def __call__(self, x, hidden):
        h, c = hidden
        # 计算门控
        gates = (torch.matmul(x, self.W_ih.t()) + self.b_ih) + \
                (torch.matmul(h, self.W_hh.t()) + self.b_hh)
        # 分割门控为四部分
        i_gate, f_gate, g_gate, o_gate = gates.chunk(4, 1)
        # 应用激活函数
        i = torch.sigmoid(i_gate)
        f = torch.sigmoid(f_gate)
        g = torch.tanh(g_gate)
        o = torch.sigmoid(o_gate)
        # 更新记忆状态和隐藏状态
        c_new = f * c + i * g
        h_new = o * torch.tanh(c_new)
        return h_new, c_new


class BidirectionalLSTM:
    def __init__(self, input_size, hidden_size):
        self.hidden_size = hidden_size
        self.forward_cell = LSTMCell(input_size, hidden_size)
        self.backward_cell = LSTMCell(input_size, hidden_size)

    def __call__(self, inputs):
        seq_len, _ = inputs.size()
        # 初始化隐藏状态和记忆状态
        h_fwd = torch.zeros(1, self.hidden_size)
        c_fwd = torch.zeros(1, self.hidden_size)
        h_bwd = torch.zeros(1, self.hidden_size)
        c_bwd = torch.zeros(1, self.hidden_size)

        outputs = []
        # 前向遍历
        hiddens_fwd = []
        for t in range(seq_len):
            x = inputs[t].unsqueeze(0)
            h_fwd, c_fwd = self.forward_cell(x, (h_fwd, c_fwd))
            hiddens_fwd.append(h_fwd)

        # 后向遍历
        hiddens_bwd = []
        for t in reversed(range(seq_len)):
            x = inputs[t].unsqueeze(0)
            h_bwd, c_bwd = self.backward_cell(x, (h_bwd, c_bwd))
            hiddens_bwd.insert(0, h_bwd)

        # 拼接前向和后向的隐藏状态
        for h_f, h_b in zip(hiddens_fwd, hiddens_bwd):
            outputs.append(torch.cat((h_f, h_b), dim=1))

        outputs = torch.cat(outputs, dim=0)
        # 最后的隐藏状态和记忆状态
        h_n = torch.cat((h_fwd, h_bwd), dim=1)
        c_n = torch.cat((c_fwd, c_bwd), dim=1)
        return outputs, (h_n, c_n)


# 输入序列长度为5，输入维度为3
input_size = 3
hidden_size = 4
seq_len = 5

# 随机生成输入数据
inputs = torch.randn(seq_len, input_size)

# 创建双向 LSTM
bilstm = BidirectionalLSTM(input_size, hidden_size)

# 获取输出和隐藏状态
outputs, (h_n, c_n) = bilstm(inputs)

print("Outputs:")
print(outputs)
print("\nHidden State h_n:")
print(h_n)
print("\nCell State c_n:")
print(c_n)

Outputs:
tensor([[ 0.3648,  0.2411, -0.3089,  0.2007, -0.1097,  0.5242,  0.3789, -0.3503],
        [ 0.3420,  0.1185, -0.0651,  0.3576,  0.0764, -0.5390,  0.1255, -0.4141],
        [ 0.0401, -0.0282, -0.3234,  0.3144, -0.0713,  0.4474,  0.3256, -0.4765],
        [ 0.0031, -0.2287,  0.1083,  0.1781, -0.2411, -0.0599,  0.5003, -0.1189],
        [ 0.4869,  0.5825, -0.0332,  0.4724, -0.6330,  0.1864,  0.1699, -0.6844]])

Hidden State h_n:
tensor([[ 0.4869,  0.5825, -0.0332,  0.4724, -0.1097,  0.5242,  0.3789, -0.3503]])

Cell State c_n:
tensor([[ 0.9634,  0.6768, -0.2033,  1.2016, -0.2670,  0.6003,  0.5137, -0.3839]])
