In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# 定义超参数
input_size = 10  # 输入特征维度
hidden_size = 20  # 隐藏层维度
num_layers = 2   # 循环层数
output_size = 1  # 输出维度
seq_length = 5   # 序列长度

# 生成一些随机数据
x_train = torch.randn((100, seq_length, input_size))  # (样本数, 序列长度, 特征维度)
y_train = torch.randn((100, output_size))  # (样本数, 输出维度)

# 定义 RNN 模型
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(num_layers, x.size(0), hidden_size)  # 初始化隐藏状态
        out, _ = self.rnn(x, h0)  # RNN 前向传播
        out = self.fc(out[:, -1, :])  # 取最后一个时间步的输出
        return out

# 定义 LSTM 模型
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(num_layers, x.size(0), hidden_size)  # 初始化隐藏状态
        c0 = torch.zeros(num_layers, x.size(0), hidden_size)  # 初始化细胞状态
        out, _ = self.lstm(x, (h0, c0))  # LSTM 前向传播
        out = self.fc(out[:, -1, :])  # 取最后一个时间步的输出
        return out

# 定义 GRU 模型
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(num_layers, x.size(0), hidden_size)  # 初始化隐藏状态
        out, _ = self.gru(x, h0)  # GRU 前向传播
        out = self.fc(out[:, -1, :])  # 取最后一个时间步的输出
        return out

# 选择模型进行训练（可以在 RNNModel, LSTMModel, GRUModel 之间切换）
model = LSTMModel(input_size, hidden_size, num_layers, output_size)

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# 训练模型
epochs = 100
for epoch in range(epochs):
    model.train()
    outputs = model(x_train)
    loss = criterion(outputs, y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# 测试模型
model.eval()
x_test = torch.randn((10, seq_length, input_size))
y_test = model(x_test)
print("Test Output:\n", y_test)

Epoch [10/100], Loss: 0.6854
Epoch [20/100], Loss: 0.3235
Epoch [30/100], Loss: 0.1033
Epoch [40/100], Loss: 0.0218
Epoch [50/100], Loss: 0.0060
Epoch [60/100], Loss: 0.0021
Epoch [70/100], Loss: 0.0008
Epoch [80/100], Loss: 0.0002
Epoch [90/100], Loss: 0.0001
Epoch [100/100], Loss: 0.0000
Test Output:
 tensor([[-0.4318],
        [-0.6755],
        [-1.1807],
        [-1.1107],
        [ 0.5226],
        [-2.0589],
        [-0.6673],
        [ 0.2435],
        [-0.9109],
        [-0.9527]], grad_fn=<AddmmBackward0>)


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os

# Load the text data
with open('../data/寒门首辅.txt', 'r', encoding='utf-8') as f:
    text = f.read()

# Create a character-level vocabulary
chars = sorted(set(text))
vocab_size = len(chars)
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}

# Convert text to indices
text_as_int = np.array([char_to_idx[ch] for ch in text])

# Define parameters
seq_length = 100  # Length of the input sequences
batch_size = 64   # Batch size

# Create input-target sequences
class TextDataset(Dataset):
    def __init__(self, text_as_int, seq_length):
        self.text_as_int = text_as_int
        self.seq_length = seq_length

    def __len__(self):
        return len(self.text_as_int) - self.seq_length

    def __getitem__(self, idx):
        x = self.text_as_int[idx:idx + self.seq_length]
        y = self.text_as_int[idx + 1:idx + self.seq_length + 1]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.long)

dataset = TextDataset(text_as_int, seq_length)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [2]:
text[1:50], chars[:10], vocab_size, idx_to_char[2000]

('寒门首辅》\n作者：一袖乾坤\n\n内容简介：    弘治五年，四海靖平。徐溥春风得意当了一朝首辅，李东',
 ['\t', '\n', '\x0f', '\x14', '\x1e', ' ', '!', '"', '#', '%'],
 4319,
 '梯')

In [3]:
class RNN(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.rnn = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out.reshape(out.size(0) * out.size(1), out.size(2)))
        return out, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device),
                torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device))

# Model parameters
embed_size = 256
hidden_size = 512
num_layers = 2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = RNN(vocab_size, embed_size, hidden_size, num_layers).to(device)


In [4]:
model

RNN(
  (embedding): Embedding(4319, 256)
  (rnn): LSTM(256, 512, num_layers=2, batch_first=True)
  (fc): Linear(in_features=512, out_features=4319, bias=True)
)

In [None]:
# Training parameters
num_epochs = 10
learning_rate = 0.002

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    
    for i, (inputs, targets) in enumerate(dataloader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Initialize hidden state with the correct batch size
        hidden = model.init_hidden(inputs.size(0))
        
        # Detach hidden state to prevent backpropagating through entire training history
        hidden = tuple([h.detach() for h in hidden])
        
        outputs, hidden = model(inputs, hidden)
        loss = criterion(outputs, targets.view(-1))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if i % 5000 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i}/{len(dataloader)}], Loss: {loss.item():.4f}')

'''
# Training parameters
num_epochs = 5
learning_rate = 0.002

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    hidden = model.init_hidden(batch_size)
    
    for i, (inputs, targets) in enumerate(dataloader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Detach hidden state to prevent backpropagating through entire training history
        hidden = tuple([h.detach() for h in hidden])
        
        outputs, hidden = model(inputs, hidden)
        loss = criterion(outputs, targets.view(-1))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if i % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i}/{len(dataloader)}], Loss: {loss.item():.4f}')
'''

Epoch [1/10], Step [0/24684], Loss: 8.3661
Epoch [1/10], Step [5000/24684], Loss: 1.7010
Epoch [1/10], Step [10000/24684], Loss: 1.2260
Epoch [1/10], Step [15000/24684], Loss: 1.1084
Epoch [1/10], Step [20000/24684], Loss: 1.0826
Epoch [2/10], Step [0/24684], Loss: 0.9615


In [None]:
def predict(model, char, hidden=None):
    x = np.array([[char_to_idx[char]]])
    x = torch.tensor(x, dtype=torch.long).to(device)
    
    with torch.no_grad():
        out, hidden = model(x, hidden)
        prob = nn.functional.softmax(out[-1], dim=0).data
        char_idx = torch.max(prob, dim=0)[1].item()
    
    return idx_to_char[char_idx], hidden

def generate_text(model, start_string, length=100):
    model.eval()
    hidden = model.init_hidden(1)
    
    for char in start_string:
        char, hidden = predict(model, char, hidden)
    
    output = start_string + char
    
    for _ in range(length):
        char, hidden = predict(model, char, hidden)
        output += char
    
    return output

# Generate text
start_string = '从前有一个'
print(generate_text(model, start_string, length=200))


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os

# Load the text data
with open('../data/寒门首辅.txt', 'r', encoding='utf-8') as f:
    text = f.read()

# Create a character-level vocabulary
chars = sorted(set(text))
vocab_size = len(chars)
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}

# Convert text to indices
text_as_int = np.array([char_to_idx[ch] for ch in text])

# Define parameters for a smaller dataset
seq_length = 50  # Shorter sequence length
batch_size = 32  # Smaller batch size

# Create input-target sequences
class TextDataset(Dataset):
    def __init__(self, text_as_int, seq_length):
        self.text_as_int = text_as_int
        self.seq_length = seq_length

    def __len__(self):
        return len(self.text_as_int) - self.seq_length

    def __getitem__(self, idx):
        x = self.text_as_int[idx:idx + self.seq_length]
        y = self.text_as_int[idx + 1:idx + self.seq_length + 1]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.long)

dataset = TextDataset(text_as_int, seq_length)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

class RNN(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.rnn = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out.reshape(out.size(0) * out.size(1), out.size(2)))
        return out, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device),
                torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device))

# Model parameters
embed_size = 256
hidden_size = 512
num_layers = 2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = RNN(vocab_size, embed_size, hidden_size, num_layers).to(device)

# Training parameters
num_epochs = 10  # Fewer epochs
learning_rate = 0.002

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    
    for i, (inputs, targets) in enumerate(dataloader):
        inputs, targets = inputs.to(device), targets.to(device)
        batch_size = inputs.size(0)  # Get the current batch size
        hidden = model.init_hidden(batch_size)  # Initialize hidden state with current batch size
        
        # Detach hidden state to prevent backpropagating through entire training history
        hidden = tuple([h.detach() for h in hidden])
        
        outputs, hidden = model(inputs, hidden)
        loss = criterion(outputs, targets.view(-1))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if i % 10 == 0:  # Print less frequently if dataset is small
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i}/{len(dataloader)}], Loss: {loss.item():.4f}')

def predict(model, char, hidden=None):
    x = np.array([[char_to_idx[char]]])
    x = torch.tensor(x, dtype=torch.long).to(device)
    
    with torch.no_grad():
        out, hidden = model(x, hidden)
        prob = nn.functional.softmax(out[-1], dim=0).data
        char_idx = torch.max(prob, dim=0)[1].item()
    
    return idx_to_char[char_idx], hidden

def generate_text(model, start_string, length=100):
    model.eval()
    hidden = model.init_hidden(1)
    
    for char in start_string:
        char, hidden = predict(model, char, hidden)
    
    output = start_string + char
    
    for _ in range(length):
        char, hidden = predict(model, char, hidden)
        output += char
    
    return output

# Generate text
start_string = '从前有一个'
print(generate_text(model, start_string, length=200))


In [None]:
def predict(model, char, hidden=None):
    x = np.array([[char_to_idx[char]]])
    x = torch.tensor(x, dtype=torch.long).to(device)
    
    with torch.no_grad():
        out, hidden = model(x, hidden)
        prob = nn.functional.softmax(out[-1], dim=0).data
        char_idx = torch.max(prob, dim=0)[1].item()
    
    return idx_to_char[char_idx], hidden

def generate_text(model, start_string, length=100):
    model.eval()
    hidden = model.init_hidden(1)
    
    for char in start_string:
        char, hidden = predict(model, char, hidden)
    
    output = start_string + char
    
    for _ in range(length):
        char, hidden = predict(model, char, hidden)
        output += char
    
    return output

# Generate text
start_string = '从前有一个'
print(generate_text(model, start_string, length=200))
