In [None]:
#获取唐诗数据集
#你可以从以下来源获取大量的唐诗数据集：
#网络爬虫：可以编写爬虫从网上的唐诗数据库中爬取数据，如中国古诗词网。https://www.gushiwen.cn/
#现成数据集：可以在一些数据平台上找到现成的唐诗数据集，如Kaggle或GitHub。
#例如，在GitHub上可以找到一些唐诗数据集：
#chinese-poetry: 这个项目包含了大量的中国古诗，包括唐诗、宋词等。https://github.com/chinese-poetry/chinese-poetry
#以下是一个简单的脚本，展示如何从 chinese-poetry 项目中提取唐诗并保存为 tang_poems.txt 文件。
"""
静夜思 李白
床前明月光，疑是地上霜。
举头望明月，低头思故乡。

春晓 孟浩然
春眠不觉晓，处处闻啼鸟。
夜来风雨声，花落知多少。

登鹳雀楼 王之涣
白日依山尽，黄河入海流。
欲穷千里目，更上一层楼。
"""
import json
# 下载 chinese-poetry 项目中的唐诗数据集
#!git clone https://github.com/chinese-poetry/chinese-poetry.git
#"C:\Users\drhu0\chinese-poetry"
# 读取唐诗数据
#poems_file_path = 'C:\\Users\\drhu0\\data\\poetry\\poet.tang.7000.json'
poems_file_path = 'C:\\Users\\drhu0\\data\\poetry\\poet.tang.1000.json'
with open(poems_file_path, 'r', encoding='utf-8') as f:
    poems_data = json.load(f)

# 保存为 tang_poems.txt 文件
with open('tang_poems_1000.txt', 'w', encoding='utf-8') as f:
    for poem in poems_data:
        title = poem['title']
        author = poem['author']
        content = '\n'.join(poem['paragraphs'])
        f.write(f"{title} {author}\n{content}\n\n")
print("唐诗数据集已保存为 tang_poems.txt 文件")

In [None]:
import json
import os
import opencc

# 创建一个转换器
converter = opencc.OpenCC('t2s')

#https://www.heywhale.com/home/global?search=%E5%85%A8%E5%94%90%E8%AF%97%26%E5%85%A8%E5%AE%8B%E8%AF%97%E6%95%B0%E6%8D%AE%E9%9B%86
#这个数据集包含5.5万首唐诗和26万首宋诗，以及相关的作者信息，存储为JSON文件。每个JSON文件包含1000篇诗，数据以繁体字存储，
#但您可能需要将其转换为简体字以适应transformers库的要求。

# 假设您的JSON文件存放在'path_to_json_files'目录下
#json_files_dir = 'C:\\Users\\drhu0\\data\\poetry\\poet.tang.7000.json'

json_files_dir = 'C:\\Users\\drhu0\\data\\poetry\\'
# 保存为 poems.txt 文件
with open('poems.txt', 'a', encoding='utf-8') as f:
    for filename in os.listdir(json_files_dir):
        if filename.endswith('.json'):
            file_path = os.path.join(json_files_dir, filename)
            # 读取JSON文件
            print(file_path)
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
    
            # 对数据集中的文本进行转换
            for poem in data:
                if 'paragraphs' in poem:
                    poem['paragraphs'] = [converter.convert(text) for text in poem['paragraphs']]
                    content = '\n'.join(poem['paragraphs'])
                    poem['title'] = converter.convert(poem['title'])
                    title = poem['title']
                    if 'author' in poem:
                        poem['author'] = converter.convert(poem['author'])
                        author = poem['author']

                    f.write(f"{title} {author}\n{content}\n\n")
            
            
            # 将转换后的数据写回文件
            with open(file_path, 'w', encoding='utf-8') as file:
                json.dump(data, file, ensure_ascii=False, indent=4)

In [None]:
#project 2: 使用预训练框架写作中文诗歌

import random
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, LSTM, Dropout, Dense, Flatten, Bidirectional, Embedding, GRU
from tensorflow.keras.optimizers import Adam

puncs = [']', '[', '（', '）', '{', '}', '：', '《', '》']

def preprocess_file(Config):
    # 语料文本内容
    files_content = ''
    with open(Config.poetry_file, 'r', encoding='utf-8') as f:
        for line in f:
            # 每行的末尾加上"]"符号代表一首诗结束
            for char in puncs:
                line = line.replace(char, "")
            files_content += line.strip() + "]"

    words = sorted(list(files_content))
    words.remove(']')
    counted_words = {}
    for word in words:
        if word in counted_words:
            counted_words[word] += 1
        else:
            counted_words[word] = 1

    # 去掉低频的字
    erase = []
    for key in counted_words:
        if counted_words[key] <= 2:
            erase.append(key)
    for key in erase:
        del counted_words[key]
    del counted_words[']']
    wordPairs = sorted(counted_words.items(), key=lambda x: -x[1])

    words, _ = zip(*wordPairs)
    # word到id的映射
    word2num = dict((c, i + 1) for i, c in enumerate(words))
    num2word = dict((i, c) for i, c in enumerate(words))
    word2numF = lambda x: word2num.get(x, 0)
    return word2numF, num2word, words, files_content

class PoetryModel(object):
    def __init__(self, config):
        self.model = None
        self.do_train = True
        self.loaded_model = False
        self.config = config

        # 文件预处理
        self.word2numF, self.num2word, self.words, self.files_content = preprocess_file(self.config)

        # 如果模型文件存在则直接加载模型，否则开始训练
        if os.path.exists(self.config.weight_file):
            self.model = load_model(self.config.weight_file)
            self.model.summary()
        else:
            self.train()
        self.do_train = False
        self.loaded_model = True

    def build_model(self):
        '''建立模型'''

        # 输入的dimension
        input_tensor = Input(shape=(self.config.max_len,))
        embedd = Embedding(len(self.num2word) + 2, 300, input_length=self.config.max_len)(input_tensor)
        lstm = Bidirectional(GRU(128, return_sequences=True))(embedd)
        # dropout = Dropout(0.6)(lstm)
        # lstm = LSTM(256)(dropout)
        # dropout = Dropout(0.6)(lstm)
        flatten = Flatten()(lstm)
        dense = Dense(len(self.words), activation='softmax')(flatten)
        self.model = Model(inputs=input_tensor, outputs=dense)
        optimizer = Adam(learning_rate=self.config.learning_rate)
        self.model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    def sample(self, preds, temperature=1.0):
        '''
        当temperature=1.0时，模型输出正常
        当temperature=0.5时，模型输出比较open
        当temperature=1.5时，模型输出比较保守
        在训练的过程中可以看到temperature不同，结果也不同
        '''
        preds = np.asarray(preds).astype('float64')
        preds = np.log(preds) / temperature
        exp_preds = np.exp(preds)
        preds = exp_preds / np.sum(exp_preds)
        probas = np.random.multinomial(1, preds, 1)
        return np.argmax(probas)

    def generate_sample_result(self, epoch, logs):
        '''训练过程中，每个epoch打印出当前的学习情况'''
        # if epoch % 5 != 0:
        #     return
        print("\n==================Epoch {}=====================".format(epoch))
        for diversity in [0.5, 1.0, 1.5]:
            print("------------Diversity {}--------------".format(diversity))
            start_index = random.randint(0, len(self.files_content) - self.config.max_len - 1)
            generated = ''
            sentence = self.files_content[start_index: start_index + self.config.max_len]
            generated += sentence
            for i in range(20):
                x_pred = np.zeros((1, self.config.max_len))
                for t, char in enumerate(sentence[-6:]):
                    x_pred[0, t] = self.word2numF(char)

                preds = self.model.predict(x_pred, verbose=0)[0]
                next_index = self.sample(preds, diversity)
                next_char = self.num2word[next_index]

                generated += next_char
                sentence = sentence + next_char
            print(sentence)

    def predict(self, text):
        '''根据给出的文字，生成诗句'''
        if not self.loaded_model:
            return
        with open(self.config.poetry_file, 'r', encoding='utf-8') as f:
            file_list = f.readlines()
        random_line = random.choice(file_list)
        # 如果给的text不到四个字，则随机补全
        if not text or len(text) != 4:
            for _ in range(4 - len(text)):
                random_str_index = random.randrange(0, len(self.words))
                text += self.num2word.get(random_str_index) if self.num2word.get(random_str_index) not in [',', '。',
                                                                                                           '，'] else self.num2word.get(
                    random_str_index + 1)
        seed = random_line[-(self.config.max_len):-1]

        res = ''

        seed = 'c' + seed

        for c in text:
            seed = seed[1:] + c
            for j in range(5):
                x_pred = np.zeros((1, self.config.max_len))
                for t, char in enumerate(seed):
                    x_pred[0, t] = self.word2numF(char)

                preds = self.model.predict(x_pred, verbose=0)[0]
                next_index = self.sample(preds, 1.0)
                next_char = self.num2word[next_index]
                seed = seed[1:] + next_char
            res += seed
        return res

    def data_generator(self):
        '''生成器生成数据'''
        i = 0
        while 1:
            x = self.files_content[i: i + self.config.max_len]
            y = self.files_content[i + self.config.max_len]

            puncs = [']', '[', '（', '）', '{', '}', '：', '《', '》', ':']
            if len([i for i in puncs if i in x]) != 0:
                i += 1
                continue
            if len([i for i in puncs if i in y]) != 0:
                i += 1
                continue

            y_vec = np.zeros(
                shape=(1, len(self.words)),
                dtype=np.bool
            )
            y_vec[0, self.word2numF(y)] = 1.0

            x_vec = np.zeros(
                shape=(1, self.config.max_len),
                dtype=np.int32
            )

            for t, char in enumerate(x):
                x_vec[0, t] = self.word2numF(char)
            yield x_vec, y_vec
            i += 1

    def train(self):
        '''训练模型'''
        number_of_epoch = len(self.files_content) // self.config.batch_size

        if not self.model:
            self.build_model()

        self.model.summary()

        self.model.fit(
            self.data_generator(),
            verbose=True,
            steps_per_epoch=self.config.batch_size,
            epochs=number_of_epoch,
            callbacks=[
                tf.keras.callbacks.ModelCheckpoint(self.config.weight_file, save_weights_only=False),
                LambdaCallback(on_epoch_end=self.generate_sample_result)
            ]
        )


class Config(object):
    poetry_file = 'poetry.txt'
    weight_file = 'poetry_model.h5'
    # 根据前六个字预测第七个字
    max_len = 6
    batch_size = 512
    learning_rate = 0.001

model = PoetryModel(Config)
while 1:
    text = input("text:")
    sentence = model.predict(text)
    print(sentence)
    

In [None]:
#project 2: 使用循环神经网络 (RNN)
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Activation, Embedding
from tensorflow.keras.optimizers import Adam

# 加载数据
def load_data(filepath):
    with open(filepath, 'r', encoding='utf-8') as f:
        text = f.read()
    return text

# 创建字符到索引和索引到字符的映射
def create_mapping(text):
    chars = sorted(list(set(text)))
    char2idx = {c: i for i, c in enumerate(chars)}
    idx2char = {i: c for i, c in enumerate(chars)}
    return char2idx, idx2char

# 构建输入序列和标签
def create_sequences(text, char2idx, seq_length):
    X, y = [], []
    for i in range(0, len(text) - seq_length):
        seq = text[i:i+seq_length]
        target = text[i+seq_length]
        X.append([char2idx[char] for char in seq])
        y.append(char2idx[target])
    return np.array(X), np.array(y)

# 加载数据集
text = load_data('poems.txt')

# 创建字符映射
char2idx, idx2char = create_mapping(text)

# 序列长度
seq_length = 10

# 创建序列
X, y = create_sequences(text, char2idx, seq_length)

# 构建模型
model = Sequential()
model.add(Embedding(len(char2idx), 128, input_length=seq_length))
model.add(SimpleRNN(128, return_sequences=False))
model.add(Dense(len(char2idx)))
model.add(Activation('softmax'))

# 编译模型
model.compile(optimizer=Adam(lr=0.001), loss='sparse_categorical_crossentropy')

# 训练模型
model.fit(X, y, epochs=20, batch_size=64)

# 生成文本
def generate_text(model, start_text, char2idx, idx2char, length):
    input_text = start_text
    generated = input_text
    for _ in range(length):
        x = np.array([[char2idx[char] for char in input_text]])
        preds = model.predict(x, verbose=0)[0]
        next_index = np.argmax(preds)
        next_char = idx2char[next_index]
        generated += next_char
        input_text = input_text[1:] + next_char
    return generated

# 示例生成
start_text = '春眠不觉晓'
generated_text = generate_text(model, start_text, char2idx, idx2char, 100)
print(generated_text)

In [1]:
# Project 2, 以下是一个完整的Transformer模型代码示例，使用注意力机制进行中文诗歌生成。假设你的数据文件名为 poems.txt，每行包含一首诗。
#数据准备
#首先，加载并预处理数据。
import tensorflow as tf
import numpy as np
import os

# 读取数据集
def load_data(filepath):
    with open(filepath, 'r', encoding='utf-8') as f:
        lines = f.read().split('\n')
    return [line.strip() for line in lines if len(line) > 0]

# 分词和编码
def tokenize(lang):
    lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
    lang_tokenizer.fit_on_texts(lang)
    tensor = lang_tokenizer.texts_to_sequences(lang)
    tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor, padding='post')
    return tensor, lang_tokenizer

# 加载和预处理数据
def load_dataset(path, num_examples=None):
    inp_lang = load_data(path)
    inp_tensor, inp_tokenizer = tokenize(inp_lang[:num_examples])
    return inp_tensor, inp_tokenizer

num_examples = 30000
path_to_file = 'poems.txt'
inp_tensor, inp_tokenizer = load_dataset(path_to_file, num_examples)

# 创建数据集
BUFFER_SIZE = len(inp_tensor)
BATCH_SIZE = 64
steps_per_epoch = BUFFER_SIZE // BATCH_SIZE
embedding_dim = 256
units = 1024
vocab_size = len(inp_tokenizer.word_index) + 1

dataset = tf.data.Dataset.from_tensor_slices((inp_tensor, inp_tensor))
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

#模型构建
#定义Transformer模型，包括编码器、解码器和注意力机制。
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model

        assert d_model % self.num_heads == 0

        self.depth = d_model // self.num_heads
        self.wq = tf.keras.layers.Dense(d_model)
        self.wk = tf.keras.layers.Dense(d_model)
        self.wv = tf.keras.layers.Dense(d_model)
        self.dense = tf.keras.layers.Dense(d_model)

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, v, k, q, mask):
        batch_size = tf.shape(q)[0]
        q = self.wq(q)
        k = self.wk(k)
        v = self.wv(v)

        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)

        scaled_attention, attention_weights = self.scaled_dot_product_attention(q, k, v, mask)

        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))

        output = self.dense(concat_attention)

        return output, attention_weights

    def scaled_dot_product_attention(self, q, k, v, mask):
        matmul_qk = tf.matmul(q, k, transpose_b=True)

        dk = tf.cast(tf.shape(k)[-1], tf.float32)
        scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

        if mask is not None:
            scaled_attention_logits += (mask * -1e9)

        attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
        output = tf.matmul(attention_weights, v)

        return output, attention_weights

class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(EncoderLayer, self).__init__()

        self.mha = MultiHeadAttention(d_model, num_heads)
        self.ffn = self.point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)

    def call(self, x, training, mask):
        attn_output, _ = self.mha(x, x, x, mask)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)

        return out2

    def point_wise_feed_forward_network(self, d_model, dff):
        return tf.keras.Sequential([
            tf.keras.layers.Dense(dff, activation='relu'),
            tf.keras.layers.Dense(d_model)
        ])

class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(DecoderLayer, self).__init__()

        self.mha1 = MultiHeadAttention(d_model, num_heads)
        self.mha2 = MultiHeadAttention(d_model, num_heads)

        self.ffn = self.point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
        self.dropout3 = tf.keras.layers.Dropout(rate)

    def call(self, x, enc_output, training, look_ahead_mask, padding_mask):
        attn1, attn_weights_block1 = self.mha1(x, x, x, look_ahead_mask)
        attn1 = self.dropout1(attn1, training=training)
        out1 = self.layernorm1(attn1 + x)

        attn2, attn_weights_block2 = self.mha2(enc_output, enc_output, out1, padding_mask)
        attn2 = self.dropout2(attn2, training=training)
        out2 = self.layernorm2(attn2 + out1)

        ffn_output = self.ffn(out2)
        ffn_output = self.dropout3(ffn_output, training=training)
        out3 = self.layernorm3(ffn_output + out2)

        return out3, attn_weights_block1, attn_weights_block2

    def point_wise_feed_forward_network(self, d_model, dff):
        return tf.keras.Sequential([
            tf.keras.layers.Dense(dff, activation='relu'),
            tf.keras.layers.Dense(d_model)
        ])

class Encoder(tf.keras.Model):
    def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, rate=0.1):
        super(Encoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
        self.pos_encoding = self.positional_encoding(input_vocab_size, self.d_model)

        self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(rate)

    def call(self, x, training, mask):
        seq_len = tf.shape(x)[1]
        x = self.embedding(x)
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x += self.pos_encoding[:, :seq_len, :]

        x = self.dropout(x, training=training)

        for i in range(self.num_layers):
            x = self.enc_layers[i](x, training, mask)

        return x

    def positional_encoding(self, position, d_model):
        angle_rads = self.get_angles(np.arange(position)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model)

        angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
        angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

        pos_encoding = angle_rads[np.newaxis, ...]

        return tf.cast(pos_encoding, dtype=tf.float32)

    def get_angles(self, pos, i, d_model):
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
        return pos * angle_rates

class Decoder(tf.keras.Model):
    def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size, rate=0.1):
        super(Decoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)
        self.pos_encoding = self.positional_encoding(target_vocab_size, d_model)

        self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(rate)

    def call(self, x, enc_output, training, look_ahead_mask, padding_mask):
        seq_len = tf.shape(x)[1]
        attention_weights = {}

        x = self.embedding(x)
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x += self.pos_encoding[:, :seq_len, :]

        x = self.dropout(x, training=training)

        for i in range(self.num_layers):
            x, block1, block2 = self.dec_layers[i](x, enc_output, training, look_ahead_mask, padding_mask)

            attention_weights[f'decoder_layer{i+1}_block1'] = block1
            attention_weights[f'decoder_layer{i+1}_block2'] = block2

        return x, attention_weights

    def positional_encoding(self, position, d_model):
        angle_rads = self.get_angles(np.arange(position)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model)

        angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
        angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

        pos_encoding = angle_rads[np.newaxis, ...]

        return tf.cast(pos_encoding, dtype=tf.float32)

    def get_angles(self, pos, i, d_model):
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
        return pos * angle_rates

def create_padding_mask(seq):
    seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
    return seq[:, tf.newaxis, tf.newaxis, :]

def create_look_ahead_mask(size):
    mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
    return mask

def create_masks(inp, tar):
    enc_padding_mask = create_padding_mask(inp)
    dec_padding_mask = create_padding_mask(inp)

    look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1])
    dec_target_padding_mask = create_padding_mask(tar)
    combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)

    return enc_padding_mask, combined_mask, dec_padding_mask

# 超参数
num_layers = 4
d_model = 128
dff = 512
num_heads = 8
dropout_rate = 0.1

encoder = Encoder(num_layers, d_model, num_heads, dff, vocab_size, dropout_rate)
decoder = Decoder(num_layers, d_model, num_heads, dff, vocab_size, dropout_rate)

learning_rate = 0.001
optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)

checkpoint_path = "./checkpoints/train"
ckpt = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print("Latest checkpoint restored!")

EPOCHS = 20

for epoch in range(EPOCHS):
    total_loss = 0

    for (batch, (inp, tar)) in enumerate(dataset.take(steps_per_epoch)):
        tar_inp = tar[:, :-1]
        tar_real = tar[:, 1:]

        enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)

        with tf.GradientTape() as tape:
            enc_output = encoder(inp, True, enc_padding_mask)
            dec_output, _ = decoder(tar_inp, enc_output, True, combined_mask, dec_padding_mask)
            loss = loss_function(tar_real, dec_output)

        gradients = tape.gradient(loss, encoder.trainable_variables + decoder.trainable_variables)
        optimizer.apply_gradients(zip(gradients, encoder.trainable_variables + decoder.trainable_variables))

        total_loss += loss

    if (epoch + 1) % 2 == 0:
        ckpt_manager.save()

    print(f'Epoch {epoch+1} Loss {total_loss / steps_per_epoch:.4f}')

def evaluate(inp_sentence):
    start_token = [inp_tokenizer.word_index['<start>']]
    end_token = [inp_tokenizer.word_index['<end>']]

    inp_sentence = start_token + inp_tokenizer.texts_to_sequences([inp_sentence])[0] + end_token
    encoder_input = tf.expand_dims(inp_sentence, 0)

    encoder_padding_mask = create_padding_mask(encoder_input)
    enc_output = encoder(encoder_input, False, encoder_padding_mask)

    decoder_input = tf.expand_dims([inp_tokenizer.word_index['<start>']], 0)
    output = tf.expand_dims([inp_tokenizer.word_index['<start>']], 0)

    for i in range(20):
        look_ahead_mask = create_look_ahead_mask(tf.shape(output)[1])
        dec_target_padding_mask = create_padding_mask(output)
        combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)

        dec_output, attention_weights = decoder(decoder_input, enc_output, False, combined_mask, encoder_padding_mask)

        predictions = dec_output[:, -1:, :]
        predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

        if predicted_id == inp_tokenizer.word_index['<end>']:
            return tf.squeeze(output, axis=0), attention_weights

        output = tf.concat([output, predicted_id], axis=-1)
        decoder_input = output

    return tf.squeeze(output, axis=0), attention_weights

def translate(sentence):
    result, attention_weights = evaluate(sentence)

    predicted_sentence = ' '.join([inp_tokenizer.index_word[i] for i in result.numpy() if i != 0])
    return predicted_sentence

# 示例调用
print(translate("春眠不觉晓，处处闻啼鸟。"))