In [1]:
import numpy as np

# 设置随机种子
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
samples = ["The cat sat on the mat.", "The dog ate my homework."]
token_index = {}
for sample in samples:
    for word in sample.split():
        if word not in token_index:
            token_index[word] = len(token_index) +1 # 为每一个单词分配一个唯一的整数索引 没有0索引
max_length = 10 # 只考虑样本的前十个单词
results = np.zeros(shape=(len(samples), max_length, max(token_index.values())+1)) # 创建一个三维的零矩阵
for i, sample in enumerate(samples):
    for j, word in list(enumerate(sample.split()))[:max_length]:
        index = token_index.get(word)
        results[i,j,index] = 1 # 将单词对应的地方索引改为1

print(results)


[[[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]]


In [3]:
# 使用Keras实现单词级别的one-hot编码
from keras.preprocessing.text import Tokenizer
samples = ["The cat sat on the mat.", "The dog ate my homework."]
tokenizer = Tokenizer(num_words=1000) # 只考虑前1000个最常见的单词
tokenizer.fit_on_texts(samples) # 将每个单词映射到一个整数索引
sequences = tokenizer.texts_to_sequences(samples) # 将每个句子转化为整数索组成的列表
one_hot_results = tokenizer.texts_to_matrix(samples, mode = "binary")
word_index = tokenizer.word_index # 获取单词索引字典
print(one_hot_results)

[[0. 1. 1. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]]


In [None]:
# 使用散列技巧实现one-hot编码
samples = ["The cat sat on the mat.", "The dog ate my homework."]
dimensionality = 1000 # 将单词保存为长度为1000的向量
max_length = 10

# 创建一个零矩阵
results = np.zeros((len(samples), max_length, dimensionality))
for i, sample in enumerate(samples):
    for j, word in list(enumerate(sample.split()))[:max_length]:
        index = abs(hash(word)) % dimensionality # 计算单词的散列值，并将其映射到0-999之间
        results[i,j,index] = 1 # 将单词对应的地方索引改为1


In [1]:
import tensorflow as tf
import os

# 屏蔽 TensorFlow 的一些 INFO 和 WARNING 日志，让输出更干净
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

def check_gpu_tensorflow():
    print("--- TensorFlow GPU 检测开始 ---")

    # 1. 使用 TensorFlow 的 API 来列出所有物理 GPU 设备
    gpus = tf.config.list_physical_devices('GPU')

    if gpus:
        print(f"✅ 成功：TensorFlow 检测到了 {len(gpus)} 个 GPU。")

        # 打印每个 GPU 的信息
        for i, gpu in enumerate(gpus):
            print(f"   - GPU {i}: {gpu.name}")
            # TensorFlow 默认不会显示 "GeForce RTX 4080" 这样的商业名称
            # 但 `nvidia-smi` 命令可以确认型号，这里的输出代表 TensorFlow 已经识别了它

        # 2. 模拟训练测试
        print("\n--- 模拟训练测试 ---")
        try:
            # TensorFlow 会自动将操作放在可用的 GPU 上
            # 我们也可以使用 tf.device('/GPU:0') 来显式指定
            with tf.device('/GPU:0'):
                print("   - 正在尝试在 GPU:0 上执行操作...")
                # 创建两个常量张量
                a = tf.constant([[1.0, 2.0], [3.0, 4.0]], dtype=tf.float32)
                b = tf.constant([[5.0, 6.0], [7.0, 8.0]], dtype=tf.float32)

                # 执行矩阵乘法
                c = tf.matmul(a, b)

            print("   - 在 GPU 上成功执行了一次矩阵乘法。")
            print(f"   - 参与运算的张量 a 所在设备: {a.device}")
            print(f"   - 运算结果张量 c 所在设备: {c.device}")
            print(f"   - 运算结果: \n{c.numpy()}")

            print("\n✅ 结论：GPU 环境配置正确，可以用于 TensorFlow 训练！")

        except Exception as e:
            print(f"❌ 错误：在 GPU 上执行操作时发生错误: {e}")

    else:
        print("❌ 失败：TensorFlow 未能检测到任何可用的 GPU。")
        print("   - 请检查 NVIDIA 驱动、CUDA Toolkit 和 cuDNN 是否已正确安装。")
        print("   - 特别注意：TensorFlow 对 CUDA 和 cuDNN 的版本有严格要求，请确保版本匹配。")

    print("\n--- 检测结束 ---")

if __name__ == "__main__":
    check_gpu_tensorflow()

2025-09-19 17:24:36.613558: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-19 17:24:36.640073: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-09-19 17:24:37.341797: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
I0000 00:00:1758273877.521211   52530 gpu_device.cc:2020] Created device /job:localhost/rep

--- TensorFlow GPU 检测开始 ---
✅ 成功：TensorFlow 检测到了 1 个 GPU。
   - GPU 0: /physical_device:GPU:0

--- 模拟训练测试 ---
   - 正在尝试在 GPU:0 上执行操作...
   - 在 GPU 上成功执行了一次矩阵乘法。
   - 参与运算的张量 a 所在设备: /job:localhost/replica:0/task:0/device:GPU:0
   - 运算结果张量 c 所在设备: /job:localhost/replica:0/task:0/device:GPU:0
   - 运算结果: 
[[19. 22.]
 [43. 50.]]

✅ 结论：GPU 环境配置正确，可以用于 TensorFlow 训练！

--- 检测结束 ---
