In [1]:
import os
from tqdm import tqdm

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

from dataset import load_core_set_data, load_pca_data
from simulation.simulator import FDTDSimulator
from simulation.student import LSTMPredictor

  return torch._C._cuda_getDeviceCount() > 0


In [2]:
import torch
import os

# 检查CUDA环境
print("当前CUDA_VISIBLE_DEVICES:", os.environ.get('CUDA_VISIBLE_DEVICES', 'Not Set'))
print("CUDA是否可用:", torch.cuda.is_available())
print("可用的GPU数量:", torch.cuda.device_count())

if torch.cuda.is_available():
    # 列出所有可用的GPU
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

当前CUDA_VISIBLE_DEVICES: Not Set
CUDA是否可用: False
可用的GPU数量: 1


In [3]:
# 检查PyTorch的CUDA版本
import torch
print("PyTorch版本:", torch.__version__)
print("PyTorch是否支持CUDA:", torch.cuda.is_available())
print("PyTorch的CUDA版本:", torch.version.cuda if torch.cuda.is_available() else "NA")

PyTorch版本: 2.5.1+cu124
PyTorch是否支持CUDA: False
PyTorch的CUDA版本: NA


In [4]:
import torch
import sys
import subprocess

# PyTorch信息
print("Python版本:", sys.version)
print("PyTorch版本:", torch.__version__)
print("PyTorch是否支持CUDA:", torch.cuda.is_available())
print("PyTorch的CUDA版本:", torch.version.cuda if torch.cuda.is_available() else "NA")

# 尝试获取系统CUDA信息
try:
    nvidia_smi = subprocess.check_output("nvidia-smi", shell=True)
    print("\nGPU信息 (nvidia-smi):")
    print(nvidia_smi.decode())
except:
    print("无法运行 nvidia-smi")

# CUDA设备详细信息
if torch.cuda.is_available():
    print("\nCUDA设备详细信息:")
    print("当前CUDA设备:", torch.cuda.current_device())
    print("设备数量:", torch.cuda.device_count())
    for i in range(torch.cuda.device_count()):
        print(f"设备 {i} 名称:", torch.cuda.get_device_name(i))
        print(f"设备 {i} 功能:", torch.cuda.get_device_capability(i))

Python版本: 3.11.11 (main, Dec 11 2024, 16:28:39) [GCC 11.2.0]
PyTorch版本: 2.5.1+cu124
PyTorch是否支持CUDA: False
PyTorch的CUDA版本: NA

GPU信息 (nvidia-smi):
Sat Dec 14 11:24:17 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 565.57.01              Driver Version: 565.57.01      CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3050 ...    On  |   00000000:01:00.0 Off |                  N/A |
| N/A   62C    P8              7W /   60W |     355MiB /   4096MiB |      0%      Default |
|                                         |                        |                 

# 准备数据

## 半径矩阵

```python
radius_save_path = "data/radius_matrix.pth"
```

In [2]:
radius_save_path = "data/radius_matrix.pth"

if os.path.exists(radius_save_path):
    radius_matrix = torch.load(radius_save_path, weights_only=False)
else:
    radius_matrix = torch.rand(10, 10) * 10
    torch.save(radius_matrix, radius_save_path)

print(radius_matrix)


tensor([[4.2537, 7.2491, 6.1717, 9.2729, 4.9853, 9.8473, 3.1820, 9.3614, 8.1104,
         9.0288],
        [2.3149, 8.7649, 5.0795, 8.0234, 4.8227, 8.7122, 0.7336, 2.7354, 7.6845,
         6.5359],
        [6.5778, 9.8060, 0.8749, 7.8437, 6.0056, 7.5152, 8.9349, 6.0827, 4.3046,
         7.7731],
        [0.3096, 1.4307, 0.4254, 2.0687, 3.2475, 0.8592, 9.9883, 3.1064, 7.1883,
         2.8260],
        [6.0670, 4.3900, 8.1416, 5.1876, 1.6904, 2.8222, 9.9509, 8.0686, 6.5593,
         5.9664],
        [9.7981, 5.3376, 9.5862, 0.3684, 5.5181, 7.1683, 3.2136, 5.2784, 1.8340,
         9.4031],
        [2.1909, 0.4959, 9.4064, 1.8535, 2.7756, 7.6035, 9.4730, 6.0837, 7.2479,
         1.7767],
        [2.4067, 8.6760, 5.0395, 6.0531, 6.9528, 0.0580, 0.6878, 2.1934, 4.4764,
         4.8344],
        [4.9628, 6.8850, 0.1238, 5.7276, 2.3660, 0.6239, 9.2926, 6.6920, 3.6652,
         1.5679],
        [2.8365, 5.7207, 1.2948, 2.9172, 4.4617, 8.5791, 4.1536, 8.3898, 6.0025,
         7.9040]])


In [3]:
class FDTDDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.FloatTensor(data)
        self.labels = torch.FloatTensor(labels)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]


train_data, _, test_data, _ = load_core_set_data()

print(train_data.shape)
print(test_data.shape)

(300, 10)
(50, 10)


## 生成知识蒸馏数据标签

调用simulator生成蒸馏用标签

In [4]:
simulator = FDTDSimulator(radius_matrix=radius_matrix)

device = "cuda"

### 训练数据标签

In [5]:
train_labels = torch.tensor([], device=device)
for data in tqdm(train_data):
    inputs = torch.tensor(data)
    outputs = simulator(inputs).detach()
    train_labels = torch.cat((train_labels, outputs), dim=0)

print(train_labels.shape)

100%|██████████| 300/300 [1:00:35<00:00, 12.12s/it]

torch.Size([300, 1000, 10])





In [6]:
train_labels = train_labels.cpu().numpy()
train_dataset = FDTDDataset(train_data, train_labels)

### 测试数据标签

In [7]:
test_labels = torch.tensor([], device=device)
for data in tqdm(test_data):
    inputs = torch.tensor(data)
    outputs = simulator(inputs).detach()
    test_labels = torch.cat((test_labels, outputs), dim=0)

print(test_labels.shape)

100%|██████████| 50/50 [14:38<00:00, 17.56s/it]

torch.Size([50, 1000, 10])





In [8]:
test_labels = test_labels.cpu().numpy()
test_dataset = FDTDDataset(test_data, test_labels)

### 保存数据


In [9]:
train_dataset_save_path = "data/train_dataset.pth"
test_dataset_save_path = "data/test_dataset.pth"

torch.save(train_dataset, train_dataset_save_path)
torch.save(test_dataset, test_dataset_save_path)

# 训练模型

In [14]:
model = LSTMPredictor(
    input_size=10,
    hidden_size=128,
    output_size=10,
    num_layers=2,
    dropout=0.1,
    device=device
)

num_epochs = 100
learning_rate = 1e-3

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


RuntimeError: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [13]:
model.train()

total_steps = num_epochs * len(train_loader)
progress_bar = tqdm(total=total_steps, desc="Training Progress")

for epoch in range(num_epochs):
    epoch_loss = 0
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        _, outputs = model.get_sequence_output(inputs)
        print(outputs.shape)
        print(labels.shape)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        progress_bar.update(1)
        progress_bar.set_description(f"Loss: {loss.item():.6f}")

    progress_bar.set_description(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss/len(train_loader):.6f}")

progress_bar.close()

model.eval()


Training Progress:   0%|          | 0/1000 [00:23<?, ?it/s]


RuntimeError: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


# 保存模型

```python
model_save_path = "data/model.pth"
```


In [None]:
model_save_path = "data/model.pth"
torch.save(model.state_dict(), model_save_path)

# 测试模型

