In [3]:
import sys
import os

current_notebook_dir = os.path.dirname(os.path.abspath('__file__'))
project_root_dir = os.path.abspath(os.path.join(current_notebook_dir, '../../'))

# 将这个父目录添加到sys.path的最前面
if project_root_dir not in sys.path:
    sys.path.insert(0, project_root_dir)

print(sys.path)

['/home/hqdeng7/lijuyang/generalization/loss_distribution', '/home/hqdeng7/.conda/envs/ljy/lib/python311.zip', '/home/hqdeng7/.conda/envs/ljy/lib/python3.11', '/home/hqdeng7/.conda/envs/ljy/lib/python3.11/lib-dynload', '', '/home/hqdeng7/.conda/envs/ljy/lib/python3.11/site-packages', '/tmp/tmpkf6eqhkj']


In [4]:
import os
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

from pytorch_script.visual_utils import get_sorted_model_paths, evaluate_model_performance, load_model_state_dict
from pytorch_script.datasets import load_tinyimagenet_data

In [5]:
import seaborn as sns

config = {
	'model_dir': '../../model_training_results/tinyimagenet_resnet18',  # 模型存储的文件夹路径
	'model_prefix': 'model_',        # 模型文件名的前缀
	'model_extension': '.pth',                         # 保存模型的文件扩展名
	'tinyimagenet_data_path': '../../pytorch_script/data/tiny-imagenet-200',           # CIFAR-10 数据集存储路径
	'batch_size': 256,                                 # DataLoader 的批次大小
	'num_workers': 16                                  # DataLoader 的工作进程数
}

device = torch.device("cuda")

# 加载数据
train_ds, val_ds, num_classes = load_tinyimagenet_data(config['tinyimagenet_data_path'])
train_loader = torch.utils.data.DataLoader(
	train_ds, config['batch_size'], shuffle=False, num_workers=config['num_workers']
)
test_loader = torch.utils.data.DataLoader(
	val_ds, config['batch_size'], shuffle=False, num_workers=config['num_workers']
)

# 获取排序后的模型文件路径
sorted_model_paths = get_sorted_model_paths(
	config['model_dir'], config['model_prefix'], config['model_extension']
)

if not sorted_model_paths:
	print("没有找到符合条件或能成功加载的模型文件。请检查 MODEL_DIR, MODEL_PREFIX 和 MODEL_EXTENSION 设置。")

epochs = []
train_accuracies = []
test_accuracies = []
train_losses = []
test_losses = []

print("\n开始逐个评估模型...")
for epoch, model_path in sorted_model_paths:
	print(f"\n正在评估模型：{model_path} (纪元: {epoch})")
	
	# 评估训练集性能
	model = load_model_state_dict('tinyimagenet', 'resnet18', 200, model_path, device)
	train_acc, train_loss = evaluate_model_performance(model, train_loader, device)
	train_accuracies.append(train_acc)
	train_losses.append(train_loss)
	print(f"  training acc = {train_acc:.2f}%")
	print(f"  training loss = {train_loss:.2f}")

	# 评估测试集性能
	test_acc, test_loss = evaluate_model_performance(model, test_loader, device)
	test_accuracies.append(test_acc)
	test_losses.append(test_loss)
	print(f"  test acc = {test_acc:.2f}%")
	print(f"  test loss = {test_loss:.2f}")

	epochs.append(epoch)

Found 10000 images in the validation set.

开始逐个评估模型...

正在评估模型：../../model_training_results/tinyimagenet_resnet18/model_0.pth (纪元: 0)
  从字典中提取模型状态字典...
提取成功
  training acc = 3.80%
  training loss = 4.95
  test acc = 4.02%
  test loss = 4.93

正在评估模型：../../model_training_results/tinyimagenet_resnet18/model_1.pth (纪元: 1)
  从字典中提取模型状态字典...
提取成功
  training acc = 9.30%
  training loss = 4.35
  test acc = 10.54%
  test loss = 4.22

正在评估模型：../../model_training_results/tinyimagenet_resnet18/model_2.pth (纪元: 2)
  从字典中提取模型状态字典...
提取成功
  training acc = 10.32%
  training loss = 4.41
  test acc = 11.19%
  test loss = 4.39

正在评估模型：../../model_training_results/tinyimagenet_resnet18/model_3.pth (纪元: 3)
  从字典中提取模型状态字典...
提取成功
  training acc = 16.05%
  training loss = 3.87
  test acc = 18.18%
  test loss = 3.68

正在评估模型：../../model_training_results/tinyimagenet_resnet18/model_4.pth (纪元: 4)
  从字典中提取模型状态字典...
提取成功
  training acc = 15.01%
  training loss = 3.99
  test acc = 18.16%
  test loss = 3.74

正在评估模型：

In [6]:
np.save('train_accuracies.npy', np.array(train_accuracies))
np.save('test_accuracies.npy', np.array(test_accuracies))
np.save('train_losses.npy', np.array(train_losses))
np.save('test_losses.npy', np.array(test_losses))

In [7]:
def model_losses(model, data_loader):
  losses = []
  model.eval()
  model.to(device)
  for images, labels in data_loader:
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    loss = torch.nn.functional.cross_entropy(outputs, labels, reduction='none')
    losses.extend(loss.tolist())
  return losses

In [12]:
def get_all_model_losses(sorted_model_paths, train_loader, test_loader):
	all_model_train_losses = {}
	all_model_test_losses = {}

	for epoch, model_path in sorted_model_paths:
		print(epoch)
		model = load_model_state_dict('tinyimagenet', 'resnet18', 200, model_path, device)
		all_model_train_losses[epoch] = model_losses(model, train_loader)
		all_model_test_losses[epoch] = model_losses(model, test_loader)
	
	return all_model_train_losses, all_model_test_losses
		

In [13]:
all_model_train_losses, all_model_test_losses = get_all_model_losses(sorted_model_paths, train_loader, test_loader)

0
  从字典中提取模型状态字典...
提取成功
1
  从字典中提取模型状态字典...
提取成功
2
  从字典中提取模型状态字典...
提取成功
3
  从字典中提取模型状态字典...
提取成功
4
  从字典中提取模型状态字典...
提取成功
5
  从字典中提取模型状态字典...
提取成功
6
  从字典中提取模型状态字典...
提取成功
7
  从字典中提取模型状态字典...
提取成功
8
  从字典中提取模型状态字典...
提取成功
9
  从字典中提取模型状态字典...
提取成功
10
  从字典中提取模型状态字典...
提取成功
15
  从字典中提取模型状态字典...
提取成功
20
  从字典中提取模型状态字典...
提取成功
25
  从字典中提取模型状态字典...
提取成功
30
  从字典中提取模型状态字典...
提取成功
35
  从字典中提取模型状态字典...
提取成功
40
  从字典中提取模型状态字典...
提取成功
45
  从字典中提取模型状态字典...
提取成功
50
  从字典中提取模型状态字典...
提取成功
55
  从字典中提取模型状态字典...
提取成功
60
  从字典中提取模型状态字典...
提取成功
65
  从字典中提取模型状态字典...
提取成功
70
  从字典中提取模型状态字典...
提取成功
75
  从字典中提取模型状态字典...
提取成功
80
  从字典中提取模型状态字典...
提取成功
85
  从字典中提取模型状态字典...
提取成功
90
  从字典中提取模型状态字典...
提取成功
95
  从字典中提取模型状态字典...
提取成功
100
  从字典中提取模型状态字典...
提取成功
110
  从字典中提取模型状态字典...
提取成功
120
  从字典中提取模型状态字典...
提取成功
130
  从字典中提取模型状态字典...
提取成功
140
  从字典中提取模型状态字典...
提取成功
150
  从字典中提取模型状态字典...
提取成功
160
  从字典中提取模型状态字典...
提取成功
170
  从字典中提取模型状态字典...
提取成功
180
  从字典中提取模型状态字典...
提取成功
190
  从字典中提取模型状态字典...
提取成功
200
  从字典中提取

In [14]:
import pickle
file_path_pickle = "all_model_train_losses.pickle"
with open(file_path_pickle, 'wb') as f: # 注意 'wb' 表示写入二进制
    pickle.dump(all_model_train_losses, f)

file_path_pickle = "all_model_test_losses.pickle"
with open(file_path_pickle, 'wb') as f: # 注意 'wb' 表示写入二进制
    pickle.dump(all_model_test_losses, f)

In [17]:
np.save('epochs.npy', epochs)

In [16]:
epochs

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 15,
 20,
 25,
 30,
 35,
 40,
 45,
 50,
 55,
 60,
 65,
 70,
 75,
 80,
 85,
 90,
 95,
 100,
 110,
 120,
 130,
 140,
 150,
 160,
 170,
 180,
 190,
 200,
 210,
 220,
 230,
 240,
 250,
 260,
 270,
 280,
 290]