In [None]:
import sys
import os

current_notebook_dir = os.path.dirname(os.path.abspath('__file__'))
project_root_dir = os.path.abspath(os.path.join(current_notebook_dir, '../../'))

# 将这个父目录添加到sys.path的最前面
if project_root_dir not in sys.path:
    sys.path.insert(0, project_root_dir)

print(sys.path)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os

from pytorch_script.visual_utils import plot_acc_losses, plot_epochs_losses_distribution, plot_losses_distribution_and_contribution

In [None]:
train_accuracies = np.load('train_accuracies.npy')
test_accuracies = np.load('test_accuracies.npy')
train_losses = np.load('train_losses.npy')
test_losses = np.load('test_losses.npy')
epochs = np.load('epochs.npy')
epochs = [  0,  5,  10,  20,  30,
        40,  50,  60,  70,  80,  90, 100, 110, 120, 130, 140, 150, 160,
       170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290,
       300, 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 410, 420,
       430, 440, 450, 460, 470, 480, 490, 500]

In [None]:
plot_acc_losses(epochs, train_accuracies, test_accuracies, train_losses, test_losses)

In [None]:
import pickle
with open('all_model_train_losses.pickle', 'rb') as f: 
	all_model_train_losses = pickle.load(f)

with open('all_model_test_losses.pickle', 'rb') as f: 
	all_model_test_losses = pickle.load(f)

In [None]:
len(all_model_train_losses[0]), len(all_model_train_losses[100])

In [None]:
plot_epochs_losses_distribution('train', all_model_test_losses, [100, 200, 300, 400, 500])

In [None]:
plot_epochs_losses_distribution('test', all_model_test_losses, [0, 20 , 40, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500])

In [None]:
fig, axes = plt.subplots(8, 2, figsize=(32, 64)) # 增加了 figsize
axes_flat = axes.flatten()

epoch_indices_to_plot = [0, 50, 100, 150, 200, 300, 400, 500]

# 遍历 epoch 索引，并将其映射到扁平化的 axes 数组中的每个子图
for i, epoch_index in enumerate(epoch_indices_to_plot):
    plot_losses_distribution_and_contribution(epoch_index, all_model_train_losses[epoch_index], axes_flat[i])

# 设置整个 Figure 的总标题
fig.suptitle('Traing Loss Distribution and Contribution Across Different Epochs', fontsize=40, y=1.02) # y 调整标题位置

plt.tight_layout(rect=[0, 0.03, 1, 0.98]) # 调整布局，为总标题留出空间
plt.show()

In [None]:
fig, axes = plt.subplots(4, 2, figsize=(32, 64)) # 增加了 figsize
axes_flat = axes.flatten()

epoch_indices_to_plot = [0, 50, 100, 150, 200, 300, 400, 500]

# 遍历 epoch 索引，并将其映射到扁平化的 axes 数组中的每个子图
for i, epoch_index in enumerate(epoch_indices_to_plot):
    plot_losses_distribution_and_contribution(epoch_index, all_model_test_losses[epoch_index], axes_flat[i])

# 设置整个 Figure 的总标题
fig.suptitle('Test Loss Distribution and Contribution Across Different Epochs', fontsize=40, y=1.02) # y 调整标题位置

plt.tight_layout(rect=[0, 0.03, 1, 0.98]) # 调整布局，为总标题留出空间
plt.show()

In [None]:
def plot_log_loss_distribution(losses, ax):
	log_losses = np.log10(np.array(losses))
	sns.histplot(log_losses, kde=False, ax=ax)

In [None]:
epochs = [0, 50, 100, 150, 200, 300, 400, 500]
fig, axes = plt.subplots(4, 2, figsize=(18, 20))
axes = axes.flatten()
for i, epoch in enumerate(epochs):
	plot_log_loss_distribution(all_model_train_losses[epoch], axes[i])
	axes[i].set_title(f"Log Training Loss Distribution of Epoch {epoch}")
	axes[i].set_xlabel('log10 loss')

In [None]:
np.mean(all_model_test_losses[100])

In [None]:
np.median(all_model_test_losses[100])

In [None]:
epochs = [0, 50, 100, 150]
fig, axes = plt.subplots(2, 2, figsize=(18, 20))
axes = axes.flatten()
for i, epoch in enumerate(epochs):
	sns.histplot(all_model_train_losses[epoch], kde=False, ax=axes[i])
	axes[i].set_title(f"Training Loss Distribution of Epoch {epoch}")
	axes[i].set_xlabel('loss value')

In [None]:
epochs = [0, 50, 100, 150, 200, 300, 400, 500]
fig, axes = plt.subplots(4, 2, figsize=(18, 20))
axes = axes.flatten()
for i, epoch in enumerate(epochs):
	plot_log_loss_distribution(all_model_test_losses[epoch], axes[i])
	axes[i].set_title(f"Log Test Loss Distribution of Epoch {epoch}")
	axes[i].set_xlabel('log10 loss')

In [None]:
def _calculate_bin_sum_contributions(epoch_losses, bin_edges, total_loss):
    """
    计算每个 bin 内实际 loss 值的总和，并将其按总 loss 进行归一化。

    参数:
        epoch_losses (np.array 或 list): 当前 epoch 的 loss 值列表或数组。
        bin_edges (np.array): 来自 np.histogram 的 bin 边界。
        total_loss (float): 当前 epoch 的所有 loss 值的总和。

    返回:
        np.array: 归一化后的贡献度 (每个 bin 内实际 loss 总和 / 总 loss)。
    """
    # 初始化一个数组，用于存储每个 bin 内 loss 值的累加和
    bin_sum_losses = np.zeros(len(bin_edges) - 1)

    # 遍历每个 loss，将其累加到正确的 bin 中
    for loss in epoch_losses:
        # np.digitize 返回每个值所属 bin 的索引。
        # 结果是 1-based index，对于等于最后一个 bin 边界的值，返回 len(bin_edges)。
        # 所以需要 -1 转换为 0-based index。
        bin_idx = np.digitize(loss, bin_edges) - 1

        # 确保索引在 bin_sum_losses 的有效范围内
        # 例如，如果 loss 恰好等于最后一个 bin 的上边界，np.digitize 可能返回 len(bin_edges)。
        bin_idx = np.clip(bin_idx, 0, len(bin_sum_losses) - 1)

        bin_sum_losses[bin_idx] += loss

    # 将每个 bin 的累加和除以总 loss 进行归一化
    if total_loss == 0:
        return np.zeros_like(bin_sum_losses)
    else:
        return bin_sum_losses / total_loss


# --- 主绘图函数 ---
def plot_loss_contribution(epoch_index, epoch_losses, ax):
    """
    绘制给定 epoch 的 loss 分布 (直方图) 和每个 bin 的实际 loss 贡献度。

    参数:
        epoch_index (int): 要绘制的 epoch 索引。
        num_bins (int): 直方图的 bin 数量。
        ax (matplotlib.axes.Axes): 用于绘图的 Axes 对象。
    """
    epoch_losses = np.array(epoch_losses)
    total_loss = np.sum(epoch_losses)

    hist, bin_edges = np.histogram(epoch_losses)
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 # bin 的中心点，用于条形图定位
    contributions = _calculate_bin_sum_contributions(epoch_losses, bin_edges, total_loss)

    ax.bar(bin_centers, contributions, width=(bin_edges[1] - bin_edges[0]) * 0.9, color='lightcoral', edgecolor='black', label='Loss Contribution', alpha=0.7)

    ax.set_ylabel('Contribution (Loss * Frequency)', color='lightcoral') # 修改标签，更准确
    ax.tick_params(axis='y', labelcolor='lightcoral')
    ax.set_ylim(0, max(contributions) * 1.1 if len(contributions) > 0 and max(contributions) > 0 else 0.1)

    ax.set_xlabel('Loss Value')
    ax.grid(axis='y', linestyle='--', alpha=0.4) # 主轴网格线


In [None]:
epochs = [0, 50, 100, 150, 200, 300, 400, 500]
fig, axes = plt.subplots(4, 2, figsize=(18, 20))
axes = axes.flatten()
for i, epoch in enumerate(epochs):
	plot_loss_contribution(epoch, all_model_train_losses[epoch], axes[i])
	axes[i].set_title(f"Training Loss Contribution of Epoch {epoch}")

In [None]:
epochs = [0, 50, 100, 150, 200, 300, 400, 500]
fig, axes = plt.subplots(4, 2, figsize=(18, 20))
axes = axes.flatten()
for i, epoch in enumerate(epochs):
	plot_loss_contribution(epoch, all_model_test_losses[epoch], axes[i])
	axes[i].set_title(f"Test Loss Contribution of Epoch {epoch}")