In [None]:
"""
模型微调(Fine-tuning)进阶技术

模型微调是迁移学习的高级技巧:
- 不仅使用预训练权重,还对其进行微调
- 选择性解冻部分卷积层参与训练
- 在目标数据集上进一步优化特征

与特征提取的对比:
1. 特征提取: 完全冻结卷积基,仅训练分类器
2. 微调: 解冻顶层卷积块,同时训练

微调的关键原则:
- 只微调顶层(高级特征层)
- 保持底层冻结(通用特征层)
- 使用非常小的学习率
- 先训练分类器后再微调

技术要点:
- Block5(VGG16最后一个卷积块)最适合微调
- 学习率要比从头训练小10-100倍
- 微调通常能提升2-5%的准确率

作者: [Your Name]
日期: 2024-01
"""

import os
import warnings
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras  
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping

# 环境配置
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# 随机种子
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

print(f"TensorFlow版本: {tf.__version__}")
print(f"GPU可用: {len(tf.config.list_physical_devices('GPU')) > 0}")

In [None]:
"""
阶段2: 解冻Block5进行微调

微调策略:
1. 只解冻最后一个卷积块(block5)
2. 保持前面的块冻结
3. 使用很小的学习率(1e-5)

为什么只解冻Block5?
- Block1-4学习的是通用特征(边缘、纹理)
- Block5学习的是任务相关的高级特征
- 微调Block5可以适应新任务,同时保持底层特征
"""

# 解冻block5的所有层
print("=" * 60)
print("解冻策略")
print("=" * 60)

conv_base.trainable = True
set_trainable = False

for layer in conv_base.layers:
    if layer.name == 'block5_conv1':
        # 从block5_conv1开始解冻
        set_trainable = True
        print(f"从 {layer.name} 开始解冻")
    
    layer.trainable = set_trainable

# 显示每层的冻结状态
print("\n层冻结状态:")
for i, layer in enumerate(conv_base.layers):
    trainable_status = "可训练" if layer.trainable else "冻结"
    print(f"{i:2d}. {layer.name:20s} - {trainable_status}")

print("=" * 60)

# 检查参数统计
trainable_params_ft = sum([np.prod(w.shape) for w in model.trainable_weights])
non_trainable_params_ft = sum([np.prod(w.shape) for w in model.non_trainable_weights])

print("\n" + "=" * 60)
print("阶段2: 微调Block5")
print("=" * 60)
print(f"可训练参数: {trainable_params_ft:,} ({trainable_params_ft/1e6:.2f}M)")
print(f"冻结参数: {non_trainable_params_ft:,} ({non_trainable_params_ft/1e6:.2f}M)")
print(f"与阶段1对比: 新增 {trainable_params_ft-trainable_params:,} 可训练参数")
print(f"学习率: {LR_FINETUNE} (降低10倍!)")
print("=" * 60 + "\n")

# 重新编译模型(使用更小的学习率!)
model.compile(
    optimizer=optimizers.RMSprop(learning_rate=LR_FINETUNE),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# 微调训练
print("开始微调...")
history_finetune = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=EPOCHS_FINETUNE,
    validation_data=validation_generator,
    validation_steps=validation_steps,
    verbose=1
)

print("\n" + "=" * 60)
print("阶段2完成 - 微调效果对比")
print("=" * 60)
final_acc_finetune = history_finetune.history['val_accuracy'][-1]
print(f"阶段1(冻结)验证准确率: {final_acc_frozen:.4f}")
print(f"阶段2(微调)验证准确率: {final_acc_finetune:.4f}")
print(f"准确率提升: {final_acc_finetune - final_acc_frozen:+.4f}")
print("=" * 60)

In [None]:
"""
总结与最佳实践

本notebook展示了完整的模型微调流程
"""

print("=" * 60)
print("迁移学习三种方法对比")
print("=" * 60)
print("\n1. 从头训练CNN:")
print("   ✓ 完全自定义")
print("   ✗ 需要大量数据和训练时间")
print("   ✗ 容易过拟合")
print("\n2. 特征提取(冻结卷积基):")
print("   ✓ 训练快速")
print("   ✓ 数据需求少")
print("   ✗ 性能可能不是最优")
print("\n3. 模型微调(本notebook方法):")
print("   ✓ 性能最佳")
print("   ✓ 在目标数据集上优化特征")
print("   ✗ 训练时间较长")
print("   ✗ 需要仔细调整学习率")

print("\n" + "=" * 60)
print("微调的关键要点")
print("=" * 60)
print("\n✓ 分阶段训练:")
print("  1. 先冻结卷积基,训练分类器")
print("  2. 再解冻顶层,进行微调")
print("\n✓ 学习率策略:")
print("  - 阶段1: 正常学习率(1e-4)")
print("  - 阶段2: 降低10-100倍(1e-5或更小)")
print("\n✓ 解冻策略:")
print("  - 仅解冻顶层卷积块")
print("  - 保持底层特征冻结")
print("\n✓ 数据增强:")
print("  - 提升模型泛化能力")
print("  - 减少过拟合")

print("\n" + "=" * 60)
print("生产环境建议")
print("=" * 60)
print("1. 使用更多训练轮数(阶段1: 10-20, 阶段2: 50-100)")
print("2. 实现学习率衰减策略")
print("3. 使用EarlyStopping防止过拟合")
print("4. 保存最佳模型检查点")
print("5. 在多个测试集上验证性能")
print("6. 考虑模型集成提升鲁棒性")
print("=" * 60)

print("\nNotebook执行完成! ✓")

In [None]:
"""
在测试集上评估最终模型性能

使用完全独立的测试集评估模型的泛化能力
"""

print("=" * 60)
print("测试集评估")
print("=" * 60)
print("正在评估模型...")

test_loss, test_acc = model.evaluate(
    test_generator,
    steps=test_generator.samples // BATCH_SIZE,
    verbose=1
)

print("\n" + "=" * 60)
print("最终性能评估")
print("=" * 60)
print(f"测试集损失: {test_loss:.4f}")
print(f"测试集准确率: {test_acc:.4f}")
print("\n性能对比:")
print(f"  验证集准确率: {val_acc_ft[-1]:.4f}")
print(f"  测试集准确率: {test_acc:.4f}")
print(f"  差异: {abs(val_acc_ft[-1] - test_acc):.4f}")

if abs(val_acc_ft[-1] - test_acc) < 0.02:
    print("\n✓ 优秀! 验证集和测试集性能接近,模型泛化良好")
elif abs(val_acc_ft[-1] - test_acc) < 0.05:
    print("\n✓ 良好,模型泛化可接受")
else:
    print("\n⚠️  验证集和测试集性能差异较大,可能存在过拟合")

print("=" * 60)

In [None]:
"""
可视化完整训练过程(阶段1+阶段2)

合并两个阶段的训练历史,展示完整的学习曲线
"""

# 定义平滑函数
def smooth_curve(points, factor=0.8):
    """
    指数移动平均平滑曲线
    
    Args:
        points: 原始数据点
        factor: 平滑因子,越大越平滑
    """
    smoothed_points = []
    for point in points:
        if smoothed_points:
            previous = smoothed_points[-1]
            smoothed_points.append(previous * factor + point * (1 - factor))
        else:
            smoothed_points.append(point)
    return smoothed_points

# 合并两个阶段的历史
acc_frozen = history_frozen.history['accuracy']
val_acc_frozen = history_frozen.history['val_accuracy']
loss_frozen = history_frozen.history['loss']
val_loss_frozen = history_frozen.history['val_loss']

acc_ft = history_finetune.history['accuracy']
val_acc_ft = history_finetune.history['val_accuracy']
loss_ft = history_finetune.history['loss']
val_loss_ft = history_finetune.history['val_loss']

# 合并数据
acc_combined = acc_frozen + acc_ft
val_acc_combined = val_acc_frozen + val_acc_ft
loss_combined = loss_frozen + loss_ft
val_loss_combined = val_loss_frozen + val_loss_ft

epochs_combined = range(1, len(acc_combined) + 1)
finetune_start_epoch = len(acc_frozen)

# 绘制原始曲线
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# 原始准确率曲线
ax1.plot(epochs_combined, acc_combined, 'bo-', label='训练准确率', alpha=0.6, markersize=5)
ax1.plot(epochs_combined, val_acc_combined, 'rs-', label='验证准确率', alpha=0.6, markersize=5)
ax1.axvline(x=finetune_start_epoch, color='g', linestyle='--', linewidth=2, label='开始微调')
ax1.set_title('训练和验证准确率', fontsize=14, fontweight='bold')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('准确率')
ax1.legend()
ax1.grid(True, alpha=0.3)

# 原始损失曲线
ax2.plot(epochs_combined, loss_combined, 'bo-', label='训练损失', alpha=0.6, markersize=5)
ax2.plot(epochs_combined, val_loss_combined, 'rs-', label='验证损失', alpha=0.6, markersize=5)
ax2.axvline(x=finetune_start_epoch, color='g', linestyle='--', linewidth=2, label='开始微调')
ax2.set_title('训练和验证损失', fontsize=14, fontweight='bold')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('损失')
ax2.legend()
ax2.grid(True, alpha=0.3)

# 平滑准确率曲线
ax3.plot(epochs_combined, smooth_curve(acc_combined), 'b-', label='训练准确率(平滑)', linewidth=2)
ax3.plot(epochs_combined, smooth_curve(val_acc_combined), 'r-', label='验证准确率(平滑)', linewidth=2)
ax3.axvline(x=finetune_start_epoch, color='g', linestyle='--', linewidth=2, label='开始微调')
ax3.set_title('训练和验证准确率(平滑)', fontsize=14, fontweight='bold')
ax3.set_xlabel('Epoch')
ax3.set_ylabel('准确率')
ax3.legend()
ax3.grid(True, alpha=0.3)

# 平滑损失曲线
ax4.plot(epochs_combined, smooth_curve(loss_combined), 'b-', label='训练损失(平滑)', linewidth=2)
ax4.plot(epochs_combined, smooth_curve(val_loss_combined), 'r-', label='验证损失(平滑)', linewidth=2)
ax4.axvline(x=finetune_start_epoch, color='g', linestyle='--', linewidth=2, label='开始微调')
ax4.set_title('训练和验证损失(平滑)', fontsize=14, fontweight='bold')
ax4.set_xlabel('Epoch')
ax4.set_ylabel('损失')
ax4.legend()
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\n" + "=" * 60)
print("训练过程分析")
print("=" * 60)
print(f"阶段1(冻结VGG16): Epoch 1-{finetune_start_epoch}")
print(f"  最终训练准确率: {acc_frozen[-1]:.4f}")
print(f"  最终验证准确率: {val_acc_frozen[-1]:.4f}")
print(f"\n阶段2(微调Block5): Epoch {finetune_start_epoch+1}-{len(epochs_combined)}")
print(f"  最终训练准确率: {acc_ft[-1]:.4f}")
print(f"  最终验证准确率: {val_acc_ft[-1]:.4f}")
print(f"\n总体提升:")
print(f"  验证准确率提升: {val_acc_ft[-1] - val_acc_frozen[-1]:+.4f}")
print("=" * 60)

In [None]:
"""
配置参数和数据路径
"""

# 数据路径
DATA_ROOT = Path("猫狗数据集/dataset")
TRAIN_DIR = DATA_ROOT / "train"
VALIDATION_DIR = DATA_ROOT / "validation"
TEST_DIR = DATA_ROOT / "test"

# 图像参数
IMG_HEIGHT = 150
IMG_WIDTH = 150
IMG_CHANNELS = 3

# 训练参数
BATCH_SIZE = 32
EPOCHS_INITIAL = 2  # 初始训练分类器的轮数(测试用)
EPOCHS_FINETUNE = 2  # 微调的轮数(测试用)
LR_INITIAL = 1e-4    # 初始训练学习率
LR_FINETUNE = 1e-5   # 微调学习率(比初始训练小10倍!)

# VGG16权重路径
VGG_WEIGHTS_PATH = 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'

# 检查权重文件
if VGG_WEIGHTS_PATH and Path(VGG_WEIGHTS_PATH).exists():
    weights_source = VGG_WEIGHTS_PATH
    print(f"✓ 使用本地VGG16权重")
else:
    weights_source = 'imagenet'
    print(f"⚠️  使用在线下载的ImageNet权重")

print("\n" + "=" * 60)
print("微调配置")
print("=" * 60)
print(f"图像尺寸: {IMG_HEIGHT}x{IMG_WIDTH}")
print(f"批次大小: {BATCH_SIZE}")
print(f"初始训练轮数: {EPOCHS_INITIAL} (测试配置)")
print(f"微调轮数: {EPOCHS_FINETUNE} (测试配置)")
print(f"初始学习率: {LR_INITIAL}")
print(f"微调学习率: {LR_FINETUNE} (降低10倍)")
print("=" * 60)

In [None]:
"""
加载VGG16卷积基并构建完整模型
"""

# 加载VGG16卷积基
conv_base = VGG16(
    weights=weights_source,
    include_top=False,
    input_shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)
)

# 构建完整模型
model = models.Sequential(name='VGG16_Finetuning')
model.add(conv_base)
model.add(layers.Flatten(name='flatten'))
model.add(layers.Dense(256, activation='relu', name='fc1'))
model.add(layers.Dropout(0.5, name='dropout'))
model.add(layers.Dense(1, activation='sigmoid', name='output'))

print("=" * 60)
print("完整模型架构")
print("=" * 60)
model.summary()
print("=" * 60)

# 显示VGG16的层结构
print("\nVGG16卷积基的层:")
print("=" * 60)
for i, layer in enumerate(conv_base.layers):
    print(f"{i:2d}. {layer.name:20s} - 输出形状: {str(layer.output_shape):30s}")
print("=" * 60)

In [None]:
"""
数据加载和预处理

使用数据增强提升模型泛化能力
"""

# 创建数据增强生成器
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# 验证/测试集仅归一化
val_test_datagen = ImageDataGenerator(rescale=1./255)

# 创建数据生成器
train_generator = train_datagen.flow_from_directory(
    str(TRAIN_DIR),
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=True,
    seed=RANDOM_SEED
)

validation_generator = val_test_datagen.flow_from_directory(
    str(VALIDATION_DIR),
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

test_generator = val_test_datagen.flow_from_directory(
    str(TEST_DIR),
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

# 计算训练步数
steps_per_epoch = train_generator.samples // BATCH_SIZE
validation_steps = validation_generator.samples // BATCH_SIZE

print("=" * 60)
print("数据加载完成")
print("=" * 60)
print(f"训练样本: {train_generator.samples}")
print(f"验证样本: {validation_generator.samples}")
print(f"测试样本: {test_generator.samples}")
print(f"每epoch步数: {steps_per_epoch}")
print("=" * 60)

In [None]:
"""
阶段1: 冻结卷积基,训练顶层分类器

微调的最佳实践:
1. 先完全冻结卷积基
2. 训练新添加的分类器层
3. 然后再解冻部分层进行微调

为什么要分阶段?
- 新添加的层权重是随机初始化的
- 如果直接微调,大梯度会破坏预训练权重
- 先训练分类器使其收敛,再微调更安全
"""

# 冻结整个卷积基
conv_base.trainable = False

# 编译模型
model.compile(
    optimizer=optimizers.RMSprop(learning_rate=LR_INITIAL),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# 检查可训练参数
trainable_params = sum([np.prod(w.shape) for w in model.trainable_weights])
total_params = model.count_params()

print("=" * 60)
print("阶段1: 训练顶层分类器(冻结VGG16)")
print("=" * 60)
print(f"可训练参数: {trainable_params:,} ({trainable_params/1e6:.2f}M)")
print(f"冻结参数: {total_params-trainable_params:,} ({(total_params-trainable_params)/1e6:.2f}M)")
print(f"总参数: {total_params:,} ({total_params/1e6:.2f}M)")
print(f"学习率: {LR_INITIAL}")
print("=" * 60 + "\n")

# 训练分类器
print("开始训练分类器...")
history_frozen = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=EPOCHS_INITIAL,
    validation_data=validation_generator,
    validation_steps=validation_steps,
    verbose=1
)

print("\n" + "=" * 60)
print("阶段1完成")
print("=" * 60)
final_acc_frozen = history_frozen.history['val_accuracy'][-1]
print(f"验证准确率: {final_acc_frozen:.4f}")
print("=" * 60)