In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
import torch
import torch.nn as nn
import timm
from torchvision import transforms as Transforms
import torch.nn.functional as F
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import random
import os
import glob
import shutil
from ultralytics import YOLO
from PIL import Image
import time
from data_provider.data_factory import data_provider
import argparse
import torch.optim as optim
import torch.multiprocessing as mp
import math
from tqdm import tqdm

from config import get_config
from optimizer import build_optimizer
from lr_scheduler import build_scheduler

from models.dis_losses.fmdv2 import AttentionProjector

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def create_directory_if_not_exists(directory):
    # 检查目录是否存在
    if not os.path.exists(directory):
        # 如果目录不存在，则创建目录
        os.makedirs(directory)
        print("目录 '{}' 创建成功".format(directory))
    else:
        print("目录 '{}' 已经存在".format(directory))

# 递归删除指定目录下的.ipynb_checkpoints文件夹
def remove_ipynb_checkpoints(root_folder):
    for root, dirs, files in os.walk(root_folder):
        for dir in dirs:
            if dir == ".ipynb_checkpoints":
                folder_path = os.path.join(root, dir)
                shutil.rmtree(folder_path)
                print(f"Deleted: {folder_path}")

class SwinTransformerTeacher(nn.Module):
    def __init__(self, num_features=512):
        super(SwinTransformerTeacher, self).__init__()
        self.model = timm.create_model('swin_base_patch4_window7_224')
        self.num_features = num_features
        self.feat = nn.Linear(1024, num_features) if num_features > 0 else None

    def extract_feat(self, x):
        # 创建一个空列表，用于保存各层的输出特征
        features = []
        
        patch_embed = self.model.patch_embed  # Patch Embedding 层
        pos_drop = self.model.pos_drop
        layers = self.model.layers  # 基本层（包含多个 SwinBlock）
        
        x = patch_embed(x)  # Patch Embedding
        x = pos_drop(x)
        for layer in layers:  # 逐个通过 BasicLayer
            # x = layer(x)
            # features.append(x)
            for block in layer.blocks:
                x = block(x)
            features.append(x)
            if layer.downsample is not None:
                x = layer.downsample(x)
        return tuple(features)

    def forward_specific_stage(self, x, stage, down_sample=True):
        BS, L, C = x.shape

        if stage == 2:
            if down_sample:
                x = self.model.layers[-4].downsample(x)

            for block in self.model.layers[-3].blocks:
                x = block(x)

        if stage == 3:
            if down_sample:
                x = self.model.layers[-3].downsample(x)

            for block in self.model.layers[-2].blocks:
                x = block(x)

        if stage == 4:
            if down_sample:
                x = self.model.layers[-2].downsample(x)

            for block in self.model.layers[-1].blocks:
                x = block(x)

            norm_layer = self.model.norm
            x = norm_layer(x)

        return x
        
    def forward_features(self, x):
        x = self.model.forward_features(x)
        return x

    def forward(self, x):
        x = self.model.forward_features(x)
        if not self.feat is None:
            x = self.feat(x)
        return x

class ResNetStudent(nn.Module):
    def __init__(self, num_features=512):
        super(ResNetStudent, self).__init__()
        self.model = timm.create_model('resnet50', pretrained=True)  # 使用ResNet-50作为学生模型
        # 修改 layer1 和 layer2，向每个残差块中的 ReLU 前加上 InstanceNorm2d
        self._modify_layer(self.model.layer1)
        self._modify_layer(self.model.layer2)
        self._modify_layer_stride(self.model.layer4[0].conv2, self.model.layer4[0].downsample[0])
        # 进行特征映射
        self.projector_1 = AttentionProjector(student_dims=256, teacher_dims=128, hw_dims=(56, 56), pos_dims=128, window_shapes=(1, 1), self_query=True, 
                                 softmax_scale=5.0, num_heads=4)
        self.projector_2 = AttentionProjector(student_dims=512, teacher_dims=256, hw_dims=(28, 28), pos_dims=256, window_shapes=(1, 1), self_query=True, 
                                         softmax_scale=5.0, num_heads=8)
        self.projector_3 = AttentionProjector(student_dims=1024, teacher_dims=512, hw_dims=(14, 14), pos_dims=512, window_shapes=(1, 1), self_query=True, 
                                         softmax_scale=5.0, num_heads=16)
        self.projector_4 = AttentionProjector(student_dims=2048, teacher_dims=1024, hw_dims=(7, 7), pos_dims=1024, window_shapes=(1, 1), self_query=True, 
                                 softmax_scale=5.0, num_heads=32)
        # 输出设置
        self.gap = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten_dim = 2048
        self.feat = nn.Linear(self.flatten_dim, num_features) if num_features > 0 else None
    
    def extract_feat(self, x):
        # 创建一个空列表，用于保存各层的输出特征
        features = []
        
        # 提取每个阶段的层
        conv1 = self.model.conv1  # 初始卷积层
        bn1 = self.model.bn1
        act1 = self.model.act1
        maxpool = self.model.maxpool
        layer1 = self.model.layer1  # 第一阶段（残差块1）
        layer2 = self.model.layer2  # 第二阶段（残差块2）
        layer3 = self.model.layer3  # 第三阶段（残差块3）
        layer4 = self.model.layer4  # 第四阶段（残差块4）
        
        x = conv1(x)
        x = bn1(x)
        x = act1(x)
        x = maxpool(x)
        stage1_out = layer1(x)  # 第一阶段的输出
        # features.append(stage1_out)
        stage2_out = layer2(stage1_out)  # 第二阶段的输出
        # features.append(stage2_out)
        stage3_out = layer3(stage2_out)  # 第三阶段的输出
        student_feature_proj3 = self.projector_3(stage3_out)
        features.append(student_feature_proj3)
        stage4_out = layer4(stage3_out)  # 第四阶段的输出
        student_feature_proj4 = self.projector_4(stage4_out)
        features.append(student_feature_proj4)
        return tuple(features)
        
    '''
    def extract_feat_proj(self, x):
        features = []
        student_features = self.extract_feat(x)
        # student_feature_proj1 = self.projector_1(student_features[0])
        # features.append(student_feature_proj1)
        # student_feature_proj2 = self.projector_2(student_features[1])
        # features.append(student_feature_proj2)
        student_feature_proj3 = self.projector_3(student_features[2])
        features.append(student_feature_proj3)
        student_feature_proj4 = self.projector_4(student_features[3])
        features.append(student_feature_proj4)
        return tuple(features)
    '''
        
    def _modify_layer(self, layer):
        """
        在每个残差块中的 ReLU 前加上 InstanceNorm2d 操作。
        """
        for block in layer:
            # 修改 conv1 和 conv2 之后的 ReLU，将 InstanceNorm2d 放在 ReLU 前面
            # 对于每个残差块，将 InstanceNorm2d 加入到 ReLU 之前
            block.act3 = nn.Sequential(
                nn.InstanceNorm2d(block.conv3.out_channels, affine=True),
                nn.ReLU(inplace=True)
            )
            
    def _modify_layer_stride(self, last_layer, last_layer_downsample):
        # 在最后一层将stride改为1
        last_layer.stride = (1, 1)
        last_layer_downsample.stride = (1, 1)
        
    def forward_features(self, x):
        x = self.model.forward_features(x)
        return x

    def forward(self, x):
        x = self.model.forward_features(x)
        # 池化操作，[batch_size, 2048, 7, 7] -> [batch_size, 2048, 1, 1]
        x = self.gap(x)
        # 展平特征图，将其变为 [batch_size, 2048 * 1 * 1]
        x = x.view(x.size(0), -1)  # 展平
        if not self.feat is None:
            x = self.feat(x)
        return x

class Data_Processor(object):
    def __init__(self, height, width):
        self.height = height
        self.width = width
        self.transformer = Transforms.Compose([
            Transforms.Resize((self.height, self.width)),
            Transforms.ToTensor(),
            Transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def __call__(self, img):
        return self.transformer(img).unsqueeze(0)

def cosine_similarity_loss(student_features, teacher_features, alpha=0.5):
    """
    计算余弦相似度损失，主要用于ReID任务中对齐特征。
    
    :param student_features: 学生模型的特征 (B, 512)
    :param teacher_features: 教师模型的特征 (B, 512)
    :param alpha: 蒸馏损失的权重，通常在 0-1 之间
    :return: 损失值
    """
    # 归一化特征向量
    student_features = F.normalize(student_features, p=2, dim=1)
    teacher_features = F.normalize(teacher_features, p=2, dim=1)
    
    # 计算余弦相似度
    cosine_similarity = F.cosine_similarity(student_features, teacher_features)
    
    # 损失为 1 - cosine_similarity，越接近1，表示相似度越高，损失越低
    loss = 1 - cosine_similarity.mean()
    
    return loss * alpha

def rbf_kernel(x, y, sigma=1.0):
    """
    计算高斯 RBF 核函数
    :param x: 输入张量 x (batch_size, feature_dim)
    :param y: 输入张量 y (batch_size, feature_dim)
    :param sigma: 核函数的宽度，控制相似度的范围
    :return: 计算得到的 RBF 核
    """
    # 计算样本之间的平方欧几里得距离
    xx = torch.sum(x ** 2, dim=1, keepdim=True)
    yy = torch.sum(y ** 2, dim=1, keepdim=True)
    dist = xx + yy.t() - 2 * torch.matmul(x, y.t())
    
    # 计算 RBF 核（高斯核）
    return torch.exp(-dist / (2 * sigma ** 2))

def mmd_loss(X, Y, sigma=1.0):
    """
    计算最大均值差异（MMD）损失
    :param X: 样本集 X (batch_size_1, feature_dim)
    :param Y: 样本集 Y (batch_size_2, feature_dim)
    :param sigma: 核函数的宽度，控制相似度的范围
    :return: MMD 损失
    """
    # 计算 RBF 核
    XX = rbf_kernel(X, X, sigma)  # X 中样本对之间的核
    YY = rbf_kernel(Y, Y, sigma)  # Y 中样本对之间的核
    XY = rbf_kernel(X, Y, sigma)  # X 和 Y 中样本对之间的核

    # 计算 MMD 损失
    loss = XX.mean() + YY.mean() - 2 * XY.mean()
    
    return loss

def mse_loss_project(student_features, teacher_features):
    total_loss = 0
    total_medium_len = len(student_features)
    for i in range(total_medium_len):
        # 计算欧几里得距离的平方
        diff = student_features[i] - teacher_features[i + 2]  # 计算两个张量的差
        sq_diff = diff ** 2  # 每个元素差的平方
        # 对所有元素求均值
        loss = torch.mean(sq_diff)  # 对整个张量取均值
        total_loss += loss
    avg_mse_loss = total_loss / total_medium_len
    return avg_mse_loss

In [5]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

fix_seed = 2024
random.seed(fix_seed)
torch.manual_seed(fix_seed)
np.random.seed(fix_seed)
mp.set_start_method('spawn', force=True)  # 设置 'spawn' 方法

# 手动创建一个包含参数的命名空间
args = argparse.Namespace()
args.root_dir_train = "/homec/xiaolei/projects/ReID/datasets/train"
args.root_dir_valid = "/homec/xiaolei/projects/ReID/datasets/valid"
args.train_epochs = 100
args.batch_size = 128
args.num_workers = 10
args.height = 224
args.width = 224
args.resume = True
config = get_config(args)
stage_train = "train"
stage_valid = "valid"

remove_ipynb_checkpoints(args.root_dir_train)
remove_ipynb_checkpoints(args.root_dir_valid)

train_data_loader = data_provider(args, stage=stage_train)
valid_data_loader = data_provider(args, stage=stage_valid)

# 初始化教师模型和学生模型
teacher_model = SwinTransformerTeacher(num_features=512).cuda()
student_model = ResNetStudent(num_features=512).cuda()

# 加载教师模型的预训练权重（假设教师模型已经训练好）
teacher_weight_path = '/homec/xiaolei/projects/ISR/weights/swin_base_patch4_window7_224.pth'
teacher_weight = torch.load(teacher_weight_path)
teacher_model.load_state_dict(teacher_weight['state_dict'], strict=True)
teacher_model.eval()  # 冻结教师模型
for param in teacher_model.parameters():
    param.requires_grad = False
if args.resume:
    student_model_weight_path = 'weights/student_model_base6_strong_reid_mmd_mse_loss/best_student_model.pth'
    student_model_weight = torch.load(student_model_weight_path)
    student_model.load_state_dict(student_model_weight, strict=True)

s_loss = dict()
# 查看模型可使用的函数
# dir(student_model)

In [6]:
# optimizer = optim.Adam(student_model.parameters(), lr=1e4)
optimizer = build_optimizer(config, student_model)
mse_loss = nn.MSELoss()
# 学习率优化器
lr_scheduler = build_scheduler(config, optimizer, len(train_data_loader))
scaler = torch.cuda.amp.GradScaler()

# 假设已经定义了 dataloader，并且数据无标签
epochs = args.train_epochs  # 设置训练的 epoch 数
best_loss = math.inf
path = "/homec/xiaolei/projects/ReID/weights/student_model_base6_strong_reid_mmd_mse_loss"
create_directory_if_not_exists(path)

# 训练学生模型（无监督）
for epoch in range(50, 50 + epochs):
    student_model.train() 
    optimizer.zero_grad()
    num_steps = len(train_data_loader)
    train_loss = []
    
    # 可视化进度条
    with tqdm(total=len(train_data_loader), desc=f"Epoch {epoch + 1}/{args.train_epochs}") as pbar:
        # 将数据送入模型进行训练
        for idx, person_image_bs in enumerate(train_data_loader):
            optimizer.zero_grad()
            # person_image_bs = torch.cat((person_image_bs[0], person_image_bs[1]), dim=0)
            if not isinstance(person_image_bs, torch.Tensor):
                person_image_bs = torch.stack(person_image_bs)
            # print(f'person_image_bs: {person_image_bs.shape}')
            # continue
            person_image_bs = person_image_bs.to(device)
            # print(type(person_image_bss))
            # 通过教师print(f'x: {x.shape}')模型和学生模型获取输出特征
            with torch.cuda.amp.autocast():
                with torch.no_grad():  # 教师模型保持冻结状态
                    teacher_features = teacher_model.extract_feat(person_image_bs)
                    teacher_output = teacher_model(person_image_bs)

                student_features = student_model.extract_feat(person_image_bs)
                student_output = student_model(person_image_bs)
                
                s_loss['ori_loss'] = cosine_similarity_loss(student_output, teacher_output, 1.0)
                s_loss['mmd_loss'] = mmd_loss(student_output, teacher_output)
                s_loss['mse_loss'] = mse_loss_project(student_features, teacher_features)

                loss = s_loss['ori_loss'] + s_loss['mmd_loss'] + s_loss['mse_loss']
                train_loss.append(loss.item())
    
            # 反向传播并更新学生模型参数
            # optimizer.zero_grad()
            # loss.backward()
            # optimizer.step()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            lr_scheduler.step_update(epoch * num_steps + idx)

            # 更新进度条
            pbar.set_postfix({'loss': loss.item()})
            pbar.update(1)
        
        train_loss_avg = np.average(train_loss)
        if epoch % 10 == 0:
            torch.save(student_model.state_dict(), path + '/' + f'checkpoint_{epoch}_{train_loss_avg}.pth')
    
    # 验证阶段
    student_model.eval()
    val_output_loss = []
    val_feature_loss = []
    with tqdm(total=len(valid_data_loader), desc=f"Epoch {epoch + 1}/{args.train_epochs}") as pbar:
        with torch.no_grad():
             for person_image_bs in valid_data_loader:
                # person_image_bs = torch.cat((person_image_bs[0], person_image_bs[1]), dim=0)
                if not isinstance(person_image_bs, torch.Tensor):
                    person_image_bs = torch.stack(person_image_bs)
                # print(f'person_image_bs: {person_image_bs.shape}')
                # continue
                person_image_bs = person_image_bs.to(device)
                # 教师
                teacher_features = teacher_model.extract_feat(person_image_bs)
                teacher_output = teacher_model(person_image_bs)
                # 学生
                student_features = student_model.extract_feat(person_image_bs)
                student_output = student_model(person_image_bs)
                
                s_loss['ori_loss'] = cosine_similarity_loss(student_output, teacher_output, 1.0)
                s_loss['mmd_loss'] = mmd_loss(student_output, teacher_output)
                s_loss['mse_loss'] = mse_loss_project(student_features, teacher_features)

                output_loss = s_loss['ori_loss'] + s_loss['mmd_loss'] + s_loss['mse_loss']
                val_output_loss.append(output_loss.item())

                # 更新进度条
                pbar.set_postfix({'val_output_loss': output_loss.item()})
                pbar.update(1)

        val_output_loss_avg = np.average(val_output_loss)  # 计算平均验证损失
        print(f'Epoch {epoch+1}/{epochs}, Training Loss: {train_loss_avg}, Validation Output Loss: {val_output_loss_avg}')

    # 保存最优的学生模型
    if val_output_loss_avg < best_loss:
        best_loss = val_output_loss_avg
        temp_best_model_path = os.path.join(path, f"best_student_model.pth")
        torch.save(student_model.state_dict(), temp_best_model_path)
        print(f'Best model saved with Validation Loss: {best_loss}')

# 保存训练好的学生模型
best_model_path = os.path.join(path, "student_model.pth")
torch.save(student_model.state_dict(), best_model_path)

目录 '/homec/xiaolei/projects/ReID/weights/student_model_base6_strong_reid_mmd_mse_loss' 已经存在


Epoch 51/100: 100%|██████████| 1854/1854 [37:44<00:00,  1.22s/it, loss=27.8]
Epoch 51/100: 100%|██████████| 107/107 [24:51<00:00, 13.94s/it, val_output_loss=24]  


Epoch 51/100, Training Loss: 27.04056491049362, Validation Output Loss: 28.163590867942737
Best model saved with Validation Loss: 28.163590867942737


Epoch 52/100: 100%|██████████| 1854/1854 [37:51<00:00,  1.23s/it, loss=27.7]
Epoch 52/100: 100%|██████████| 107/107 [21:28<00:00, 12.04s/it, val_output_loss=23.4]


Epoch 52/100, Training Loss: 26.97899309176843, Validation Output Loss: 27.779837528121806
Best model saved with Validation Loss: 27.779837528121806


Epoch 53/100: 100%|██████████| 1854/1854 [37:49<00:00,  1.22s/it, loss=30.1]
Epoch 53/100: 100%|██████████| 107/107 [34:38<00:00, 19.42s/it, val_output_loss=23.3]


Epoch 53/100, Training Loss: 26.881751501830266, Validation Output Loss: 27.739326619656286
Best model saved with Validation Loss: 27.739326619656286


Epoch 54/100: 100%|██████████| 1854/1854 [37:39<00:00,  1.22s/it, loss=25.7]
Epoch 54/100: 100%|██████████| 107/107 [33:34<00:00, 18.83s/it, val_output_loss=23.3]


Epoch 54/100, Training Loss: 26.83286458851836, Validation Output Loss: 27.516983994813724
Best model saved with Validation Loss: 27.516983994813724


Epoch 55/100: 100%|██████████| 1854/1854 [37:46<00:00,  1.22s/it, loss=27.9]
Epoch 55/100: 100%|██████████| 107/107 [20:14<00:00, 11.35s/it, val_output_loss=23.4]


Epoch 55/100, Training Loss: 26.73995355340655, Validation Output Loss: 27.719667399041008


Epoch 56/100: 100%|██████████| 1854/1854 [37:49<00:00,  1.22s/it, loss=28.8]
Epoch 56/100: 100%|██████████| 107/107 [26:57<00:00, 15.12s/it, val_output_loss=23.3]


Epoch 56/100, Training Loss: 26.635056409249415, Validation Output Loss: 27.71592564449132


Epoch 57/100: 100%|██████████| 1854/1854 [37:50<00:00,  1.22s/it, loss=27.1]
Epoch 57/100: 100%|██████████| 107/107 [21:46<00:00, 12.21s/it, val_output_loss=23.3]


Epoch 57/100, Training Loss: 26.598027646991685, Validation Output Loss: 27.223106259497527
Best model saved with Validation Loss: 27.223106259497527


Epoch 58/100: 100%|██████████| 1854/1854 [37:51<00:00,  1.23s/it, loss=27.1]
Epoch 58/100: 100%|██████████| 107/107 [21:39<00:00, 12.15s/it, val_output_loss=23.8]


Epoch 58/100, Training Loss: 26.528528081817793, Validation Output Loss: 27.745502204538507


Epoch 59/100: 100%|██████████| 1854/1854 [37:55<00:00,  1.23s/it, loss=26.4]
Epoch 59/100: 100%|██████████| 107/107 [31:01<00:00, 17.40s/it, val_output_loss=23.2]


Epoch 59/100, Training Loss: 26.466556283648345, Validation Output Loss: 27.51876080593216


Epoch 60/100: 100%|██████████| 1854/1854 [37:43<00:00,  1.22s/it, loss=26.2]
Epoch 60/100: 100%|██████████| 107/107 [30:11<00:00, 16.93s/it, val_output_loss=23]  


Epoch 60/100, Training Loss: 26.389839358571535, Validation Output Loss: 27.29508767172555


Epoch 61/100: 100%|██████████| 1854/1854 [37:47<00:00,  1.22s/it, loss=26.7]
Epoch 61/100: 100%|██████████| 107/107 [39:59<00:00, 22.43s/it, val_output_loss=23.1]


Epoch 61/100, Training Loss: 26.311590804765654, Validation Output Loss: 27.197093910145984
Best model saved with Validation Loss: 27.197093910145984


Epoch 62/100: 100%|██████████| 1854/1854 [37:55<00:00,  1.23s/it, loss=24.6]
Epoch 62/100: 100%|██████████| 107/107 [39:05<00:00, 21.92s/it, val_output_loss=22.5]


Epoch 62/100, Training Loss: 26.274973896071877, Validation Output Loss: 26.82184331662187
Best model saved with Validation Loss: 26.82184331662187


Epoch 63/100: 100%|██████████| 1854/1854 [37:53<00:00,  1.23s/it, loss=26]  
Epoch 63/100: 100%|██████████| 107/107 [29:44<00:00, 16.68s/it, val_output_loss=22.8]


Epoch 63/100, Training Loss: 26.185711284542805, Validation Output Loss: 27.02109541848441


Epoch 64/100: 100%|██████████| 1854/1854 [37:43<00:00,  1.22s/it, loss=25.4]
Epoch 64/100: 100%|██████████| 107/107 [22:51<00:00, 12.82s/it, val_output_loss=22.8]


Epoch 64/100, Training Loss: 26.14395077508887, Validation Output Loss: 26.949745178222656


Epoch 65/100: 100%|██████████| 1854/1854 [37:49<00:00,  1.22s/it, loss=26]  
Epoch 65/100: 100%|██████████| 107/107 [04:15<00:00,  2.38s/it, val_output_loss=23.1]


Epoch 65/100, Training Loss: 26.08725680605365, Validation Output Loss: 27.26606964396539


Epoch 66/100: 100%|██████████| 1854/1854 [37:42<00:00,  1.22s/it, loss=25.5]
Epoch 66/100: 100%|██████████| 107/107 [03:52<00:00,  2.18s/it, val_output_loss=22.8]


Epoch 66/100, Training Loss: 26.03202331670537, Validation Output Loss: 26.849494185403127


Epoch 67/100: 100%|██████████| 1854/1854 [37:44<00:00,  1.22s/it, loss=25.3]
Epoch 67/100: 100%|██████████| 107/107 [06:25<00:00,  3.61s/it, val_output_loss=23]  


Epoch 67/100, Training Loss: 25.955578009801904, Validation Output Loss: 26.89437301136623


Epoch 68/100: 100%|██████████| 1854/1854 [37:51<00:00,  1.22s/it, loss=25.5]
Epoch 68/100: 100%|██████████| 107/107 [36:17<00:00, 20.35s/it, val_output_loss=22.6]


Epoch 68/100, Training Loss: 25.895486297730876, Validation Output Loss: 26.717750353233836
Best model saved with Validation Loss: 26.717750353233836


Epoch 69/100: 100%|██████████| 1854/1854 [37:38<00:00,  1.22s/it, loss=25.4]
Epoch 69/100: 100%|██████████| 107/107 [06:39<00:00,  3.73s/it, val_output_loss=22.8]


Epoch 69/100, Training Loss: 25.835821420243644, Validation Output Loss: 26.729095138121988


Epoch 70/100: 100%|██████████| 1854/1854 [37:40<00:00,  1.22s/it, loss=25]  
Epoch 70/100: 100%|██████████| 107/107 [26:41<00:00, 14.96s/it, val_output_loss=22.8]


Epoch 70/100, Training Loss: 25.803728284702075, Validation Output Loss: 26.710142082143054
Best model saved with Validation Loss: 26.710142082143054


Epoch 71/100: 100%|██████████| 1854/1854 [37:50<00:00,  1.22s/it, loss=25.5]
Epoch 71/100: 100%|██████████| 107/107 [04:05<00:00,  2.30s/it, val_output_loss=22.5]


Epoch 71/100, Training Loss: 25.727116153721155, Validation Output Loss: 26.820625572561102


Epoch 72/100: 100%|██████████| 1854/1854 [37:48<00:00,  1.22s/it, loss=25.3]
Epoch 72/100: 100%|██████████| 107/107 [24:04<00:00, 13.50s/it, val_output_loss=22.6]


Epoch 72/100, Training Loss: 25.660037138444064, Validation Output Loss: 26.6833964196321
Best model saved with Validation Loss: 26.6833964196321


Epoch 73/100: 100%|██████████| 1854/1854 [37:41<00:00,  1.22s/it, loss=25.5]
Epoch 73/100: 100%|██████████| 107/107 [05:03<00:00,  2.83s/it, val_output_loss=22.5]


Epoch 73/100, Training Loss: 25.662083241031652, Validation Output Loss: 26.43494384979533
Best model saved with Validation Loss: 26.43494384979533


Epoch 74/100: 100%|██████████| 1854/1854 [37:53<00:00,  1.23s/it, loss=25.6]
Epoch 74/100: 100%|██████████| 107/107 [04:13<00:00,  2.37s/it, val_output_loss=22.3]


Epoch 74/100, Training Loss: 25.608037121473394, Validation Output Loss: 26.6356828814355


Epoch 75/100: 100%|██████████| 1854/1854 [37:53<00:00,  1.23s/it, loss=25.8]
Epoch 75/100: 100%|██████████| 107/107 [04:04<00:00,  2.29s/it, val_output_loss=22]  


Epoch 75/100, Training Loss: 25.56093142487856, Validation Output Loss: 26.403493809922832
Best model saved with Validation Loss: 26.403493809922832


Epoch 76/100: 100%|██████████| 1854/1854 [37:42<00:00,  1.22s/it, loss=25.2]
Epoch 76/100: 100%|██████████| 107/107 [04:14<00:00,  2.37s/it, val_output_loss=22.3]


Epoch 76/100, Training Loss: 25.492029682846645, Validation Output Loss: 26.486351476651485


Epoch 77/100: 100%|██████████| 1854/1854 [37:52<00:00,  1.23s/it, loss=25.9]
Epoch 77/100: 100%|██████████| 107/107 [05:51<00:00,  3.29s/it, val_output_loss=22.1]


Epoch 77/100, Training Loss: 25.461344044056784, Validation Output Loss: 26.2646473857844
Best model saved with Validation Loss: 26.2646473857844


Epoch 78/100: 100%|██████████| 1854/1854 [37:51<00:00,  1.23s/it, loss=26]  
Epoch 78/100: 100%|██████████| 107/107 [08:59<00:00,  5.04s/it, val_output_loss=22.1]


Epoch 78/100, Training Loss: 25.416938873299245, Validation Output Loss: 26.31604777541116


Epoch 79/100: 100%|██████████| 1854/1854 [37:50<00:00,  1.22s/it, loss=28.4]
Epoch 79/100: 100%|██████████| 107/107 [08:02<00:00,  4.51s/it, val_output_loss=21.9]


Epoch 79/100, Training Loss: 25.37343116974239, Validation Output Loss: 26.104827880859375
Best model saved with Validation Loss: 26.104827880859375


Epoch 80/100: 100%|██████████| 1854/1854 [37:48<00:00,  1.22s/it, loss=26.5]
Epoch 80/100: 100%|██████████| 107/107 [04:26<00:00,  2.49s/it, val_output_loss=21.8]


Epoch 80/100, Training Loss: 25.31534370249529, Validation Output Loss: 26.118305028041945


Epoch 81/100: 100%|██████████| 1854/1854 [37:38<00:00,  1.22s/it, loss=26.1]
Epoch 81/100: 100%|██████████| 107/107 [35:30<00:00, 19.91s/it, val_output_loss=21.9]


Epoch 81/100, Training Loss: 25.260050318902106, Validation Output Loss: 26.407961337365837


Epoch 82/100: 100%|██████████| 1854/1854 [37:45<00:00,  1.22s/it, loss=25.9]
Epoch 82/100: 100%|██████████| 107/107 [04:19<00:00,  2.42s/it, val_output_loss=22.3]


Epoch 82/100, Training Loss: 25.247894230526757, Validation Output Loss: 26.222085614070714


Epoch 83/100: 100%|██████████| 1854/1854 [37:47<00:00,  1.22s/it, loss=26.1]
Epoch 83/100: 100%|██████████| 107/107 [15:25<00:00,  8.65s/it, val_output_loss=22.3]


Epoch 83/100, Training Loss: 25.20349005253698, Validation Output Loss: 26.228647428138235


Epoch 84/100: 100%|██████████| 1854/1854 [37:42<00:00,  1.22s/it, loss=24.3]
Epoch 84/100: 100%|██████████| 107/107 [04:08<00:00,  2.33s/it, val_output_loss=21.8]


Epoch 84/100, Training Loss: 25.156308179277175, Validation Output Loss: 26.04704445099162
Best model saved with Validation Loss: 26.04704445099162


Epoch 85/100: 100%|██████████| 1854/1854 [37:40<00:00,  1.22s/it, loss=25.7]
Epoch 85/100: 100%|██████████| 107/107 [04:01<00:00,  2.26s/it, val_output_loss=21.8]


Epoch 85/100, Training Loss: 25.107493304897666, Validation Output Loss: 25.914486501818505
Best model saved with Validation Loss: 25.914486501818505


Epoch 86/100: 100%|██████████| 1854/1854 [37:52<00:00,  1.23s/it, loss=25]  
Epoch 86/100: 100%|██████████| 107/107 [04:12<00:00,  2.36s/it, val_output_loss=22.3]


Epoch 86/100, Training Loss: 25.097288930043177, Validation Output Loss: 26.631336978662794


Epoch 87/100: 100%|██████████| 1854/1854 [37:40<00:00,  1.22s/it, loss=25.7]
Epoch 87/100: 100%|██████████| 107/107 [08:54<00:00,  5.00s/it, val_output_loss=21.9]


Epoch 87/100, Training Loss: 25.028572830438872, Validation Output Loss: 25.90912795735297
Best model saved with Validation Loss: 25.90912795735297


Epoch 88/100: 100%|██████████| 1854/1854 [37:44<00:00,  1.22s/it, loss=23.9]
Epoch 88/100: 100%|██████████| 107/107 [04:05<00:00,  2.29s/it, val_output_loss=21.7]


Epoch 88/100, Training Loss: 25.025086179648916, Validation Output Loss: 25.852556816885404
Best model saved with Validation Loss: 25.852556816885404


Epoch 89/100: 100%|██████████| 1854/1854 [37:47<00:00,  1.22s/it, loss=25.5]
Epoch 89/100: 100%|██████████| 107/107 [04:19<00:00,  2.42s/it, val_output_loss=22]  


Epoch 89/100, Training Loss: 25.009002028159724, Validation Output Loss: 26.121297925432152


Epoch 90/100: 100%|██████████| 1854/1854 [37:40<00:00,  1.22s/it, loss=23.1]
Epoch 90/100: 100%|██████████| 107/107 [04:18<00:00,  2.42s/it, val_output_loss=21.8]


Epoch 90/100, Training Loss: 24.921330278102378, Validation Output Loss: 26.07167251087795


Epoch 91/100: 100%|██████████| 1854/1854 [37:41<00:00,  1.22s/it, loss=24.6]
Epoch 91/100: 100%|██████████| 107/107 [03:58<00:00,  2.23s/it, val_output_loss=21.6]


Epoch 91/100, Training Loss: 24.90445376831351, Validation Output Loss: 25.794538123585355
Best model saved with Validation Loss: 25.794538123585355


Epoch 92/100: 100%|██████████| 1854/1854 [37:42<00:00,  1.22s/it, loss=25.2]
Epoch 92/100: 100%|██████████| 107/107 [20:06<00:00, 11.28s/it, val_output_loss=21.8]


Epoch 92/100, Training Loss: 24.869464454527424, Validation Output Loss: 25.797895645426813


Epoch 93/100: 100%|██████████| 1854/1854 [37:37<00:00,  1.22s/it, loss=25.8]
Epoch 93/100: 100%|██████████| 107/107 [04:17<00:00,  2.40s/it, val_output_loss=22]  


Epoch 93/100, Training Loss: 24.83444421782591, Validation Output Loss: 26.055656540059598


Epoch 94/100: 100%|██████████| 1854/1854 [37:53<00:00,  1.23s/it, loss=24.9]
Epoch 94/100: 100%|██████████| 107/107 [18:56<00:00, 10.62s/it, val_output_loss=21.5]


Epoch 94/100, Training Loss: 24.78818674303567, Validation Output Loss: 25.920948206821336


Epoch 95/100: 100%|██████████| 1854/1854 [37:46<00:00,  1.22s/it, loss=25.2]
Epoch 95/100: 100%|██████████| 107/107 [04:08<00:00,  2.32s/it, val_output_loss=21.8]


Epoch 95/100, Training Loss: 24.802408417966117, Validation Output Loss: 25.892782567817473


Epoch 96/100: 100%|██████████| 1854/1854 [37:44<00:00,  1.22s/it, loss=24.5]
Epoch 96/100: 100%|██████████| 107/107 [04:09<00:00,  2.33s/it, val_output_loss=21.8]


Epoch 96/100, Training Loss: 24.762658881523848, Validation Output Loss: 25.867581162497263


Epoch 97/100: 100%|██████████| 1854/1854 [37:44<00:00,  1.22s/it, loss=24.8]
Epoch 97/100: 100%|██████████| 107/107 [08:05<00:00,  4.54s/it, val_output_loss=21.6]


Epoch 97/100, Training Loss: 24.70850233841354, Validation Output Loss: 25.718899521872263
Best model saved with Validation Loss: 25.718899521872263


Epoch 98/100: 100%|██████████| 1854/1854 [37:40<00:00,  1.22s/it, loss=24.8]
Epoch 98/100: 100%|██████████| 107/107 [13:16<00:00,  7.44s/it, val_output_loss=21.9]


Epoch 98/100, Training Loss: 24.701492457713897, Validation Output Loss: 25.987400910564673


Epoch 99/100: 100%|██████████| 1854/1854 [37:37<00:00,  1.22s/it, loss=23.5]
Epoch 99/100: 100%|██████████| 107/107 [04:00<00:00,  2.25s/it, val_output_loss=21.5]


Epoch 99/100, Training Loss: 24.655097705260836, Validation Output Loss: 25.622720254915897
Best model saved with Validation Loss: 25.622720254915897


Epoch 100/100: 100%|██████████| 1854/1854 [37:45<00:00,  1.22s/it, loss=23.6]
Epoch 100/100: 100%|██████████| 107/107 [04:36<00:00,  2.59s/it, val_output_loss=21.8]


Epoch 100/100, Training Loss: 24.621477582822468, Validation Output Loss: 25.744105864908093


Epoch 101/100: 100%|██████████| 1854/1854 [37:50<00:00,  1.22s/it, loss=24]  
Epoch 101/100: 100%|██████████| 107/107 [22:54<00:00, 12.84s/it, val_output_loss=21.7] 


Epoch 101/100, Training Loss: 24.600248350166066, Validation Output Loss: 25.67148525915413


Epoch 102/100: 100%|██████████| 1854/1854 [37:53<00:00,  1.23s/it, loss=24.3]
Epoch 102/100: 100%|██████████| 107/107 [04:08<00:00,  2.32s/it, val_output_loss=21.5]


Epoch 102/100, Training Loss: 24.582230219177443, Validation Output Loss: 25.665236553299092


Epoch 103/100: 100%|██████████| 1854/1854 [37:46<00:00,  1.22s/it, loss=24]  
Epoch 103/100: 100%|██████████| 107/107 [19:40<00:00, 11.03s/it, val_output_loss=21.4]


Epoch 103/100, Training Loss: 24.51461296945714, Validation Output Loss: 25.60756647698233
Best model saved with Validation Loss: 25.60756647698233


Epoch 104/100: 100%|██████████| 1854/1854 [37:49<00:00,  1.22s/it, loss=27.8]
Epoch 104/100: 100%|██████████| 107/107 [04:09<00:00,  2.33s/it, val_output_loss=21.4]


Epoch 104/100, Training Loss: 24.514163053280072, Validation Output Loss: 25.663106953986336


Epoch 105/100: 100%|██████████| 1854/1854 [37:51<00:00,  1.23s/it, loss=24.4]
Epoch 105/100: 100%|██████████| 107/107 [04:04<00:00,  2.29s/it, val_output_loss=21.2]


Epoch 105/100, Training Loss: 24.447378215151954, Validation Output Loss: 25.445322803247755
Best model saved with Validation Loss: 25.445322803247755


Epoch 106/100: 100%|██████████| 1854/1854 [37:53<00:00,  1.23s/it, loss=24.8]
Epoch 106/100: 100%|██████████| 107/107 [07:43<00:00,  4.33s/it, val_output_loss=21]  


Epoch 106/100, Training Loss: 24.438732923941835, Validation Output Loss: 25.4587813225862


Epoch 107/100: 100%|██████████| 1854/1854 [37:47<00:00,  1.22s/it, loss=25.3]
Epoch 107/100: 100%|██████████| 107/107 [05:37<00:00,  3.15s/it, val_output_loss=21.2]


Epoch 107/100, Training Loss: 24.51342303390256, Validation Output Loss: 25.459224593973605


Epoch 108/100: 100%|██████████| 1854/1854 [37:47<00:00,  1.22s/it, loss=24.8]
Epoch 108/100: 100%|██████████| 107/107 [04:27<00:00,  2.50s/it, val_output_loss=21.4]


Epoch 108/100, Training Loss: 24.447466091044898, Validation Output Loss: 25.470996589304132


Epoch 109/100: 100%|██████████| 1854/1854 [37:49<00:00,  1.22s/it, loss=24.1]
Epoch 109/100: 100%|██████████| 107/107 [04:13<00:00,  2.37s/it, val_output_loss=21.4]


Epoch 109/100, Training Loss: 24.459627521848216, Validation Output Loss: 25.35998372942488
Best model saved with Validation Loss: 25.35998372942488


Epoch 110/100: 100%|██████████| 1854/1854 [37:51<00:00,  1.23s/it, loss=24.1]
Epoch 110/100: 100%|██████████| 107/107 [32:05<00:00, 18.00s/it, val_output_loss=21.1]


Epoch 110/100, Training Loss: 24.417470040059012, Validation Output Loss: 25.36963906689225


Epoch 111/100: 100%|██████████| 1854/1854 [37:45<00:00,  1.22s/it, loss=24.1]
Epoch 111/100: 100%|██████████| 107/107 [04:11<00:00,  2.35s/it, val_output_loss=21.3]


Epoch 111/100, Training Loss: 24.402127446994555, Validation Output Loss: 25.4503587740604


Epoch 112/100: 100%|██████████| 1854/1854 [37:51<00:00,  1.23s/it, loss=24]  
Epoch 112/100: 100%|██████████| 107/107 [28:55<00:00, 16.22s/it, val_output_loss=21.4]


Epoch 112/100, Training Loss: 24.368551419905216, Validation Output Loss: 25.63149653639749


Epoch 113/100: 100%|██████████| 1854/1854 [37:48<00:00,  1.22s/it, loss=23.4]
Epoch 113/100: 100%|██████████| 107/107 [04:16<00:00,  2.39s/it, val_output_loss=21.2]


Epoch 113/100, Training Loss: 24.333332490766704, Validation Output Loss: 25.209612552250658
Best model saved with Validation Loss: 25.209612552250658


Epoch 114/100: 100%|██████████| 1854/1854 [37:50<00:00,  1.22s/it, loss=24.5]
Epoch 114/100: 100%|██████████| 107/107 [23:38<00:00, 13.26s/it, val_output_loss=21.1]


Epoch 114/100, Training Loss: 24.304125432957854, Validation Output Loss: 25.174149682588666
Best model saved with Validation Loss: 25.174149682588666


Epoch 115/100: 100%|██████████| 1854/1854 [37:48<00:00,  1.22s/it, loss=26.4]
Epoch 115/100: 100%|██████████| 107/107 [04:05<00:00,  2.30s/it, val_output_loss=21.1]


Epoch 115/100, Training Loss: 24.28147390976693, Validation Output Loss: 25.172474745278045
Best model saved with Validation Loss: 25.172474745278045


Epoch 116/100: 100%|██████████| 1854/1854 [37:50<00:00,  1.22s/it, loss=23.5]
Epoch 116/100: 100%|██████████| 107/107 [04:08<00:00,  2.33s/it, val_output_loss=21.1]


Epoch 116/100, Training Loss: 24.237690032621547, Validation Output Loss: 25.10129862633821
Best model saved with Validation Loss: 25.10129862633821


Epoch 117/100: 100%|██████████| 1854/1854 [37:47<00:00,  1.22s/it, loss=24.4]
Epoch 117/100: 100%|██████████| 107/107 [06:24<00:00,  3.59s/it, val_output_loss=21.5]


Epoch 117/100, Training Loss: 24.22511747662689, Validation Output Loss: 25.313097837929412


Epoch 118/100: 100%|██████████| 1854/1854 [37:45<00:00,  1.22s/it, loss=23.7]
Epoch 118/100: 100%|██████████| 107/107 [04:07<00:00,  2.31s/it, val_output_loss=21.1]


Epoch 118/100, Training Loss: 24.215876566939368, Validation Output Loss: 25.22891083833213


Epoch 119/100: 100%|██████████| 1854/1854 [37:49<00:00,  1.22s/it, loss=24.2]
Epoch 119/100: 100%|██████████| 107/107 [03:58<00:00,  2.23s/it, val_output_loss=21.2]


Epoch 119/100, Training Loss: 24.177662195125443, Validation Output Loss: 25.51873237395955


Epoch 120/100: 100%|██████████| 1854/1854 [37:47<00:00,  1.22s/it, loss=25.5]
Epoch 120/100: 100%|██████████| 107/107 [04:56<00:00,  2.78s/it, val_output_loss=20.9]


Epoch 120/100, Training Loss: 24.17807571736226, Validation Output Loss: 24.978333428641346
Best model saved with Validation Loss: 24.978333428641346


Epoch 121/100: 100%|██████████| 1854/1854 [37:49<00:00,  1.22s/it, loss=23.6]
Epoch 121/100: 100%|██████████| 107/107 [04:12<00:00,  2.36s/it, val_output_loss=21]  


Epoch 121/100, Training Loss: 24.133293692991057, Validation Output Loss: 25.18843911072918


Epoch 122/100: 100%|██████████| 1854/1854 [37:43<00:00,  1.22s/it, loss=25.5]
Epoch 122/100: 100%|██████████| 107/107 [06:30<00:00,  3.65s/it, val_output_loss=20.8]


Epoch 122/100, Training Loss: 24.110359961418144, Validation Output Loss: 25.142096777942694


Epoch 123/100: 100%|██████████| 1854/1854 [37:44<00:00,  1.22s/it, loss=24.8]
Epoch 123/100: 100%|██████████| 107/107 [15:10<00:00,  8.51s/it, val_output_loss=20.8]


Epoch 123/100, Training Loss: 24.092571034271998, Validation Output Loss: 24.961299789286105
Best model saved with Validation Loss: 24.961299789286105


Epoch 124/100: 100%|██████████| 1854/1854 [37:51<00:00,  1.23s/it, loss=24.5]
Epoch 124/100: 100%|██████████| 107/107 [03:44<00:00,  2.10s/it, val_output_loss=20.9]


Epoch 124/100, Training Loss: 24.072097862679854, Validation Output Loss: 25.25888074001419


Epoch 125/100: 100%|██████████| 1854/1854 [37:51<00:00,  1.23s/it, loss=23.1]
Epoch 125/100: 100%|██████████| 107/107 [21:48<00:00, 12.23s/it, val_output_loss=20.9]


Epoch 125/100, Training Loss: 24.02655908282135, Validation Output Loss: 24.970621019880348


Epoch 126/100: 100%|██████████| 1854/1854 [37:46<00:00,  1.22s/it, loss=24.2]
Epoch 126/100: 100%|██████████| 107/107 [03:42<00:00,  2.08s/it, val_output_loss=20.9]


Epoch 126/100, Training Loss: 24.024415136540977, Validation Output Loss: 24.952880431558484
Best model saved with Validation Loss: 24.952880431558484


Epoch 127/100: 100%|██████████| 1854/1854 [37:48<00:00,  1.22s/it, loss=24.2]
Epoch 127/100: 100%|██████████| 107/107 [11:14<00:00,  6.31s/it, val_output_loss=20.8]


Epoch 127/100, Training Loss: 23.9720379768709, Validation Output Loss: 24.83328097334532
Best model saved with Validation Loss: 24.83328097334532


Epoch 128/100: 100%|██████████| 1854/1854 [47:23<00:00,  1.53s/it, loss=24.2]
Epoch 128/100: 100%|██████████| 107/107 [03:44<00:00,  2.10s/it, val_output_loss=21.1]


Epoch 128/100, Training Loss: 23.96224641954243, Validation Output Loss: 25.022126081947967


Epoch 129/100: 100%|██████████| 1854/1854 [37:50<00:00,  1.22s/it, loss=23.1]
Epoch 129/100: 100%|██████████| 107/107 [16:13<00:00,  9.10s/it, val_output_loss=21.1]


Epoch 129/100, Training Loss: 23.92922323961474, Validation Output Loss: 24.95506498746783


Epoch 130/100: 100%|██████████| 1854/1854 [37:38<00:00,  1.22s/it, loss=23.8]
Epoch 130/100: 100%|██████████| 107/107 [03:46<00:00,  2.11s/it, val_output_loss=21.2]


Epoch 130/100, Training Loss: 23.90055498104651, Validation Output Loss: 25.001560282484395


Epoch 131/100: 100%|██████████| 1854/1854 [37:45<00:00,  1.22s/it, loss=23]  
Epoch 131/100: 100%|██████████| 107/107 [05:19<00:00,  2.99s/it, val_output_loss=21.3]


Epoch 131/100, Training Loss: 23.905713673934194, Validation Output Loss: 25.110908633080598


Epoch 132/100: 100%|██████████| 1854/1854 [37:46<00:00,  1.22s/it, loss=23.4]
Epoch 132/100: 100%|██████████| 107/107 [03:41<00:00,  2.07s/it, val_output_loss=20.8]


Epoch 132/100, Training Loss: 23.85641071891579, Validation Output Loss: 24.779938528470904
Best model saved with Validation Loss: 24.779938528470904


Epoch 133/100: 100%|██████████| 1854/1854 [37:46<00:00,  1.22s/it, loss=23.7]
Epoch 133/100: 100%|██████████| 107/107 [03:42<00:00,  2.08s/it, val_output_loss=20.9]


Epoch 133/100, Training Loss: 23.834262741936577, Validation Output Loss: 24.79242524476809


Epoch 134/100: 100%|██████████| 1854/1854 [37:48<00:00,  1.22s/it, loss=23]  
Epoch 134/100: 100%|██████████| 107/107 [03:48<00:00,  2.13s/it, val_output_loss=21.2]


Epoch 134/100, Training Loss: 23.837216879300442, Validation Output Loss: 25.12318702056029


Epoch 135/100: 100%|██████████| 1854/1854 [37:45<00:00,  1.22s/it, loss=23]  
Epoch 135/100: 100%|██████████| 107/107 [03:46<00:00,  2.11s/it, val_output_loss=20.8]


Epoch 135/100, Training Loss: 23.797451661242636, Validation Output Loss: 24.862981528879327


Epoch 136/100: 100%|██████████| 1854/1854 [37:42<00:00,  1.22s/it, loss=22.9]
Epoch 136/100: 100%|██████████| 107/107 [14:16<00:00,  8.01s/it, val_output_loss=20.7]


Epoch 136/100, Training Loss: 23.801884306544498, Validation Output Loss: 24.859201163889093


Epoch 137/100: 100%|██████████| 1854/1854 [37:41<00:00,  1.22s/it, loss=23.7]
Epoch 137/100: 100%|██████████| 107/107 [03:44<00:00,  2.09s/it, val_output_loss=20.7]


Epoch 137/100, Training Loss: 23.771040262142044, Validation Output Loss: 24.669799947293004
Best model saved with Validation Loss: 24.669799947293004


Epoch 138/100: 100%|██████████| 1854/1854 [37:45<00:00,  1.22s/it, loss=24.4]
Epoch 138/100: 100%|██████████| 107/107 [17:18<00:00,  9.70s/it, val_output_loss=20.8]


Epoch 138/100, Training Loss: 23.770501649109676, Validation Output Loss: 24.78978224781072


Epoch 139/100: 100%|██████████| 1854/1854 [37:45<00:00,  1.22s/it, loss=24.3]
Epoch 139/100: 100%|██████████| 107/107 [03:47<00:00,  2.12s/it, val_output_loss=20.6]


Epoch 139/100, Training Loss: 23.70378204045301, Validation Output Loss: 24.727574250408423


Epoch 140/100: 100%|██████████| 1854/1854 [37:38<00:00,  1.22s/it, loss=23.4]
Epoch 140/100: 100%|██████████| 107/107 [14:20<00:00,  8.04s/it, val_output_loss=20.6]


Epoch 140/100, Training Loss: 23.717449664555193, Validation Output Loss: 24.771317544384537


Epoch 141/100: 100%|██████████| 1854/1854 [37:38<00:00,  1.22s/it, loss=24.2]
Epoch 141/100: 100%|██████████| 107/107 [03:46<00:00,  2.12s/it, val_output_loss=20.6]


Epoch 141/100, Training Loss: 23.68455441175541, Validation Output Loss: 24.758121757863837


Epoch 142/100: 100%|██████████| 1854/1854 [37:40<00:00,  1.22s/it, loss=23.2]
Epoch 142/100: 100%|██████████| 107/107 [03:43<00:00,  2.09s/it, val_output_loss=20.8]


Epoch 142/100, Training Loss: 23.67582198125516, Validation Output Loss: 24.792328647363966


Epoch 143/100: 100%|██████████| 1854/1854 [37:45<00:00,  1.22s/it, loss=24.6]
Epoch 143/100: 100%|██████████| 107/107 [03:41<00:00,  2.07s/it, val_output_loss=20.8]


Epoch 143/100, Training Loss: 23.643088464212262, Validation Output Loss: 24.75506716576692


Epoch 144/100: 100%|██████████| 1854/1854 [37:49<00:00,  1.22s/it, loss=22.8]
Epoch 144/100: 100%|██████████| 107/107 [04:08<00:00,  2.32s/it, val_output_loss=20.7]


Epoch 144/100, Training Loss: 23.618085611213758, Validation Output Loss: 24.808397988292658


Epoch 145/100: 100%|██████████| 1854/1854 [37:49<00:00,  1.22s/it, loss=23.6]
Epoch 145/100: 100%|██████████| 107/107 [03:44<00:00,  2.10s/it, val_output_loss=20.6]


Epoch 145/100, Training Loss: 23.614116694420545, Validation Output Loss: 24.65931348711531
Best model saved with Validation Loss: 24.65931348711531


Epoch 146/100: 100%|██████████| 1854/1854 [37:47<00:00,  1.22s/it, loss=24.5]
Epoch 146/100: 100%|██████████| 107/107 [03:46<00:00,  2.12s/it, val_output_loss=20.5]


Epoch 146/100, Training Loss: 23.539283362535727, Validation Output Loss: 24.458631533328617
Best model saved with Validation Loss: 24.458631533328617


Epoch 147/100: 100%|██████████| 1854/1854 [37:47<00:00,  1.22s/it, loss=24.2]
Epoch 147/100: 100%|██████████| 107/107 [07:23<00:00,  4.14s/it, val_output_loss=20.4]


Epoch 147/100, Training Loss: 23.557152218288845, Validation Output Loss: 24.54030700041869


Epoch 148/100: 100%|██████████| 1854/1854 [37:47<00:00,  1.22s/it, loss=23.8]
Epoch 148/100: 100%|██████████| 107/107 [03:43<00:00,  2.09s/it, val_output_loss=20.4]


Epoch 148/100, Training Loss: 23.517534941145517, Validation Output Loss: 24.486980901700313


Epoch 149/100: 100%|██████████| 1854/1854 [37:45<00:00,  1.22s/it, loss=24.5]
Epoch 149/100: 100%|██████████| 107/107 [21:25<00:00, 12.01s/it, val_output_loss=20.4]


Epoch 149/100, Training Loss: 23.492402753655064, Validation Output Loss: 24.544836507779415


Epoch 150/100: 100%|██████████| 1854/1854 [37:47<00:00,  1.22s/it, loss=23.8]
Epoch 150/100: 100%|██████████| 107/107 [03:44<00:00,  2.10s/it, val_output_loss=20.3]


Epoch 150/100, Training Loss: 23.485481850666265, Validation Output Loss: 24.431181114410684
Best model saved with Validation Loss: 24.431181114410684


In [None]:
import torch
import torch.nn as nn
import timm

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

swin_model = SwinTransformerTeacher()
swin_model = swin_model.to(device)
res_model = ResNetStudent()
res_model = res_model.to(device)

swin_features = swin_model.extract_feat(torch.randn(1, 3, 224, 224).to(device))
res_features = res_model.extract_feat(torch.randn(1, 3, 224, 224).to(device))

for swin_feature in swin_features:
    print(swin_feature.shape)
print('='*100)
for res_feature in res_features:
    print(res_feature.shape)

print(res_model(torch.randn(1, 3, 224, 224).to(device)).shape)