In [59]:
from os import listdir
from pathlib import Path
import json

import torch
import random
from torch.utils.data import DataLoader,Dataset
from PIL import Image
from torchvision import transforms

# CLIP 官方预训练时常用的归一化参数（RGB 三个通道的均值和标准差）
CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073]
CLIP_STD  = [0.26862954, 0.26130258, 0.27577711]
class WenwuDataset:
    def __init__(self,data_json_path:str="/Users/zsh2401/Lab/Spiders/wenwu/data.json",images_path:str="/Users/zsh2401/Lab/Spiders/wenwu/output_root"):
        with open(data_json_path,'r') as f:
            self.data = json.load(f)["items"]
        self.ids = sorted(self.data.keys())
        self.images_root = Path(images_path)
        self.transform = transforms.Compose([
            transforms.RandomResizedCrop((224,224),scale=(0.8,1.0),ratio=(3/4,4/3)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(CLIP_MEAN,CLIP_STD)
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        uuid = self.ids[idx]
        item =  self.data[uuid]
        item_image_folder = self.images_root / uuid[0:2] / uuid[2:4] / uuid
        images = []
        for name in listdir(item_image_folder):
            fpath = item_image_folder / name
            image = Image.open(fpath).convert('RGB')
            # image = self.transform(image)
            images.append(image)
        text_pos = f'{item["name"]},{item['meta']['年代']},{",".join(item["types"])}'
        text_neg = f'{item["name"]}'
        return images[0],text_pos,text_neg

dataset = WenwuDataset()
dataset[0],len(dataset)

((<PIL.Image.Image image mode=RGB size=800x600>,
  '青釉刻莲瓣纹钵,隋,钵（容器）,瓷钵,瓷（陶瓷）,青瓷,青釉钵,陶瓷,颜色釉瓷',
  '青釉刻莲瓣纹钵'),
 7673)

In [62]:
# train_wenwu_clip.py
# ----------------------------------------------------------
# 0. 可配置超参
BATCH_SIZE      = 16          # 128 for 24 GB, 256 for 40 GB
EPOCHS          = 5
LR              = 5e-6
FREEZE_VIT_LAYERS = 10         # 冻结前 10 层视觉 backbone
FREEZE_TXT_LAYERS = 9          # 同理文本
MODEL_NAME      = "openai/clip-vit-base-patch32"   # 或 hfl/chinese-clip-vit-b32
DATA_PATH       = "data/wenwu_meta.jsonl"
OUTPUT_DIR      = "wenwu-clip-vit-b32"
# ----------------------------------------------------------

import json, random, os, torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from transformers import CLIPProcessor, CLIPModel, get_linear_schedule_with_warmup

device = torch.device("mps")
torch.backends.cuda.matmul.allow_tf32 = True  # 加速

# 2. collate：一次性把正负喂进 processor
def collate_fn(batch):
    imgs, txt_pos, txt_neg = zip(*batch)
    processor =  CLIPProcessor.from_pretrained(MODEL_NAME)
    # 拼 batch：图像重复两次，文本 = 正 + 负
    imgs2  = imgs + imgs                      # 2*B
    texts2 = list(txt_pos) + list(txt_neg)    # 2*B
    inputs = processor(text=texts2, images=imgs2,
                       truncation=True,
                       return_tensors="pt", padding=True)
    return inputs

# 3. 准备模型并冻结部分层
model = CLIPModel.from_pretrained(MODEL_NAME)
# 冻结视觉 backbone 前 FREEZE_VIT_LAYERS 层
for name, param in model.vision_model.named_parameters():
    if name.split(".")[1].isdigit() and int(name.split(".")[1]) < FREEZE_VIT_LAYERS:
        param.requires_grad_(False)
# 冻结文本编码器前 FREEZE_TXT_LAYERS 层
for name, param in model.text_model.named_parameters():
    if name.startswith("encoder.layers"):
        layer_idx = int(name.split(".")[2])
        if layer_idx < FREEZE_TXT_LAYERS:
            param.requires_grad_(False)
model = model.to(device)

# InfoNCE = 交叉熵(相似度矩阵 / τ)，直接用 CLIP 的 loss
criterion = nn.CrossEntropyLoss()

# 4. DataLoader & 优化器
dl  = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True,collate_fn=collate_fn, pin_memory=True)

optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad,
                                     model.parameters()), lr=LR, weight_decay=0.2)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=len(dl)*1, num_training_steps=len(dl)*EPOCHS)

# 5. 训练循环
global_step = 0
model.train()
for epoch in range(EPOCHS):
    pbar = tqdm(dl, desc=f"epoch {epoch+1}/{EPOCHS}")
    for batch in pbar:

        batch = {k:v.to(device) for k,v in batch.items()}
        print("what")
        # forward
        outputs = model(**batch)
        img_emb = outputs.image_embeds            # shape (2B, D)
        txt_emb = outputs.text_embeds

        # 构造对比标签：
        # seq: [正 img1, img1_neg]  [正 img2, img2_neg] ...
        # 正确对是 idx_i == idx_(i+B)
        logits = img_emb @ txt_emb.t()            # 2B x 2B
        labels = torch.arange(len(img_emb), device=device)
        loss_i2t = criterion(logits / 0.01, labels)            # img→text
        loss_t2i = criterion(logits.t() / 0.01, labels)        # text→img
        loss = (loss_i2t + loss_t2i)/2

        loss.backward()
        optimizer.step(); scheduler.step()
        optimizer.zero_grad(set_to_none=True)

        pbar.set_postfix(loss=float(loss), lr=scheduler.get_last_lr()[0])
        global_step += 1

    # 可选：每 epoch 保存 ckpt
    model.save_pretrained(f"{OUTPUT_DIR}/ckpt_epoch{epoch+1}")

# 6. 训练完成后保存最终权重
model.save_pretrained(OUTPUT_DIR)
collate_fn.processor.save_pretrained(OUTPUT_DIR)
print(f"Model saved to {OUTPUT_DIR}")

epoch 1/5:   0%|          | 0/480 [00:00<?, ?it/s]

what


epoch 1/5:   0%|          | 1/480 [00:10<1:27:14, 10.93s/it, loss=4.49, lr=1.04e-8]

what


epoch 1/5:   0%|          | 2/480 [00:16<1:00:08,  7.55s/it, loss=5.38, lr=2.08e-8]

what


epoch 1/5:   1%|          | 3/480 [00:22<57:35,  7.24s/it, loss=4.26, lr=3.13e-8]  

what


epoch 1/5:   1%|          | 4/480 [00:28<51:06,  6.44s/it, loss=4.37, lr=4.17e-8]

what


epoch 1/5:   1%|          | 5/480 [00:32<46:02,  5.82s/it, loss=4.21, lr=5.21e-8]

what


epoch 1/5:   1%|▏         | 6/480 [00:40<50:33,  6.40s/it, loss=4.5, lr=6.25e-8] 

what


epoch 1/5:   1%|▏         | 7/480 [00:47<53:25,  6.78s/it, loss=4.7, lr=7.29e-8]

what


epoch 1/5:   2%|▏         | 8/480 [00:55<55:18,  7.03s/it, loss=4.3, lr=8.33e-8]

what


epoch 1/5:   2%|▏         | 9/480 [01:00<50:31,  6.44s/it, loss=5.23, lr=9.38e-8]

what


epoch 1/5:   2%|▏         | 10/480 [01:05<47:31,  6.07s/it, loss=4.48, lr=1.04e-7]

what


epoch 1/5:   2%|▏         | 11/480 [01:12<48:36,  6.22s/it, loss=4.22, lr=1.15e-7]

what


epoch 1/5:   2%|▎         | 12/480 [01:17<45:19,  5.81s/it, loss=4.7, lr=1.25e-7] 

what


epoch 1/5:   3%|▎         | 13/480 [01:24<48:07,  6.18s/it, loss=4.45, lr=1.35e-7]

what


epoch 1/5:   3%|▎         | 14/480 [01:30<48:42,  6.27s/it, loss=4.05, lr=1.46e-7]

what


epoch 1/5:   3%|▎         | 15/480 [01:37<50:00,  6.45s/it, loss=4.34, lr=1.56e-7]

what


epoch 1/5:   3%|▎         | 16/480 [01:45<52:52,  6.84s/it, loss=4.24, lr=1.67e-7]

what


epoch 1/5:   4%|▎         | 17/480 [01:50<48:59,  6.35s/it, loss=4.29, lr=1.77e-7]

what


epoch 1/5:   4%|▍         | 18/480 [01:58<51:32,  6.69s/it, loss=3.77, lr=1.88e-7]

what


epoch 1/5:   4%|▍         | 19/480 [02:02<46:35,  6.06s/it, loss=4.01, lr=1.98e-7]

what


epoch 1/5:   4%|▍         | 20/480 [02:09<48:50,  6.37s/it, loss=4.6, lr=2.08e-7] 

what


epoch 1/5:   4%|▍         | 21/480 [02:15<46:48,  6.12s/it, loss=4.21, lr=2.19e-7]

what


epoch 1/5:   5%|▍         | 22/480 [02:22<49:10,  6.44s/it, loss=4.27, lr=2.29e-7]

what


epoch 1/5:   5%|▍         | 23/480 [02:27<45:29,  5.97s/it, loss=4.01, lr=2.4e-7] 

what


epoch 1/5:   5%|▌         | 24/480 [02:34<46:59,  6.18s/it, loss=4.01, lr=2.5e-7]

what


epoch 1/5:   5%|▌         | 25/480 [02:41<48:24,  6.38s/it, loss=3.9, lr=2.6e-7] 

what


epoch 1/5:   5%|▌         | 26/480 [02:48<49:40,  6.56s/it, loss=4.14, lr=2.71e-7]

what


epoch 1/5:   6%|▌         | 27/480 [02:54<50:16,  6.66s/it, loss=4.29, lr=2.81e-7]

what


epoch 1/5:   6%|▌         | 28/480 [03:01<51:04,  6.78s/it, loss=3.6, lr=2.92e-7] 

what


epoch 1/5:   6%|▌         | 29/480 [03:07<48:49,  6.50s/it, loss=3.62, lr=3.02e-7]

what


epoch 1/5:   6%|▋         | 30/480 [03:13<46:14,  6.17s/it, loss=3.79, lr=3.13e-7]

what


epoch 1/5:   6%|▋         | 31/480 [03:18<43:26,  5.81s/it, loss=3.72, lr=3.23e-7]

what


epoch 1/5:   7%|▋         | 32/480 [03:23<41:35,  5.57s/it, loss=3.83, lr=3.33e-7]

what


epoch 1/5:   7%|▋         | 33/480 [03:29<43:53,  5.89s/it, loss=4.13, lr=3.44e-7]

what


epoch 1/5:   7%|▋         | 34/480 [03:34<41:32,  5.59s/it, loss=3.91, lr=3.54e-7]

what


epoch 1/5:   7%|▋         | 35/480 [03:39<40:20,  5.44s/it, loss=3.8, lr=3.65e-7] 

what


epoch 1/5:   8%|▊         | 36/480 [03:47<45:44,  6.18s/it, loss=4.06, lr=3.75e-7]

what


epoch 1/5:   8%|▊         | 37/480 [03:54<46:43,  6.33s/it, loss=3.51, lr=3.85e-7]

what


epoch 1/5:   8%|▊         | 38/480 [04:00<46:57,  6.37s/it, loss=3.8, lr=3.96e-7] 

what


epoch 1/5:   8%|▊         | 39/480 [04:05<43:33,  5.93s/it, loss=3.94, lr=4.06e-7]

what


epoch 1/5:   8%|▊         | 40/480 [04:12<44:55,  6.13s/it, loss=3.97, lr=4.17e-7]

what


epoch 1/5:   9%|▊         | 41/480 [04:19<46:04,  6.30s/it, loss=4.3, lr=4.27e-7] 

what


epoch 1/5:   9%|▉         | 42/480 [04:24<43:35,  5.97s/it, loss=3.93, lr=4.37e-7]

what


epoch 1/5:   9%|▉         | 43/480 [04:29<41:23,  5.68s/it, loss=3.84, lr=4.48e-7]

what


epoch 1/5:   9%|▉         | 44/480 [04:37<45:55,  6.32s/it, loss=3.8, lr=4.58e-7] 

what


epoch 1/5:   9%|▉         | 45/480 [04:44<48:26,  6.68s/it, loss=3.54, lr=4.69e-7]

what


epoch 1/5:  10%|▉         | 46/480 [04:51<47:51,  6.62s/it, loss=3.82, lr=4.79e-7]

what


epoch 1/5:  10%|▉         | 47/480 [04:56<44:57,  6.23s/it, loss=4.01, lr=4.9e-7] 

what


epoch 1/5:  10%|█         | 48/480 [05:01<43:01,  5.97s/it, loss=3.7, lr=5e-7]   

what


epoch 1/5:  10%|█         | 49/480 [05:08<44:48,  6.24s/it, loss=3.9, lr=5.1e-7]

what


epoch 1/5:  10%|█         | 50/480 [05:17<49:46,  6.95s/it, loss=3.61, lr=5.21e-7]

what


epoch 1/5:  11%|█         | 51/480 [05:25<53:19,  7.46s/it, loss=3.83, lr=5.31e-7]

what


epoch 1/5:  11%|█         | 52/480 [05:36<59:17,  8.31s/it, loss=3.75, lr=5.42e-7]

what


epoch 1/5:  11%|█         | 53/480 [05:44<58:53,  8.27s/it, loss=3.74, lr=5.52e-7]

what


epoch 1/5:  11%|█▏        | 54/480 [05:49<52:31,  7.40s/it, loss=3.5, lr=5.63e-7] 

what


epoch 1/5:  11%|█▏        | 55/480 [05:58<54:50,  7.74s/it, loss=3.57, lr=5.73e-7]

what


epoch 1/5:  12%|█▏        | 56/480 [06:05<53:10,  7.52s/it, loss=3.8, lr=5.83e-7] 

what


epoch 1/5:  12%|█▏        | 57/480 [06:13<54:16,  7.70s/it, loss=3.8, lr=5.94e-7]

what


epoch 1/5:  12%|█▏        | 58/480 [06:19<50:05,  7.12s/it, loss=3.5, lr=6.04e-7]

what


epoch 1/5:  12%|█▏        | 59/480 [06:25<48:07,  6.86s/it, loss=3.92, lr=6.15e-7]

what


epoch 1/5:  12%|█▎        | 60/480 [06:32<49:09,  7.02s/it, loss=3.68, lr=6.25e-7]

what


epoch 1/5:  13%|█▎        | 61/480 [06:41<52:24,  7.50s/it, loss=3.39, lr=6.35e-7]

what


epoch 1/5:  13%|█▎        | 62/480 [06:53<1:00:57,  8.75s/it, loss=3.7, lr=6.46e-7]

what


epoch 1/5:  13%|█▎        | 63/480 [06:59<55:54,  8.04s/it, loss=3.76, lr=6.56e-7]  

what


epoch 1/5:  13%|█▎        | 64/480 [07:04<49:58,  7.21s/it, loss=3.47, lr=6.67e-7]

what


epoch 1/5:  14%|█▎        | 65/480 [07:09<44:35,  6.45s/it, loss=3.93, lr=6.77e-7]

what


epoch 1/5:  14%|█▍        | 66/480 [07:16<45:44,  6.63s/it, loss=3.64, lr=6.88e-7]

what


epoch 1/5:  14%|█▍        | 67/480 [07:23<46:50,  6.81s/it, loss=3.75, lr=6.98e-7]

what


epoch 1/5:  14%|█▍        | 68/480 [07:28<42:59,  6.26s/it, loss=3.4, lr=7.08e-7] 

what


epoch 1/5:  14%|█▍        | 69/480 [07:36<46:14,  6.75s/it, loss=3.53, lr=7.19e-7]

what


epoch 1/5:  15%|█▍        | 70/480 [07:46<52:57,  7.75s/it, loss=3.58, lr=7.29e-7]

what


epoch 1/5:  15%|█▍        | 71/480 [07:51<46:43,  6.85s/it, loss=3.64, lr=7.4e-7] 

what


epoch 1/5:  15%|█▌        | 72/480 [07:58<47:58,  7.05s/it, loss=3.54, lr=7.5e-7]

what


epoch 1/5:  15%|█▌        | 73/480 [08:04<44:00,  6.49s/it, loss=3.75, lr=7.6e-7]

what


epoch 1/5:  15%|█▌        | 74/480 [08:08<40:18,  5.96s/it, loss=3.45, lr=7.71e-7]

what


epoch 1/5:  16%|█▌        | 75/480 [08:14<39:49,  5.90s/it, loss=3.79, lr=7.81e-7]

what


epoch 1/5:  16%|█▌        | 76/480 [08:21<40:59,  6.09s/it, loss=3.64, lr=7.92e-7]

what


epoch 1/5:  16%|█▌        | 77/480 [08:27<41:26,  6.17s/it, loss=3.24, lr=8.02e-7]

what


epoch 1/5:  16%|█▋        | 78/480 [08:34<42:13,  6.30s/it, loss=3.68, lr=8.13e-7]

what


epoch 1/5:  16%|█▋        | 79/480 [08:38<39:04,  5.85s/it, loss=3.57, lr=8.23e-7]

what


epoch 1/5:  17%|█▋        | 80/480 [08:45<40:21,  6.05s/it, loss=3.29, lr=8.33e-7]

what


epoch 1/5:  17%|█▋        | 81/480 [08:52<41:51,  6.29s/it, loss=3.49, lr=8.44e-7]

what


epoch 1/5:  17%|█▋        | 82/480 [08:59<44:33,  6.72s/it, loss=3.44, lr=8.54e-7]

what


epoch 1/5:  17%|█▋        | 83/480 [09:04<40:42,  6.15s/it, loss=3.45, lr=8.65e-7]

what


epoch 1/5:  18%|█▊        | 84/480 [09:12<44:03,  6.68s/it, loss=3.54, lr=8.75e-7]

what


epoch 1/5:  18%|█▊        | 85/480 [09:20<46:13,  7.02s/it, loss=3.27, lr=8.85e-7]

what


epoch 1/5:  18%|█▊        | 86/480 [09:25<41:34,  6.33s/it, loss=3.54, lr=8.96e-7]

what


epoch 1/5:  18%|█▊        | 87/480 [09:29<38:18,  5.85s/it, loss=3.29, lr=9.06e-7]

what


epoch 1/5:  18%|█▊        | 88/480 [09:34<35:43,  5.47s/it, loss=3.3, lr=9.17e-7] 

what


epoch 1/5:  19%|█▊        | 89/480 [09:39<34:21,  5.27s/it, loss=3.22, lr=9.27e-7]

what


epoch 1/5:  19%|█▉        | 90/480 [09:46<37:22,  5.75s/it, loss=3.44, lr=9.38e-7]

what


epoch 1/5:  19%|█▉        | 91/480 [09:53<39:23,  6.08s/it, loss=3.41, lr=9.48e-7]

what


epoch 1/5:  19%|█▉        | 92/480 [10:00<41:17,  6.39s/it, loss=3.28, lr=9.58e-7]

what


epoch 1/5:  19%|█▉        | 93/480 [10:05<38:23,  5.95s/it, loss=3.33, lr=9.69e-7]

what


epoch 1/5:  20%|█▉        | 94/480 [10:12<40:40,  6.32s/it, loss=3.08, lr=9.79e-7]

what


epoch 1/5:  20%|█▉        | 95/480 [10:19<41:55,  6.53s/it, loss=3.37, lr=9.9e-7] 

what


epoch 1/5:  20%|██        | 96/480 [10:25<41:05,  6.42s/it, loss=3.35, lr=1e-6]  

what


epoch 1/5:  20%|██        | 97/480 [10:32<42:14,  6.62s/it, loss=3.41, lr=1.01e-6]

what


epoch 1/5:  20%|██        | 98/480 [10:37<38:24,  6.03s/it, loss=3.42, lr=1.02e-6]

what


epoch 1/5:  21%|██        | 99/480 [10:42<36:05,  5.68s/it, loss=3.18, lr=1.03e-6]

what


epoch 1/5:  21%|██        | 100/480 [10:49<39:11,  6.19s/it, loss=3.32, lr=1.04e-6]

what


epoch 1/5:  21%|██        | 101/480 [10:53<35:45,  5.66s/it, loss=3.16, lr=1.05e-6]

what


epoch 1/5:  21%|██▏       | 102/480 [10:59<35:45,  5.68s/it, loss=3.23, lr=1.06e-6]

what


epoch 1/5:  21%|██▏       | 103/480 [11:05<35:38,  5.67s/it, loss=3.36, lr=1.07e-6]

what


epoch 1/5:  22%|██▏       | 104/480 [11:11<36:43,  5.86s/it, loss=3.22, lr=1.08e-6]

what


epoch 1/5:  22%|██▏       | 105/480 [11:16<34:25,  5.51s/it, loss=3.14, lr=1.09e-6]

what


epoch 1/5:  22%|██▏       | 106/480 [11:22<35:17,  5.66s/it, loss=3.09, lr=1.1e-6] 

what


epoch 1/5:  22%|██▏       | 107/480 [11:28<36:21,  5.85s/it, loss=3.41, lr=1.11e-6]

what


epoch 1/5:  22%|██▎       | 108/480 [11:35<37:27,  6.04s/it, loss=3.01, lr=1.13e-6]

what


epoch 1/5:  23%|██▎       | 109/480 [11:41<38:03,  6.16s/it, loss=3.13, lr=1.14e-6]

what


epoch 1/5:  23%|██▎       | 110/480 [11:48<39:14,  6.36s/it, loss=3.2, lr=1.15e-6] 

what


epoch 1/5:  23%|██▎       | 111/480 [11:54<39:20,  6.40s/it, loss=3.37, lr=1.16e-6]

what


epoch 1/5:  23%|██▎       | 112/480 [12:01<39:24,  6.42s/it, loss=3.27, lr=1.17e-6]

what


epoch 1/5:  24%|██▎       | 113/480 [12:05<36:11,  5.92s/it, loss=3.03, lr=1.18e-6]

what


epoch 1/5:  24%|██▍       | 114/480 [12:11<34:49,  5.71s/it, loss=3.26, lr=1.19e-6]

what


epoch 1/5:  24%|██▍       | 115/480 [12:18<36:59,  6.08s/it, loss=3.43, lr=1.2e-6] 

what


epoch 1/5:  24%|██▍       | 116/480 [12:23<34:53,  5.75s/it, loss=3.45, lr=1.21e-6]

what


epoch 1/5:  24%|██▍       | 117/480 [12:30<37:11,  6.15s/it, loss=3.4, lr=1.22e-6] 

what


epoch 1/5:  25%|██▍       | 118/480 [12:37<38:20,  6.35s/it, loss=3.23, lr=1.23e-6]

what


epoch 1/5:  25%|██▍       | 119/480 [12:44<41:01,  6.82s/it, loss=3.18, lr=1.24e-6]

what


epoch 1/5:  25%|██▌       | 120/480 [12:51<39:31,  6.59s/it, loss=3.08, lr=1.25e-6]

what


epoch 1/5:  25%|██▌       | 121/480 [12:56<38:18,  6.40s/it, loss=3.26, lr=1.26e-6]

what


epoch 1/5:  25%|██▌       | 122/480 [13:02<35:45,  5.99s/it, loss=3.08, lr=1.27e-6]

what


epoch 1/5:  26%|██▌       | 123/480 [13:09<37:26,  6.29s/it, loss=3.11, lr=1.28e-6]

what


epoch 1/5:  26%|██▌       | 124/480 [13:13<33:58,  5.73s/it, loss=3.16, lr=1.29e-6]

what


epoch 1/5:  26%|██▌       | 125/480 [13:20<35:36,  6.02s/it, loss=3.24, lr=1.3e-6] 

what


epoch 1/5:  26%|██▋       | 126/480 [13:24<32:42,  5.54s/it, loss=3.14, lr=1.31e-6]

what


epoch 1/5:  26%|██▋       | 127/480 [13:33<39:27,  6.71s/it, loss=3.13, lr=1.32e-6]

what


epoch 1/5:  27%|██▋       | 128/480 [13:45<47:59,  8.18s/it, loss=3.02, lr=1.33e-6]

what


epoch 1/5:  27%|██▋       | 129/480 [13:53<47:53,  8.19s/it, loss=3.02, lr=1.34e-6]

what


epoch 1/5:  27%|██▋       | 130/480 [14:01<47:45,  8.19s/it, loss=3.05, lr=1.35e-6]

what


epoch 1/5:  27%|██▋       | 131/480 [14:09<45:44,  7.86s/it, loss=3.06, lr=1.36e-6]

what


epoch 1/5:  28%|██▊       | 132/480 [14:15<42:27,  7.32s/it, loss=3.27, lr=1.38e-6]

what


epoch 1/5:  28%|██▊       | 133/480 [14:22<42:19,  7.32s/it, loss=2.45, lr=1.39e-6]

what


epoch 1/5:  28%|██▊       | 134/480 [14:27<38:51,  6.74s/it, loss=3.34, lr=1.4e-6] 

what


epoch 1/5:  28%|██▊       | 135/480 [14:36<41:27,  7.21s/it, loss=3.13, lr=1.41e-6]

what


epoch 1/5:  28%|██▊       | 136/480 [14:43<40:46,  7.11s/it, loss=2.96, lr=1.42e-6]

what


epoch 1/5:  29%|██▊       | 137/480 [14:48<37:22,  6.54s/it, loss=3.49, lr=1.43e-6]

what


epoch 1/5:  29%|██▉       | 138/480 [14:55<39:02,  6.85s/it, loss=3.06, lr=1.44e-6]

what


epoch 1/5:  29%|██▉       | 139/480 [15:00<35:04,  6.17s/it, loss=3.13, lr=1.45e-6]

what


epoch 1/5:  29%|██▉       | 140/480 [15:05<32:49,  5.79s/it, loss=3.03, lr=1.46e-6]

what


epoch 1/5:  29%|██▉       | 141/480 [15:12<35:27,  6.28s/it, loss=3.19, lr=1.47e-6]

what


epoch 1/5:  30%|██▉       | 142/480 [15:17<32:36,  5.79s/it, loss=3.07, lr=1.48e-6]

what


epoch 1/5:  30%|██▉       | 143/480 [15:23<32:45,  5.83s/it, loss=2.93, lr=1.49e-6]

what


epoch 1/5:  30%|███       | 144/480 [15:30<34:33,  6.17s/it, loss=3.05, lr=1.5e-6] 

what


epoch 1/5:  30%|███       | 145/480 [15:35<33:14,  5.95s/it, loss=2.93, lr=1.51e-6]

what


epoch 1/5:  30%|███       | 146/480 [15:41<32:04,  5.76s/it, loss=2.88, lr=1.52e-6]

what


epoch 1/5:  31%|███       | 147/480 [15:48<34:49,  6.27s/it, loss=2.68, lr=1.53e-6]

what


epoch 1/5:  31%|███       | 148/480 [15:53<32:42,  5.91s/it, loss=3.08, lr=1.54e-6]

what


epoch 1/5:  31%|███       | 149/480 [16:01<35:44,  6.48s/it, loss=3.2, lr=1.55e-6] 

what


epoch 1/5:  31%|███▏      | 150/480 [16:05<32:30,  5.91s/it, loss=2.71, lr=1.56e-6]

what


epoch 1/5:  31%|███▏      | 151/480 [16:11<31:17,  5.71s/it, loss=3.16, lr=1.57e-6]

what


epoch 1/5:  32%|███▏      | 152/480 [16:16<30:57,  5.66s/it, loss=3.28, lr=1.58e-6]

what


epoch 1/5:  32%|███▏      | 153/480 [16:24<34:16,  6.29s/it, loss=3.04, lr=1.59e-6]

what


epoch 1/5:  32%|███▏      | 154/480 [16:29<31:40,  5.83s/it, loss=2.77, lr=1.6e-6] 

what


epoch 1/5:  32%|███▏      | 155/480 [16:35<32:45,  6.05s/it, loss=3, lr=1.61e-6]  

what


epoch 1/5:  32%|███▎      | 156/480 [16:42<34:20,  6.36s/it, loss=3.18, lr=1.63e-6]

what


epoch 1/5:  33%|███▎      | 157/480 [16:48<33:39,  6.25s/it, loss=3.07, lr=1.64e-6]

what


epoch 1/5:  33%|███▎      | 158/480 [16:53<31:21,  5.84s/it, loss=2.79, lr=1.65e-6]

what


epoch 1/5:  33%|███▎      | 159/480 [17:01<33:34,  6.27s/it, loss=2.91, lr=1.66e-6]

what


epoch 1/5:  33%|███▎      | 160/480 [17:06<32:45,  6.14s/it, loss=2.82, lr=1.67e-6]

what


epoch 1/5:  34%|███▎      | 161/480 [17:12<31:38,  5.95s/it, loss=2.79, lr=1.68e-6]

what


epoch 1/5:  34%|███▍      | 162/480 [17:19<32:39,  6.16s/it, loss=2.91, lr=1.69e-6]

what


epoch 1/5:  34%|███▍      | 163/480 [17:24<30:43,  5.82s/it, loss=2.71, lr=1.7e-6] 

what


epoch 1/5:  34%|███▍      | 164/480 [17:31<32:28,  6.17s/it, loss=3.06, lr=1.71e-6]

what


epoch 1/5:  34%|███▍      | 165/480 [17:38<34:23,  6.55s/it, loss=3.1, lr=1.72e-6] 

what


epoch 1/5:  35%|███▍      | 166/480 [17:43<31:26,  6.01s/it, loss=2.89, lr=1.73e-6]

what


epoch 1/5:  35%|███▍      | 167/480 [17:48<29:50,  5.72s/it, loss=2.93, lr=1.74e-6]

what


epoch 1/5:  35%|███▌      | 168/480 [17:54<30:51,  5.93s/it, loss=2.78, lr=1.75e-6]

what


epoch 1/5:  35%|███▌      | 169/480 [18:01<31:36,  6.10s/it, loss=2.96, lr=1.76e-6]

what


epoch 1/5:  35%|███▌      | 170/480 [18:07<32:25,  6.28s/it, loss=3.17, lr=1.77e-6]

what


epoch 1/5:  36%|███▌      | 171/480 [18:12<30:17,  5.88s/it, loss=2.97, lr=1.78e-6]

what


epoch 1/5:  36%|███▌      | 172/480 [18:18<30:35,  5.96s/it, loss=2.64, lr=1.79e-6]

what


epoch 1/5:  36%|███▌      | 173/480 [18:23<28:56,  5.65s/it, loss=3.25, lr=1.8e-6] 

what


epoch 1/5:  36%|███▋      | 174/480 [18:30<30:34,  5.99s/it, loss=3.13, lr=1.81e-6]

what


epoch 1/5:  36%|███▋      | 175/480 [18:37<31:24,  6.18s/it, loss=2.68, lr=1.82e-6]

what


epoch 1/5:  37%|███▋      | 176/480 [18:42<30:24,  6.00s/it, loss=3.16, lr=1.83e-6]

what


epoch 1/5:  37%|███▋      | 177/480 [18:48<29:38,  5.87s/it, loss=2.67, lr=1.84e-6]

what


epoch 1/5:  37%|███▋      | 178/480 [18:54<29:50,  5.93s/it, loss=2.72, lr=1.85e-6]

what


epoch 1/5:  37%|███▋      | 179/480 [18:59<28:19,  5.65s/it, loss=3.03, lr=1.86e-6]

what


epoch 1/5:  38%|███▊      | 180/480 [19:04<27:09,  5.43s/it, loss=2.72, lr=1.88e-6]

what


epoch 1/5:  38%|███▊      | 181/480 [19:09<27:06,  5.44s/it, loss=2.92, lr=1.89e-6]

what


epoch 1/5:  38%|███▊      | 182/480 [19:15<26:36,  5.36s/it, loss=3.29, lr=1.9e-6] 

what


epoch 1/5:  38%|███▊      | 183/480 [19:22<28:57,  5.85s/it, loss=2.9, lr=1.91e-6]

what


epoch 1/5:  38%|███▊      | 184/480 [19:26<27:06,  5.49s/it, loss=2.61, lr=1.92e-6]

what


epoch 1/5:  39%|███▊      | 185/480 [19:31<26:34,  5.41s/it, loss=2.6, lr=1.93e-6] 

what


epoch 1/5:  39%|███▉      | 186/480 [19:36<25:42,  5.25s/it, loss=3, lr=1.94e-6]  

what


epoch 1/5:  39%|███▉      | 187/480 [19:43<27:55,  5.72s/it, loss=2.72, lr=1.95e-6]

what


epoch 1/5:  39%|███▉      | 188/480 [19:50<29:13,  6.00s/it, loss=2.74, lr=1.96e-6]

what


epoch 1/5:  39%|███▉      | 189/480 [19:56<30:06,  6.21s/it, loss=2.94, lr=1.97e-6]

what


epoch 1/5:  40%|███▉      | 190/480 [20:03<30:10,  6.24s/it, loss=2.92, lr=1.98e-6]

what


epoch 1/5:  40%|███▉      | 191/480 [20:10<31:42,  6.58s/it, loss=2.55, lr=1.99e-6]

what


epoch 1/5:  40%|████      | 192/480 [20:16<30:05,  6.27s/it, loss=2.27, lr=2e-6]   

what


epoch 1/5:  40%|████      | 193/480 [20:21<28:00,  5.86s/it, loss=2.57, lr=2.01e-6]

what


epoch 1/5:  40%|████      | 194/480 [20:27<29:00,  6.09s/it, loss=2.69, lr=2.02e-6]

what


epoch 1/5:  41%|████      | 195/480 [20:34<30:16,  6.37s/it, loss=2.45, lr=2.03e-6]

what


epoch 1/5:  41%|████      | 196/480 [20:39<28:17,  5.98s/it, loss=2.51, lr=2.04e-6]

what


epoch 1/5:  41%|████      | 197/480 [20:44<26:30,  5.62s/it, loss=3.04, lr=2.05e-6]

what


epoch 1/5:  41%|████▏     | 198/480 [20:51<28:16,  6.02s/it, loss=2.85, lr=2.06e-6]

what


epoch 1/5:  41%|████▏     | 199/480 [20:58<29:05,  6.21s/it, loss=2.52, lr=2.07e-6]

what


epoch 1/5:  42%|████▏     | 200/480 [21:02<26:47,  5.74s/it, loss=2.43, lr=2.08e-6]

what


epoch 1/5:  42%|████▏     | 201/480 [21:09<27:56,  6.01s/it, loss=2.52, lr=2.09e-6]

what


epoch 1/5:  42%|████▏     | 202/480 [21:14<26:03,  5.63s/it, loss=2.44, lr=2.1e-6] 

what


epoch 1/5:  42%|████▏     | 203/480 [21:21<28:02,  6.07s/it, loss=2.68, lr=2.11e-6]

what


epoch 1/5:  42%|████▎     | 204/480 [21:25<25:45,  5.60s/it, loss=2.71, lr=2.12e-6]

what


epoch 1/5:  43%|████▎     | 205/480 [21:33<28:32,  6.23s/it, loss=2.93, lr=2.14e-6]

what


epoch 1/5:  43%|████▎     | 206/480 [21:38<27:22,  5.99s/it, loss=2.47, lr=2.15e-6]

what


epoch 1/5:  43%|████▎     | 207/480 [21:46<29:20,  6.45s/it, loss=2.64, lr=2.16e-6]

what


epoch 1/5:  43%|████▎     | 208/480 [21:53<29:19,  6.47s/it, loss=2.65, lr=2.17e-6]

what


epoch 1/5:  44%|████▎     | 209/480 [21:58<27:20,  6.06s/it, loss=2.56, lr=2.18e-6]

what


epoch 1/5:  44%|████▍     | 210/480 [22:04<27:23,  6.09s/it, loss=2.56, lr=2.19e-6]

what


epoch 1/5:  44%|████▍     | 211/480 [22:09<25:39,  5.72s/it, loss=2.58, lr=2.2e-6] 

what


epoch 1/5:  44%|████▍     | 212/480 [22:14<25:03,  5.61s/it, loss=2.61, lr=2.21e-6]

what


epoch 1/5:  44%|████▍     | 213/480 [22:20<24:56,  5.60s/it, loss=2.77, lr=2.22e-6]

what


epoch 1/5:  45%|████▍     | 214/480 [22:26<26:28,  5.97s/it, loss=2.57, lr=2.23e-6]

what


epoch 1/5:  45%|████▍     | 215/480 [22:33<27:41,  6.27s/it, loss=2.49, lr=2.24e-6]

what


epoch 1/5:  45%|████▌     | 216/480 [22:39<26:04,  5.93s/it, loss=2.52, lr=2.25e-6]

what


epoch 1/5:  45%|████▌     | 217/480 [22:43<24:36,  5.61s/it, loss=2.66, lr=2.26e-6]

what


epoch 1/5:  45%|████▌     | 218/480 [22:51<27:21,  6.26s/it, loss=2.71, lr=2.27e-6]

what


epoch 1/5:  46%|████▌     | 219/480 [22:58<27:54,  6.42s/it, loss=2.47, lr=2.28e-6]

what


epoch 1/5:  46%|████▌     | 220/480 [23:10<34:33,  7.98s/it, loss=2.29, lr=2.29e-6]

what


epoch 1/5:  46%|████▌     | 221/480 [23:15<31:02,  7.19s/it, loss=2.4, lr=2.3e-6]  

what


epoch 1/5:  46%|████▋     | 222/480 [23:21<30:03,  6.99s/it, loss=2.5, lr=2.31e-6]

what


epoch 1/5:  46%|████▋     | 223/480 [23:33<27:08,  6.34s/it, loss=2.32, lr=2.32e-6]

KeyboardInterrupt

