In [1]:
import torch
import os
from tqdm import tqdm, trange
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from vgg_dataset import *
from models import *
from train_utls import *

In [2]:
root = "/mnt/new_volume2/vgg_sound_emb"
partition = "train"
data_dir = f"{root}/{partition}"
print(f"Loading {partition} data from {data_dir}")

Loading train data from /mnt/new_volume2/vgg_sound_emb/train


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [None]:
# vgg_sound = LargeVideoDataset(data_dir, subset_ratio = 0.2)
vgg_sound = InMemoryVideoDataset(data_dir, subset_ratio=0.8)

Loading 146100 samples into memory as torch.float16 …


Caching data:  53%|█████▎    | 77163/146100 [10:28<09:19, 123.18it/s]

# DataLoader

In [5]:
val_ratio = 0.1
test_ratio = 0.1

total_len = len(vgg_sound)
val_len = int(total_len * val_ratio)
test_len = int(total_len * test_ratio)
train_len = total_len - val_len - test_len
train_dataset, val_dataset, test_dataset = random_split(
    vgg_sound, [train_len, val_len, test_len], generator=torch.Generator().manual_seed(42), 
)


In [6]:
batch_size = 2048
num_workers = 8

In [7]:
train_loader = DataLoader(
    train_dataset, 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=num_workers,
    pin_memory=True
)
val_loader = DataLoader(
    val_dataset, 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=num_workers,
    pin_memory=True
)
test_loader = DataLoader(
    test_dataset, 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=num_workers,
    pin_memory=True
)

In [8]:
clip_feat, clap_feat = next(iter(train_loader))
print("Clip:", clip_feat.device, clip_feat.dtype)
print("Clap:", clap_feat.device, clap_feat.dtype)

Clip: cpu torch.float16
Clap: cpu torch.float16


# Wandb

In [9]:
import wandb

# Model

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CosineLoss(nn.Module):
    def __init__(self, margin=0.8):  # 建议 margin 设得稍高一点
        super(CosineLoss, self).__init__()
        self.margin = margin

    def forward(self, output, target):
        # output: (B, 512)
        # target: (B, 1, 512) or (B, 512)
        if target.ndim == 3:
            target = target.squeeze(1)
        cos_sim = F.cosine_similarity(output, target, dim=1)  # (B,)
        loss = torch.mean(torch.clamp(self.margin - cos_sim, min=0))
        return loss


In [11]:
print(device)

cuda


In [None]:
# (input_dim=512, hidden_dim=1024, output_dim=512, lstm_hidden=256, lstm_layers=1)
configs = {"input_dim": 512, "hidden_dim": 1024, "output_dim": 512, "lstm_hidden": 256, "lstm_layers": 1 ,"dropout": 0.3}

In [None]:
# model = V2AMapperMLPImproved(input_dim=512, hidden_dim=2048, output_dim=512).to(device)
model = V2AMapperBiLSTM(input_dim=512, hidden_dim=1024, output_dim=512, lstm_hidden=256, lstm_layers=1,dropout=0.3).to(device)

In [13]:
# show me the mode summary 
from torchsummary import summary
# model = V2AMapperMLP(input_dim=512, hidden_dim=512, output_dim=512).to(device)
# model = V2AMapperMLPImproved().to(device)

# # 改用 torchinfo 兼容性更好！
# from torchinfo import summary as torchinfo_summary
# torchinfo_summary(model, input_size=(1, 64, 512))

In [13]:
# Use wandb? Resume Training?
USE_WANDB = True

RESUME_LOGGING = False # Set this to true if you are resuming training from a previous run

# Create your wandb run

run_name = 'lstm-vggsound-simplefied_mapper' # Give your run a name, this will be used to identify the run in wandb

# If you are resuming an old run
if USE_WANDB:

    wandb.login(key="8475199febe13b3465c7d5e4a595bba7422c14fc") #TODO

    if RESUME_LOGGING:
        run = wandb.init(
            id     = "", ### Insert specific run id here if you want to resume a previous run
            resume = "must", ### You need this to resume previous runs
            project = "v2amapper", ### Project should be created in your wandb
            settings = wandb.Settings(_service_wait=300)
        )


    else:
        run = wandb.init(
            name    = run_name, ### Wandb creates random run names if you skip this field, we recommend you give useful names
            reinit  = True, ### Allows reinitalizing runs when you re-run this cell
            project = "v2amapper", ### Project should be created in your wandb account
        )

        ### Save your model architecture as a string with str(model)
        model_arch  = str(model)
        ### Save it in a txt file
        arch_file   = open("model_arch.txt", "w")
        file_write  = arch_file.write(model_arch)
        arch_file.close()

        ### log it in your wandb run with wandb.save()
        wandb.save('model_arch.txt')

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ubuntu/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mzianp[0m ([33mzianp-personal[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [14]:
lr = 0.001
epochs = 120
# model = V2AMapperMLP(input_dim=512, hidden_dim=1024, output_dim=512).to(device)
criterion = nn.MSELoss()
# criterion = CosineLoss(margin= 1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
# scheduler = torch.optim.CosineAnnealingLR(optimizer, T_max=10, eta_min=0.00001)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs, eta_min=0.000005)
scaler = torch.amp.GradScaler(enabled=True)

ckpt_dir = "ckpts/"
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

train(model, train_loader, val_loader, criterion, optimizer,scaler, scheduler, ckpt_dir = "ckpts/", num_epochs=epochs)

Epoch 1/120


Train:   0%|          | 0/15 [00:00<?, ?it/s]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.9445, Val Loss: 0.9136
Model saved!
Epoch 2/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.66it/s, loss=0.9120, lr=0.001000]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.8894, Val Loss: 0.8617
Model saved!
Epoch 3/120


Train:   7%|▋         | 1/15 [00:00<00:13,  1.01it/s, loss=0.8616, lr=0.000999]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.8377, Val Loss: 0.8100
Model saved!
Epoch 4/120


Train:   7%|▋         | 1/15 [00:01<00:14,  1.06s/it, loss=0.8100, lr=0.000998]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.7855, Val Loss: 0.7574
Model saved!
Epoch 5/120


Train:   7%|▋         | 1/15 [00:01<00:15,  1.12s/it, loss=0.7574, lr=0.000997]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.7326, Val Loss: 0.7043
Model saved!
Epoch 6/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.73it/s, loss=0.7026, lr=0.000996]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.6797, Val Loss: 0.6519
Model saved!
Epoch 7/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.70it/s, loss=0.6502, lr=0.000994]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.6281, Val Loss: 0.6014
Model saved!
Epoch 8/120


Train:   7%|▋         | 1/15 [00:01<00:14,  1.05s/it, loss=0.6014, lr=0.000992]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.5787, Val Loss: 0.5534
Model saved!
Epoch 9/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.93it/s, loss=0.5517, lr=0.000989]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.5319, Val Loss: 0.5081
Model saved!
Epoch 10/120


Train:   7%|▋         | 1/15 [00:00<00:13,  1.01it/s, loss=0.5080, lr=0.000986]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.4878, Val Loss: 0.4652
Model saved!
Epoch 11/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.96it/s, loss=0.4637, lr=0.000983]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.4459, Val Loss: 0.4243
Model saved!
Epoch 12/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.77it/s, loss=0.4229, lr=0.000980]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.4055, Val Loss: 0.3842
Model saved!
Epoch 13/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.77it/s, loss=0.3828, lr=0.000976]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.3654, Val Loss: 0.3441
Model saved!
Epoch 14/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.84it/s, loss=0.3427, lr=0.000971]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.3257, Val Loss: 0.3052
Model saved!
Epoch 15/120


Train:   7%|▋         | 1/15 [00:00<00:13,  1.01it/s, loss=0.3052, lr=0.000967]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.2884, Val Loss: 0.2701
Model saved!
Epoch 16/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.95it/s, loss=0.2690, lr=0.000962]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.2554, Val Loss: 0.2396
Model saved!
Epoch 17/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.76it/s, loss=0.2386, lr=0.000957]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.2270, Val Loss: 0.2135
Model saved!
Epoch 18/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  2.03it/s, loss=0.2127, lr=0.000952]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.2027, Val Loss: 0.1911
Model saved!
Epoch 19/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  2.12it/s, loss=0.1904, lr=0.000946]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.1818, Val Loss: 0.1717
Model saved!
Epoch 20/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.97it/s, loss=0.1710, lr=0.000940]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.1635, Val Loss: 0.1547
Model saved!
Epoch 21/120


Train:   7%|▋         | 1/15 [00:01<00:14,  1.00s/it, loss=0.1546, lr=0.000933]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.1475, Val Loss: 0.1398
Model saved!
Epoch 22/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.90it/s, loss=0.1392, lr=0.000927]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.1334, Val Loss: 0.1265
Model saved!
Epoch 23/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.87it/s, loss=0.1260, lr=0.000920]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.1208, Val Loss: 0.1146
Model saved!
Epoch 24/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.67it/s, loss=0.1142, lr=0.000913]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.1095, Val Loss: 0.1040
Model saved!
Epoch 25/120


Train:   7%|▋         | 1/15 [00:01<00:14,  1.03s/it, loss=0.1040, lr=0.000905]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0994, Val Loss: 0.0945
Model saved!
Epoch 26/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.79it/s, loss=0.0942, lr=0.000897]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0904, Val Loss: 0.0859
Model saved!
Epoch 27/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.65it/s, loss=0.0856, lr=0.000889]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0822, Val Loss: 0.0782
Model saved!
Epoch 28/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.80it/s, loss=0.0778, lr=0.000881]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0748, Val Loss: 0.0711
Model saved!
Epoch 29/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.82it/s, loss=0.0708, lr=0.000872]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0681, Val Loss: 0.0648
Model saved!
Epoch 30/120


Train:   7%|▋         | 1/15 [00:00<00:13,  1.02it/s, loss=0.0647, lr=0.000863]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0620, Val Loss: 0.0590
Model saved!
Epoch 31/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.91it/s, loss=0.0588, lr=0.000854]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0565, Val Loss: 0.0538
Model saved!
Epoch 32/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.86it/s, loss=0.0536, lr=0.000845]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0515, Val Loss: 0.0491
Model saved!
Epoch 33/120


Train:   7%|▋         | 1/15 [00:01<00:16,  1.15s/it, loss=0.0490, lr=0.000835]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0470, Val Loss: 0.0448
Model saved!
Epoch 34/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.78it/s, loss=0.0446, lr=0.000826]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0429, Val Loss: 0.0409
Model saved!
Epoch 35/120


Train:   7%|▋         | 1/15 [00:01<00:15,  1.14s/it, loss=0.0408, lr=0.000816]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0392, Val Loss: 0.0374
Model saved!
Epoch 36/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  2.01it/s, loss=0.0372, lr=0.000805]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0358, Val Loss: 0.0341
Model saved!
Epoch 37/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  2.09it/s, loss=0.0340, lr=0.000795]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0327, Val Loss: 0.0312
Model saved!
Epoch 38/120


Train:   7%|▋         | 1/15 [00:01<00:14,  1.04s/it, loss=0.0312, lr=0.000784]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0299, Val Loss: 0.0286
Model saved!
Epoch 39/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.79it/s, loss=0.0284, lr=0.000773]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0273, Val Loss: 0.0261
Model saved!
Epoch 40/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.87it/s, loss=0.0260, lr=0.000762]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0250, Val Loss: 0.0240
Model saved!
Epoch 41/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.70it/s, loss=0.0238, lr=0.000751]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0229, Val Loss: 0.0219
Model saved!
Epoch 42/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.71it/s, loss=0.0218, lr=0.000740]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0210, Val Loss: 0.0201
Model saved!
Epoch 43/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.98it/s, loss=0.0200, lr=0.000728]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0193, Val Loss: 0.0185
Model saved!
Epoch 44/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.63it/s, loss=0.0184, lr=0.000717]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0177, Val Loss: 0.0170
Model saved!
Epoch 45/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.83it/s, loss=0.0169, lr=0.000705]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0163, Val Loss: 0.0156
Model saved!
Epoch 46/120


Train:  13%|█▎        | 2/15 [00:01<00:08,  1.59it/s, loss=0.0155, lr=0.000693]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0150, Val Loss: 0.0144
Model saved!
Epoch 47/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.89it/s, loss=0.0143, lr=0.000681]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0138, Val Loss: 0.0133
Model saved!
Epoch 48/120


Train:   7%|▋         | 1/15 [00:01<00:14,  1.05s/it, loss=0.0132, lr=0.000669]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0127, Val Loss: 0.0122
Model saved!
Epoch 49/120


Train:   7%|▋         | 1/15 [00:00<00:13,  1.00it/s, loss=0.0121, lr=0.000656]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0117, Val Loss: 0.0113
Model saved!
Epoch 50/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.83it/s, loss=0.0112, lr=0.000644]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0108, Val Loss: 0.0105
Model saved!
Epoch 51/120


Train:   7%|▋         | 1/15 [00:00<00:13,  1.01it/s, loss=0.0104, lr=0.000631]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0100, Val Loss: 0.0097
Model saved!
Epoch 52/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.79it/s, loss=0.0096, lr=0.000619]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0093, Val Loss: 0.0090
Model saved!
Epoch 53/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.68it/s, loss=0.0089, lr=0.000606]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0086, Val Loss: 0.0084
Model saved!
Epoch 54/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.74it/s, loss=0.0082, lr=0.000593]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0080, Val Loss: 0.0078
Model saved!
Epoch 55/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.72it/s, loss=0.0077, lr=0.000580]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0074, Val Loss: 0.0073
Model saved!
Epoch 56/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.91it/s, loss=0.0071, lr=0.000567]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0069, Val Loss: 0.0068
Model saved!
Epoch 57/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  2.13it/s, loss=0.0067, lr=0.000555]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0065, Val Loss: 0.0064
Model saved!
Epoch 58/120


Train:   7%|▋         | 1/15 [00:00<00:13,  1.03it/s, loss=0.0062, lr=0.000542]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0060, Val Loss: 0.0060
Model saved!
Epoch 59/120


Train:   7%|▋         | 1/15 [00:00<00:13,  1.04it/s, loss=0.0058, lr=0.000529]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0057, Val Loss: 0.0056
Model saved!
Epoch 60/120


Train:  13%|█▎        | 2/15 [00:01<00:08,  1.59it/s, loss=0.0055, lr=0.000516]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0053, Val Loss: 0.0053
Model saved!
Epoch 61/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.65it/s, loss=0.0051, lr=0.000502]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0050, Val Loss: 0.0050
Model saved!
Epoch 62/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.75it/s, loss=0.0048, lr=0.000489]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0047, Val Loss: 0.0047
Model saved!
Epoch 63/120


Train:  13%|█▎        | 2/15 [00:01<00:08,  1.62it/s, loss=0.0045, lr=0.000476]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0044, Val Loss: 0.0045
Model saved!
Epoch 64/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.64it/s, loss=0.0043, lr=0.000463]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0042, Val Loss: 0.0042
Model saved!
Epoch 65/120


Train:  13%|█▎        | 2/15 [00:01<00:08,  1.59it/s, loss=0.0041, lr=0.000450]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0040, Val Loss: 0.0040
Model saved!
Epoch 66/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.95it/s, loss=0.0039, lr=0.000438]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0038, Val Loss: 0.0038
Model saved!
Epoch 67/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.71it/s, loss=0.0037, lr=0.000425]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0036, Val Loss: 0.0037
Model saved!
Epoch 68/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.66it/s, loss=0.0035, lr=0.000412]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0034, Val Loss: 0.0035
Model saved!
Epoch 69/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.89it/s, loss=0.0033, lr=0.000399]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0032, Val Loss: 0.0034
Model saved!
Epoch 70/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  2.01it/s, loss=0.0032, lr=0.000386]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0031, Val Loss: 0.0032
Model saved!
Epoch 71/120


Train:  13%|█▎        | 2/15 [00:01<00:08,  1.57it/s, loss=0.0030, lr=0.000374]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0030, Val Loss: 0.0031
Model saved!
Epoch 72/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.89it/s, loss=0.0029, lr=0.000361]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0029, Val Loss: 0.0030
Model saved!
Epoch 73/120


Train:  13%|█▎        | 2/15 [00:01<00:08,  1.62it/s, loss=0.0028, lr=0.000349]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0027, Val Loss: 0.0029
Model saved!
Epoch 74/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.72it/s, loss=0.0027, lr=0.000336]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0026, Val Loss: 0.0028
Model saved!
Epoch 75/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.86it/s, loss=0.0026, lr=0.000324]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0025, Val Loss: 0.0027
Model saved!
Epoch 76/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  2.08it/s, loss=0.0025, lr=0.000312]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0025, Val Loss: 0.0027
Model saved!
Epoch 77/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.87it/s, loss=0.0024, lr=0.000300]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0024, Val Loss: 0.0026
Model saved!
Epoch 78/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  2.08it/s, loss=0.0023, lr=0.000288]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0023, Val Loss: 0.0025
Model saved!
Epoch 79/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.68it/s, loss=0.0023, lr=0.000277]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0022, Val Loss: 0.0024
Model saved!
Epoch 80/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.74it/s, loss=0.0022, lr=0.000265]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0022, Val Loss: 0.0024
Model saved!
Epoch 81/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.82it/s, loss=0.0021, lr=0.000254]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0021, Val Loss: 0.0023
Model saved!
Epoch 82/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.72it/s, loss=0.0021, lr=0.000243]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0021, Val Loss: 0.0023
Model saved!
Epoch 83/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.96it/s, loss=0.0020, lr=0.000232]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0020, Val Loss: 0.0022
Model saved!
Epoch 84/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.68it/s, loss=0.0020, lr=0.000221]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0020, Val Loss: 0.0022
Model saved!
Epoch 85/120


Train:   7%|▋         | 1/15 [00:01<00:15,  1.09s/it, loss=0.0019, lr=0.000210]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0019, Val Loss: 0.0022
Model saved!
Epoch 86/120


Train:   7%|▋         | 1/15 [00:01<00:14,  1.04s/it, loss=0.0019, lr=0.000200]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0019, Val Loss: 0.0021
Model saved!
Epoch 87/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.63it/s, loss=0.0018, lr=0.000189]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0018, Val Loss: 0.0021
Model saved!
Epoch 88/120


Train:   7%|▋         | 1/15 [00:01<00:15,  1.14s/it, loss=0.0018, lr=0.000179]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0018, Val Loss: 0.0021
Model saved!
Epoch 89/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.98it/s, loss=0.0018, lr=0.000170]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0018, Val Loss: 0.0020
Model saved!
Epoch 90/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.80it/s, loss=0.0018, lr=0.000160]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0017, Val Loss: 0.0020
Model saved!
Epoch 91/120


Train:  13%|█▎        | 2/15 [00:01<00:08,  1.60it/s, loss=0.0017, lr=0.000151]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0017, Val Loss: 0.0020
Model saved!
Epoch 92/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.70it/s, loss=0.0017, lr=0.000142]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0017, Val Loss: 0.0020
Model saved!
Epoch 93/120


Train:   7%|▋         | 1/15 [00:00<00:13,  1.01it/s, loss=0.0017, lr=0.000133]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0017, Val Loss: 0.0020
Model saved!
Epoch 94/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.83it/s, loss=0.0017, lr=0.000124]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0017, Val Loss: 0.0019
Model saved!
Epoch 95/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  2.03it/s, loss=0.0016, lr=0.000116]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0016, Val Loss: 0.0019
Model saved!
Epoch 96/120


Train:   7%|▋         | 1/15 [00:01<00:15,  1.10s/it, loss=0.0016, lr=0.000108]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0016, Val Loss: 0.0019
Model saved!
Epoch 97/120


Train:   7%|▋         | 1/15 [00:00<00:13,  1.01it/s, loss=0.0016, lr=0.000100]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0016, Val Loss: 0.0019
Model saved!
Epoch 98/120


Train:  13%|█▎        | 2/15 [00:01<00:09,  1.39it/s, loss=0.0016, lr=0.000092]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0016, Val Loss: 0.0019
Validation loss did not improve, model not saved.
Epoch 99/120


Train:   7%|▋         | 1/15 [00:01<00:14,  1.01s/it, loss=0.0016, lr=0.000085]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0016, Val Loss: 0.0019
Model saved!
Epoch 100/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.97it/s, loss=0.0016, lr=0.000078]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0016, Val Loss: 0.0019
Model saved!
Epoch 101/120


Train:  13%|█▎        | 2/15 [00:01<00:08,  1.57it/s, loss=0.0015, lr=0.000072]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0019
Validation loss did not improve, model not saved.
Epoch 102/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.71it/s, loss=0.0015, lr=0.000065]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Epoch 103/120


Train:  13%|█▎        | 2/15 [00:01<00:08,  1.62it/s, loss=0.0015, lr=0.000059]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Epoch 104/120


Train:   7%|▋         | 1/15 [00:01<00:14,  1.01s/it, loss=0.0015, lr=0.000053]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Epoch 105/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.88it/s, loss=0.0015, lr=0.000048]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Epoch 106/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.76it/s, loss=0.0015, lr=0.000043]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Validation loss did not improve, model not saved.
Epoch 107/120


Train:   7%|▋         | 1/15 [00:00<00:13,  1.03it/s, loss=0.0015, lr=0.000038]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Epoch 108/120


Train:   7%|▋         | 1/15 [00:01<00:16,  1.15s/it, loss=0.0015, lr=0.000034]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Validation loss did not improve, model not saved.
Epoch 109/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.73it/s, loss=0.0015, lr=0.000029]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Epoch 110/120


Train:  13%|█▎        | 2/15 [00:01<00:08,  1.61it/s, loss=0.0015, lr=0.000025]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Epoch 111/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.68it/s, loss=0.0015, lr=0.000022]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Validation loss did not improve, model not saved.
Epoch 112/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.97it/s, loss=0.0015, lr=0.000019]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Epoch 113/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.77it/s, loss=0.0015, lr=0.000016]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Validation loss did not improve, model not saved.
Epoch 114/120


Train:   7%|▋         | 1/15 [00:00<00:13,  1.00it/s, loss=0.0015, lr=0.000013]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Epoch 115/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  2.04it/s, loss=0.0015, lr=0.000011]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Epoch 116/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.93it/s, loss=0.0015, lr=0.000009]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Epoch 117/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  2.04it/s, loss=0.0015, lr=0.000008]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Epoch 118/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.78it/s, loss=0.0015, lr=0.000007]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Validation loss did not improve, model not saved.
Epoch 119/120


Train:  13%|█▎        | 2/15 [00:01<00:06,  1.86it/s, loss=0.0015, lr=0.000006]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Epoch 120/120


Train:  13%|█▎        | 2/15 [00:01<00:07,  1.63it/s, loss=0.0015, lr=0.000005]

[Debug] clip_feat device = cuda:0, clap_feat device = cuda:0


                                                                                

Train Loss: 0.0015, Val Loss: 0.0018
Model saved!
Training complete!
Best validation loss: 0.0018


In [15]:
from infer import *

  from .autonotebook import tqdm as notebook_tqdm


Load AudioLDM: %s audioldm-s-full-v2
DiffusionWrapper has 185.04 M params.


  WeightNorm.apply(module, name, dim)
  fft_window = librosa.util.pad_center(fft_window, n_fft)
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'robert

KeyboardInterrupt: 