In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [1]:
import torch
import inspect
from datasets import load_dataset
from models import SpeechToTextModel

In [2]:
from transformers import WhisperProcessor, TrainingArguments, Trainer, AutoTokenizer

In [3]:
from utils import LibriSpeechDataCollator

In [4]:
WHISPER_MODEL_NAME = "openai/whisper-base"
LLAMA_MODEL_NAME = "meta-llama/Llama-3.2-3B"

In [5]:
model = SpeechToTextModel(
    whisper_model_name=WHISPER_MODEL_NAME,
    llama_model_name=LLAMA_MODEL_NAME,
    hidden_dims=[2048, 1024, 2048],
    train_whisper=False
)

TypeError: LlamaForCausalLM.__init__() got an unexpected keyword argument 'attention_implementation'

In [6]:
dataset_name = "openslr/librispeech_asr"

In [7]:
dataset = load_dataset(dataset_name, 'clean', split='train.100')

In [8]:
processor = WhisperProcessor.from_pretrained(WHISPER_MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_NAME)

In [14]:
training_args = TrainingArguments(
    output_dir="./v1-checkpoints",
    overwrite_output_dir=True,
    per_device_train_batch_size=1,
    num_train_epochs=1,
    logging_steps=10,
    save_steps=100,
    bf16=True,
    remove_unused_columns=False,
    learning_rate=5e-5,
    report_to="none",
    save_safetensors=False,
)

In [20]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=LibriSpeechDataCollator(processor, tokenizer),
)

In [21]:
trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 96.00 MiB. GPU 0 has a total capacity of 79.14 GiB of which 80.75 MiB is free. Process 1937152 has 79.05 GiB memory in use. Of the allocated memory 77.34 GiB is allocated by PyTorch, and 1.21 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
model


In [19]:
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
print(torch.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |  37215 MiB |  37215 MiB |   8121 GiB |   8085 GiB |
|       from large pool |  37139 MiB |  37139 MiB |   8115 GiB |   8078 GiB |
|       from small pool |     76 MiB |     76 MiB |      6 GiB |      6 GiB |
|---------------------------------------------------------------------------|
| Active memory         |  37215 MiB |  37215 MiB |   8121 GiB |   8085 GiB |
|       from large pool |  37139 MiB |  37139 MiB |   8115 GiB |   8078 GiB |
|       from small pool |     76 MiB |     76 MiB |      6 GiB |      6 GiB |
|---------------------------------------------------------------

In [18]:
import gc

torch.cuda.empty_cache()
gc.collect()
with torch.no_grad():
    for obj in gc.get_objects():
        try:
            if torch.is_tensor(obj):
                del obj
        except:
            pass
torch.cuda.empty_cache()

In [None]:
inspect.getsource(LibriSpeechDataCollator)

In [62]:
batch = dataset.select(range(1))
lbdc = LibriSpeechDataCollator(processor, tokenizer)

In [63]:
input_parameters = lbdc(batch)

In [64]:
print(input_parameters["input_features"].shape)
print(input_parameters["labels"].shape)
print(input_parameters["input_ids"].shape)

torch.Size([1, 80, 3000])
torch.Size([1, 56])
torch.Size([1, 56])


In [65]:
input_parameters['input_features'] = input_parameters['input_features'].cuda()
input_parameters['labels'] = input_parameters['labels'].cuda()
input_parameters['input_ids'] = input_parameters['input_ids'].cuda()

In [66]:
model = model.cuda()

In [67]:
outputs = model.forward(
    input_features=input_parameters['input_features'],
    labels=input_parameters['labels'],
    input_ids=input_parameters['input_ids'],
)

OutOfMemoryError: CUDA out of memory. Tried to allocate 762.00 MiB. GPU 0 has a total capacity of 79.14 GiB of which 534.75 MiB is free. Process 802611 has 78.61 GiB memory in use. Of the allocated memory 74.10 GiB is allocated by PyTorch, and 4.01 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
print(outputs.logits.shape)

In [None]:
print(outputs.loss)