In [None]:
from transformers import WhisperProcessor, WhisperForConditionalGeneration

model_name = r"F:\Hybridmodel-project\ipyfiles\model\whisper-large-v3-turbo"
processor = WhisperProcessor.from_pretrained(model_name)
model = WhisperForConditionalGeneration.from_pretrained(model_name)


In [None]:
import torch

def prepare_example(batch):
    audio = batch["audio"]
    inputs = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt")
    with processor.as_target_processor():
        labels = processor(batch["sentence"], return_tensors="pt").input_ids
    batch["input_features"] = inputs.input_features[0]
    batch["labels"] = labels[0]
    return batch

dataset = dataset.map(prepare_example)


In [None]:
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq

training_args = Seq2SeqTrainingArguments(
    output_dir="./whisper-large-v3-finetuned",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    learning_rate=1e-5,
    warmup_steps=100,
    max_steps=500,
    logging_steps=10,
    evaluation_strategy="no",
    fp16=True,
    save_steps=100,
    save_total_limit=2,
)

data_collator = DataCollatorForSeq2Seq(processor=processor, model=model)

trainer = Seq2SeqTrainer(
    args=training_args,
    model=model,
    train_dataset=dataset,
    tokenizer=processor.feature_extractor,
    data_collator=data_collator,
)

trainer.train()
