In [3]:
# Imports
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
)
from datasets import load_dataset, Dataset, DatasetDict  # or Dataset.from_pandas
import evaluate
import numpy as np

# Load preprocessed data
raw_ds = load_dataset("stanfordnlp/imdb")
train_ds = raw_ds["train"]
eval_ds  = raw_ds["test"]

# Choose a model & tokenizer
MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer  = AutoTokenizer.from_pretrained(MODEL_NAME)
model      = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)

# Tokenize your splits
def tokenize_batch(batch):
    return tokenizer(batch["text"], truncation=True, padding="max_length", max_length=256)

train_ds = train_ds.map(tokenize_batch, batched=True)
eval_ds  = eval_ds.map(tokenize_batch,  batched=True)
# tell Trainer which columns to pass to the model
train_ds = train_ds.remove_columns(["text"])
eval_ds  = eval_ds.remove_columns(["text"])
train_ds.set_format("torch")
eval_ds.set_format("torch")

# Define a compute_metrics function
accuracy = evaluate.load("accuracy")
f1       = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds          = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1":       f1.compute(predictions=preds, references=labels, average="weighted")["f1"],
    }

# Set up TrainingArguments
training_args = TrainingArguments(
    output_dir="my_finetuned_model",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    push_to_hub=False,         # set True if you want to push to HF Hub
)

# Instantiate the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=eval_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Launch training!
trainer.train()

# Save your best model locally
trainer.save_model("my_finetuned_model")


  trainer = Trainer(


Step,Training Loss
500,0.2877
1000,0.2541
1500,0.2347
