In [None]:
import sys
sys.path.append("./Incontext-learning") # this part works for goole colab

In [None]:
import transformers
import torch

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

# Task
Modify the provided code to perform a classification task instead of a causal language modeling task. This involves loading a suitable classification model, preparing a classification dataset, fine-tuning the model on the dataset, and evaluating its performance.

## Load a classification model

### Subtask:
Instead of loading a causal language model, load a model specifically designed for sequence classification, such as `AutoModelForSequenceClassification`.


**Reasoning**:
The subtask is to load a sequence classification model. This requires importing the appropriate class and loading the model and tokenizer.



In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_name = "Qwen/Qwen3-0.6B" # Assuming a sequence classification version exists

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set the padding token to the EOS token if it doesn't exist
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token


# Load the sequence classification model
# Note: You might need to specify `num_labels` based on your classification task
model = AutoModelForSequenceClassification.from_pretrained(model_name,num_labels = 2).to(device)

print("Sequence classification model and tokenizer loaded successfully.")

In [None]:
# Check current tokens
print("EOS token:", tokenizer.eos_token, tokenizer.eos_token_id)
print("PAD token:", tokenizer.pad_token, tokenizer.pad_token_id)

## Prepare data

### Subtask:
Load and preprocess your classification dataset. This includes tokenizing the text and formatting the labels.


**Reasoning**:
Load a suitable classification dataset, define a preprocessing function to tokenize the text and prepare labels, and apply the function to the dataset.



In [None]:
from datasets import load_dataset

# Load a suitable classification dataset (e.g., SST-2 from GLUE)
dataset = load_dataset("glue", "sst2", split="validation")

# Define a preprocessing function
def preprocess_function(examples):
    # Tokenize the text
    tokenized_inputs = tokenizer(examples["sentence"], truncation=True)
    # Prepare the labels
    tokenized_inputs["labels"] = examples["label"]
    return tokenized_inputs

# Apply the preprocessing function to the dataset
tokenized_dataset = dataset.map(preprocess_function, batched=True)

# Split into training and evaluation sets
# train_dataset = tokenized_dataset["validation"]
# eval_dataset = tokenized_dataset["validation"]
train_dataset = tokenized_dataset.select(range(100))
eval_dataset = tokenized_dataset.select(range(100, 200))

print("Dataset loaded, preprocessed, and split successfully.")
print("Training dataset example:", train_dataset[0])
print("Evaluation dataset example:", eval_dataset[0])

In [None]:
eval_dataset

## Fine-tune the model

### Subtask:
Train the classification model on your prepared dataset.


**Reasoning**:
Import the necessary classes and define the training arguments, then initialize and start the training process.



**Reasoning**:
The error indicates that `evaluation_strategy` is not a valid argument for `TrainingArguments`. Looking at the transformers documentation, `evaluation_strategy` was deprecated and replaced by `eval_strategy`. I will fix the typo and rerun the code.



In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",  # Output directory
    eval_strategy="epoch",  # Evaluate every epoch
    learning_rate=2e-5,  # Learning rate
    per_device_train_batch_size=8,  # Batch size for training
    per_device_eval_batch_size=8,  # Batch size for evaluation
    num_train_epochs=1,  # Number of training epochs.  || in order to make it the same as in context learning
    weight_decay=0.01,  # Weight decay
    logging_dir="./logs",  # Directory for logs
    logging_steps=10, # Log every 10 steps
    report_to="none", # Disable Weights & Biases
    save_strategy='no'
)

# Ensure the tokenizer has the correct padding token and ID for the data collator
if tokenizer.pad_token is None:
  tokenizer.pad_token = tokenizer.eos_token
  tokenizer.pad_token_id = tokenizer.eos_token_id
if model.config.pad_token_id is None:
  model.config.pad_token_id = tokenizer.pad_token_id


# Initialize the data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Initialize the Trainer
trainer = Trainer(
    model=model,  # the loaded sequence classification model
    args=training_args,  # training arguments
    train_dataset=train_dataset,  # training dataset
    eval_dataset=eval_dataset,  # evaluation dataset
    data_collator=data_collator, # Use the data collator
)

# Start training
trainer.train()