In [None]:
# ChatGPT

In [None]:
!pwd

In [None]:
!pip install transformers

In [None]:
import numpy as np
import pandas as pd
import torch, torch.nn.functional as F
from torch.utils.data import DataLoader
import torch.optim as optim
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig


In [None]:
def get_incar_assist_dataset():
    input_file = "s3://data-daizika-com/incar_assist/data/intent_classification/incar_assist_samples.csv"
    df = pd.read_csv(input_file)
    labels_df = df[['Label']]
    input_file = "s3://data-daizika-com/incar_assist/data/intent_classification/incar_assist_labels.csv"
    labels_df = pd.read_csv(input_file)
    labels_dict = {rec['id']: rec['label'] for rec in labels_df.to_dict(orient="records")}
    df = df.set_index('Label').join(labels_df.set_index('label'), how="left").reset_index()
    df.columns = ['intent', 'text', 'label']
    df_dict = df[['text', 'label']].to_dict(orient="records")
    return df_dict, labels_dict
    
 # Example dataset: list of dicts with 'text' and 'label'
#train_examples = [{"text":"book me a flight","label":3}, ...]
train_examples, labels_dict = get_incar_assist_dataset()

def collate(batch):
    texts = [b["text"] for b in batch]
    labels = torch.tensor([b["label"] for b in batch])
    enc = tok(texts, padding=True, truncation=True, return_tensors="pt", max_length=128)
    enc["labels"] = labels
    return enc

NUM_LABELS = len(labels_dict)

In [None]:
teacher_ckpt = "roberta-large"         # use your fine-tuned path here
student_ckpt = "roberta-base"

T = 4.0                                # temperature
alpha = 0.9                            # weight for soft loss (teacher)
lr = 2e-5
batch_size = 16
epochs = 3

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
tok = AutoTokenizer.from_pretrained(student_ckpt, use_fast=True)

teacher = AutoModelForSequenceClassification.from_pretrained(
    teacher_ckpt, num_labels=NUM_LABELS, output_hidden_states=True, output_attentions=True
).to(device).eval()  # move to device!

cfg = AutoConfig.from_pretrained(student_ckpt)
cfg.num_hidden_layers = 6
cfg.num_labels = NUM_LABELS
student = AutoModelForSequenceClassification.from_config(cfg).to(device)


In [None]:
dl = DataLoader(train_examples, batch_size=batch_size, shuffle=True, collate_fn=collate)
opt = optim.AdamW(student.parameters(), lr=lr)


In [None]:
for epoch in range(epochs):
    student.train()
    for batch in dl:
        batch = {k: v.cuda() if torch.cuda.is_available() else v for k, v in batch.items()}
        with torch.no_grad():
            t_out = teacher(input_ids=batch["input_ids"], attention_mask=batch["attention_mask"])
            t_logits = t_out.logits

        s_out = student(input_ids=batch["input_ids"], attention_mask=batch["attention_mask"], labels=batch["labels"])
        s_logits = s_out.logits

        # Soft target loss (KLDiv between softened distributions)
        log_p_s = F.log_softmax(s_logits / T, dim=-1)
        p_t = F.softmax(t_logits / T, dim=-1)
        kd_loss = F.kl_div(log_p_s, p_t, reduction="batchmean") * (T * T)

        # Hard label loss (optional but helpful if labels exist)
        ce_loss = F.cross_entropy(s_logits, batch["labels"])

        loss = alpha * kd_loss + (1 - alpha) * ce_loss
        loss.backward()
        torch.nn.utils.clip_grad_norm_(student.parameters(), 1.0)
        opt.step()
        opt.zero_grad()


In [None]:
# save
student.save_pretrained("roberta-student-distilled")
tok.save_pretrained("roberta-student-distilled")

In [None]:
# Upload to S3
!aws s3 cp --recursive ./roberta-student-distilled s3://data-daizika-com/incar_assist/model/roberta-student-distilled/
    

## Using the model

In [None]:
!ls ../lambda

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# 1️⃣ Load your saved model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("../lambda/roberta-student-distilled")
#model_f16 = AutoModelForSequenceClassification.from_pretrained("../lambda/roberta-student-distilled")
#model_int8 = torch.quantization.quantize_dynamic(model_f16, {torch.nn.Linear}, dtype=torch.qint8)
model_int8 = AutoModelForSequenceClassification.from_pretrained("../lambda/roberta-student-distilled")

label_dict = {0: 'close door',
 1: 'open door',
 2: 'open window',
 3: 'close window',
 4: 'open bluetooth',
 5: 'close bluetooth',
 6: 'steering wheel',
 7: 'camera'}

# 2️⃣ Prepare your input text
text = "Please open the door"

# 3️⃣ Tokenize the text (convert to model inputs)
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)

# 4️⃣ Run inference
with torch.no_grad():
    outputs = model_int8(**inputs)
    logits = outputs.logits
    probs = torch.nn.functional.softmax(logits, dim=-1)
    predicted_class = torch.argmax(probs, dim=-1).item()

print(f"Predicted intent class: {labels_dict[predicted_class]}")
