In [None]:
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, pipeline
import torch
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
import openai
import gradio as gr

#dataset link: https://www.kaggle.com/datasets/adisongoh/it-service-ticket-classification-dataset
csv_path = r"D:\AU\Internships\DHC - ML\AI_ML_AdvancedTasks\AI-ML-AdvancedTask5_HussainAbdullah\all_tickets_processed_improved_v3.csv"

# ---------- 1. Load Dataset ----------
df = pd.read_csv(csv_path)
df = df.rename(columns={"Subject": "subject", "Body": "body", "Types": "label"})
df["text"] = df["subject"].astype(str) + " " + df["body"].astype(str)
labels = sorted(df["label"].unique())
label2id = {l: i for i, l in enumerate(labels)}
id2label = {i: l for l, i in label2id.items()}
df["labels"] = df["label"].map(label2id)

dataset = Dataset.from_pandas(df)

# ---------- 2. Zero-Shot Prompting ----------
def zero_shot_tags(text, top_k=3):
    prompt = f"""Classify this support ticket into {top_k} most relevant tags from: {labels}.
Ticket:
\"\"\"{text}\"\"\"
Respond as a JSON: {{ "tags": ["tag1", "tag2", "tag3"] }}"""
    resp = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    return resp.choices[0].message["content"]

# ---------- 3. Few-Shot Prompting ----------
few_examples = [
    {"text": "My app crashes when I open settings", "tags": ["Technical Support", "Bug", "High Priority"]},
    {"text": "Please send my invoice for March", "tags": ["Billing", "Customer Service", "Low Priority"]},
]

def few_shot_tags(text, examples=few_examples, top_k=3):
    ex_str = "\n".join(f"Ticket: \"{e['text']}\"\nTags: {e['tags']}" for e in examples)
    prompt = ex_str + f"\n\nNow classify:\nTicket: \"{text}\"\nTags:"
    resp = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    return resp.choices[0].message["content"]

# ---------- 4. Fine-Tune a Transformer ----------
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def preprocess(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)

tokenized_ds = dataset.map(preprocess, batched=True)
tokenized_ds.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=len(labels),
    id2label=id2label,
    label2id=label2id
)

def compute_metrics(eval_pred):
    logits, y_true = eval_pred
    y_pred = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred, average="weighted")
    }

args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,  
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    evaluation_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_ds,
    eval_dataset=tokenized_ds,
    compute_metrics=compute_metrics,
)

trainer.train()

finetuned_clf = pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=3)

def finetuned_tags(text):
    preds = finetuned_clf(text)
    return [f"{p['label']} ({round(p['score'],2)})" for p in preds[0]]

# ---------- 5. Gradio App ----------
def classify_ticket(text):
    z = zero_shot_tags(text)
    f = few_shot_tags(text)
    ft = finetuned_tags(text)
    return z, f, ft

iface = gr.Interface(
    fn=classify_ticket,
    inputs="text",
    outputs=["text", "text", "text"],
    title="Support Ticket Auto-Tagger",
    description="Zero-shot, few-shot, and fine-tuned classification"
)
iface.launch()