In [59]:
!pip install -q transformers pandas

In [60]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import pandas as pd
from google.colab import files

In [61]:
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [62]:
# Upload your CSV file (must include a 'Feedback' column)
uploaded = files.upload()

# Load the uploaded file
filename = next(iter(uploaded))
df = pd.read_csv(filename)
df.head()

Saving Dataset.csv to Dataset.csv


Unnamed: 0,Feedback
0,There is no one available in the admin block a...
1,Getting a bonafide certificate takes over a week
2,Professors share real-world examples during le...
3,The curriculum includes recent advancements in...
4,Hostel rooms are too cramped and poorly ventil...


In [63]:
# ✅ Valid categories
valid_categories = [
    "Academics", "Facilities", "Administration", "Hostel", "Canteen",
    "Transport", "IT Services", "Library", "Labs", "Mental Health"
]

# ✅ Few-shot examples
few_shot_examples = [
    ("The teacher explained the concepts very clearly and gave great examples.", "Academics"),
    ("The classroom projector was not working properly.", "Facilities"),
    ("The admin office delayed the scholarship form processing.", "Administration"),
    ("Wi-Fi in the hostel is very slow and unreliable.", "Hostel"),
    ("The canteen food quality needs improvement.", "Canteen"),
    ("The bus service is often late and overcrowded.", "Transport"),
    ("The IT lab computers are outdated and slow.", "IT Services"),
    ("The library has a great collection but limited seating.", "Library"),
    ("Labs were well-equipped and the assistants were helpful.", "Labs"),
    ("The mental health counselor was very understanding and helpful.", "Mental Health")
]

# 🔧 Prompt builder
def build_prompt(feedback):
    examples = "\n".join([f"Feedback: '{ex}' → Category: {label}" for ex, label in few_shot_examples])
    categories = ", ".join(valid_categories)
    return (
        f"{examples}\n\n"
        f"Choose the most appropriate category from: {categories}.\n"
        f"Feedback: '{feedback}' → Category:"
    )

In [64]:
# 🆕 Keyword-based fallback mapping
keyword_map = {
    "Academics": ["teacher", "professor", "lecture", "exam", "syllabus", "assignment", "curriculum", "course", "academic calendar"],
    "Facilities": ["classroom", "projector", "ac", "clean", "elevator", "gym", "seating", "maintenance", "power cut"],
    "Administration": ["admin", "registration", "bonafide", "mark sheet", "approval", "fee", "grievance", "schedule change"],
    "Hostel": ["hostel", "room", "bathroom", "water cooler", "internet"],
    "Canteen": ["cafeteria", "canteen", "food"],
    "Transport": ["bus", "transport"],
    "IT Services": ["app", "website", "crash", "bug", "wifi", "internet"],
    "Library": ["library", "books", "reading room"],
    "Labs": ["lab", "experiment", "equipment"],
    "Mental Health": ["counselor", "mental health", "stress"]
}

In [65]:
def classify_theme(text):
    prompt = build_prompt(text)
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    outputs = model.generate(**inputs, max_new_tokens=10)
    raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

    # ✅ Step 1: Try matching model output to valid categories
    for category in valid_categories:
        if category.lower() in raw_output.lower():
            return category

    # ✅ Step 2: Fallback to keyword-based matching
    lowered = text.lower()
    for category, keywords in keyword_map.items():
        if any(keyword in lowered for keyword in keywords):
            return category

    # ✅ Step 3: Still uncertain
    return "Uncategorized"

In [66]:
df['Predicted Theme'] = df['Feedback'].apply(classify_theme)
df.head()

Unnamed: 0,Feedback,Predicted Theme
0,There is no one available in the admin block a...,Administration
1,Getting a bonafide certificate takes over a week,Administration
2,Professors share real-world examples during le...,Academics
3,The curriculum includes recent advancements in...,Academics
4,Hostel rooms are too cramped and poorly ventil...,Hostel


In [67]:
if 'Department' in df.columns:
    summary = df.groupby(['Department', 'Predicted Category']).size().unstack(fill_value=0)
    display(summary)

In [68]:
df.to_csv("cleaned_classified_feedback.csv", index=False)
files.download("cleaned_classified_feedback.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>