In [1]:
import os
import pandas as pd
import torch
from tqdm import tqdm
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# ✅ Load distilgpt2 model and tokenizer
model_name = "distilgpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()

if torch.cuda.is_available():
    model.to("cuda")

# ✅ Custom prompt builder
def build_prompt(question, ideal_answer, student_answer):
    return (
        f"You are a strict but supportive science teacher. Your task is to give constructive feedback to a student.\n\n"
        f"Question: {question}\n"
        f"Ideal Answer: {ideal_answer}\n"
        f"Student Answer: {student_answer}\n\n"
        f"Give subtle, helpful feedback. If the student missed an important concept, hint at it without saying it. "
        f"If they made a spelling mistake on a key term, gently point it out without revealing the word. Do not say 'correct' or 'incorrect'. "
        f"Start your response directly with feedback."
    )

# ✅ Feedback generation function
def generate_feedback(prompt, max_new_tokens=80):
    inputs = tokenizer.encode(prompt, return_tensors="pt", truncation=True)
    if torch.cuda.is_available():
        inputs = inputs.to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id,
        )
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return decoded[len(prompt):].strip()

# ✅ Paths
grades = [5, 6, 7]
input_base = "D:/Varun PERSONAL/Edcite/smart_feedback_generator/data"
output_base = "D:/Varun PERSONAL/Edcite/smart_feedback_generator/feedback"

# ✅ Process all grades and generate feedback
for grade in grades:
    print(f"\n📘 Processing Grade {grade}")
    input_path = os.path.join(input_base, f"grade{grade}.csv")
    output_path = os.path.join(output_base, f"grade{grade}_with_local_feedback.csv")

    if not os.path.exists(input_path):
        print(f"⛔ File not found: {input_path}")
        continue

    df = pd.read_csv(input_path)

    for label in ['close', 'partial', 'wrong']:
        feedback_col = []
        col_name = f"student_answer_{label}"
        print(f"🔹 Generating feedback for: {col_name}")

        for _, row in tqdm(df.iterrows(), total=len(df)):
            question = str(row["question"])
            ideal = str(row["ideal_answer"])
            student = str(row[col_name])

            prompt = build_prompt(question, ideal, student)
            try:
                feedback = generate_feedback(prompt)
            except Exception as e:
                feedback = f"Error generating feedback: {e}"
            feedback_col.append(feedback)

        df[f"local_{label}_feedback"] = feedback_col

    df.to_csv(output_path, index=False)
    print(f"✅ Saved: {output_path}")


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]


📘 Processing Grade 5
🔹 Generating feedback for: student_answer_close


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.

00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:24<00:00,  2.50s/it]

🔹 Generating feedback for: student_answer_partial



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:22<00:00,  2.23s/it]

🔹 Generating feedback for: student_answer_wrong



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:21<00:00,  2.18s/it]

✅ Saved: D:/Varun PERSONAL/Edcite/smart_feedback_generator/feedback\grade5_with_local_feedback.csv

📘 Processing Grade 6
🔹 Generating feedback for: student_answer_close



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:36<00:00,  3.62s/it]

🔹 Generating feedback for: student_answer_partial



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:29<00:00,  2.93s/it]

🔹 Generating feedback for: student_answer_wrong



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:27<00:00,  2.78s/it]

✅ Saved: D:/Varun PERSONAL/Edcite/smart_feedback_generator/feedback\grade6_with_local_feedback.csv

📘 Processing Grade 7
🔹 Generating feedback for: student_answer_close



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:24<00:00,  2.42s/it]

🔹 Generating feedback for: student_answer_partial



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:23<00:00,  2.36s/it]

🔹 Generating feedback for: student_answer_wrong


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:24<00:00,  2.47s/it]

✅ Saved: D:/Varun PERSONAL/Edcite/smart_feedback_generator/feedback\grade7_with_local_feedback.csv





In [None]:
from transformers import GPTJForCausalLM, GPT2Tokenizer
import torch

# Load GPT-J 6B model and tokenizer
model_name = "EleutherAI/gpt-j-6B"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPTJForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)

# Check if CUDA is available
if torch.cuda.is_available():
    model = model.to("cuda")

# Define a prompt
prompt = "What is the capital of France?"

inputs = tokenizer(prompt, return_tensors="pt")
inputs = {k: v.to("cuda") for k, v in inputs.items()}

# Generate output
with torch.no_grad():
    output = model.generate(inputs["input_ids"], max_length=50)

generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)


pytorch_model.bin:   2%|2         | 566M/24.2G [00:00<?, ?B/s]