In [1]:
import os
import time
import re
import requests
import pandas as pd
from tqdm import tqdm

# ✅ API settings
TOGETHER_API_URL = "https://api.together.xyz/v1/chat/completions"
TOGETHER_API_KEY = "92328edde95f65943128cf7ee4cef72431aaccd8de8cde7eef4c1195cad68a2e"  # Replace this!

MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free"  # You can change to another model
HEADERS = {
    "Authorization": f"Bearer {TOGETHER_API_KEY}",
    "Content-Type": "application/json"
}

# ✅ System prompt (instructs model to behave like a teacher)
SYSTEM_PROMPT = (
    """You are a supportive but strict middle school science teacher. 
Your job is to give smart, constructive feedback to a student's answer based on the question and the ideal answer.

Rules:
- if the answer is same as ideal answer, say its right.
- If the student answer is very close to the ideal answer, acknowledge it positively.
- If the student misses a key concept, give a gentle hint — do NOT directly mention the missing keyword.This is a very important point. Just give a hint and not the keyword directly
- If there is a spelling mistake, only point it out if it's significant — but do NOT say the correct spelling. Be very careful when checking for spelling mistake.
- Do NOT mention the ideal answer again.
- Avoid saying 'wrong' or being negative. Be encouraging and helpful.
- Keep feedback clear, natural, and student-facing — never include meta commentary or internal thoughts.

Always start directly with the feedback.
"""
)

# ✅ Prompt builder
def build_prompt(question, ideal_answer, student_answer):
    return (
        f"Question: {question}\n"
        f"Ideal Answer: {ideal_answer}\n"
        f"Student Answer: {student_answer}\n\n"
        f"Now write helpful, encouraging feedback for the student."
    )

# ✅ Remove <think>...</think> blocks
def clean_response(text):
    return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()

# ✅ LLM call function with 15s delay and retry logic
def generate_feedback(question, ideal, student):
    prompt = build_prompt(question, ideal, student)

    payload = {
        "model": MODEL_NAME,
        "max_tokens": 1500,
        "temperature": 0.7,
        "top_p": 0.9,
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": prompt}
        ]
    }

    for attempt in range(3):
        try:
            response = requests.post(TOGETHER_API_URL, headers=HEADERS, json=payload, timeout=30)
            response.raise_for_status()
            raw = response.json()["choices"][0]["message"]["content"]
            cleaned = clean_response(raw)
            time.sleep(5)  # ⏲️ Wait 15 seconds after every call
            return cleaned
        except requests.exceptions.HTTPError as e:
            print(f"⚠️ HTTP error: {e}")
            time.sleep(5)
        except Exception as e:
            print(f"⚠️ Other error: {e}")
            time.sleep(5)

    return "Error generating feedback"

# ✅ CSV Processing
grades = [5, 6, 7]
input_base = "D:/Varun PERSONAL/Edcite/smart_feedback_generator/data"
output_base = "D:/Varun PERSONAL/Edcite/smart_feedback_generator/feedback"

for grade in grades:
    print(f"\n📘 Processing Grade {grade}")
    input_path = os.path.join(input_base, f"grade{grade}.csv")
    output_path = os.path.join(output_base, f"grade{grade}_with_llama_feedback.csv")

    if not os.path.exists(input_path):
        print(f"⛔ File not found: {input_path}")
        continue

    df = pd.read_csv(input_path)

    for label in ['close', 'partial', 'wrong']:
        feedbacks = []
        col_name = f"student_answer_{label}"
        print(f"🔹 Generating feedback for: {col_name}")

        for _, row in tqdm(df.iterrows(), total=len(df)):
            question = str(row["question"])
            ideal = str(row["ideal_answer"])
            student = str(row[col_name])
            feedback = generate_feedback(question, ideal, student)
            feedbacks.append(feedback)

        df[f"llama_{label}_feedback"] = feedbacks

    df.to_csv(output_path, index=False)
    print(f"✅ Saved: {output_path}")



📘 Processing Grade 5
🔹 Generating feedback for: student_answer_close



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:06<00:00, 12.65s/it]

🔹 Generating feedback for: student_answer_partial



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:16<00:00, 13.61s/it]

🔹 Generating feedback for: student_answer_wrong



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:33<00:00, 15.39s/it]

✅ Saved: D:/Varun PERSONAL/Edcite/smart_feedback_generator/feedback\grade5_with_llama_feedback.csv

📘 Processing Grade 6
🔹 Generating feedback for: student_answer_close



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:18<00:00, 13.86s/it]

🔹 Generating feedback for: student_answer_partial


 10%|████████▎                                                                          | 1/10 [00:15<02:18, 15.41s/it]

⚠️ Other error: HTTPSConnectionPool(host='api.together.xyz', port=443): Max retries exceeded with url: /v1/chat/completions (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x000001B6014E3200>: Failed to resolve 'api.together.xyz' ([Errno 11001] getaddrinfo failed)"))


 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [02:15<00:14, 14.04s/it]

⚠️ Other error: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))
⚠️ Other error: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [03:32<00:00, 21.26s/it]

🔹 Generating feedback for: student_answer_wrong



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:16<00:00, 13.63s/it]

✅ Saved: D:/Varun PERSONAL/Edcite/smart_feedback_generator/feedback\grade6_with_llama_feedback.csv

📘 Processing Grade 7
🔹 Generating feedback for: student_answer_close



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:20<00:00, 14.02s/it]

🔹 Generating feedback for: student_answer_partial



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:12<00:00, 13.26s/it]

🔹 Generating feedback for: student_answer_wrong


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:08<00:00, 12.89s/it]

✅ Saved: D:/Varun PERSONAL/Edcite/smart_feedback_generator/feedback\grade7_with_llama_feedback.csv





In [2]:
import os
import re
import time
import requests
import pandas as pd
from tqdm import tqdm
from keybert import KeyBERT

# ✅ 1. Setup KeyBERT for keyword extraction
kw_model = KeyBERT(model='all-MiniLM-L6-v2')

# ✅ 2. Together API setup
TOGETHER_API_URL = "https://api.together.xyz/v1/chat/completions"
TOGETHER_API_KEY = "92328edde95f65943128cf7ee4cef72431aaccd8de8cde7eef4c1195cad68a2e"  # ← Replace this

MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free"
HEADERS = {
    "Authorization": f"Bearer {TOGETHER_API_KEY}",
    "Content-Type": "application/json"
}

# ✅ 3. Clean <think> tags
def clean_response(text):
    return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()

# ✅ 4. Extract keywords from the ideal answer
def extract_keywords(text, top_k=5):
    keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=top_k)
    return [kw[0].lower() for kw in keywords]

# ✅ 5. Check which keywords are missing in student answer
def find_missing_keywords(keywords, student_answer):
    student_answer = student_answer.lower()
    missing = [kw for kw in keywords if kw not in student_answer]
    return missing

# ✅ 6. Build prompt with gentle hints
def build_prompt(question, ideal_answer, student_answer, missing_keywords):
    hint = ""
    if missing_keywords:
        hint = "It seems the student may have missed one or more important scientific ideas. Consider gently prompting them to revisit key parts of the process."

    return (
        
        f"You are a supportive but strict middle school science teacher.\n\n"
        f"Question: {question}\n"
        f"Ideal Answer: {ideal_answer}\n"
        f"Student Answer: {student_answer}\n\n"
        f"{hint}\n\n"
        f"Now write helpful, constructive feedback for the student:\n"
        f"if the answer is same as or very close to the ideal answer, say its right.\n"
        f"- If their answer is close to the ideal answer, say it's good.\n"
        f"- If they missed something important, just hint at it — do not give the keyword.\n"
        f"- If there's a spelling mistake, suggest checking it without revealing it.Be very careful when checking for spelling mistake. Only alert if there is a spelling mistake,otherwise don't\n"
        f"- Do NOT mention the ideal answer.\n"
        f"- Do NOT include internal thoughts or use <think> tags.\n"
        f"- If the answer is completely wrong and not even close,dont be hesitant to tell it is wrong.\n"
        f"- Be friendly, clear, and encouraging."
    )

# ✅ 7. Generate feedback with retry and delay
def generate_feedback(question, ideal, student):
    keywords = extract_keywords(ideal)
    missing = find_missing_keywords(keywords, student)
    prompt = build_prompt(question, ideal, student, missing)

    payload = {
        "model": MODEL_NAME,
        "max_tokens": 1500,
        "temperature": 0.7,
        "top_p": 0.9,
        "messages": [
            {"role": "system", "content": "You are a helpful science teacher."},
            {"role": "user", "content": prompt}
        ]
    }

    for attempt in range(3):
        try:
            response = requests.post(TOGETHER_API_URL, headers=HEADERS, json=payload, timeout=30)
            response.raise_for_status()
            raw = response.json()["choices"][0]["message"]["content"]
            feedback = clean_response(raw)
            time.sleep(5)  # Respect rate limit
            return feedback
        except Exception as e:
            print(f"⚠️ Error: {e}")
            time.sleep(5)

    return "Error generating feedback."

# ✅ 8. Run over CSVs for Grades 5–7
grades = [5, 6, 7]
input_base = "D:/Varun PERSONAL/Edcite/smart_feedback_generator/data"
output_base = "D:/Varun PERSONAL/Edcite/smart_feedback_generator/feedback"

for grade in grades:
    print(f"\nProcessing Grade {grade}")
    input_path = os.path.join(input_base, f"grade{grade}.csv")
    output_path = os.path.join(output_base, f"grade{grade}_with_llama_feedback.csv")

    if not os.path.exists(input_path):
        print(f"File not found: {input_path}")
        continue

    df = pd.read_csv(input_path)

    for label in ['close', 'partial', 'wrong']:
        feedbacks = []
        col = f"student_answer_{label}"
        print(f"Generating feedback for {col}")

        for _, row in tqdm(df.iterrows(), total=len(df)):
            question = str(row["question"])
            ideal = str(row["ideal_answer"])
            student = str(row[col])
            feedbacks.append(generate_feedback(question, ideal, student))

        df[f"llama_{label}_feedback"] = feedbacks

    df.to_csv(output_path, index=False)
    print(f"Saved: {output_path}")



Processing Grade 5
Generating feedback for student_answer_close


 40%|█████████████████████████████████▏                                                 | 4/10 [00:47<01:08, 11.37s/it]

⚠️ Error: 429 Client Error: Too Many Requests for url: https://api.together.xyz/v1/chat/completions



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:00<00:00, 12.09s/it]

Generating feedback for student_answer_partial


 20%|████████████████▌                                                                  | 2/10 [00:20<01:21, 10.22s/it]

⚠️ Error: 429 Client Error: Too Many Requests for url: https://api.together.xyz/v1/chat/completions



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:07<00:00, 12.78s/it]

Generating feedback for student_answer_wrong



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [01:48<00:00, 10.83s/it]

Saved: D:/Varun PERSONAL/Edcite/smart_feedback_generator/feedback\grade5_with_llama_feedback.csv

Processing Grade 6
Generating feedback for student_answer_close



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:01<00:00, 12.11s/it]

Generating feedback for student_answer_partial



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:00<00:00, 12.02s/it]

Generating feedback for student_answer_wrong


 50%|█████████████████████████████████████████▌                                         | 5/10 [00:59<00:58, 11.67s/it]

⚠️ Error: 429 Client Error: Too Many Requests for url: https://api.together.xyz/v1/chat/completions



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:13<00:00, 13.34s/it]

Saved: D:/Varun PERSONAL/Edcite/smart_feedback_generator/feedback\grade6_with_llama_feedback.csv

Processing Grade 7
Generating feedback for student_answer_close


 10%|████████▎                                                                          | 1/10 [00:09<01:26,  9.58s/it]

⚠️ Error: 429 Client Error: Too Many Requests for url: https://api.together.xyz/v1/chat/completions



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [01:59<00:00, 11.92s/it]

Generating feedback for student_answer_partial


 50%|█████████████████████████████████████████▌                                         | 5/10 [01:05<01:00, 12.11s/it]

⚠️ Error: 429 Client Error: Too Many Requests for url: https://api.together.xyz/v1/chat/completions



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:10<00:00, 13.07s/it]

Generating feedback for student_answer_wrong


 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [01:32<00:22, 11.36s/it]

⚠️ Error: 429 Client Error: Too Many Requests for url: https://api.together.xyz/v1/chat/completions


 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [01:47<00:12, 12.55s/it]

⚠️ Error: 429 Client Error: Too Many Requests for url: https://api.together.xyz/v1/chat/completions



00%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:10<00:00, 13.03s/it]

Saved: D:/Varun PERSONAL/Edcite/smart_feedback_generator/feedback\grade7_with_llama_feedback.csv
