In [2]:
import torch
import pandas as pd
import random
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import os

os.environ["CUDA_VISIBLE_DEVICES"]="1,2,3,4,5,6,7"

In [3]:
def set_all_seeds(seed):
    # Python's built-in random
    random.seed(seed)
    
    # Numpy
    np.random.seed(seed)
    
    # PyTorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # For CUDA GPU
    torch.cuda.manual_seed_all(seed)  # For multi-GPU
    
    # Additional PyTorch settings for reproducibility
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Usage example
set_all_seeds(0)  # You can choose any integer as your seed

In [4]:
data = pd.read_csv("data/train.csv")
data.head()

Unnamed: 0,qid,scenario,question,ground_truth,student_incorrect_solution,student_profile,teacher_described_confusion,self-correctness,self-typical-confusion,self-typical-interactions,conversation
0,5000012,1,Nancy is filling an aquarium for her fish. She...,First calculate the volume of the aquarium by ...,The aquarium has a volume of 4 x 6 x 3 = 72 cu...,Steven is a 7th grade student. He has difficul...,He added a step after completing the problem.,Yes,3.0,3.0,"Teacher: (probing)Steven, If you had 4 of some..."
1,5000084,2,John is very unfit and decides to work up to d...,He needs to do 15*3=45 progressions\nThat will...,"To get to 15 reps, John will take 15 - 1 = 14 ...",Stephanie is a 7th grade student. She has diff...,She became fixated on a wrong calculation and ...,No,2.0,2.0,"Teacher: (probing)Stephanie, How many days wil..."
2,5000982,3,James has 20 pairs of red socks and half as ma...,He has 20/2=10 black socks\nSo he has 20+10=30...,James has 20/2 = 10 pairs of black socks.\nJam...,DeAndre is a 7th grade student. He has problem...,He was technically correct.,Yes,5.0,5.0,"Teacher: (probing)DeAndre, you successfully an..."
3,5000274,4,Jason drives past 3 convenience stores on his ...,First find the additional distance between the...,The distance between the second store and thir...,Winnie is a 7th grade student. She struggle to...,He understood it when it was broken down step ...,Yes,3.0,3.0,Teacher: (probing)What is the distance between...
4,5000797,5,Wanda walks her daughter .5 miles to school in...,She walks .5 miles to school and back 4 times ...,"Every day, Wanda walks a total of 0.5 + 0.5 = ...",Cody is a 7th grade student. He has problem wi...,He forgot she walked twice each day!,Yes,3.0,3.0,"Teacher: (probing)Cody, How far doe Wanda walk..."


In [6]:
def run_llm(prompts, model, tokenizer, max_tokens=1024, temperature=0.1):
    input_texts = [tokenizer.apply_chat_template(prompts, tokenize=False, add_generation_prompt=True)]

    model_inputs = tokenizer(input_texts, return_tensors="pt", padding=True).to("cuda")
    input_ids = model_inputs.input_ids
    attention_mask = model_inputs.attention_mask

    terminators = [
        tokenizer.eos_token_id,
        # tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
    pad_token_id = tokenizer.eos_token_id

    with torch.no_grad():
        generated_ids = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            pad_token_id=pad_token_id,
            eos_token_id=terminators,
            temperature=temperature,
            max_new_tokens=max_tokens,
            top_p=1,
            top_k=0,
            do_sample=True,
            num_beams=1,
            num_return_sequences=1,
        )

    generated_ids = [output_ids[len(input_ids[i]):] for i, output_ids in enumerate(generated_ids)]

    responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
    return responses


In [None]:
def generate_prompt_teacher(profile, question, solution, conversation):
    # student_name = profile.split(" ")[0]
    prompt = f"Student Profile: {profile}\n\nMath problem: {question}\n\nStudent solution: {solution}\n\nThe student thinks his/her answer is correct. Student can use a calculator and thus makes no calculation errors. Respond as a teacher and provide conversational, concise and step by step guidance. \n\nConversation: {conversation}"
    return [
        {"role": "system", "content": "You are a teacher helping a student solve a math problem. Your responses should be concise and conversational. Guide the student step-by-step, asking only one question at a time. Base your guidance on the student's incorrect solution and the conversation history." },
        {"role": "user", "content": prompt}
    ]

In [8]:
def generate_prompt_bad_teacher(profile, question, solution, conversation):
    # student_name = profile.split(" ")[0]
    prompt = f"Student Profile: {profile}\n\nMath problem: {question}\n\nStudent solution: {solution}\n\nThe student thinks his/her answer is correct. Student can use a calculator and thus makes no calculation errors. Respond as a poorly performing teacher and provide unhelpful, incorrect, or confusing feedback. You might give vague explanations, skip important steps, make logical errors, or misinterpret the student's mistakes. Avoid clear step-by-step guidance. Avoid phrases like '(sigh)' or similar terms on how you behave. Keep responses concise but unhelpful. \n\nConversation: {conversation}"
    return [
        {"role": "system", "content": "You are a poorly performing teacher who provides ineffective or incorrect guidance. Your responses may be vague, confusing, overly critical, or incorrect. You should not offer step-by-step help or clear explanations. Do not use phrases like '(sigh)' or similar expressions. Keep responses concise but unhelpful. Do not directly tell the student to move on to the next question." },
        {"role": "user", "content": prompt}
    ]

In [None]:
# model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
model_id = "meta-llama/Meta-Llama-3.1-70B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    cache_dir="/data1/nl35/cache/hub/",
    torch_dtype=torch.bfloat16,
    device_map="auto",
);

tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/30 [00:00<?, ?it/s]

In [10]:
all_prompts = {}
all_conversation = {}
all_qids = {}

for idx, row in data[0:500].iterrows():
    prompt_lst, conversation_lst, response_lst = [], [], []
    
    for conversation in row["format_conversation_teacher"]:
        conversation += "\nTeacher: "
        # prompts = generate_prompt_teacher(row["student_profile"], row["question"], row["student_incorrect_solution"], conversation)
        prompts = generate_prompt_bad_teacher(row["student_profile"], row["question"], row["student_incorrect_solution"], conversation)

        responses = run_llm(prompts, model, tokenizer, max_tokens=2048, temperature=0.5)[0].strip()
        prompt_lst.append(prompts)
        print(idx, responses)
        print("=" * 50)
        response_lst.append(responses)
        conversation_lst.append([conversation, responses])
    
    all_conversation[idx] = conversation_lst
    all_prompts[idx] = prompt_lst
    all_qids[idx] = row["qid"]

0 Your answer seems a bit too straightforward, don't you think? I mean, Nancy tripled the amount of water, but did you take into account the cat's intentions when it knocked over the aquarium? Maybe we should be focusing on the cat's role in this problem.
0 That's close, Steven, but not exactly what we're looking for here. The question isn't really about tripling the amount of something, it's about... well, it's more about the aquarium. You see, when Nancy triples the amount of water, it's not just a simple multiplication problem. You need to think about the aquarium itself, not just the water. Does that make sense?
0 That's not exactly what tripling means in this situation. You're just adding a lot of water, but you're not thinking about what was there before. You need to consider the whole aquarium, not just the water. What's the relationship between the water and the aquarium?
1 You got 12 weeks. That seems like a lot. You should think about how fast John is progressing. Is he reall

In [11]:
# Create empty lists to store the expanded data
keys = []
indices = []
values = []
prompts = []
q_ids = []

# Iterate through the dictionary and expand the nested structure
for key, list_of_lists in all_conversation.items():
    for idx, inner_list in enumerate(list_of_lists):
        keys.append(key)
        indices.append(idx)
        prompts.append(all_prompts[key][idx])
        values.append(inner_list)
        q_ids.append(all_qids[key])

# Create the DataFrame
df = pd.DataFrame({
    "number": keys,
    'qid': q_ids,
    'index': indices,
    'prompts': prompts,
    'responses': values
})

df.to_csv("llama_70b_teacher_500_0_bad.csv")