In [1]:
!pip install -q llama-index accelerate

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.6/302.6 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.4/15.4 MB[0m [31m83.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m76.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.3/320.3 kB[0m [31m38.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m141.9/141.9 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m74.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.4/290.4 kB[0m [31m33.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━

In [2]:
import pandas as pd
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, logging
from huggingface_hub import notebook_login
from llama_index.core import PromptTemplate
from enum import Enum

In [3]:
logging.set_verbosity_error()

In [4]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [6]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [8]:
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

In [10]:
def generate_response(prompt):
    messages = [
        {"role": "user", "content": prompt},
    ]

    input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

    terminators = [
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]

    outputs = model.generate(
        input_ids,
        max_new_tokens=512,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )

    response = outputs[0][input_ids.shape[-1]:]

    return tokenizer.decode(response, skip_special_tokens=True)

In [11]:
# Define prompts.
template_no_guidance = (
    "You are going to act as a mathematics tutor for a 13 year old student who is in grade 8 or 9 and lives in the United States.\n"
    "You will be encouraging and factual.\n"
    "Prefer simple, short responses.\n"
    "If the student says something inappropriate or off topic you will say you can only focus on mathematics and ask them if they have any math-related follow-up questions.\n"
    "Student: {query_str}\n"
    "You:"
)

template_low_guidance = (
    "You are going to act as a mathematics tutor for a 13 year old student who is in grade 8 or 9 and lives in the United States.\n"
    "You will be encouraging and factual.\n"
    "Only if it is relevant, examples and language from the section below may be helpful to format your response:\n"
    "===\n"
    "{context_str}\n"
    "===\n"
    "Prefer simple, short responses.\n"
    "If the student says something inappropriate or off topic you will say you can only focus on mathematics and ask them if they have any math-related follow-up questions.\n"
    "Student: {query_str}\n"
    "You:"
)

template_high_guidance = (
    "You are going to act as a mathematics tutor for a 13 year old student who is in grade 8 or 9 and lives in the United States.\n"
    "You will be encouraging and factual.\n"
    "Use examples and language from the section below to format your response:\n"
    "===\n"
    "{context_str}\n"
    "===\n"
    "Prefer simple, short responses.\n"
    "If the student says something inappropriate or off topic you will say you can only focus on mathematics and ask them if they have any math-related follow-up questions.\n"
    "Student: {query_str}\n"
    "You:"
)

In [12]:
qa_template_no_guidance = PromptTemplate(template_no_guidance)
qa_template_low_guidance = PromptTemplate(template_low_guidance)
qa_template_high_guidance = PromptTemplate(template_high_guidance)

In [13]:
class GuidanceLevel(Enum):
    NO_GUIDANCE = "no_guidance"
    LOW_GUIDANCE = "low_guidance"
    HIGH_GUIDANCE = "high_guidance"

In [14]:
def generate_response_with_guidance_level(question, context_str):

    for level in [GuidanceLevel.NO_GUIDANCE, GuidanceLevel.LOW_GUIDANCE, GuidanceLevel.HIGH_GUIDANCE]:
        if level == GuidanceLevel.NO_GUIDANCE:
            context_no_guidance = context_str
            prompt = qa_template_no_guidance.format(query_str=question)
            response_no_guidance = generate_response(prompt)

        elif level == GuidanceLevel.LOW_GUIDANCE:
            context_low_guidance = context_str
            prompt = qa_template_low_guidance.format(context_str=context_str, query_str=question)
            response_low_guidance = generate_response(prompt)

        else:
            context_high_guidance = context_str
            prompt = qa_template_high_guidance.format(context_str=context_str, query_str=question)
            response_high_guidance = generate_response(prompt)

    return response_no_guidance, context_no_guidance, response_low_guidance, context_low_guidance, response_high_guidance, context_high_guidance

In [None]:
# Define root_path

# Save df_llama3_new
df_llama3_new.shape

In [None]:
filtered_df = df_llama3_new.reset_index(drop=True)
filtered_df.shape

In [None]:
# Main loop:
data = []

for i, row in filtered_df.iterrows():
    if i % 10 == 0:
        print(i)
    id_ = row['id']
    question = row['question']
    teacher_answer = row['teacher_answer']
    context_str = row['context_graph_no_guidance']

    # Graph.
    response_graph_no_guidance, context_graph_no_guidance, response_graph_low_guidance, context_graph_low_guidance, response_graph_high_guidance, context_graph_high_guidance = generate_response_with_guidance_level(question, context_str)

    data.append({
        'id': id_,
        'question': question,
        'teacher_answer': teacher_answer,
        'response_graph_no_guidance': response_graph_no_guidance,
        'context_graph_no_guidance': context_graph_no_guidance,
        'response_graph_low_guidance': response_graph_low_guidance,
        'context_graph_low_guidance': context_graph_low_guidance,
        'response_graph_high_guidance': response_graph_high_guidance,
        'context_graph_high_guidance': context_graph_high_guidance,
    })

# Convert the data list to a pandas DataFrame
final_df = pd.DataFrame(data)

# Write the DataFrame to a CSV file
# Save final_df