In [None]:
import os, math, numpy as np
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"

In [None]:
%%time
!pip uninstall -y torch
!pip install --no-index --find-links=/kaggle/input/making-wheels-of-necessary-packages-for-vllm vllm
!pip install -U --upgrade /kaggle/input/vllm-t4-fix/grpcio-1.62.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!pip install -U --upgrade /kaggle/input/vllm-t4-fix/ray-2.11.0-cp310-cp310-manylinux2014_x86_64.whl
!pip install --no-deps --no-index /kaggle/input/hf-libraries/sentence-transformers/sentence_transformers-3.1.0-py3-none-any.whl

In [None]:
import os
from transformers import AutoTokenizer
import pandas as pd


df_train = pd.read_csv("/kaggle/input/eedi-mining-misconceptions-in-mathematics/train.csv")
tokenizer = AutoTokenizer.from_pretrained("/kaggle/input/hugging-quants-meta-llama-3-1-8b-instruct-awq-int4")

PROMPT  = """Question: {Question}
Incorrect Answer: {IncorrectAnswer}
Correct Answer: {CorrectAnswer}
Construct Name: {ConstructName}
Subject Name: {SubjectName}

Your task: Identify the misconception behind Incorrect Answer. Answer concisely and generically inside <response>$$INSERT TEXT HERE$$</response>.
Before answering the question think step by step concisely in 1-2 sentence inside <thinking>$$INSERT TEXT HERE$$</thinking> tag and respond your final misconception inside <response>$$INSERT TEXT HERE$$</response> tag."""

def apply_template(row, tokenizer, targetCol):
    messages = [
        {
            "role": "user", 
            "content": PROMPT.format(
                 ConstructName=row["ConstructName"],
                 SubjectName=row["SubjectName"],
                 Question=row["QuestionText"],
                 IncorrectAnswer=row[f"Answer{targetCol}Text"],
                 CorrectAnswer=row[f"Answer{row.CorrectAnswer}Text"])
        }
    ]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    return text

In [None]:
def get_correct_answer_text(row):
    if row['CorrectAnswer'] == 'A':
        return row['AnswerAText']
    elif row['CorrectAnswer'] == 'B':
        return row['AnswerBText']
    elif row['CorrectAnswer'] == 'C':
        return row['AnswerCText']
    elif row['CorrectAnswer'] == 'D':
        return row['AnswerDText']
    else:
        return None

df_train['CorrectAnswerText'] = df_train.apply(get_correct_answer_text, axis=1)

In [None]:
import re

select_column = ["QuestionId", "ConstructName", "SubjectName", "CorrectAnswer", "QuestionText", "CorrectAnswerText"]

df_answer = pd.melt(df_train, 
                    id_vars=select_column,
                    value_vars=[f"Answer{ans}Text" for ans in ["A", "B", "C", "D"]],
                    var_name="Option",
                    value_name="AnswerText").sort_values("QuestionId")

df_misconception = pd.melt(df_train, 
                    id_vars=select_column,
                    value_vars=[f"Misconception{ans}Id" for ans in ["A", "B", "C", "D"]],
                    var_name="Option",
                    value_name="Misconception").sort_values("QuestionId")

df_answer['Option'] = df_answer['Option'].apply(lambda x: re.search(r'Answer([A-D])', x).group(1) if re.search(r'Answer([A-D])', x) else None)
df_misconception['Option'] = df_misconception['Option'].apply(lambda x: re.search(r'Misconception([A-D])', x).group(1) if re.search(r'Misconception([A-D])', x) else None)

df_merged = pd.merge(df_answer, df_misconception, 
                     on=["QuestionId", "Option", "ConstructName", "SubjectName", "CorrectAnswer", "QuestionText", "CorrectAnswerText"], 
                     how="inner", 
                     suffixes=('', '_y'))

# Drop any extra duplicated columns that were suffixed with '_y'
df_merged.drop(df_merged.filter(regex='_y$').columns.tolist(), axis=1, inplace=True)


In [None]:
df_merged

In [None]:
import os
from transformers import AutoTokenizer


PROMPT  = """Question: {Question}
Incorrect Answer: {IncorrectAnswer}
Correct Answer: {CorrectAnswer}
Construct Name: {ConstructName}
Subject Name: {SubjectName}

Your task: Identify the misconception behind Incorrect Answer. Answer concisely and generically inside <response>$$INSERT TEXT HERE$$</response>.
Before answering the question think step by step concisely in 1-2 sentence inside <thinking>$$INSERT TEXT HERE$$</thinking> tag and respond your final misconception inside <response>$$INSERT TEXT HERE$$</response> tag."""

def apply_template(row, tokenizer):
    messages = [
        {
            "role": "user", 
            "content": PROMPT.format(
                ConstructName=row["ConstructName"],
                SubjectName=row["SubjectName"],
                Question=row["QuestionText"],
                IncorrectAnswer=row["AnswerText"],
                CorrectAnswer=row["CorrectAnswerText"]
            )
        }
    ]
    

    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    return text

df_merged["Prompt"] = df_merged.apply(lambda row: apply_template(row, tokenizer), axis=1)

In [None]:
df_merged.to_parquet("prompt.parquet", index=False)

In [None]:
%%writefile run_vllm.py

import re
import vllm
import pandas as pd

df = pd.read_parquet("prompt.parquet")

llm = vllm.LLM(
    "/kaggle/input/hugging-quants-meta-llama-3-1-8b-instruct-awq-int4",
    quantization="awq",
    tensor_parallel_size=2, 
    gpu_memory_utilization=0.95, 
    trust_remote_code=True,
    dtype="half", 
    enforce_eager=True,
    max_model_len=8192,
    disable_log_stats=True
)
tokenizer = llm.get_tokenizer()


responses = llm.generate(
    df["Prompt"].values,
    vllm.SamplingParams(
        n=1,  # Number of output sequences to return for each prompt.
        top_p=0.9,  # Float that controls the cumulative probability of the top tokens to consider.
        temperature=0,  # randomness of the sampling
        seed=777, # Seed for reprodicibility
        skip_special_tokens=False,  # Whether to skip special tokens in the output.
        max_tokens=2048,  # Maximum number of tokens to generate per output sequence.
    ),
    use_tqdm = True
)

responses = [x.outputs[0].text for x in responses]
df["FullResponse"] = responses

def extract_response(text):
    return ",".join(re.findall(r"<response>(.*?)</response>", text)).strip()

responses = [extract_response(x) for x in responses]
df["Misconception"] = responses
df.to_parquet("output.parquet", index=False)

In [None]:
!python run_vllm.py
