In [None]:
#pip install openai

### Data preparation

In [1]:
import os
from openai import OpenAI

client = OpenAI(api_key=os.environ.get("API_KEY"))

In [75]:
import pandas as pd

df = pd.read_csv("jianna_10_sample.csv")

# Combine past answers into a single string
jianna_knowledge = "\n".join([f"Q: {row['Question']}\nA: {row['Answer']}" for _, row in df.iterrows()])
# print(jianna_knowledge)

In [76]:
df_test = pd.read_csv('jianna_5_test.csv')

### Prompting strategies

Strategy 1: zero shot

In [77]:
zero_shot_response = []
def zero_shot(question):
    prompt = f""" Answer the question.
    Question:{question}
    Answer:
    """

    response = client.responses.create(
        model = 'gpt-4o',
        input = [
            {
                "role":"user",
                "content":prompt
            }
        ]
    )
    zero_shot_response.append(response.output_text)
    

Strategy 2: few shot

In [78]:
few_shot_response = []
def few_shot(question, knowledge):
    prompt = f"""You are Jianna, an international student female from South Korea. Here are some of Jianna's past answers:
    {knowledge}
    Based on these answers, respond in Jianna's style.
    Question:{question}
    Answer:"""
       
    response = client.responses.create(
        model = 'gpt-4o',
        input=[
            {
                "role":"user",
                "content":prompt
            }]

    )
    few_shot_response.append(response.output_text)

Strategy 3: RAG

AI use clarification: I used Chatgpt GPT-4o to generate the first version of code by asking "How do I use RAG to do impersonation?"

In [79]:
from sentence_transformers import SentenceTransformer
import numpy as np

embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# convert questions into embedding
df["question_embedding"] = df['Question'].apply(lambda x: embedding_model.encode(x))

In [80]:
from sklearn.metrics.pairwise import cosine_similarity
def find_similar_question(question, df, model, top_k = 3):
    question_embedding = model.encode(question)

    similarities = cosine_similarity([question_embedding], np.stack(df['question_embedding'].values))[0]

    top_index = np.argsort(similarities)[-top_k:][::-1]

    relevant_answers = "\n".join([f"Q: {df.iloc[i]['Question']}\nA: {df.iloc[i]['Answer']}" for i in top_index])

    return relevant_answers



In [96]:
rag_prompt_response=[]
def rag_prompt(question, df, model):
    relevant_answers = find_similar_question(question, df, model)

    prompt = f"""You are Jianna, an international student female from South Korea. Here are some of Jianna's past answers to some related questions:
    {relevant_answers} 
    Question: {question}    
    Answer:
    """
    
    response = client.responses.create(
        model = 'gpt-4o',
        input=[
            {
                "role":"user",
                "content":prompt
            }]

    )
    
    rag_prompt_response.append(response.output_text)
    return relevant_answers


### Evaluation
To evaluate the impersonation performance, we can compare the similarity between the groud truth and AI-generated answer. In this article (https://huggingface.co/tasks/sentence-similarity) from HuggingFace, there are two approach to measure sentence similarity. One is Cosine Similarity and the other is Mean Reciprocal Rank. 

(1) Cosine similarity

In [None]:
# pip install -U sentence-transformers

In [82]:
# cosine similarity using Huggingface
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

#Compute embedding for both lists
def cosine_test(zero_shot_response,few_shot_response,rag_prompt_response, answer, model):
    embedding_zero= model.encode(zero_shot_response, convert_to_tensor=True)
    embedding_few= model.encode(few_shot_response, convert_to_tensor=True)
    embedding_rag= model.encode(rag_prompt_response, convert_to_tensor=True)
    embedding_answer = model.encode(answer, convert_to_tensor=True)
    
    cosine_zero = util.pytorch_cos_sim(embedding_zero, embedding_answer)
    cosine_few = util.pytorch_cos_sim(embedding_few, embedding_answer)
    cosine_rag = util.pytorch_cos_sim(embedding_rag, embedding_answer)

    return cosine_zero, cosine_few, cosine_rag

(2) Mean reciprocal rank

In [83]:
import requests
import os
from dotenv import load_dotenv

load_dotenv()
api_token = os.getenv("HF_API_TOKEN")

API_URL = "https://router.huggingface.co/hf-inference/models/sentence-transformers/msmarco-distilbert-base-tas-b"
headers = {"Authorization": f"Bearer {api_token}"}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    
    return response.json()

def passage_ranking(zero_shot_response, few_shot_response, rag_prompt_response, answer):
    data = query(
        {
            "inputs": {
                "source_sentence": answer,
                "sentences": [
                    zero_shot_response,
                    few_shot_response, 
                    rag_prompt_response,
                ]
            }
        })
    return data

Call the three prompting & print out evaluation functions

In [None]:
# generate answers via gpt API
for i in range(5):
    question = df_test["Question"][i]
    answer = df_test['Answer'][i]
    
    # Be CAREFUL, Run this takes money
    # zero_shot(question)
    # few_shot(question, jianna_knowledge)
    # rag_prompt(question, df, embedding_model)

In [None]:
# Save answers for human judgement
df_test_zero = df_test.copy()
df_test_zero['Zero-shot Answer'] = zero_shot_response
df_test_zero.to_csv('zero-shot test - answers.csv', index=False)

df_test_few = df_test.copy()
df_test_few['Few-shot Answer'] = few_shot_response
df_test_few.to_csv('few-shot test - answers.csv', index=False)

df_test_rag = df_test.copy()
df_test_rag['RAG-shot Answer'] = rag_prompt_response
df_test_rag.to_csv('RAG-shot test - answers.csv', index=False)

In [90]:
for i in range(5):
    print("Question", i)
    answer = df_test['Answer'][i]
    print(cosine_test(zero_shot_response[i],few_shot_response[i],rag_prompt_response[i], answer, model))
    print("------")

Question 0
(tensor([[0.5693]]), tensor([[0.4381]]), tensor([[0.3430]]))
------
Question 1
(tensor([[0.7892]]), tensor([[0.8123]]), tensor([[0.6754]]))
------
Question 2
(tensor([[0.6551]]), tensor([[0.6876]]), tensor([[0.6241]]))
------
Question 3
(tensor([[0.4931]]), tensor([[0.4903]]), tensor([[0.5876]]))
------
Question 4
(tensor([[0.4069]]), tensor([[0.3598]]), tensor([[0.4458]]))
------


In [94]:
for i in range(5):
    print("Question", i)
    answer = df_test['Answer'][i]
    print(passage_ranking(zero_shot_response[i], few_shot_response[i], rag_prompt_response[i], answer))
    print("------")

Question 0
[0.8188427090644836, 0.78847736120224, 0.7755982875823975]
------
Question 1
[0.8991299867630005, 0.8802077770233154, 0.8572396636009216]
------
Question 2
[0.86814284324646, 0.8591591715812683, 0.8261526823043823]
------
Question 3
[0.730859637260437, 0.7916536927223206, 0.8310517072677612]
------
Question 4
[0.731633186340332, 0.7101984620094299, 0.7590171098709106]
------
