In [None]:
#pip install openai

### Data preparation

In [None]:
import os
from openai import OpenAI

client = OpenAI(api_key=os.environ.get("API_KEY"))

In [55]:
import pandas as pd

df = pd.read_csv("yishan_10_sample.csv")

# Combine past answers into a single string
yishan_knowledge = "\n".join([f"Q: {row['Question']}\nA: {row['Answer']}" for _, row in df.iterrows()])
# print(yishan_knowledge)

In [53]:
df_test = pd.read_csv('yishan_10_test.csv')

### Prompting strategies

Strategy 1: zero shot

In [8]:
def zero_shot(question):
    prompt = f""" Answer the question.
    Question:{question}
    Answer:
    """

    response = client.responses.create(
        model = 'gpt-4o',
        input = [
            {
                "role":"user",
                "content":prompt
            }
        ]
    )
    return response.output_text

Strategy 2: few shot

In [9]:
def few_shot(question, knowledge):
    prompt = f"""You are Yishan. Here are some of Yishan's past answers:
    {knowledge}
    Based on these answers, respond in Yishan's style.
    Question:{question}
    Answer:"""
       
    response = client.responses.create(
        model = 'gpt-4o',
        input=[
            {
                "role":"user",
                "content":prompt
            }]

    )
    return response.output_text

Strategy 3: RAG

AI use clarification: I used Chatgpt GPT-4o to generate the first version of code by asking "How do I use RAG to do impersonation?"

In [58]:
from sentence_transformers import SentenceTransformer
import numpy as np

embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# convert questions into embedding
df['question_embedding'] = df['Question'].apply(lambda x: embedding_model.encode(x))

In [46]:
from sklearn.metrics.pairwise import cosine_similarity
def find_similar_question(question, df, model, top_k = 3):
    question_embedding = model.encode(question)

    similarities = cosine_similarity([question_embedding], np.stack(df['question_embedding'].values))[0]

    top_index = np.argsort(similarities)[-top_k:][::-1]

    relevant_answers = "\n".join([f"Q: {df.iloc[i]['Question']}\nA: {df.iloc[i]['Answer']}" for i in top_index])

    return relevant_answers



In [None]:
def rag_prompt(question, df, model):
    relevant_answers = find_similar_question(question, df, model)

    prompt = f"""Answer the question as if you were Yishan.
    Here are Yishan's answers to some related questions: {relevant_answers} 
    Question: {question}    
    Answer:
    """
    
    response = client.responses.create(
        model = 'gpt-4o',
        input=[
            {
                "role":"user",
                "content":prompt
            }]

    )
    return response.output_text


In [None]:
question = df_test["Question"][0]  #take the first one
zero_shot_response = zero_shot(question)
few_shot_response = few_shot(question, yishan_knowledge)
rag_prompt_response = rag_prompt(question, df, embedding_model)

### Evaluation
To evaluate the impersonation performance, we can compare the similarity between the groud truth and AI-generated answer. In this article (https://huggingface.co/tasks/sentence-similarity) from HuggingFace, there are two approach to measure sentence similarity. One is Cosine Similarity and the other is Mean Reciprocal Rank. 

(1) Cosine similarity

In [None]:
# pip install -U sentence-transformers

In [85]:
# cosine similarity using Huggingface
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

#Compute embedding for both lists
def cosine_test(zero_shot_response,few_shot_response,rag_prompt_response, answer, model):
    embedding_zero= model.encode(zero_shot_response, convert_to_tensor=True)
    embedding_few= model.encode(few_shot_response, convert_to_tensor=True)
    embedding_rag= model.encode(rag_prompt_response, convert_to_tensor=True)
    embedding_answer = model.encode(answer, convert_to_tensor=True)
    
    cosine_zero = util.pytorch_cos_sim(embedding_zero, embedding_answer)
    cosine_few = util.pytorch_cos_sim(embedding_few, embedding_answer)
    cosine_rag = util.pytorch_cos_sim(embedding_rag, embedding_answer)

    return cosine_zero, cosine_few, cosine_rag

(tensor([[0.5261]]), tensor([[0.5535]]), tensor([[0.5108]]))
(tensor([[0.6047]]), tensor([[0.6117]]), tensor([[0.6585]]))
(tensor([[0.5102]]), tensor([[0.4826]]), tensor([[0.4565]]))
(tensor([[0.5909]]), tensor([[0.5192]]), tensor([[0.4979]]))
(tensor([[0.2907]]), tensor([[0.4160]]), tensor([[0.5898]]))
(tensor([[0.4999]]), tensor([[0.4993]]), tensor([[0.5112]]))
(tensor([[0.3248]]), tensor([[0.4846]]), tensor([[0.3846]]))
(tensor([[0.4549]]), tensor([[0.4187]]), tensor([[0.4531]]))
(tensor([[0.5715]]), tensor([[0.4523]]), tensor([[0.4372]]))
(tensor([[0.3877]]), tensor([[0.7234]]), tensor([[0.6635]]))


(2) Mean reciprocal rank

In [83]:
import requests
import os
from dotenv import load_dotenv

load_dotenv()
api_token = os.getenv("HF_API_TOKEN")

API_URL = "https://router.huggingface.co/hf-inference/models/sentence-transformers/msmarco-distilbert-base-tas-b"
headers = {"Authorization": f"Bearer {api_token}"}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

def passage_ranking(zero_shot_response, few_shot_response, rag_prompt_response, answer):
    data = query(
        {
            "inputs": {
                "source_sentence": answer,
                "sentences": [
                    zero_shot_response,
                    few_shot_response, 
                    rag_prompt_response,
                ]
            }
        })
    return data

Call the three prompting & print out evaluation functions

In [88]:
df_test.head()

Unnamed: 0,Question,Answer
0,"Do you agree with the statement: ""AI will repl...","Yes, I believe so. Generative AI like Chatgpt ..."
1,"What is your religion (if any), and what are i...",My religion is Buddhism. The core value of Bud...
2,What are some of the biggest challenges you fa...,Making friends from different culture is diffi...
3,How do you make friends and build a social lif...,"I did part-time jobs at a school center, and a..."
4,What are some underrated travel destinations t...,Some parks in Bloomington are good enough for ...


In [87]:
for i in range(4):
    question = df_test["Question"][i]
    answer = df_test['Answer'][i]
    zero_shot_response = zero_shot(question)
    few_shot_response = few_shot(question, yishan_knowledge)
    rag_prompt_response = rag_prompt(question, df, embedding_model)

    # print(cosine_test(zero_shot_response,few_shot_response,rag_prompt_response, answer, model))
    print(passage_ranking(zero_shot_response, few_shot_response, rag_prompt_response, answer))

[0.792880654335022, 0.7845485806465149, 0.8008186221122742]
[0.8503563404083252, 0.8269603848457336, 0.8320447206497192]
[0.8101091980934143, 0.8511186242103577, 0.8422950506210327]
[0.8285865187644958, 0.8842434287071228, 0.8457888960838318]
