In [62]:
import json
import pandas as pd
import pickle
import openai
import functools

# Process the Squad JSON to extract what we need

In [15]:
d = pd.read_json('data/squad2.json')['data']

In [35]:
all_paragraphs = [paragraph['context'] for ent in d for paragraph in ent['paragraphs']]

In [60]:
# Ok. list comprehension is still possible but it gets a little obnoxious.
all_qa = []
for ent in d:
    for paragraph_id, paragraph in enumerate(ent['paragraphs']):
        for qa in paragraph['qas']:
            if len(qa['answers']) > 0 and qa['is_impossible'] == False:
                all_qa.append((paragraph_id, qa['question'], qa['answers'][0]['text']))

In [61]:
len(all_qa)

86821

In [62]:
len(all_paragraphs)

19035

In [63]:
all_paragraphs[0:5]

['Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny\'s Child. Managed by her father, Mathew Knowles, the group became one of the world\'s best-selling girl groups of all time. Their hiatus saw the release of Beyoncé\'s debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy".',
 'Following the disbandment of Destiny\'s Child in June 2005, she released her second solo album, B\'Day (2006), which contained hits "Déjà Vu", "Irreplaceable", and "Beautiful Liar". Beyoncé also ventured into acting, with a Golden Globe-nominated performance in Dreamgirls (2006), and starring ro

In [64]:
all_qa[0:5]

[(0, 'When did Beyonce start becoming popular?', 'in the late 1990s'),
 (0,
  'What areas did Beyonce compete in when she was growing up?',
  'singing and dancing'),
 (0,
  "When did Beyonce leave Destiny's Child and become a solo singer?",
  '2003'),
 (0, 'In what city and state did Beyonce  grow up? ', 'Houston, Texas'),
 (0, 'In which decade did Beyonce become famous?', 'late 1990s')]

In [66]:
f = open('data/paragraphs_and_qa.pickle','wb')
pickle.dump(all_paragraphs, f)
pickle.dump(all_qa, f)
f.close()

# Baseline OpenAI QA. Knowing exactly the paragraphs to use

In [2]:
f = open('data/paragraphs_and_qa.pickle','rb')
all_paragraphs = pickle.load(f)
all_qa = pickle.load(f)
f.close()

In [None]:
client = openai.OpenAI()

In [75]:
@functools.cache
def qa(client, question, context):
    system_prompt = "You are an assistant for question-answering tasks. Use the provided pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Provide just the answer in as few words as possible. Do not use complete sentences."
    user_prompt = f"Question: {question} \nContext: {context} \nAnswer:"
    response = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": user_prompt,
            }
        ],
        model="gpt-3.5-turbo",
        temperature=0.0
    )
    return response.choices[0].message.content

In [50]:
para_id, question, answer = all_qa[0]
print(all_paragraphs[para_id])
print(question)
print(answer)

Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy".
When did Beyonce start becoming popular?
in the late 1990s


In [65]:
response = qa(client, question, all_paragraphs[para_id])
print(response)

Late 1990s


## Coming with a simple comparison evaluation

In [66]:
@functools.cache
def is_same(client, question, a1, a2):
    system_prompt = "You are an assistant for scoring answers. Two answers to a hypothetical question are provided. Say 'Yes' if both answers have the same meaning, and 'No' otherwise."
    user_prompt = f"Question: {question} \Answer 1: {a1} \nAnswer 2: {a2}"
    response = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": user_prompt,
            }
        ],
        model="gpt-3.5-turbo",
        temperature=0.0
    )
    return response.choices[0].message.content == 'Yes'
    

In [67]:
answer

'in the late 1990s'

In [70]:
is_same(client, qn, answer, response)

True

In [72]:
is_same(client, qn, answer, "Late 1980s")

False

# Evaluate Everything
80k questions at about 1k tokens per question == ~ 0.0010 * 80000 or about $80. That is a bit pricy for a quick test. We will subsample to ~ 10% questions

In [59]:
import numpy as np
sub_qa = all_qa[0:len(all_qa):10]

In [None]:
responses = []
evaluations = []
for ent in sub_qa:
    para_id, question, true_answer = ent
    response = qa(client, question, all_paragraphs[para_id])
    evaluation = is_same(client, question, true_answer, response)
    print(f"True: {true_answer}, Response: {response}, Eval: {evaluation}")
    responses.append(response)
    evaluations.append(evaluation)

True: in the late 1990s, Response: Late 1990s, Eval: True
True: Dangerously in Love, Response: Dangerously in Love, Eval: True
True: Beyoncé, Response: Beyoncé (2013), Eval: True
True: 2000s, Response: 2000s, Eval: True
True: her mother's maiden name, Response: Her mother's maiden name., Eval: True
True: Joseph Broussard., Response: Joseph Broussard., Eval: True
True: Darlette Johnson, Response: Darlette Johnson, Eval: True
True: age eight, Response: At age eight., Eval: True
True: Book of Isaiah, Response: Isaiah, Eval: True
True: boyfriend left her, Response: Her long-standing boyfriend left her., Eval: True
True: eleven, Response: 11, Eval: True
True: Mike Myers, Response: Mike Myers, Eval: True
True: Austin Powers in Goldmember, Response: Austin Powers in Goldmember, Eval: True
True: Jay Z, Response: "Crazy in Love", Eval: False
True: 2006, Response: March 2006, Eval: False
True: Déjà Vu, Response: "Déjà Vu", Eval: True
True: The Pink Panther, Response: Dreamgirls, Eval: False
True

In [77]:
sub_qa[8]

(6,
 "The name Destiny's Child was based on a quote in which book of the Bible?",
 'Book of Isaiah')