In [1]:
import os
import re
import json
import tqdm
import random
import openai
import tiktoken
import langdetect
import collections
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Read files

In [3]:
data_dir = os.path.join(os.getenv("DATA_DIR"), "narrative_understanding/chatter")
samples_file = os.path.join(data_dir, "attr_annot/samples.csv")
zero_txt_file = os.path.join(data_dir, "attr_annot/zero.txt")
zero_json_file = os.path.join(data_dir, "attr_annot/zero.json")
few_txt_file = os.path.join(data_dir, "attr_annot/few.txt")
few_json_file = os.path.join(data_dir, "attr_annot/few.json")
cot_txt_file = os.path.join(data_dir, "attr_annot/cot.txt")
cot_json_file = os.path.join(data_dir, "attr_annot/cot.json")
df = pd.read_csv(samples_file, index_col=None)

with open(zero_txt_file) as fr1, open(zero_json_file) as fr2:
    zero_responses_txt = fr1.read().strip().split("\n")
    zero_responses_json = json.load(fr2)
with open(few_txt_file) as fr1, open(few_json_file) as fr2:
    few_responses_txt = fr1.read().strip().split("\n")
    few_responses_json = json.load(fr2)
with open(cot_txt_file) as fr1, open(cot_json_file) as fr2:
    cot_responses_txt = fr1.read().strip().split("\n")
    cot_responses_json = json.load(fr2)

display(df.head(5))
print(df.shape[0])

Unnamed: 0,attr,id,imdb_id,text_id,text,character,genres,answer_prob
0,accomplishments,259,6294822,35,"They walk into Bradlee's office, where we stay...",Ben Bradlee,"Biography,Drama,History,Thriller,War",0.768555
1,accomplishments,260,6294822,35,"They walk into Bradlee's office, where we stay...",BRADLEE,"Biography,Drama,History,Thriller,War",0.772461
2,accomplishments,818,213149,220,"Dorie Miller, the boxing champion/kitchen help...",DORIE MILLER,"Action,Drama,History,Romance,War",0.569336
3,accomplishments,1080,120801,1,Gregg Toland: KANE Director of Photography Joe...,JOE WILLICOMBE,"Biography,Drama",0.647461
4,accomplishments,4272,36027,179,A returning wave brings Jessica's body back ag...,BOECKLIN,"Drama,Fantasy,Horror,Romance",0.733398


3159


Find cot responses that do not end in "Therefore, the answer is ..."

In [5]:
n_errors = 0 # "Therefore, the answer is <ANSWER>" not found
error_responses = []
for response in cot_responses_txt:
    if re.search(r"Therefore\, the answer is .+$", response) is None:
        n_errors += 1
        error_responses.append(response)
print(f"{n_errors} responses does not have 'Therefore, the answer is ...' at the end")
print(f"Erroneous responses =>")
for i, response in enumerate(error_responses):
    print(f"\t{i + 1:2d}. {response}")

24 responses does not have 'Therefore, the answer is ...' at the end
Erroneous responses =>
	 1. CANNOT ANSWER
	 2. CANNOT ANSWER
	 3. CANNOT ANSWER
	 4. CANNOT ANSWER
	 5. CANNOT ANSWER
	 6. CANNOT ANSWER
	 7. CANNOT ANSWER.
	 8. CANNOT ANSWER
	 9. CANNOT ANSWER
	10. CANNOT ANSWER
	11. CANNOT ANSWER
	12. CANNOT ANSWER
	13. CANNOT ANSWER
	14. CANNOT ANSWER
	15. CANNOT ANSWER
	16. CANNOT ANSWER
	17. CANNOT ANSWER
	18. Henry looks defeated when he steps out of the elevator. This suggests that he is feeling down and discouraged. Therefore, his demeanor is "Defeated, discouraged".
	19. CANNOT ANSWER
	20. CANNOT ANSWER
	21. CANNOT ANSWER
	22. CANNOT ANSWER
	23. CANNOT ANSWER
	24. The passage states that all five men are black and Muslim. Therefore, Thomas Hayer's race and ethnicity is "Black Muslim".


Separate cot responses into explanations and answers

In [19]:
n_errors = 0 # response is erroneous if it does not match the regex
error_responses = []
cot_explanations, cot_answers = [], []
for attr, response in zip(df["attr"], cot_responses_txt):
    match = re.match(f"(.+\.) Therefore\, the answer is \"?(.+)\"?\.?$", response)
    only_cannot_answer = re.match(r"CANNOT ANSWER\.?$", response)
    if match is not None:
        explanation, answer = match.group(1), match.group(2)
    elif only_cannot_answer is not None:
        explanation, answer = "", "CANNOT ANSWER"
    else:
        n_errors += 1
        error_responses.append(response)
        explanation, answer = "ERROR", "ERROR"
    cot_explanations.append(explanation.strip())
    cot_answers.append(answer.strip().strip("\"\."))
print(f"{n_errors} responses does not match the regex")
for i, response in enumerate(error_responses):
    print(f"{i + 1:2d}. {response}")
cot_explanations_file = os.path.join(data_dir, "attr_annot/cot_explanations.txt")
cot_answers_file = os.path.join(data_dir, "attr_annot/cot_answers.txt")
with open(cot_explanations_file, "w") as fw1, open(cot_answers_file, "w") as fw2:
    fw1.write("\n".join(cot_explanations))
    fw2.write("\n".join(cot_answers))

3 responses does not match the regex
 1. Henry looks defeated when he steps out of the elevator. This suggests that he is feeling down and discouraged. Therefore, his demeanor is "Defeated, discouraged".
 2. Melvin's eyes are described as "wild with fatigue and paranoia." Therefore, the answer is "Wild with fatigue and paranoia".
 3. The passage states that all five men are black and Muslim. Therefore, Thomas Hayer's race and ethnicity is "Black Muslim".


## Manually edit the cot explanations and cot answers file for the errorneous responses

Compare zero, few, and cot answers

In [21]:
zero_answers = [response.strip("\.") for response in zero_responses_txt]
few_answers = [response.strip("\.") for response in few_responses_txt]
cot_explanations_file = os.path.join(data_dir, "attr_annot/cot_explanations.txt")
cot_answers_file = os.path.join(data_dir, "attr_annot/cot_answers.txt")
with open(cot_answers_file) as fr:
    cot_answers = fr.read().strip().split("\n")
cot_answers = [response.strip("\.") for response in cot_answers]
print(f"{len(zero_answers)} zero-shot, {len(few_answers)} few-shot, and {len(cot_answers)} cot answers")
answers_list = [zero_answers, few_answers, cot_answers]
prompt_strategy = ["zero-shot", "few-shot", "chain-of-thought"]

3159 zero-shot, 3159 few-shot, and 3159 cot answers


In [23]:
# exact match
for i in range(3):
    for j in range(i + 1, 3):
        answers_x, answers_y = answers_list[i], answers_list[j]
        strat_x, strat_y = prompt_strategy[i], prompt_strategy[j]
        compare_xy = np.zeros((2, 2), dtype=int)
        exact = 0
        for ans_x, ans_y in zip(answers_x, answers_y):
            ans_x_is_cannotanswer = ans_x.lower() == "cannot answer"
            ans_y_is_cannotanswer = ans_y.lower() == "cannot answer"
            is_exact = ans_x.lower() == ans_y.lower()
            if ans_x_is_cannotanswer and ans_y_is_cannotanswer:
                compare_xy[0, 0] += 1
            elif ans_x_is_cannotanswer:
                compare_xy[0, 1] += 1
            elif ans_y_is_cannotanswer:
                compare_xy[1, 0] += 1
            else:
                compare_xy[1, 1] += 1
                exact += is_exact
        index = [f"{strat_x} = cannot answer", f"{strat_x} != cannot answer"]
        columns = [f"{strat_y} = cannot answer", f"{strat_y} != cannot answer"]
        compare_xy_df = pd.DataFrame(compare_xy, index=index, columns=columns)
        display(compare_xy_df)
        print(f"{exact}/{compare_xy[1, 1]} match exactly when {strat_x} and {strat_y} != cannot answer")
        print()

Unnamed: 0,few-shot = cannot answer,few-shot != cannot answer
zero-shot = cannot answer,602,440
zero-shot != cannot answer,158,1959


688/1959 match exactly when zero-shot and few-shot != cannot answer



Unnamed: 0,chain-of-thought = cannot answer,chain-of-thought != cannot answer
zero-shot = cannot answer,745,297
zero-shot != cannot answer,342,1775


387/1775 match exactly when zero-shot and chain-of-thought != cannot answer



Unnamed: 0,chain-of-thought = cannot answer,chain-of-thought != cannot answer
few-shot = cannot answer,668,92
few-shot != cannot answer,419,1980


665/1980 match exactly when few-shot and chain-of-thought != cannot answer

