In [1]:
import os
import re
import tqdm
import time
import openai
import tenacity
import tiktoken
import collections
import pandas as pd

Samples file

In [2]:
data_dir = os.path.join(os.getenv("DATA_DIR"), "narrative_understanding/chatter")
sample_file = os.path.join(data_dir, "attr_instr/samples_filtered.csv")
sample_df = pd.read_csv(sample_file, index_col=None)
for attr, attr_df in sample_df.groupby("attr"):
    print(f"{attr:30s} {len(attr_df)} samples")

accomplishments                35 samples
age                            31 samples
attire                         37 samples
attitude                       30 samples
demeanor                       33 samples
emotion                        36 samples
eyes                           31 samples
goal                           29 samples
hair                           38 samples
physical appearance            38 samples
physical health status         39 samples
possessions                    39 samples
profession                     32 samples
qualities                      34 samples
race                           30 samples
voice                          32 samples


OpenAI

In [3]:
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.organization = "org-xPjDKPQ58le6x8A7CE13e8O6"

@tenacity.retry(wait=tenacity.wait_random_exponential(min=1, max=60), stop=tenacity.stop_after_attempt(10))
def completion_with_backoff(**kwargs):
    return openai.Completion.create(**kwargs)

def prompt_sample(instr, temperature=0.7, max_tokens=1024):
    try:
        response = completion_with_backoff(
            model="text-davinci-003", 
            prompt=instr,
            temperature=temperature,
            max_tokens=max_tokens,
            logprobs=1
            )
        return response.to_dict()
    except Exception:
        return

encoding = tiktoken.encoding_for_model("text-davinci-003")

# Demonstrations

## Accomplishments

In [4]:
attr = "accomplishments"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, find what the character has accomplished or achieved in the past using only the passage text. Include only significant positive achievements such as winning an award, overcoming a challenge, learning a skill, or completing their education. Give an answer as briefly as possible without missing the important facts. If you cannot find any accomplishments, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, find what the character has accomplished or achieved in the past using only the passage text. Include only significant positive achievements such as winning an award, overcoming a challenge, learning a skill, or completing their education. Give an answer as briefly as possible without missing the important facts. If you cannot find any accomplishments, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [28]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 35/35 [00:53<00:00,  1.54s/it]







In [29]:
with open(os.path.join(data_dir, f"attr_instr/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"TEXT = {text}\n")
        fw.write(f"CHARACTER = {character}\n")
        fw.write(f"{attr.upper()} = {completion}\n\n")

In [34]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

845 tokens


## Age

In [11]:
attr = "age"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, infer the current age of the character using only the passage text. If you cannot infer their age, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, infer the current age of the character using only the passage text. If you cannot infer their age, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [8]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

  0%|          | 0/31 [00:00<?, ?it/s]

100%|██████████| 31/31 [00:19<00:00,  1.57it/s]







In [9]:
with open(os.path.join(data_dir, f"attr_instr/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"TEXT = {text}\n")
        fw.write(f"CHARACTER = {character}\n")
        fw.write(f"{attr.upper()} = {completion}\n\n")

In [10]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

425 tokens


## Attire

In [5]:
attr = "attire"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, find what clothes or type of attire is worn by the character using only the passage text. If they are not wearing anything, answer as NUDE. If you cannot find an answer, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, find what clothes or type of attire is worn by the character using only the passage text. If they are not wearing anything, answer as NUDE. If you cannot find an answer, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [9]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 37/37 [00:23<00:00,  1.58it/s]







In [11]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"Passage: {text}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.upper()}: {completion.strip()}\n\n")

In [12]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

584 tokens


## Attitude

In [4]:
attr = "attitude"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, infer the attitude, opinion or evaluation of the character which they hold towards something or someone in the passage text. Write your answer as briefly as possible. If you cannot infer the attitude of the character, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, infer the attitude, opinion or evaluation of the character which they hold towards something or someone in the passage text. Write your answer as briefly as possible. If you cannot infer the attitude of the character, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [5]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 30/30 [00:26<00:00,  1.14it/s]







In [6]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"Passage: {text}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"Answer: {completion.strip()}\n\n")

In [7]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

422 tokens


## Demeanor

In [8]:
attr = "demeanor"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, infer the demeanor, manners, bearing, or outward behavior of the character using only the passage text. Write your answer as briefly as possible. If you cannot infer the demeanor of the character, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, infer the demeanor, manners, bearing, or outward behavior of the character using only the passage text. Write your answer as briefly as possible. If you cannot infer the demeanor of the character, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [9]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 33/33 [00:24<00:00,  1.36it/s]







In [10]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"Passage: {text}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"Answer: {completion.strip()}\n\n")

In [11]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

720 tokens


## Emotion

In [12]:
attr = "emotion"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, infer the emotions, feelings, or mental state of the character using only the passage text. Write your answer as briefly as possible. If you cannot infer the emotions of the character, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, infer the emotions, feelings, or mental state of the character using only the passage text. Write your answer as briefly as possible. If you cannot infer the emotions of the character, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [13]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 36/36 [00:24<00:00,  1.45it/s]







In [14]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"Passage: {text}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"Answer: {completion.strip()}\n\n")

In [15]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

700 tokens


## Eyes

In [19]:
attr = "eyes"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, find attributes of the character's eyes. Write your answer as briefly as possible. If you cannot find any attributes of the character's eyes, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, find attributes of the character's eyes. Write your answer as briefly as possible. If you cannot find any attributes of the character's eyes, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [20]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=1)
    outputs.append(output)
print()

100%|██████████| 31/31 [00:25<00:00,  1.24it/s]







In [21]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"Passage: {text}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"Answer: {completion.strip()}\n\n")

In [22]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

484 tokens


## Goal

In [32]:
attr = "goal"
attr_df = sample_df[sample_df["attr"] == attr]

def norm_scene(text):
    norm_text = re.sub(r"\n+", "\n", text).strip()
    return norm_text

template = """Given a passage from a movie script and a character mentioned in it, find the goal or motive of the character, or what they are trying to achieve. If you cannot find the goal of the character, write CANNOT ANSWER.

Passage: 
<PASSAGE>

Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, find the goal or motive of the character, or what they are trying to achieve. If you cannot find the goal of the character, write CANNOT ANSWER.

Passage: 
<PASSAGE>

Character: <CHARACTER>
Answer:


In [33]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    norm_text = norm_scene(text)
    prompt = template.replace("<PASSAGE>", norm_text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 29/29 [01:09<00:00,  2.39s/it]







In [34]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        norm_text = norm_scene(text)
        fw.write(f"Passage:\n{norm_text}\n\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"Answer: {completion.strip()}\n\n")

In [36]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

2937 tokens


## Hair

In [37]:
attr = "hair"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, find the hair color, hair type or other attributes of the character's hair. Write your answer as briefly as possible. If you cannot find any attributes of the character's hair, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, find the hair color, hair type or other attributes of the character's hair. Write your answer as briefly as possible. If you cannot find any attributes of the character's hair, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [38]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 38/38 [00:21<00:00,  1.73it/s]







In [39]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"Passage: {text}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"Answer: {completion.strip()}\n\n")

In [40]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

592 tokens


## Physical Appearance

In [7]:
attr = "physical appearance"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, find the explicitly described physical attributes of the character's body using only the passage text. Write your answer as briefly as possible. If you cannot find any physical attributes of the character, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, find the explicitly described physical attributes of the character's body using only the passage text. Write your answer as briefly as possible. If you cannot find any physical attributes of the character, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [8]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 38/38 [00:26<00:00,  1.42it/s]







In [9]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"Passage: {text}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"Answer: {completion.strip()}\n\n")

In [10]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

637 tokens


## Physical health status

In [45]:
attr = "physical health status"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, find the physical health condition of the character using only the passage text, such as whether they are suffering from any disease, injured, or physically fit. Write your answer as briefly as possible. If you cannot find any information about the physical health condition of the character, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, find the physical health condition of the character using only the passage text, such as whether they are suffering from any disease, injured, or physically fit. Write your answer as briefly as possible. If you cannot find any information about the physical health condition of the character, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [12]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 39/39 [00:28<00:00,  1.38it/s]







In [13]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"Passage: {text}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"Answer: {completion.strip()}\n\n")

In [46]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

561 tokens


## Possessions

In [10]:
attr = "possessions"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, find the items held by the character with them presently using only the passage text. Write your answer as briefly as possible. If the passage does not explicitly specify any items held by the character, write CANNOT ANSWER. Do not consider the clothes, shoes, or any headgear worn by the character.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, find the items held by the character with them presently using only the passage text. Write your answer as briefly as possible. If the passage does not explicitly specify any items held by the character, write CANNOT ANSWER. Do not consider the clothes, shoes, or any headgear worn by the character.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [11]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 39/39 [00:22<00:00,  1.74it/s]







In [12]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"Passage: {text}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"Answer: {completion.strip()}\n\n")

In [13]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

633 tokens


## Profession

In [43]:
attr = "profession"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, find the profession of the character using only the passage text. Write your answer as briefly as possible. Unless the passage clearly and explicitly states the character's profession, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, find the profession of the character using only the passage text. Write your answer as briefly as possible. Unless the passage clearly and explicitly states the character's profession, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [18]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 32/32 [00:14<00:00,  2.20it/s]







In [19]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"Passage: {text}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"Answer: {completion.strip()}\n\n")

In [44]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

808 tokens


## Qualities

In [23]:
attr = "qualities"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, find any special unique qualities, skills, or abilies of the character using only the passage text. Write your answer as briefly as possible. If you cannot find any special skills of the character, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, find any special unique qualities, skills, or abilies of the character using only the passage text. Write your answer as briefly as possible. If you cannot find any special skills of the character, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [24]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 34/34 [00:19<00:00,  1.77it/s]







In [25]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"Passage: {text}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"Answer: {completion.strip()}\n\n")

In [26]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

714 tokens


## Race

In [31]:
attr = "race"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, find the race or ethnicity of the character using only the passage text. Write your answer as briefly as possible. If you cannot find the race or ethnicity of the character, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, find the quality, tone, pitch, accent, or any other attributes of the character's voice using only the passage text. Write your answer as briefly as possible. If you cannot find any attributes of the character's voice, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [32]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 30/30 [00:16<00:00,  1.77it/s]







In [29]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"Passage: {text}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"Answer: {completion.strip()}\n\n")

In [30]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

411 tokens


## Voice

In [37]:
attr = "voice"
attr_df = sample_df[sample_df["attr"] == attr]

template = """Given a passage from a movie script and a character mentioned in it, find the quality, tone, pitch, accent, or any other attributes of the character's voice using only the passage text. Write your answer as briefly as possible. If you cannot find any attributes of the character's voice, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:
"""

print(template.strip())

Given a passage from a movie script and a character mentioned in it, find the quality, tone, pitch, accent, or any other attributes of the character's voice using only the passage text. Write your answer as briefly as possible. If you cannot find any attributes of the character's voice, write CANNOT ANSWER.

Passage: <PASSAGE>
Character: <CHARACTER>
Answer:


In [38]:
outputs = []

for _, row in tqdm.tqdm(attr_df.iterrows(), total=len(attr_df)):
    text, character = row["text"], row["character"]
    prompt = template.replace("<PASSAGE>", text).replace("<CHARACTER>", character).strip()
    output = prompt_sample(prompt, temperature=0)
    outputs.append(output)
print()

100%|██████████| 32/32 [00:16<00:00,  1.99it/s]







In [40]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/{attr}.txt"), "w") as fw:
    for (_, row), output in zip(attr_df.iterrows(), outputs):
        text, character = row["text"], row["character"]
        completion = output["choices"][0]["text"]
        fw.write(f"Passage: {text}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"Answer: {completion.strip()}\n\n")

In [41]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

495 tokens


# CoT

In [38]:
attrs = sorted(sample_df["attr"].unique())
instruction_dict, demonstrations_dict = {}, {}

for attr in attrs:
    prompt_file = os.path.join(data_dir, "attr_instr", attr, "prompt.txt")
    with open(prompt_file, "r") as fr:
        prompt_lines = fr.read().strip().split("\n")
    instruction = prompt_lines[0]
    demonstrations = []
    if attr == "goal":
        i = 3
        while i < len(prompt_lines):
            passage = []
            while prompt_lines[i] != "":
                passage.append(prompt_lines[i])
                i += 1
            demonstrations.append(("\n".join(passage).strip(), 
                                   prompt_lines[i + 1].lstrip("Character:").strip(), 
                                   prompt_lines[i + 2].lstrip("Answer:").strip()))
            i += 6
    else:
        i = 2
        while i < len(prompt_lines):
            demonstrations.append((prompt_lines[i].lstrip("Passage:").strip(), 
                                   prompt_lines[i + 1].lstrip("Character:").strip(), 
                                   prompt_lines[i + 2].lstrip("Answer:").strip()))
            i += 4
    instruction_dict[attr] = instruction
    demonstrations_dict[attr] = demonstrations

## Accomplishments

In [32]:
attr = "accomplishments"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, find what the character has accomplished or achieved in the past using only the passage text. Include only significant positive achievements such as winning an award, overcoming a challenge, learning a skill, or completing their education. Give an answer as briefly as possible without missing the important facts. If you cannot find any accomplishments, write CANNOT ANSWER.

Passage: DELLA FRYE, 26, crosses the newsroom. Stylish, Meticulouslygroomed, ambitious - she's a successful blogger who recently accepted a position on the Globe's fledgling internet edition. She heads toward Cal's desk... then slows a bit, not wanting to barge in while he's on the phone. We peer over his cubicle wall, as:
Character: Della Frye
Accomplishments: Successful blogger; accepted a position on the Globe's fledgling internet edition.

Passage: Rita walks to her minivan alone, in a hurry. She Jumps in and starts the stubborn engine. HELEN (

In [23]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and a description of the past accomplishments or achievements of the character, very briefly explain why the passage supports or how it gives evidence about those accomplishments. If the accomplishments field contains CANNOT ANSWER, then explain why you cannot find any past accomplishments or achievements of the character from the passage. If the accomplishments have been mostly copied from the passage, you can answer as COPY.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and a description of the past accomplishments or achievements of the character, very briefly explain why the passage supports or how it gives evidence about those accomplishments. If the accomplishments field contains CANNOT ANSWER, then explain why you cannot find any past accomplishments or achievements of the character from the passage. If the accomplishments have been mostly copied from the passage, you can answer as COPY.

Passage: <Passage>
Character: <Character>
Accomplishments: <Attr>
Answer:


In [24]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict["accomplishments"]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 6/6 [00:31<00:00,  5.24s/it]


In [28]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict["accomplishments"], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

In [29]:
attr_prompt_file = os.path.join(data_dir, f"attr_instr/{attr}/cot_prompt.txt")
with open(attr_prompt_file, "r") as fr:
    attr_instr = fr.read().strip()
n_tokens = len(encoding.encode(attr_instr))
print(f"{n_tokens} tokens")

1164 tokens


## Age

In [6]:
attr = "age"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, infer the current age of the character using only the passage text. If you cannot infer their age, write CANNOT ANSWER.

Passage: KEYS in the lock. LAUGHTER in the hallway. The door swings open to reveal a drenched Will (29) carrying four sacks of groceries, the bottoms collapsing from the rain. His wife Josephine (28) pushes past him to get the phone.
Character: WILL
Age: 29

Passage: JULIA nods to LITTLE JULIA, standing by the record player, who puts on a '78: "Oh John My Son To Me You Are so Wonderful". It's a 30's ballad - definitely not rock and roll. The room falls quiet, thinking 'what is this shiite'. JULIA re-appears from the kitchen holding a birthday cake, 17 candles on top. She brings it to JOHN. He eyes her before blowing out the candles.
Character: John
Age: 17

Passage: We spot a snippet: "The Girl: Sweet Sixteen and never -- well, once or twice." Fay finishes, rises, switches to Mandarin:
Character: FA

In [7]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and the age of the character, very briefly explain why the passage supports or how it gives evidence about the character's age. If the age field contains CANNOT ANSWER, then explain why you cannot find the character's age from the passage.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and the age of the character, very briefly explain why the passage supports or how it gives evidence about the character's age. If the age field contains CANNOT ANSWER, then explain why you cannot find the character's age from the passage.

Passage: <Passage>
Character: <Character>
Age: <Attr>
Answer:


In [8]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 6/6 [00:19<00:00,  3.27s/it]


In [9]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Attire

In [10]:
attr = "attire"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, find what clothes or type of attire is worn by the character using only the passage text. If they are not wearing anything, answer as NUDE. If you cannot find an answer, write CANNOT ANSWER.

Passage: Pavel exits office, closing the door. Doug gets out the envelope with the money in and writes on it "BUGLER", name and address, attention Henry Whittaker. Doug then puts the envelope back in his jacket pocket and moves to the window.
Character: DOUG
Attire: Jacket

Passage: In the fine master bedroom, MISS ELEANOR HAVISHAM sits in front of a mirror. It's hard to see her face beneath the veil, but she's a little older than one might expect for a bride in the last years of the 18th Century. Nevertheless, there's no mistaking the grace and beauty there - nor the discontent and resentment in the eyes of ARTHUR HAVISHAM, the half-brother, sulky and malign.
Character: ELEANOR HAVISHAM
Attire: WEDDING DRESS

Passage: Nick, Dev,

In [11]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and the attire worn by the character, very briefly explain why the passage supports or how it gives evidence about the character's attire or what clothes they are wearing. If the attire field contains CANNOT ANSWER, then explain why you cannot find the character's attire from the passage.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and the attire worn by the character, very briefly explain why the passage supports or how it gives evidence about the character's attire or what clothes they are wearing. If the attire field contains CANNOT ANSWER, then explain why you cannot find the character's attire from the passage.

Passage: <Passage>
Character: <Character>
Attire: <Attr>
Answer:


In [12]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 7/7 [00:17<00:00,  2.44s/it]


In [13]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Attitude

In [15]:
attr = "attitude"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, infer the attitude, opinion or evaluation of the character which they hold towards something or someone in the passage text. Write your answer as briefly as possible. If you cannot infer the attitude of the character, write CANNOT ANSWER.

Passage: Neo's eyes light up as he steps closer to the screens that seem alive with a constant flow of data.
Character: Neo
Attitude: Interested.

Passage: Just look for Mr. Pelikas. And be careful. Todd gives Frady a hugely hostile look, but he goes down the hall toward back of house.
Character: FRADY
Attitude: CANNOT ANSWER

Passage: Steven yanks the knife back out, and gently places it in Billy's tiny hands, wrapping his son's fingers around the wooden handle. The boy grins like he was just handed a new toy. Steven smiles proudly, almost teary-eyed. Together, they stare at the head. It's a real father and son moment.
Character: BILLY
Attitude: Excitement/happiness

Passage: Tight

In [16]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and the attitude of the character, very briefly explain why the passage supports or how it gives evidence about the character's attitude, opinion or evaluation towards something or someone in the passage text. If the attitude field contains CANNOT ANSWER, then explain why you cannot find the character's attitude from the passage.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and the attitude of the character, very briefly explain why the passage supports or how it gives evidence about the character's attitude, opinion or evaluation towards something or someone in the passage text. If the attitude field contains CANNOT ANSWER, then explain why you cannot find the character's attitude from the passage.

Passage: <Passage>
Character: <Character>
Attitude: <Attr>
Answer:


In [17]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 6/6 [00:17<00:00,  2.94s/it]


In [18]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Demeanor

In [19]:
attr = "demeanor"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, infer the demeanor, manners, bearing, or outward behavior of the character using only the passage text. Write your answer as briefly as possible. If you cannot infer the demeanor of the character, write CANNOT ANSWER.

Passage: A man toils over a pot of simmering tomato sauce. He is a handsome Italian-American with big brown eyes. He is VICTOR. Victor tastes the sauce he's cooking the way a sommelier tastes wine. Slow and deliberate. He's pleased but subdued. Victor carefully plates some of that fresh pasta with his sauce and takes it into the living room. Sophia looks stressed out. She keeps glancing at what looks like a draft of a magazine article, certain parts have been highlighted. She's in the middle of typing something when Victor approaches with the plate of food.
Character: Victor
Demeanor: Careful, pleased, subdued.

Passage: As BRIDGE and CASTLE glimmer in the distance, Hagrid emerges from the Forbidden For

In [20]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and the demeanor of the character, very briefly explain why the passage supports or how it gives evidence about the character's demeanor, manners, bearing, or outward behavior. If the demeanor field contains CANNOT ANSWER, then explain why you cannot find the character's demeanor from the passage.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and the demeanor of the character, very briefly explain why the passage supports or how it gives evidence about the character's demeanor, manners, bearing, or outward behavior. If the demeanor field contains CANNOT ANSWER, then explain why you cannot find the character's demeanor from the passage.

Passage: <Passage>
Character: <Character>
Demeanor: <Attr>
Answer:


In [21]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 6/6 [00:34<00:00,  5.73s/it]


In [22]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Emotion

In [23]:
attr = "emotion"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, infer the emotions, feelings, or mental state of the character using only the passage text. Write your answer as briefly as possible. If you cannot infer the emotions of the character, write CANNOT ANSWER.

Passage: IN THE MANUFACTORY: Armand enters, Ferroq and McGivens pushing Elena and Joaquin onward. IN THE TUNNEL: Zorro's getting the hell beaten out of him SEES THE FUSE moving toward the locomotive, manages to BREAK AWAY only to be TACKLED by the Stoker, Zorro's hand just out of reach of the fuse as it STREAKS ON -- and it's too late, there's no way he'll stop it now.
Character: ZORRO
Emotion: Frustration, desperation.

Passage: Maurice is out of the truck and Kenneth, with a shotgun, is scrambling to be next. Buddy catches him by his jacket collar and holds him squirming until Foley is out.
Character: KENNETH
Emotion: Fear, panic.

Passage: The ADMIRAL'S MEN are all in costume, and are in a buzz of nervous excite

In [24]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and the emotion of the character, very briefly explain why the passage supports or how it gives evidence about the character's emotions, feelings, or mental state. If the emotion field contains CANNOT ANSWER, then explain why you cannot find the character's emotion from the passage.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and the emotion of the character, very briefly explain why the passage supports or how it gives evidence about the character's emotions, feelings, or mental state. If the emotion field contains CANNOT ANSWER, then explain why you cannot find the character's emotion from the passage.

Passage: <Passage>
Character: <Character>
Emotion: <Attr>
Answer:


In [25]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 8/8 [00:27<00:00,  3.43s/it]


In [26]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Eyes

In [28]:
attr = "eyes"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, find the eye color, eye shape, or any other attributes of the character's eyes. Write your answer as briefly as possible. If you cannot find any attributes of the character's eyes, write CANNOT ANSWER.

Passage: CLAIRE stands before the Core. She is naked. Her skin is pale and beautiful and cold and wet. Her hair hangs in her face, covering her milk-white eyes...
Character: CLAIRE
Eyes: Milk-white.

Passage: GALLY'S face is gaunt, something dark and unhinged just beneath the surface. His left eye clouded over, his right, cold and piercing.
Character: GALLY
Eyes: Left eye clouded, right eye cold and piercing.

Passage: Julia is paralyzed against the wall. Staring into the dead as-marble eyes of Shepard. He looks her up and down.
Character: SHEPARD
Eyes: dead as marble

Passage: With the soft HISS of hydraulics, she LOWERS herself down to where her face is very close to that of Data's. Her features are almost angelic, b

In [29]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and some attributes of the eyes of the character, very briefly explain why the passage supports or how it gives evidence about those attributes of the character's eyes. If the eye field contains CANNOT ANSWER, then explain why you cannot find any attributes of the character's eyes from the passage.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and some attributes of the eyes of the character, very briefly explain why the passage supports or how it gives evidence about those attributes of the character's eyes. If the eye field contains CANNOT ANSWER, then explain why you cannot find any attributes of the character's eyes from the passage.

Passage: <Passage>
Character: <Character>
Eyes: <Attr>
Answer:


In [30]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 7/7 [00:33<00:00,  4.74s/it]


In [31]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Goal

In [39]:
attr = "goal"
print(instruction_dict[attr])

Given a passage from a movie script and a character mentioned in it, find the goal or motive of the character, or what they are trying to achieve. Write your answer as briefly as possible. If you cannot find the goal of the character, write CANNOT ANSWER.


In [40]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and the goal of the character, very briefly explain why the passage supports or how it gives evidence about the character's goal, motive, or what they are trying to achieve. If the goal field contains CANNOT ANSWER, then explain why you cannot find the character's goal from the passage.

Passage:
<Passage>

Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and the goal of the character, very briefly explain why the passage supports or how it gives evidence about the character's goal, motive, or what they are trying to achieve. If the goal field contains CANNOT ANSWER, then explain why you cannot find the character's goal from the passage.

Passage:
<Passage>

Character: <Character>
Goal: <Attr>
Answer:


In [42]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

In [43]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Hair

In [44]:
attr = "hair"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, find the hair color, hair type or other attributes of the character's hair. Write your answer as briefly as possible. If you cannot find any attributes of the character's hair, write CANNOT ANSWER.

Passage: She's just about to say something when RAY GRIFFEN arrives. Ray's the starting tailback, black, handsome, clean-cut, well-spoken, middle class.
Character: RAY
Hair: CANNOT ANSWER

Passage: Carter switches on the light. Edna is pressed against the far wall, holding a poker. Her dress is torn and her hair dishevelled. When she sees Carter, she sighs with relief.
Character: EDNA
Hair: Dishevelled

Passage: The bespectacled, short-haired MURRAY ROSENBERG, 48, looks over and spots Danny listening outside. Murray walks over and shuts the door. He looks at DR. SWEENEY, who now has his jacket off.
Character: Danny
Hair: CANNOT ANSWER

Passage: She points to a table at the side of the restaurant. JIMMY ELLIS, a bald. littl

In [45]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and some attributes of the character's hair, very briefly explain why the passage supports or how it gives evidence about those attributes of the character's hair. If the hair field contains CANNOT ANSWER, then explain why you cannot find any attributes of the character's hair from the passage.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and some attributes of the character's hair, very briefly explain why the passage supports or how it gives evidence about those attributes of the character's hair. If the hair field contains CANNOT ANSWER, then explain why you cannot find any attributes of the character's hair from the passage.

Passage: <Passage>
Character: <Character>
Hair: <Attr>
Answer:


In [46]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 7/7 [00:21<00:00,  3.11s/it]


In [47]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Physical appearance

In [49]:
attr = "physical appearance"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"Body: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, find the explicitly described physical attributes of the character's body using only the passage text. Write your answer as briefly as possible. If you cannot find any physical attributes of the character's body, write CANNOT ANSWER.

Passage: Hans pours the water on the fireplace, putting out the fire. Anna tries to stop him. She falls to the floor, weak.
Character: ANNA
Body: Weak.

Passage: The untouched chicken broth sits on the night stand. Gideon is TALKING in his sleep. Morning light filters through the window shade. The room takes on a brownish tint. Gideon has dark rings under his eyes and looks like he has aged considerably.
Character: Gideon
Body: Dark rings under eyes, aged considerably.

Passage: The archive footage continues, but it is not of the Coronation. David and Wallis visiting the Fuhrer in Germany: Hitler gallantly kissing Mrs Simpson's hand while Goring and the Duke of Windsor beam; David giving

In [50]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and some physical attributes of the character's body, very briefly explain why the passage supports or how it gives evidence about those physical attributes of the character. If the body field contains CANNOT ANSWER, then explain why you cannot find any physical attributes of the character's body from the passage.

Passage: <Passage>
Character: <Character>
Body: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and some physical attributes of the character's body, very briefly explain why the passage supports or how it gives evidence about those physical attributes of the character. If the body field contains CANNOT ANSWER, then explain why you cannot find any physical attributes of the character's body from the passage.

Passage: <Passage>
Character: <Character>
Body: <Attr>
Answer:


In [51]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("Body", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 7/7 [00:18<00:00,  2.71s/it]


In [52]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Physical Health Status

In [53]:
attr = "physical health status"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, find the physical health condition of the character using only the passage text, such as whether they are suffering from any disease, injured, or physically fit. Write your answer as briefly as possible. If you cannot find any information about the physical health condition of the character, write CANNOT ANSWER.

Passage: SHOTS of Deirdre and Erik struggling to get Momo settled. CONTINUE SOUNDS OF GETTING MOMO SETTLED (shuffling of feet, pills being sorted, pans being picked up) over-GLIMPSES OF THE APARTMENT: --the stairwell and its curves, rusted sections --a pre-war light fixture medallion sans light fixture BACK TO SCENE--Deirdre gives mumbling Momo a pill. Richard, Brigid and Aimee clean up the kitchen mess.
Character: MOMO
Physical Health Status: Suffering from an unspecified illness.

Passage: Yuri shudders with the impact. The enraged Vitaly lunges towards the Leader but Yuri, despite his wound, holds him back

In [54]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and the physical health status of the character, very briefly explain why the passage supports or how it gives evidence about the character's physical health condition. If the attitude field contains CANNOT ANSWER, then explain why you cannot find the character's physical health status from the passage.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and the physical health status of the character, very briefly explain why the passage supports or how it gives evidence about the character's physical health condition. If the attitude field contains CANNOT ANSWER, then explain why you cannot find the character's physical health status from the passage.

Passage: <Passage>
Character: <Character>
Physical Health Status: <Attr>
Answer:


In [55]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 6/6 [00:20<00:00,  3.47s/it]


In [56]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Possessions

In [57]:
attr = "possessions"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, find the items held by the character with them presently using only the passage text. Write your answer as briefly as possible. If the passage does not explicitly specify any items held by the character, write CANNOT ANSWER. Do not consider the clothes, shoes, or any headgear worn by the character.

Passage: Nick hesitates, then picks it up again. He runs his fingers over the face of the bill and snaps the paper.
Character: NICK
Possessions: Paper Bill

Passage: Rose comes walking out of the house with groceries. She sets them on the picnic table and heads back into the house. Alvin loads the groceries into the now finished trailer. The back door of the house opens and a big sheet of foam rubber flies out the door followed by Rose. She hauls it over to the trailer and sets it in. She fusses over its arrangement.
Character: ROSE
Possessions: Groceries, foam rubber.

Passage: Holly, excited, runs over to April, who is b

In [58]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and the possessions held by the character, very briefly explain why the passage supports or how it gives evidence about those items or possessions held by the character presently. If the possessions field contains CANNOT ANSWER, then explain why you cannot find the character's possessions from the passage.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and the possessions held by the character, very briefly explain why the passage supports or how it gives evidence about those items or possessions held by the character presently. If the possessions field contains CANNOT ANSWER, then explain why you cannot find the character's possessions from the passage.

Passage: <Passage>
Character: <Character>
Possessions: <Attr>
Answer:


In [59]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 6/6 [00:21<00:00,  3.60s/it]


In [60]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Profession

In [66]:
attr = "profession"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, find the profession of the character using only the passage text. Write your answer as briefly as possible. Unless the passage clearly and explicitly states the character's profession, write CANNOT ANSWER.

Passage: 11. EXTERIOR FRONT ENTRANCE HARBOR COURT. 11. Limos await dignitaries for publicity cavalcade to Senator Theater premiere. PETIE, a 22-year-old hillbilly tough guy, dressed in a chauffeur's uniform sits behind the wheel of a white limousine as HONEY and MAYOR and their entourages make their way to limos followed by PRESS.
Character: PETIE
Profession: Chauffeur

Passage: (CU) A file folder is opened. We SEE recent PHOTOGRAPHS of Harry and Archie, alongside their original MUG SHOTS. (WIDE) DEKE YABLONSKI, is walking through the busy squadroom with WATCH COMMANDER, CAPT. RALPH JARVIS, 40. Jarvis is studying the file.
Character: JARVIS
Profession: WATCH COMMANDER

Passage: Yes! You Must! (pointing inside) Harr

In [62]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and the profession of the character, very briefly explain why the passage supports or how it gives evidence about the character's profession. If the profession field contains CANNOT ANSWER, then explain why you cannot find the character's profession from the passage.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and the profession of the character, very briefly explain why the passage supports or how it gives evidence about the character's profession. If the profession field contains CANNOT ANSWER, then explain why you cannot find the character's profession from the passage.

Passage: <Passage>
Character: <Character>
Profession: <Attr>
Answer:


In [63]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 8/8 [00:31<00:00,  3.88s/it]


In [67]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Qualities

In [69]:
attr = "qualities"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, find any special unique qualities, skills, or abilies of the character using only the passage text. Write your answer as briefly as possible. If you cannot find any special skills of the character, write CANNOT ANSWER.

Passage: Carnegie is such an impassioned and inspiring speaker it's almost impossible not to get caught up and swept away by his words. His apparently unshakable belief is infectious.
Character: CARNEGIE
Qualities: Charismatic, inspiring, persuasive.

Passage: Through her Engineer's lens Watts sees DAVID manipulating a complex interface of light. The pulsing life-signs of the Sleeper begin to change.
Character: DAVID
Qualities: Technological expertise.

Passage: The "MESSIAH" FADES and an R & B BAND breaks into ROUSING MUSIC as Feingold approaches and sticks a CIGAR in his mouth, LIGHTS it.
Character: FEINGOLD
Qualities: CANNOT ANSWER

Passage: - Jeff completes the remaining ploughing - Jeff ripping ou

In [71]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and the qualities of the character, very briefly explain why the passage supports or how it gives evidence about the character's qualities, skills, or abilies. If the qualities field contains CANNOT ANSWER, then explain why you cannot find the character's qualities from the passage.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and the qualities of the character, very briefly explain why the passage supports or how it gives evidence about the character's qualities, skills, or abilies. If the qualities field contains CANNOT ANSWER, then explain why you cannot find the character's qualities from the passage.

Passage: <Passage>
Character: <Character>
Qualities: <Attr>
Answer:


In [72]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 7/7 [00:27<00:00,  3.87s/it]


In [73]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Race

In [74]:
attr = "race"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, find the race or ethnicity of the character using only the passage text. Write your answer as briefly as possible. If you cannot find the race or ethnicity of the character, write CANNOT ANSWER.

Passage: Standing on the porch is SHELBY, an optimistic looking white boy - age 25. He is wearing a white button-down shirt and a black tie.
Character: SHELBY
Race: White

Passage: A big room divided into many small cubicles in which telephone operators, wearing headsets, answer 411 calls. In one of the cubicles is LOUISE WILLIAMS, a warm, sympathetic African-American woman in her forties. She is upset. On her workstation is a framed photograph of her twin sons Cedric and Neville (16).
Character: NEVILLE
Race: African-American

Passage: Dr. Ravell comes out of surgery, clearly exhausted. Without his surgical mask he is ruggedly handsome. TWO NURSES follow, attending him like a fighter fresh from the ring: CHLOE, 25, Raven-hai

In [75]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and the race of the character, very briefly explain why the passage supports or how it gives evidence about the character's race or ethnicity. If the race field contains CANNOT ANSWER, then explain why you cannot find the character's race or ethnicity from the passage.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and the race of the character, very briefly explain why the passage supports or how it gives evidence about the character's race or ethnicity. If the race field contains CANNOT ANSWER, then explain why you cannot find the character's race or ethnicity from the passage.

Passage: <Passage>
Character: <Character>
Race: <Attr>
Answer:


In [76]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 4/4 [00:11<00:00,  2.87s/it]


In [77]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

## Voice

In [78]:
attr = "voice"
print(instruction_dict[attr])
print()
for passage, character, answer in demonstrations_dict[attr]:
    print(f"Passage: {passage}")
    print(f"Character: {character}")
    print(f"{attr.title()}: {answer}")
    print()

Given a passage from a movie script and a character mentioned in it, find the quality, tone, pitch, accent, or any other attributes of the character's voice using only the passage text. Write your answer as briefly as possible. If you cannot find any attributes of the character's voice, write CANNOT ANSWER.

Passage: BORIS BALKAN, standing at a state-of-the-art lecturer's desk, is a bulky, imposing figure of a man around 50 years old. His thick gray hair is slicked back to reveal a domed forehead. The eyes beneath it radiate keen intelligence through a pair of heavy hornrims. He speaks in a deep, slow, almost monotonous voice, but with great authority.
Character: BORIS BALKAN
Voice: Deep, slow, almost monotonous, with great authority.

Passage: Dewey, this is crazy! Ain't nobody gon' wanna hear music like that. You standin' there, playing as fast as you can, singin' like some kind of. .. punk. DEWEY
Character: DEWEY
Voice: Fast-paced, punk-like singing.

Passage: Giulia pedals away, bu

In [79]:
template = f"""Given a passage from a movie script, a character mentioned in the passage, and some attributes of the character's voice, very briefly explain why the passage supports or how it gives evidence about those attributes of the character's voice. If the voice field contains CANNOT ANSWER, then explain why you cannot find any attributes of the character's voice from the passage.

Passage: <Passage>
Character: <Character>
{attr.title()}: <Attr>
Answer:
"""
print(template.strip())

Given a passage from a movie script, a character mentioned in the passage, and some attributes of the character's voice, very briefly explain why the passage supports or how it gives evidence about those attributes of the character's voice. If the voice field contains CANNOT ANSWER, then explain why you cannot find any attributes of the character's voice from the passage.

Passage: <Passage>
Character: <Character>
Voice: <Attr>
Answer:


In [80]:
explanations = []

for passage, character, answer in tqdm.tqdm(demonstrations_dict[attr]):
    prompt = template.replace("<Passage>", passage).replace("<Character>", character).replace("<Attr>", answer)
    output = prompt_sample(prompt, temperature=0)
    explanations.append(output)

100%|██████████| 6/6 [00:15<00:00,  2.59s/it]


In [81]:
with open(os.path.join(data_dir, f"attr_instr/{attr}/explanations.txt"), "w") as fw:
    for (passage, character, answer), explanation in zip(demonstrations_dict[attr], explanations):
        completion = explanation["choices"][0]["text"].strip()
        fw.write(f"Passage: {passage}\n")
        fw.write(f"Character: {character}\n")
        fw.write(f"{attr.title()}: {answer}\n")
        fw.write(f"Explanation: {completion}\n\n")

In [83]:
for attr in attrs:
    with open(os.path.join(data_dir, "attr_instr", attr, "cot_prompt.txt")) as fr:
        cot_prompt = fr.read().strip()
    n_tokens = len(encoding.encode(cot_prompt))
    print(f"{attr:30s} = {n_tokens:4d} words")

accomplishments                = 1164 words
age                            =  675 words
attire                         =  825 words
attitude                       =  698 words
demeanor                       = 1045 words
emotion                        = 1095 words
eyes                           =  721 words
goal                           = 3222 words
hair                           =  779 words
physical appearance            =  863 words
physical health status         =  826 words
possessions                    =  854 words
profession                     = 1097 words
qualities                      = 1059 words
race                           =  553 words
voice                          =  686 words
