In [119]:
list(client.batches.list())


[BatchJob(
   create_time=datetime.datetime(2026, 2, 5, 8, 5, 1, 109024, tzinfo=TzInfo(UTC)),
   display_name='batch-paired-polite-hedge-and-categorical',
   model='models/gemini-3-flash-preview',
   name='batches/e8uw77lse9dlhluzx5w3uqq3jwvvfn694i1g',
   state=<JobState.JOB_STATE_PENDING: 'JOB_STATE_PENDING'>,
   update_time=datetime.datetime(2026, 2, 5, 8, 5, 1, 109024, tzinfo=TzInfo(UTC))
 ),
 BatchJob(
   create_time=datetime.datetime(2026, 2, 5, 8, 4, 14, 276352, tzinfo=TzInfo(UTC)),
   display_name='batch-paired-hedge-and-categorical',
   model='models/gemini-3-flash-preview',
   name='batches/wi2gbzdhu1telhq5n2xj2fwvkn7v6ivhxip1',
   state=<JobState.JOB_STATE_PENDING: 'JOB_STATE_PENDING'>,
   update_time=datetime.datetime(2026, 2, 5, 8, 4, 14, 276352, tzinfo=TzInfo(UTC))
 ),
 BatchJob(
   create_time=datetime.datetime(2026, 2, 5, 8, 1, 56, 768566, tzinfo=TzInfo(UTC)),
   display_name='batch-paired-hedge-and-categorical',
   model='models/gemini-3-flash-preview',
   name='batches

In [100]:

import json
from google import genai
from google.genai import types
import os
import pandas as pd
import random
from dotenv import load_dotenv
load_dotenv()

client = genai.Client(
    api_key=os.getenv("google_api_key"),
)


predicates = ['cold', 'loud', 'messy', 'long']
scales = {
    'cold':["Much colder","Somewhat colder","About the same","Somewhat warmer","Much warmer"],
    'loud':["Much louder","Somewhat louder","About the same","Somewhat quieter","Much quieter"],
    'messy':["Much messier","Somewhat messier","About the same","Somewhat tidier","Much tidier"],
    'long':["Much longer","Somewhat longer","About the same","Somewhat shorter","Much shorter"],
}
df = pd.read_csv('PolitenessScenario.csv')[['baseline', 'c1', 'hedged_baseline','predicate question','politeness question']] # change this to predicate question for the how cold? questions
modifiers = ['slightly','kind of','quite', 'very', 'extremely']

In [101]:
    
def create_prompt(scenario,predicate, question, modifier):
    speaker = question.split(' ')[3]
    start_pos = scenario.find('"')
    end_pos = scenario.rfind('"')
    combined_prompt = f"""
Please read this scenario and answer the question afterwards:
Scenario:
{scenario.replace('[modifier] ', '')}

Question: If {speaker} had said "{scenario[start_pos + 1:end_pos].replace('[modifier]', modifier)}" (adding "{modifier}"), does this mean they are actually feeling:
(A) {scales[predicate][0]}
(B) {scales[predicate][1]}
(C) {scales[predicate][2]}
(D) {scales[predicate][3]}
(E) {scales[predicate][4]}

Please output your answer as a json format like this:
{{"scenario_answer": "<your_choice>"}}

Do not output anything else.
    """
    return combined_prompt

In [102]:
def create_politeness_prompt(scenario,predicate,question, modifier):
    speaker = question.split(' ')[3]
    start_pos = scenario.find('"')
    end_pos = scenario.rfind('"')
    combined_prompt = f"""
Please read this scenario and answer the question afterwards:
Scenario:
{scenario.replace('[modifier] ', '')}

Question: If {speaker} had said "{scenario[start_pos + 1:end_pos].replace('[modifier]', modifier)}" (adding "{modifier}"),would it be:
(A) Much less polite
(B) Somewhat less polite
(C) About the same politeness
(D) Somewhat more polite
(E) Much more polite

Please output your answer as a json format like this:
{{"scenario_answer": "<your_choice>"}}

Do not output anything else.
    """
    return combined_prompt

In [103]:
def create_combined_prompt(scenario1,scenario2,predicate, question, modifier):
    # create combined prompt with the **4** scenarios in random order
    # generate in random order, 4 combinations of modified and unmodified scenarios with c1 or baseline
    swap = random.choice([True, False])
    scenarios = ["", ""]
    if not swap:
        scenarios[0] = scenario1
        scenarios[1] = scenario2
    else:
        scenarios[0] = scenario2
        scenarios[1] = scenario1
    speaker = question.split(' ')[3]
    start_pos = scenario1.find('"')
    end_pos = scenario1.rfind('"')
    combined_prompt = f"""
Please read these two scenarios:
Scenario 1:
{scenarios[0].replace('[modifier] ', '')}

Scenario 2:
{scenarios[1].replace('[modifier] ', '')}

If {speaker} had said "{scenario1[start_pos + 1:end_pos].replace('[modifier]', modifier)}" (adding "{modifier}"), does this mean they are actually feeling:
(A) {scales[predicate][0]}
(B) {scales[predicate][1]}
(C) {scales[predicate][2]}
(D) {scales[predicate][3]}
(E) {scales[predicate][4]}

Please only output your answer as a json format like this:
{{"scenario_1_answer": "<your_choice_for_scenario_1>", "scenario_2_answer": "<your_choice_for_scenario_2>"}}

Do not output anything else.
    """
    return combined_prompt,swap

In [104]:
def create_combined_polite_prompt(scenario1,scenario2,predicate, question, modifier):
    # create combined prompt with the **4** scenarios in random order
    # generate in random order, 4 combinations of modified and unmodified scenarios with c1 or baseline
    swap = random.choice([True, False])
    scenarios = ["", ""]
    if not swap:
        scenarios[0] = scenario1
        scenarios[1] = scenario2
    else:
        scenarios[0] = scenario2
        scenarios[1] = scenario1
    speaker = question.split(' ')[3]
    start_pos = scenario1.find('"')
    end_pos = scenario1.rfind('"')
    combined_prompt = f"""
Please read these two scenarios:
Scenario 1:
{scenarios[0].replace('[modifier] ', '')}

Scenario 2:
{scenarios[1].replace('[modifier] ', '')}

If {speaker} had said "{scenario1[start_pos + 1:end_pos].replace('[modifier]', modifier)}" (adding "{modifier}"),would it be:
(A) Much less polite
(B) Somewhat less polite
(C) About the same politeness
(D) Somewhat more polite
(E) Much more polite

Please provide your choice for each of the two scenarios. Output your answer as a json format like this:
{{"scenario_1_answer": "<your_choice_for_scenario_1>", "scenario_2_answer": "<your_choice_for_scenario_2>"}}

Do not output anything else.
    """
    return combined_prompt,swap

In [107]:
requests = []
with open("batch-hedge-and-categorical.jsonl", "w") as f:
    for modifier in modifiers:
        for index, row in df.iterrows():
            for attempt in range(3):
                baseline = row['baseline']
                hedged_baseline = row['hedged_baseline']
                question = row['predicate question']     
                scenario_values = ["",""]
                prompt = create_prompt(baseline,predicates[int(index/5)],question,modifier)
                requests.append({"key": f"request-{len(requests) + 1}", "request": {"contents": [{"parts": [{"text": prompt}]}]}})
                prompt = create_prompt(hedged_baseline,predicates[int(index/5)],question,modifier)
                requests.append({"key": f"request-hedge-{len(requests) + 1}", "request": {"contents": [{"parts": [{"text": prompt}]}]}})
                print(prompt)
    for req in requests:
        f.write(json.dumps(req) + "\n")

uploaded_file = client.files.upload(
    file='batch-hedge-and-categorical.jsonl',
    config=types.UploadFileConfig(display_name='batch-hedge-and-categorical', mime_type='jsonl')
)
input_file = uploaded_file.name
file_batch_job = client.batches.create(
    model="gemini-3-flash-preview",
    src=input_file,
    config={
        'display_name': "hedged-and-categorical",
    },
)

print(f"Created batch job: {file_batch_job.name}")


Please read this scenario and answer the question afterwards:
Scenario:
In their office, Terry tells his coworker, "I’m sorry but the room is cold," and asks them to raise the temperature.

Question: If Terry had said "I’m sorry but the room is slightly cold," (adding "slightly"), does this mean they are actually feeling:
(A) Much colder
(B) Somewhat colder
(C) About the same
(D) Somewhat warmer
(E) Much warmer

Please output your answer as a json format like this:
{"scenario_answer": "<your_choice>"}

Do not output anything else.
    

Please read this scenario and answer the question afterwards:
Scenario:
In their office, Terry tells his coworker, "I’m sorry but the room is cold," and asks them to raise the temperature.

Question: If Terry had said "I’m sorry but the room is slightly cold," (adding "slightly"), does this mean they are actually feeling:
(A) Much colder
(B) Somewhat colder
(C) About the same
(D) Somewhat warmer
(E) Much warmer

Please output your answer as a json form

In [111]:
requests = []
with open("batch-hedge-polite-and-categorical.jsonl", "w") as f:
    for modifier in modifiers:
        for index, row in df.iterrows():
            for attempt in range(3):
                baseline = row['baseline']
                hedged_baseline = row['hedged_baseline']   
                question = row['politeness question']   
                scenario_values = ["",""]
                prompt = create_politeness_prompt(baseline,predicates[int(index/5)],question,modifier)
                requests.append({"key": f"request-{len(requests) + 1}", "request": {"contents": [{"parts": [{"text": prompt}]}]}})
                prompt = create_politeness_prompt(hedged_baseline,predicates[int(index/5)],question,modifier)
                requests.append({"key": f"request-hedge-{len(requests) + 1}", "request": {"contents": [{"parts": [{"text": prompt}]}]}})
    for req in requests:
        f.write(json.dumps(req) + "\n")

uploaded_file = client.files.upload(
    file='batch-hedge-polite-and-categorical.jsonl',
    config=types.UploadFileConfig(display_name='batch-hedge-polite-and-categorical', mime_type='jsonl')
)
input_file = uploaded_file.name
file_batch_job = client.batches.create(
    model="gemini-3-flash-preview",
    src=input_file,
    config={
        'display_name': "hedged-polite-and-categorical",
    },
)

print(f"Created batch job: {file_batch_job.name}")

Created batch job: batches/ci6om54fa3mljyqe7w7iexc0jybx0odfdipj


In [115]:
# Create a sample JSONL file
swapped_list = []
requests = []
with open("batch-paired-hedge-and-categorical.jsonl", "w") as f:
    for modifier in modifiers:
        for index, row in df.iterrows():
            for attempt in range(3):
                baseline = row['baseline']
                hedged = row['hedged_baseline']
                question = row['predicate question']     
                scenario_values = ["",""]
                prompt , swapped = create_combined_prompt(baseline,hedged,predicates[int(index/5)],question,modifier)
                swapped_list.append(swapped)
                requests.append({"key": f"request-{len(requests) + 1}-{swapped}", "request": {"contents": [{"parts": [{"text": prompt}]}]}})
    for req in requests:
        f.write(json.dumps(req) + "\n")
with open("swapped_paired_list_feb4.json", "w") as f:
    json.dump(swapped_list, f)

uploaded_file = client.files.upload(
    file='batch-paired-hedge-and-categorical.jsonl',
    config=types.UploadFileConfig(display_name='batch-paired-hedge-and-categorical', mime_type='jsonl')
)
input_file = uploaded_file.name
print(f"Uploaded file: {uploaded_file.name}")
file_batch_job = client.batches.create(
    model="gemini-3-flash-preview",
    src=input_file,
    config={
        'display_name': "batch-paired-hedge-and-categorical",
    },
)

print(f"Created batch job: {file_batch_job.name}")

Uploaded file: files/gukwhhx2zig5
Created batch job: batches/wi2gbzdhu1telhq5n2xj2fwvkn7v6ivhxip1


In [117]:
# Create a sample JSONL file
swapped_list = []
requests = []
with open("batch-paired-polite-hedge-and-categorical.jsonl", "w") as f:
    for modifier in modifiers:
        for index, row in df.iterrows():
            for attempt in range(3):
                question = row["politeness question"]
                baseline = row['baseline']
                hedged = row['hedged_baseline']   
                scenario_values = ["",""]
                prompt , swapped = create_combined_polite_prompt(baseline,hedged,predicates[int(index/5)],question,modifier)
                swapped_list.append(swapped)
                requests.append({"key": f"request-{len(requests) + 1}-{swapped}", "request": {"contents": [{"parts": [{"text": prompt}]}]}})
    for req in requests:
        f.write(json.dumps(req) + "\n")
with open("polite_swapped_paired_list_feb4.json", "w") as f:
    json.dump(swapped_list, f)

uploaded_file = client.files.upload(
    file='batch-paired-polite-hedge-and-categorical.jsonl',
    config=types.UploadFileConfig(display_name='batch-paired-polite-hedge-and-categorical', mime_type='jsonl')
)
input_file = uploaded_file.name
print(f"Uploaded file: {uploaded_file.name}")
file_batch_job = client.batches.create(
    model="gemini-3-flash-preview",
    src=input_file,
    config={
        'display_name': "batch-paired-polite-hedge-and-categorical",
    },
)

print(f"Created batch job: {file_batch_job.name}")

Uploaded file: files/g7mqxfp5br5n
Created batch job: batches/e8uw77lse9dlhluzx5w3uqq3jwvvfn694i1g


In [118]:
job_name = file_batch_job.name  # (e.g. 'batches/your-batch-id')
batch_job = client.batches.get(name=job_name)

completed_states = set([
    'JOB_STATE_SUCCEEDED',
    'JOB_STATE_FAILED',
    'JOB_STATE_CANCELLED',
    'JOB_STATE_EXPIRED',
])

print(f"Polling status for job: {job_name}")
batch_job = client.batches.get(name=job_name) # Initial get
print(f"Current state: {batch_job.state.name}")

Polling status for job: batches/e8uw77lse9dlhluzx5w3uqq3jwvvfn694i1g
Current state: JOB_STATE_PENDING


In [None]:
response_outputs = []
if batch_job.state.name == 'JOB_STATE_SUCCEEDED':
    result_file_name = batch_job.dest.file_name
    file_content = client.files.download(file=result_file_name).decode('utf-8')
    for line_num, line in enumerate(file_content.split('\n'), 1):
        line = line.strip()
        if not line:
            print("not line")
            print(line_num)
            continue
        try:
            data = json.loads(line)
            response = data['response']['candidates'][0]['content']['parts'][0]['text']
            start = response.find('{')
            end = response.rfind('}') + 1
            response = json.loads(response[start:end])
            response["scenario_answer"] = response["scenario_answer"].strip('()')
            response_outputs.append(response["scenario_answer"])
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON on line {line_num}: {e}")
            continue
else:
    print(f"Job did not succeed. Final state: {batch_job.state.name}")
    if batch_job.error:
        print(f"Error: {batch_job.error}")
list_outputs.append([modifier,question] + scenario_values)
index_num = 0
for modifier in modifiers:
    for index, row in df.iterrows():
        question = row['predicate question'] 
        choices_baseline = ""
        choices_hedged = ""
        for attempt in range(3):
            choices_baseline += response_outputs[index_num]
            choices_hedged += response_outputs[index_num+1]
            index_num+=2
        list_outputs.append([modidifier,question,choices_baseline,choices_hedged]
                
df_outputs = pd.DataFrame(list_outputs, columns=['modifier', 'question', 'baseline', 'hedge_baseline'])                
df_outputs.to_csv('hedged-and-categorical.csv', index=False)

In [58]:
# Store lists
list_outputs = [
    ['mod1', 'q1', ['A', 'B', 'C'], ['D', 'E']],
    ['mod2', 'q2', ['F', 'G'], ['H', 'I', 'J']],
]

df = pd.DataFrame(list_outputs, columns=['modifier', 'question', 'options1', 'options2'])

# Retrieve
options = df.loc[0, 'options1']
print(options)  # ['A', 'B', 'C']
print(options[0])  # 'A'

['A', 'B', 'C']
A
