In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re

# Proof of Concept Environments

In [61]:
num_scenarios = 3

In [62]:
df = pd.DataFrame()
for s in range(1, num_scenarios + 1):
    for t in type_mapping:
        data = pd.read_csv(f"scenario{s}_type{t}.csv", index_col = False)
        df = pd.concat([df, data], axis = 0, ignore_index = True)

In [63]:
df["scenario"] = df["scenario"].astype(int)
df["succeeded"] = df["succeeded"].astype(bool)
df["steps_to_succeed"] = df["steps_to_succeed"].astype(float)
df["total_interactions"] = df["total_interactions"].astype(int)

In [64]:
df.replace([np.inf, -np.inf], np.nan, inplace = True)

In [65]:
table = df.groupby(["scenario", "inst_type"]).apply(lambda g: pd.Series({"success_rate": g["succeeded"].sum() / len(g),
                                                                         "steps_to_succeed": g["steps_to_succeed"].mean()})).reset_index()
table.rename(columns = {"scenario": "env_type", "inst_type": "inst_level"}, inplace = True)

In [66]:
table

Unnamed: 0,env_type,inst_level,success_rate,steps_to_succeed
0,1,high,0.0,
1,1,low,0.0,
2,1,mid-direct-explore,0.0,
3,2,high,0.075,7.0
4,2,low,0.368421,12.0
5,2,mid-direct-explore,0.2,7.833333
6,3,high,0.173913,4.875
7,3,low,0.478261,11.272727
8,3,mid-direct-explore,0.103448,1.666667


### Thoughts
#### Environment 1: TA has an empty room, LA has to unlock a door
- High: "Get to the goal." Obviously will fail, LA has no idea what to do in the room
- Low: "Go forward all the way until you hit a wall, then turn right, then go forward all the way again until you hit the goal." Fail because LA's environment is vastly different
- Mid: "Pick up the key, use it to unlock the door, and go through the door to get to the goal. For each of the key, door, and goal, if you don't see it first, explore the room until you find it." LA is aware of key and door existence, so failure most likely due to getting LLM to understand that you cannot interact with objects if you are not directly facing it and it is directly in front of you.

#### Environment 2: TA has to unlock a door, LA has an empty room
- High: "Get to the goal." Small success rate. Seems to be only if the goal is readily visible/accessible to the agent (otherwise it just won't know where to go) and it doesn't do a weird move that removes the goal from its field of vision.
- Low: "Turn right and move forward until you see a key and pick it up. Then turn around, go forward one step, and turn right to use the key to unlock the door. Then go through the door and continue walking until you hit a wall. Then turn right and go forward until you hit the goal." Truly unsure why the success rate is so high. Originally it would hallucinate that there was a key due to the instruction so the success rate was lower. Then after removing the invalid actions I suppose it just kept turning and moving according the instruction and inherently saw the goal.
- Mid: "There are no obstacles around you. Find the goal and go straight to it. If you don't see it at first, explore the room more until you find it." Like above, would hallucinate objects until invalid actions were taken away. Not sure why success rate is lower, but at least num steps is also lower.

#### Environment 3: TA has one green goal, LA has multiple colored goals
- High: "Get to the goal." If the green goal is closest, it will succeed. On second thought, should have ended the run when any goal is reached but actually count as failure if not green; this would have lowered the success rate.
- Low: "Walk forward until you hit a wall, then turn and keep walking forward until you hit the goal." Same as above, I guess it just kept on walking and walking until it hit the green goal???
- Mid: "Go straight to the green goal. If you don't see it anywhere, explore the room more until you find it. Do not go to a goal of another color." Again, no clue why this is not more clearly better.

# BabyAI-Text Environments

In [17]:
instruction_types = ["high", "low-teach", "low-learn", "mid-direct-explore"]

In [18]:
num_scenarios = 1

In [19]:
df = pd.DataFrame()
for s in range(1, num_scenarios + 1):
    for type in instruction_types:
        data = pd.read_csv(f"data/babyai_text/scenario{s}_{type.replace('-', '_')}.csv", index_col = False)
        df = pd.concat([df, data], axis = 0, ignore_index = True)

In [20]:
df["scenario"] = df["scenario"].astype(int)
df["succeeded"] = df["succeeded"].astype(bool)
df["steps_to_succeed"] = df["steps_to_succeed"].astype(float)
df["total_interactions"] = df["total_interactions"].astype(int)

df.replace([np.inf, -np.inf], np.nan, inplace = True)

In [21]:
table = df.groupby(["scenario", "inst_type"]).apply(lambda g: pd.Series({"success_rate": g["succeeded"].sum() / len(g),
                                                                         "steps_to_succeed": g["steps_to_succeed"].mean()})).reset_index()
table.rename(columns = {"scenario": "env_type", "inst_type": "inst_level"}, inplace = True)

In [22]:
table

Unnamed: 0,env_type,inst_level,success_rate,steps_to_succeed
0,1,high,0.1,17.8
1,1,low-learn,0.3,14.5
2,1,low-teach,0.1,17.7
3,1,mid-direct-explore,0.2,20.6


# Custom Environments with more high level actions

In [35]:
df = pd.DataFrame()
for s in range(1, num_scenarios + 1):
    for type in instruction_types:
        data = pd.read_csv(f"data/high_level_actions_360_explore/scenario{s}_{type.replace('-', '_')}.csv", index_col = False)
        df = pd.concat([df, data], axis = 0, ignore_index = True)

In [36]:
df["scenario"] = df["scenario"].astype(int)
df["succeeded"] = df["succeeded"].astype(bool)
df["steps_to_succeed"] = df["steps_to_succeed"].astype(int)
df["total_interactions"] = df["total_interactions"].astype(int)

df.replace([np.inf, -np.inf], np.nan, inplace = True)

In [37]:
table = df.groupby(["scenario", "inst_type"]).apply(lambda g: pd.Series({"success_rate": g["succeeded"].sum() / len(g),
                                                                         "steps_to_succeed": g["steps_to_succeed"].mean()})).reset_index()
table.rename(columns = {"scenario": "env_type", "inst_type": "inst_level"}, inplace = True)

In [38]:
table

Unnamed: 0,env_type,inst_level,success_rate,steps_to_succeed
0,1,high,0.1,13.4
1,1,low-learn,0.3,15.4
2,1,low-teach,0.142857,19.714286
3,1,mid-direct-explore,0.285714,17.857143


# Mistral Tests and GPT Tests

In [11]:
num_setups = 5
instruction_types = ["high", "low_teacher", "low_learner", "mid_direct_explore"]

In [20]:
mistral1 = pd.read_csv("./mistral_tests/setup1.csv", index_col = False)
mistral1 = mistral1.groupby("inst_type").mean().drop(columns = ["seed"]).rename(columns = {"succeeded": "success_rate"})
gpt1 = pd.read_csv("./gpt_tests/setup1.csv", index_col = False)
gpt1 = gpt1.groupby("inst_type").mean().drop(columns = ["seed"]).rename(columns = {"succeeded": "success_rate"})
setup1 = mistral1.merge(gpt1, right_index = True, left_index = True, suffixes = ("_mistral", "_gpt"))
print("Setup 1")
setup1

Setup 1


Unnamed: 0_level_0,success_rate_mistral,total_steps_mistral,total_tokens_mistral,success_rate_gpt,total_steps_gpt,total_tokens_gpt
inst_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
high,0.0,4.4,3191.2,0.2,6.6,4258.2
low_student,0.4,6.6,5502.6,0.4,10.4,6674.4
low_teacher,0.0,4.0,2797.4,0.2,18.8,12033.2
mid_direct_explore,0.0,4.6,3681.6,0.2,9.6,7290.0


In [17]:
mistral2 = pd.read_csv("./mistral_tests/setup2.csv", index_col = False)
mistral2 = mistral2.groupby("inst_type").mean().drop(columns = ["seed"]).rename(columns = {"succeeded": "success_rate"})
mistral2

Unnamed: 0_level_0,success_rate,total_steps,total_tokens
inst_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
high,0.6,2.4,1318.0
low_student,0.4,3.8,3183.4
low_teacher,0.4,3.4,2877.2
mid_direct_explore,0.0,4.0,3642.4
