In [8]:
from IPython.display import HTML, display
from copy import deepcopy
import random
import os
import openai
import time
import sys
import ast
import json

from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from getpass import getpass
import os
import pandas as pd

# import pdfkit
import html
from copy import deepcopy
import re

import re
from tqdm.auto import tqdm 
from datasets import load_dataset

In [9]:
# setting up an OpenAI template on the run
OPENAI_API_KEY = getpass()

os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

In [10]:
model = ChatOpenAI(model="gpt-4-1106-preview")
model.temperature = 0.9

In [11]:
_task_creation_msgs = \
[
    HumanMessage(content="I am building a city environment where a robot can interact. Can you define some set of actions a robot can perform in the environment? Please make sure to keep the number of actions as low as possible such that all other actions can be derived from the actions you define. Return the actions as a JSON string with keys as the action command and the description as the value."),
    AIMessage(content=\
"""To define a minimalist set of actions a robot can perform in a city environment, we should aim for the most fundamental movements and interactions that other, more specific actions can be derived from. Here's a JSON representation of such actions:
```json
{
  "move_forward": "The robot advances in the direction it is currently facing.",
  "turn_left": "The robot rotates 90 degrees to its left.",
  "turn_right": "The robot rotates 90 degrees to its right.",
  "pick_up": "The robot picks up an object within its reach.",
  "put_down": "The robot places a held object down in the designated location.",
  "interact": "The robot performs a general interaction with objects or interfaces in the environment (e.g., press a button, open a door)."
}
```
These actions are fundamental in the sense that combinations of movements (forward, left, right) can result in any navigational pattern within the city. The interaction commands (pick_up, put_down, interact) are abstract enough to encompass various object manipulations or interactions with the city environment. The actual implementation could be context-sensitive, meaning the exact action performed might depend on the robot's current state, position, and the characteristics of the object or interface it's interacting with."""),
    HumanMessage(content="Good, now create the environment overview that I should give the robot which can perform these actions in the environment. The environment description can be a natural language descripiton in a few sentences."),
    AIMessage(content="The city environment is a grid-based layout with streets and sidewalks for navigation, various buildings, and diverse objects such as trash cans, benches, traffic signals, and streetlights. The robot operates primarily on the sidewalks but has to occasionally cross streets at designated crosswalks while obeying traffic signals. Buildings can occasionally be entered, with doorways that can be interacted with. There are areas designated for different activities, like parks with objects that can be picked up or put down, and zones marked for robot recharging. Other dynamic entities, such as pedestrians, vehicles, and animals, move in the environment and the robot must navigate around them, ensuring safe and respectful interaction."),
    HumanMessage(content="Good, now please create some detailed tasks which the robot can perform in the environment using some combination of the actions. Note don't explain how to solve the task but only list down the feasible tasks in a sentence or two. Number each of the tasks you generate. Plz do not be vague in describing the task, describe every detail of the task.")
]

In [12]:
_env_msgs = \
[
    SystemMessage(content=\
"""You have to simulate a real environment to interact with an agent. You should simulate it by pretending to act as the environment. 
You have the following information:
ENVIRONMENT OVERVIEW: Description of the environment.
ACTIONS: list of actions you can perform in the environment.
HISTORY: history of the agent's interaction with the environment.
CURRENT STATE: current state of the environment as perceived by the agent.
STOPPING CRITERIA: command that the environment prints when the task has been accomplished.

To produce the ENVIRONMENT side of the conversation as a real environment you should think how the environment should behave. Please use the following format and think step by step:
```
## Thought: you should always think about what to do, explicitly restating the task without pronouns and restating details based on the conversation history and new input.
## Environment: result of the agent's interaction with the environment.
## Current State: after the previous action, describe the robot's view of the environment.
```""")
]

In [13]:
_agent_msgs = \
[
    SystemMessage(content=\
""""You're a helpful AI agent. You have to interact with an environment to solve a task given by a user. To start with, you will be given the following information about the environment:
ENVIRONMENT OVERVIEW: Description of the environment.
ACTIONS: list of actions you can perform in the environment.
HISTORY: history of the agent's interaction with the environment.
CURRENT STATE: current state of the environment as perceived by the agent.
STOPPING CRITERIA: command that the environment prints when the task has been accomplished.

To solve a task, please use the following format and think step by step:
```
Thought: you should always think about what to do, explicitly restating the task without pronouns and restating details based on the conversation history and new input.
Prior Observations: restate verbatim ALL details/names/figures/facts/etc from past observations relevant to the task and ALL related entities.
Action: the action to perform in the environment. The action should obey the specified interaction format. The action should be in a JSON string, for example: 
```json
{
  Action: "turn_left""
}
``` 
```""")
]

In [14]:
num_turns = 50
all_convs = []
for _ in tqdm(range(100)):
    task_creation_msgs = _task_creation_msgs
    response = model.predict_messages(task_creation_msgs)
    task_creation_msgs.append(response)
    task_creation_msgs.append(HumanMessage(content="Very good, now I will give the 2nd task for the robot to perform. Can you describe the state he is in, precisely describing the details of the environment with exact position (like sofa is on the left 10m apart, etc) and what the robot view at one instance."))
    response = model.predict_messages(task_creation_msgs)
    task_creation_msgs.append(response)
    task_creation_msgs.append(HumanMessage(content="Can you imagine a stopping criteria when the robot succeeds in the given task? The stopping criteria should be a short statement/command produced by the environment when the robot completes the task."))
    response = model.predict_messages(task_creation_msgs)
    task_creation_msgs.append(response)
    task_creation_msgs.append(HumanMessage(content=\
"""Very good, now I want to give the information I gathered from your help to an agent in a specific format. Can you please arrange it in the format described below. Pls just copy in the below format.
## Environment Overview
Description of the environment

## Actions 
A JSON string to list all available set of actions

## Task
The task for the agent to perform

## Current State
The current state/location/position of the agent 

## Stopping Criteria
A command that the environment prints when the task has been accomplished."""))
    response = model.predict_messages(task_creation_msgs)
    env_setup_info = response.content
    task = env_setup_info.split("## Task")[-1].split("##")[0].strip()
    stop = env_setup_info.split("## Stopping Criteria")[-1].strip()
    
    agent_msgs = _agent_msgs
    convs = {"conversations": []}
    agent_msgs.append(HumanMessage(content=env_setup_info))
    agent_msgs.append(HumanMessage(content=f"""ENVIRONMENT: Here is your task. {task}\n\nAGENT:"""))
    convs["conversations"].append({"from": "human", "value": env_setup_info, "loss": False})
    agent_response = model.predict_messages(agent_msgs)
    agent_msgs.append(agent_response)
    convs["conversations"].append({"from": "gpt", "value": agent_response.content, "loss": True})
    json_block = re.search(r"```json(.*?)```", agent_response.content, re.DOTALL)
    json_str = json_block.group(1).strip()
    
    env_msgs = _env_msgs
    env_msgs.append(HumanMessage(content=env_setup_info))
    env_msgs.append(HumanMessage(content=f"""ENVIRONMENT: Here is your task. {task}\n\nAGENT: {json_str}\n\nENVIRONMENT:"""))
    env_response = model.predict_messages(env_msgs)
    convs["conversations"].append({"from": "human", "value": env_response.content.split("## Environment:")[-1].split("## Current State")[0].strip(), "loss": False})
    env_msgs.append(env_response)
    agent_msgs.append(HumanMessage(content=f"""ENVIRONMENT: {env_response.content.split('## Environment:')[-1].split('## Current State')[0].strip()}\n\nAGENT:"""))
    
    for __ in range(num_turns):
        agent_response = model.predict_messages(agent_msgs)
        convs["conversations"].append({"from": "gpt", "value": agent_response.content, "loss": True})
        agent_msgs.append(agent_response)
        json_block = re.search(r"```json(.*?)```", agent_response.content, re.DOTALL)
        if json_block:
            json_str = json_block.group(1).strip()
        else:
            json_str = "No action needed further. Pls confirm if I have completed the task"
    
        env_msgs.append(HumanMessage(content=f"AGENT: {json_str}\n\nENVIRONMENT:"))
        env_response = model.predict_messages(env_msgs)
        convs["conversations"].append({"from": "human", "value": env_response.content.split("## Environment:")[-1].split("## Current State")[0].strip(), "loss": False})
        env_msgs.append(env_response)
        agent_msgs.append(HumanMessage(content=f"""ENVIRONMENT: {env_response.content.split('## Environment:')[-1].split('## Current State')[0].strip()}\n\nAGENT:"""))
        
        if stop in env_response.content:
            break
    all_convs.append(convs)
    
    with open('./synthetic-3/language_commands_syn.jsonl', 'w') as file:
        for d in all_convs:
            json.dump(d, file)
            file.write('\n')

  0%|          | 0/100 [00:00<?, ?it/s]