In [1]:
import openai
import json
import random
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from getpass import getpass
import os

from IPython.display import HTML, display
import pdfkit
import html
from copy import deepcopy

import threading
import queue
import time  # Used for simulating API call
import pickle

In [2]:
# setting up an OpenAI template on the run
OPENAI_API_KEY = getpass()

os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

········


### Setup Model

In [3]:
model = ChatOpenAI(model="gpt-4-0613")
model.temperature = 0.8

### Prompts

In [4]:
environment_prompt_template = '''
You will simulate a software enviroment that can be used by a user or agent. You will simulate the following software environment:
{}

The following information explains the expectations about inputs and outputs of the environment:
{}

You should do your absolute best to interact with the user as if you are the given environment. You should NOT act as if you are chatGPT, GPT-4, or any other AI agent. The goal is to make a convincing simulation. Below you are given information about the state of the environment:
{}

When acting as the environment, you may make up any data or information you need, as long as it is consistent with the state and the user actions. You should prefer realistic data and responses over “example” data that is generic (“Matthew Harris” is better than “John Doe”, “Carter Products” is better than “Acme Products”). Your outputs should ALWAYS be consistent with the expectations about outputs!

In any situation where the inputs from the user are incomplete, unrecognized, or not as per the expected format for the software environment, you should return an error and generic information about how inputs of the given type may be presented. 

Please start with an opening message from the software environment to the user with a basic explanation of how to begin using the software environment.
'''


agent_prompt_template = '''
Interact with a software environment to solve a task. Imagine you are an intelligent agent working for a user and your target is to perform actions to complete the task goal. At the beginning of your interactions, you will be given a detailed description of the current environment and your goal to accomplish. For each of your turns, you will be given a list of actions which you can choose one to perform in this turn. You should provide two parts of your response: "THOUGHT" and "ACTION". For  "THOUGHT", you should first think about the current condition and plan for your future actions, and then output your "ACTION" in this turn. Your output must strictly follow this format:"THOUGHT: your thoughts.
 ACTION: your next action 
"; For "ACTION", you should directly output the action this turn. Your output must strictly follow this format:"ACTION: your next action
". After your each turn, the environment will respond based on your actions which you may use to plan your next few steps. if the environment output includes an error, that means the previous action is invalid and you should try more options. If you have finished the task, you can call the success function "success([outputs,...])" with any final outputs.
 Reminder: 
1. the action must follow any formats requested
2. Think when necessary, try to act directly more in the process.
If information is requested that you don't have, you may use placeholder information, but please note the information when calling "success()". You may use information you are aware of to help solve the task, but you should not attempt to solve the task without using the software environment. 

Software Environment: {}
Your Task: {}
'''

### Load Data

In [7]:
def save_queue_to_file(q, filename):
    with open(filename, 'wb') as file:
        pickle.dump(list(q.queue), file)

def load_queue_from_file(filename):
    try:
        q = queue.Queue()
        with open(filename, 'rb') as file:
            the_list = pickle.load(file)
            for item in the_list:
                q.put(item)
        return q
    except (FileNotFoundError, EOFError):
        return queue.Queue()


In [8]:
# The input queue is the list of tasks that haven't yet been turned into agent interactions
input_queue = load_queue_from_file("./synthetic/input_queue.pkl")


In [10]:
queue_list = list(input_queue.queue)

In [12]:
import random
a = random.choice(queue_list)

In [15]:
a

{'environment': 'Environmental Impact Analyzer: The agent evaluates the environmental impact of various projects or decisions based on input data. Environmentalists can adjust evaluation parameters and interpret results.',
 'io': "Outputs: The output will be a well-structured JSON object. It would include fields for different impact categories such as GHG emissions, energy consumption, water usage, waste generation, biodiversity impact, etc. Each field will contain numerical values representing the estimated impact in appropriate units (e.g., tons of CO2, Megajoules, liters, kilograms, etc.). It will also include a summary section to provide an overall score of the project's environmental impact. This summary could be based on an aggregation of the category impacts using a weighting system that reflects the priorities of the environmentalist.\n\nInputs: The input will be a YAML file. This file will include sections to define the project's details, such as the project type, duration, lo

In [14]:
print(a['io'])

Outputs: The output will be a well-structured JSON object. It would include fields for different impact categories such as GHG emissions, energy consumption, water usage, waste generation, biodiversity impact, etc. Each field will contain numerical values representing the estimated impact in appropriate units (e.g., tons of CO2, Megajoules, liters, kilograms, etc.). It will also include a summary section to provide an overall score of the project's environmental impact. This summary could be based on an aggregation of the category impacts using a weighting system that reflects the priorities of the environmentalist.

Inputs: The input will be a YAML file. This file will include sections to define the project's details, such as the project type, duration, location, involved materials, processes, and energy sources. These sections will be organized hierarchically to maintain clarity and simplicity.

Each section will include relevant parameters that the AI can analyze. For example, the e

#### Open Tasks

In [16]:
# Read from a JSON file
with open('./synthetic/tasks_with_full_environment.json', 'r') as file:
    tasks_with_full_environment = json.load(file)

In [17]:
len(tasks_with_full_environment)

1525

In [18]:
tasks_with_full_environment[0]

{'environment': 'Book Reservation System: The agent interacts with a simulated booking system to reserve flights, hotels, or restaurants based on criteria. Travel agents can adjust reservation details and monitor booking statuses.',
 'io': 'Outputs: The output format could be a structured JSON response containing key information about the available options for bookings. For flight bookings, it will return information like flight number, airline, departure time, arrival time, price, available seats, etc. For hotel bookings, it could return information like hotel name, location, price per night, available rooms, amenities, etc. For restaurant bookings, it could return information like restaurant name, location, available seats, menu, pricing, etc. Furthermore, it could return statuses of adjusted reservations and booking statuses.\n\nInputs: The input format could be a specifically structured JSON request. The JSON request should have fields specifying the type of reservation (flight, ho

In [19]:
# Group tasks by environment
grouped_by_environment = {}

for d in tasks_with_full_environment:
    env = d['environment']
    if env not in grouped_by_environment:
        grouped_by_environment[env] = []
    grouped_by_environment[env].append(d)

result = list(grouped_by_environment.values())

In [20]:
len(grouped_by_environment)

59

In [53]:
key_to_use = list(grouped_by_environment.keys())[13]

In [54]:
tasks_group = grouped_by_environment[key_to_use]

In [55]:
all_tasks = [x['task'] for x in tasks_group ]

In [56]:
all_tasks

['Search for a 2-bedroom apartment in the Upper East Side, New York, under $5000 per month for Charlotte Cohen.',
 'List a 3-bedroom bungalow in Beverly Hills, with an asking price of $1.2 million for broker George Riddle.',
 'Plan a visit for Amber Thompson to a loft located in downtown Chicago, with the preferred date being 23rd of this month.',
 "Manage a transaction for broker Marcus Wyatt, updating the status of the sale of a penthouse in Miami from 'pending' to 'closed'.",
 'Make an offer of $980,000 for a villa in Malibu on behalf of investor Natalie Reeves, specifying the terms as a 30-day close.',
 'Update the property details for a studio apartment in San Francisco listed by broker Lydia Morton, changing the bathroom count from 1 to 1.5.',
 'View the client feedback for the property ID #2356, a condo in Downtown LA, for broker Stephen Hawkins.',
 'Search for available commercial properties in Boston for startup owner Wanda Harris, with a budget of under $7000 per month.',
 'S

In [57]:
key_to_use

'Virtual Real Estate Agent: The agent searches, lists, and suggests properties based on preferences and constraints. Real estate brokers can adjust property details, monitor client feedback, and handle transactions.'

In [58]:
tasks_group[0]

{'environment': 'Virtual Real Estate Agent: The agent searches, lists, and suggests properties based on preferences and constraints. Real estate brokers can adjust property details, monitor client feedback, and handle transactions.',
 'io': 'Outputs: The output format would be a structured JSON response which contains the key details of each property - such as property ID, location, price, type (apartment, house, studio etc.), number of bedrooms/bathrooms, area (in sq ft), images (links to property images), seller information, and property status (available/leased). Moreover, the output might also include user-specific details like user’s search history, saved properties, scheduled visits, and transaction details. \n\nInputs: The input format would be primarily JSON commands and parameters sent through an API endpoint. This can include commands for user actions such as SEARCH_PROPERTY (parameters: location, price range, property type, number of bedrooms etc.), LIST_PROPERTY (parameters

In [59]:
print(tasks_group[0]['io'])

Outputs: The output format would be a structured JSON response which contains the key details of each property - such as property ID, location, price, type (apartment, house, studio etc.), number of bedrooms/bathrooms, area (in sq ft), images (links to property images), seller information, and property status (available/leased). Moreover, the output might also include user-specific details like user’s search history, saved properties, scheduled visits, and transaction details. 

Inputs: The input format would be primarily JSON commands and parameters sent through an API endpoint. This can include commands for user actions such as SEARCH_PROPERTY (parameters: location, price range, property type, number of bedrooms etc.), LIST_PROPERTY (parameters: property details), SCHEDULE_VISIT (parameters: property ID, preferred date-time), MAKE_OFFER (parameters: property ID, offer price, terms), and TRANSACTION (parameters: property ID, buyer and seller information, sale price, closing date). Bro