In [1]:
from IPython.display import HTML, display
from copy import deepcopy
import random
import os
import openai
import time
import sys
import ast
import json

from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from getpass import getpass
import os
import pandas as pd

import pdfkit
import html
from copy import deepcopy


In [2]:
# setting up an OpenAI template on the run
OPENAI_API_KEY = getpass()

os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

········


### Setup Model

In [3]:
model = ChatOpenAI(model="gpt-4-0613")
model.temperature = 0.8

In [81]:
type(model.max_tokens)

NoneType

### Input and Output Modeling

In [4]:
environment_design_prompt = '''
Help me design an environment for agents to interact in. I will give you an environment idea, and you will spend some time thinking about how it could work. The environment will be entirely text-based, and turn based, and the user will be expected to try to complete an action in that environment. The user will be an AI agent acting on behalf of a user. You will need to specify a reasonable output format that can convey the information of the environment, and an input format that can give the user the power to use the environment to the fullest. 

Here are some examples (but you can do better):
Example 1: Web-browser accessing company intranet
Outputs: Outputs simplified HTML (CSS and Javascript removed, HTML edited to focus on content)
Inputs: Python code that uses the playwright library to direct the browser

Example 2: Computer Network Management System
Outputs: Command line output using output from tools like Cisco IOS, nmap, snmpwalk, lldp, iptables, ntopng, wireshark, openvpn, wireguard, isc dhcp server, etc.
Inputs: Command Line Inputs and files, such as,
JSON configurations for bulk configuration.
YAML configurations.
Infrastructure-as-code files.
Initial Capabilities Displayed by SNMS:
ADD_DEVICE: Add a device to the virtual network.
CONFIGURE_DEVICE: Configure a device in the virtual network.
SHOW_NETWORK: Display current network topology.
PING: Test connectivity between two devices.
SAVE_CONFIG: Save configuration of devices.
ASK_QUESTION: Pose a natural language question about capabilities.

Example 3: Flight Booking System
Outputs: XML outputs in proprietary format [you should provide more detail than this]
Inputs: JSON Inputs in proprietary format  [you should provide more detail than this]
-------
Your description of the Outputs and Inputs should be high level (we will get into further detail later) BUT MUST BE AS SPECIFIC AS POSSIBLE (Please no "it could be JSON or XML"). Also, you can assume that the inputs include some mechanism for querying the documentation of the input format in natural language. 

Your output format:
Outputs: [The output format that would be used]
Inputs: [The input format that would be used]

Your environment to design:
{}
'''

In [5]:
environment_example = '''
Recipe Query System: The agent can search for and suggest recipes based on available ingredients.
'''

In [7]:
design_prompt_filled = environment_design_prompt.format(environment_example)
design_messages = [
        HumanMessage(content=design_prompt_filled)
    ]
environment_design_result = model.predict_messages(design_messages)

In [37]:
inputs_and_outputs = environment_design_result.content
print(inputs_and_outputs)

Outputs: The output format could be a JSON file for each recipe result that includes the recipe name, ingredients, cooking instructions, cooking time, and associated images URL. This would allow the agent to easily parse the information and present it to the user in a readable format. Suppose a user is searching for a recipe that can be made with chicken and broccoli. An example of the output could be:

{
    "recipeName": "Chicken and Broccoli Stir-fry",
    "ingredients": ["Chicken", "Broccoli", "Soy Sauce", "Garlic", "Ginger"],
    "instructions": "1. Heat pan and add oil. 2. Add garlic and ginger...",
    "cookingTime": "35 minutes",
    "imageURL": "http://example.com/path_to_image"
}

Inputs: The input format could be a JSON file where the agent can specify the ingredients that the user has available. The system should also accept additional parameters such as dietary restrictions, cuisine preference etc. An example of the input could be:

{
    "ingredients": ["Chicken", "Brocco

In [11]:
e_d_output, e_d_input = environment_design_result.content.split("Inputs: ")

In [13]:
e_d_output = e_d_output.lstrip("Outputs: ")

In [14]:
e_d_output

'The output format could be a JSON file for each recipe result that includes the recipe name, ingredients, cooking instructions, cooking time, and associated images URL. This would allow the agent to easily parse the information and present it to the user in a readable format. Suppose a user is searching for a recipe that can be made with chicken and broccoli. An example of the output could be:\n\n{\n    "recipeName": "Chicken and Broccoli Stir-fry",\n    "ingredients": ["Chicken", "Broccoli", "Soy Sauce", "Garlic", "Ginger"],\n    "instructions": "1. Heat pan and add oil. 2. Add garlic and ginger...",\n    "cookingTime": "35 minutes",\n    "imageURL": "http://example.com/path_to_image"\n}\n\n'

#### Get a list of Tasks

In [82]:
tasks_generation_prompt = '''Consider the following software environment: {}

The environment has the following inputs and outputs:
{}

---------
Can you think of a long list of twenty-five tasks that can be executed in that environment? Your character limit has been turned off. 

It's important that the tasks are diverse and use all of the possible features of the environment. Tasks should be specific ("Check out a science fiction book on rockets for Maryann Bray" versus "Check out a book", "Find available properties near State and Lake in Chicago under $1000/month" versus "Find available properties") and should include any information reasonably needed to complete the task (If the task is a dinner reservation, it should include the number of diners). By making the tasks VERY DETAILED, it should be easier to make a long list of twenty five of them. Each task should be for a different end user. Avoid generic names for things (John Doe, Acme Corporation), be creative. 

For each task, include a plain text description of the software state. Software state can include things such as data or knowledge that it may have, pages or screens it may use, history of previous actions, users data, etc. depending on the environment  that the user would encounter in that task. (Like,  if the task is to find the email address of Sally Smith from an employee directory on a company intranet, then the software state would include "the Intranet environment includes an employee directory with Sally Smith in it"). THE SOFTWARE STATE CAN BE VERBOSE AND SHOULD NOT REFERENCE THE TASK OR ASSUME THE  READER OF THE TASK IS PROVIDED WITH THE STATE. Format in json like this: {{"task": "[task]","state":"[state]"\}}, etc. I'd love a list of like twenty-five tasks and states.

'''

In [32]:
tasks_generation_filled = tasks_generation_prompt.format(environment_example, environment_design_result.content)
tasks_generation_messages = [
        HumanMessage(content=tasks_generation_filled)
    ]
tasks_generation_result = model.predict_messages(tasks_generation_messages)

In [33]:
import re
pattern = r'\{[^}]*\}'

matches = re.findall(pattern, tasks_generation_result.content)

tasks_and_state = []
for match in matches:
    tasks_and_state.append(match)

In [34]:
tasks_and_state

['{"task":"Find a vegan recipe using only carrots and potatoes","state":"The software has a database of recipes and can filter based on ingredients and dietary preferences."}',
 '{"task":"Find a gluten-free dessert recipe that can be cooked in under 30 minutes","state":"The software can search through a recipe database, filtering by dietary restrictions, dish types, and cooking time."}',
 '{"task":"Find a Keto diet recipe that uses beef and is of Mexican cuisine","state":"The software is connected to a recipe database that can filter based on cuisine, diet, and ingredients."}',
 '{"task":"Find a seafood recipe that doesn\'t include shellfish","state":"The software has a recipe database with detailed ingredient lists and can exclude certain ingredients in its search."}',
 '{"task":"Find a pasta recipe that can be made using only pantry staples","state":"The software has a recipe database and can suggest recipes based on the ingredients provided."}',
 '{"task":"Find an egg-free breakfast

#### Create a conversation

In [60]:
environment_prompt_template = '''
You will simulate a software enviroment that can be used by a user or agent. You will simulate the following software environment:
{}

The following information explains the expectations about inputs and outputs of the environment:
{}

You should do your absolute best to interact with the user as if you are the given environment. You should NOT act as if you are chatGPT, GPT-4, or any other AI agent. The goal is to make a convincing simulation. Below you are given information about the state of the environment:
{}

When acting as the environment, you may make up any data or information you need, as long as it is consistent with the state and the user actions. You should prefer realistic data and responses over “example” data that is generic (“Matthew Harris” is better than “John Doe”, “Carter Products” is better than “Acme Products”). Your outputs should ALWAYS be consistent with the expectations about outputs!

In any situation where the inputs from the user are incomplete, unrecognized, or not as per the expected format for the software environment, you should return an error and generic information about how inputs of the given type may be presented. 

Please start with an opening message from the software environment to the user with a basic explanation of how to begin using the software environment.
'''


agent_prompt_template = '''
Interact with a software environment to solve a task. Imagine you are an intelligent agent working for a user and your target is to perform actions to complete the task goal. At the beginning of your interactions, you will be given a detailed description of the current environment and your goal to accomplish. For each of your turns, you will be given a list of actions which you can choose one to perform in this turn. You should provide two parts of your response: "THOUGHT" and "ACTION". For  "THOUGHT", you should first think about the current condition and plan for your future actions, and then output your "ACTION" in this turn. Your output must strictly follow this format:"THOUGHT: your thoughts.
 ACTION: your next action 
"; For "ACTION", you should directly output the action this turn. Your output must strictly follow this format:"ACTION: your next action
". After your each turn, the environment will respond based on your actions which you may use to plan your next few steps. if the environment output includes an error, that means the previous action is invalid and you should try more options. If you have finished the task, you can call the success function "success([outputs,...])" with any final outputs.
 Reminder: 
1. the action must follow any formats requested
2. Think when necessary, try to act directly more in the process.
If information is requested that you don't have, you may use placeholder information, but please note the information when calling "success()". You may use information you are aware of to help solve the task, but you should not attempt to solve the task without using the software environment. 

Software Environment: {}
Your Task: {}
'''

In [61]:
the_task = eval(tasks_and_state[0])['task']
the_state = eval(tasks_and_state[0])['state']

In [62]:
environment_prompt = environment_prompt_template.format(environment_example, inputs_and_outputs, the_state)
agent_prompt = agent_prompt_template.format(environment_example, the_task)


agent_messages = [
    HumanMessage(content=agent_prompt),
]

environment_messages = [
    HumanMessage(content=environment_prompt)
]

In [63]:
num_turns = 10

for i in range(num_turns):
    environment_result = model.predict_messages(environment_messages)
    environment_messages.append(environment_result)
    print(environment_result.content)
    agent_messages.append(HumanMessage(content=environment_result.content))
    agent_response = model.predict_messages(agent_messages)
    print(agent_response.content)
    agent_result = agent_response.content.split("ACTION:")[1].strip()
    environment_messages.append(HumanMessage(content=agent_result))
    agent_messages.append(agent_response)
    if "success(" in agent_result:
        break


Welcome to Recipe Query System. To get started, you can provide a list of ingredients you have on hand, and we will suggest a recipe for you. You can also specify dietary restrictions or cuisine preferences. Please input your information in the following format:

{
    "ingredients": ["Ingredient1", "Ingredient2"],
    "diet": "YourDiet",
    "cuisine": "YourPreferredCuisine"
}

Once the request is processed, you will receive a JSON response with the recipe name, ingredients, cooking instructions, cooking time, and associated images URL. Let's cook something delicious!
THOUGHT: I need to structure the request to the Recipe Query System with carrots and potatoes as the ingredients, and specify the diet as vegan.

ACTION: 
{
    "ingredients": ["carrots", "potatoes"],
    "diet": "vegan",
    "cuisine": ""
}
{
    "recipeName": "Vegan Carrot and Potato Soup",
    "ingredients": ["Carrots", "Potatoes", "Vegetable Stock", "Garlic", "Onion", "Salt", "Pepper", "Olive Oil"],
    "instructions

#### Create several conversations

In [67]:
num_turns = 20

all_agents = []
all_environments = []

for x in range(len(tasks_and_state[:2])): #
    the_task = eval(tasks_and_state[x])['task']
    the_state = eval(tasks_and_state[x])['state']
    print("Task:", the_task)
    environment_prompt = environment_prompt_template.format(environment_example, inputs_and_outputs, the_state)
    agent_prompt = agent_prompt_template.format(environment_example, the_task)
    agent_messages = [
        HumanMessage(content=agent_prompt),
    ]

    environment_messages = [
        HumanMessage(content=environment_prompt)
    ]
    
    for i in range(num_turns):
        environment_result = model.predict_messages(environment_messages)
        environment_messages.append(environment_result)
        print(environment_result.content)
        agent_messages.append(HumanMessage(content=environment_result.content))
        agent_response = model.predict_messages(agent_messages)
        print(agent_response.content)
        agent_result = agent_response.content.split("ACTION:")[1].strip()
        environment_messages.append(HumanMessage(content=agent_result))
        agent_messages.append(agent_response)
        if "success(" in agent_result:
            break
    all_agents.append(deepcopy(agent_messages))
    all_environments.append(deepcopy(environment_messages))
    

Task: Find a vegan recipe using only carrots and potatoes
Welcome to Recipe Query System! This software is designed to help you discover recipes based on available ingredients. To get started, please provide a list of ingredients that you have on hand in a JSON format. You can also specify other preferences like diet restrictions and cuisine type. Here's an example of the input format:

{
    "ingredients": ["Chicken", "Broccoli"],
    "diet": "Keto",
    "cuisine": "Chinese"
}

After receiving your input, the system will search a comprehensive database of recipes and return the best matches. The results will be delivered in a JSON format that includes the recipe name, ingredients, cooking instructions, cooking time, and associated images URL. 

Please make sure to input all fields according to the format for the software to be able to correctly process your request.
THOUGHT: I need to find a vegan recipe using only carrots and potatoes. I will use the Recipe Query System to do this. 


### Many, Many environments

In [68]:
environments = [
    # Refinery & Manufacturing Systems
    "Refinery Control Simulator: The agent monitors and adjusts virtual refinery operations to ensure safe and optimal production. Technicians can calibrate equipment and perform diagnostics.",
    "Factory Floor Management: The agent schedules, monitors, and optimizes manufacturing processes, ensuring timely production. Technicians can maintain and calibrate machinery.",
    "CNC Machine Controller: The agent designs and executes CNC machining processes based on design inputs. Machine technicians can calibrate and troubleshoot the machine.",
    "Automated Assembly Line: The agent manages an assembly line's pace, worker assignments, and quality checks. Technicians can perform maintenance checks and repairs.",

    # Scientific & Engineering Software
    "Petrel Reservoir Simulation: The agent manages and analyzes virtual oil reservoir simulations, optimizing extraction strategies. Geoscientists can adjust parameters and analyze geological data.",
    "GROMACS Molecular Dynamics: The agent runs molecular simulations, analyzing protein structures and interactions. Researchers can adjust simulation parameters and analyze results.",
    "ANSYS Fluent Flow Simulator: The agent sets up and runs fluid dynamics simulations, analyzing airflow over virtual objects. Engineers can adjust meshing and boundary conditions.",
    "MCNP Particle Simulation: The agent sets up and monitors nuclear particle simulations, evaluating radiation doses and shielding effectiveness. Nuclear scientists can adjust simulation parameters.",
    "Kingdom Suite Seismic Analysis: The agent processes and interprets seismic data for oil and gas exploration. Geophysicists can adjust processing parameters and interpret results.",
    "SPICE Circuit Simulation: The agent designs and simulates electronic circuits, analyzing their behavior under various conditions. Electrical engineers can modify component values and test different scenarios.",
    "NAPA Ship Design: The agent designs virtual ship hulls and evaluates their hydrodynamic performance. Naval architects can modify design parameters and run simulations.",
    "OpenTrack Railway Operations: The agent schedules and manages virtual railway operations, optimizing for efficiency and safety. Railway technicians can maintain tracks and signals.",
    "KDB+/q Data Analysis: The agent queries and processes large datasets in real-time, extracting insights and patterns. Data analysts can adjust query parameters and visualize results.",
    "Cadence Virtuoso IC Design: The agent designs integrated circuits and tests their functionality. Microelectronics engineers can adjust design parameters and run simulations.",
    "CST Studio Electromagnetic Analysis: The agent sets up and runs electromagnetic simulations, evaluating antenna designs and electromagnetic interference. RF engineers can adjust simulation parameters and evaluate results.",
    "TunnelCAD Tunnel Design: The agent designs virtual tunnels, evaluating their stability and safety. Civil engineers can adjust design parameters and run simulations.",

    # Miscellaneous Domains
    "DNA Sequencing Simulator: The agent processes and interprets virtual DNA samples, identifying genes and mutations. Biologists can adjust sequencing parameters and analyze results.",
    "Virtual Architectural Planner: The agent designs building layouts, evaluating structural integrity and aesthetics. Architects can modify design elements and run simulations.",
    "Financial Forecasting System: The agent analyzes financial data to predict market trends. Financial analysts can adjust forecast parameters and interpret results.",
    "Urban Traffic Management: The agent manages traffic lights and flow in a virtual city, optimizing for reduced congestion. Traffic technicians can adjust signal timings and monitor traffic patterns.",
    "Drone Flight Controller: The agent plans and executes drone flight paths, ensuring safe and efficient operations. Drone technicians can calibrate and troubleshoot drone components.",
    "Virtual Retail Store Manager: The agent manages inventory, sales, and customer interactions in a virtual retail environment. Store managers can analyze sales data and adjust marketing strategies.",
    "Automated Journalism Simulator: The agent gathers data and composes news articles or reports. Editors can adjust article parameters and approve final drafts.",
    "Text-based Marine Navigator: The agent plans and navigates maritime routes, avoiding obstacles and optimizing for fuel efficiency. Ship captains can adjust navigation parameters and monitor ship performance.",
    "Virtual Astronomy Observatory: The agent analyzes data from virtual telescopes, identifying celestial objects and phenomena. Astronomers can adjust observation parameters and interpret results.",
    "Hydroelectric Power Plant Simulator: The agent manages operations in a virtual hydroelectric power plant, optimizing for energy production. Engineers can calibrate turbines and monitor water levels.",
    "Text-based Theme Park Manager: The agent designs and manages a virtual theme park, ensuring guest satisfaction and safety. Park managers can adjust ride parameters and monitor visitor feedback.",
    "Automated Legal Advisor: The agent analyzes legal texts and provides advice or interpretations. Lawyers can adjust query parameters and review interpretations.",
    "Environmental Impact Analyzer: The agent evaluates the environmental impact of various projects or decisions based on input data. Environmentalists can adjust evaluation parameters and interpret results.",
    "Virtual Wildlife Conservationist: The agent monitors and manages virtual wildlife populations, ensuring their survival and health. Conservationists can adjust habitat parameters and monitor species data.",
    "Smart Grid Energy Manager: The agent optimizes energy distribution in a virtual smart grid, balancing supply and demand. Technicians can calibrate grid components and monitor energy flows.",
    "Text-based Astronaut Trainer: The agent trains and prepares for virtual space missions, ensuring readiness for real-life space exploration. Space agencies can adjust training parameters and evaluate astronaut performance.",
    "Virtual Art Auctioneer: The agent evaluates and auctions virtual art pieces, optimizing for profitability and buyer satisfaction. Art curators can adjust auction parameters and evaluate artwork authenticity."
]

original_environments = [
    # Bookings & Reservations
    "Booking Reservation System: The agent interacts with a simulated booking system to reserve flights, hotels, or restaurants based on criteria. Travel agents can adjust reservation details and monitor booking statuses.",

    # Libraries & Databases
    "Library Database Manager: The agent queries a text-based library system to find books or articles based on topics or authors. Librarians can update records, analyze usage, and maintain the system.",

    # Technical Support
    "Tech Support Simulator: The agent engages with a virtual user reporting technical problems, providing troubleshooting steps. Tech support agents can interact with tickets, resolve issues, and manage user interactions.",

    # Financial & Stock Market
    "Virtual Stock Market Analyst: The agent receives textual data on stocks, making buy/sell decisions based on information. Financial analysts can adjust investment strategies and monitor stock performance.",

    # Event Planning
    "Event Planner Assistant: The agent organizes events, manages schedules, and handles invitations considering constraints and preferences. Event organizers can adjust details, monitor RSVPs, and communicate with attendees.",

    # Gaming & Narrative
    "Text-based Game Environment: The agent engages in a story-driven game, making decisions that influence the narrative's outcome. Game developers can adjust story elements and monitor player engagement.",

    # Culinary & Cooking
    "Recipe Query System: The agent searches and proposes recipes based on available ingredients. Culinary experts can adjust recipe parameters, add new recipes, and monitor user feedback.",

    # Museums & Exhibitions
    "Virtual Museum Guide: The agent escorts a user through a text-based museum, explaining artworks or exhibits tailored to user interest. Museum curators can update exhibit details, monitor user engagement, and gather feedback.",

    # Education & Learning
    "E-Learning Platform Navigator: The agent navigates an online learning platform, enrolling in courses, and engaging in quizzes. Educators can adjust course materials, monitor student progress, and provide feedback.",

    # Fitness & Health
    "Text-based Fitness Trainer: The agent designs workout routines, monitors progress, and gives advice based on health and fitness inputs. Fitness instructors can adjust workout parameters, monitor user engagement, and track progress.",

    # Job Interviews
    "Automated Interview Simulator: The agent plays both interviewer and interviewee roles across various job domains, facilitating questions and answers. HR professionals can adjust interview parameters and evaluate responses.",

    # Medical & Health
    "Medical Diagnosis Assistant: The agent evaluates provided symptoms, gives potential diagnoses, and recommends next steps. Medical professionals can adjust diagnosis criteria, provide feedback, and monitor patient interactions.",

    # Agriculture & Farming
    "Text-based Farming Simulator: The agent oversees a virtual farm, making decisions about crops, livestock, and sales. Farmers can adjust farming strategies, monitor livestock health, and track crop yields.",

    # Real Estate & Housing
    "Virtual Real Estate Agent: The agent searches, lists, and suggests properties based on preferences and constraints. Real estate brokers can adjust property details, monitor client feedback, and handle transactions.",

    # Software & Version Control
    "Version Control System Operator: The agent interacts with a text-based version control system (like Git), handling commits, branches, merges, and conflict resolution. Developers can adjust repository details, monitor commits, and track changes.",
    
    # Software Development & CI/CD
    "CI/CD Pipeline Manager: The agent oversees build pipelines, runs tests, and handles software deployment based on textual logs and commands. DevOps engineers can adjust pipeline configurations, monitor build statuses, and troubleshoot issues.",

    # Cloud & Infrastructure
    "Cloud Infrastructure Manager: The agent interfaces with a simulated cloud service to provision servers, manage databases, and allocate resources. Cloud administrators can adjust resource configurations, monitor usage, and optimize costs.",

    # Code Review & Optimization
    "Automated Code Reviewer: The agent reviews code snippets, provides feedback, suggests enhancements, and identifies potential issues. Software engineers can adjust review criteria, provide peer feedback, and monitor code quality.",

    # Algorithm Design & Testing
    "Algorithm Design Simulator: The agent is given problems, designs solutions, tests algorithms, and evaluates their performance. Computer scientists can adjust problem parameters, provide feedback, and compare against benchmarks.",

    # Databases & Data Management
    "Database Design & Management System: The agent creates database schemas, normalizes data structures, and fine-tunes queries. Database administrators can adjust schema configurations, monitor database health, and optimize performance.",

    # Networking & Connectivity
    "Network Management Simulator: The agent configures and manages virtual networks, setting up routers, switches, and ensuring connectivity. Network engineers can adjust network configurations, monitor traffic, and troubleshoot issues.",

    # Embedded Systems & Firmware
    "Embedded Systems Designer: The agent programs and tests firmware for simulated embedded devices, ensuring correct functionality. Firmware developers can adjust device parameters, monitor performance, and troubleshoot issues.",

    # API Design & Interaction
    "API Design & Interaction Simulator: The agent creates, tests, and communicates with simulated RESTful or GraphQL APIs, ensuring valid endpoints and responses. Backend developers can adjust API configurations, monitor requests, and optimize performance.",

    # Web Development & Design
    "Web Development Simulator: The agent receives a design brief and crafts HTML, CSS, and JavaScript to render a functional webpage or app. Web developers can adjust design parameters, monitor user engagement, and optimize performance.",

    # Compilers & Interpreters
    "Compiler Design Simulator: The agent crafts and tests code for a virtual compiler or interpreter, ensuring proper code translation. Compiler developers can adjust compilation parameters, monitor translation accuracy, and troubleshoot issues.",

    # Operating Systems & Scheduling
    "Operating System Scheduler Simulator: The agent manages virtual OS processes, handles memory allocation, and schedules tasks. OS developers can adjust scheduling parameters, monitor system health, and optimize performance.",

    # Software Testing & Quality Assurance
    "Automated Testing Environment: The agent authors unit, integration, and end-to-end tests for software modules, and assesses test outcomes. QA engineers can adjust test criteria, monitor test coverage, and troubleshoot issues.",

    # Security & Threat Management
    "Security Breach Simulator: The agent detects and responds to simulated security threats in a virtual ecosystem, mitigating risks. Security analysts can adjust threat parameters, monitor system integrity, and implement protective measures."
]

all_environments = original_environments + environments 



In [87]:
# tasks_with_full_environment = []

for x in range(len(all_environments[2:])):
    environment_example = all_environments[x]
    # Input-Outputs
    design_prompt_filled = environment_design_prompt.format(environment_example)
    design_messages = [
            HumanMessage(content=design_prompt_filled)
        ]
    environment_design_result = model.predict_messages(design_messages)
    inputs_and_outputs = environment_design_result.content

    # Generate Tasks and state
    tasks_generation_filled = tasks_generation_prompt.format(environment_example, environment_design_result.content)
    tasks_generation_messages = [
            HumanMessage(content=tasks_generation_filled)
        ]
    tasks_generation_result = model.predict_messages(tasks_generation_messages)

    import re
    pattern = r'\{[^}]*\}'

    matches = re.findall(pattern, tasks_generation_result.content)

    tasks_and_state = []
    for match in matches:
        tasks_and_state.append(match)
     
    for item in tasks_and_state:
        final_result = { 
            "environment": environment_example,
            "io": inputs_and_outputs,
            "task": eval(item)['task'],
            "state": eval(item)['state']
            
        }
        tasks_with_full_environment.append(final_result)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIError: Bad gateway. {"error":{"code":502,"message":"Bad gateway.","param":null,"type":"cf_bad_gateway"}} 502 {'error': {'code': 502, 'message': 'Bad gateway.', 'param': None, 'type': 'cf_bad_gateway'}} {'Date': 'Wed, 01 Nov 2023 04:19:42 GMT', 'Content-Type': 'application/json', 'Content-Length': '84', 'Connection': 'keep-alive', 'X-Frame-Options': 'SAMEORIGIN', 'Referrer-Policy': 'same-origin', 'Cache-Control': 'private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0', 'Expires': 'Thu, 01 Jan 1970 00:00:01 GMT', 'Server': 'cloudflare', 'CF-RAY': '81f151947c352a2a-ORD', 'alt-svc': 'h3=":443"; ma=86400'}.


In [88]:
len(tasks_with_full_environment)

1525

In [89]:
# Write to a JSON file
with open('./synthetic/tasks_with_full_environment.json', 'w') as file:
    json.dump(tasks_with_full_environment, file, indent=4)

In [90]:
# Read from a JSON file
with open('./synthetic/tasks_with_full_environment.json', 'r') as file:
    data = json.load(file)

In [91]:
data == tasks_with_full_environment

True

In [92]:
tasks_with_full_environment[0]

{'environment': 'Book Reservation System: The agent interacts with a simulated booking system to reserve flights, hotels, or restaurants based on criteria. Travel agents can adjust reservation details and monitor booking statuses.',
 'io': 'Outputs: The output format could be a structured JSON response containing key information about the available options for bookings. For flight bookings, it will return information like flight number, airline, departure time, arrival time, price, available seats, etc. For hotel bookings, it could return information like hotel name, location, price per night, available rooms, amenities, etc. For restaurant bookings, it could return information like restaurant name, location, available seats, menu, pricing, etc. Furthermore, it could return statuses of adjusted reservations and booking statuses.\n\nInputs: The input format could be a specifically structured JSON request. The JSON request should have fields specifying the type of reservation (flight, ho