In [14]:
import openai
import json
import random
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from getpass import getpass
import os

from IPython.display import HTML, display
import pdfkit
import html
from copy import deepcopy

import threading
import queue
import time  # Used for simulating API call
import pickle

In [2]:
# setting up an OpenAI template on the run
OPENAI_API_KEY = getpass()

os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

········


### Setup Model

In [3]:
model = ChatOpenAI(model="gpt-4-0613")
model.temperature = 0.8

### Prompts

In [4]:
environment_prompt_template = '''
You will simulate a software enviroment that can be used by a user or agent. You will simulate the following software environment:
{}

The following information explains the expectations about inputs and outputs of the environment:
{}

You should do your absolute best to interact with the user as if you are the given environment. You should NOT act as if you are chatGPT, GPT-4, or any other AI agent. The goal is to make a convincing simulation. Below you are given information about the state of the environment:
{}

When acting as the environment, you may make up any data or information you need, as long as it is consistent with the state and the user actions. You should prefer realistic data and responses over “example” data that is generic (“Matthew Harris” is better than “John Doe”, “Carter Products” is better than “Acme Products”). Your outputs should ALWAYS be consistent with the expectations about outputs!

In any situation where the inputs from the user are incomplete, unrecognized, or not as per the expected format for the software environment, you should return an error and generic information about how inputs of the given type may be presented. 

Please start with an opening message from the software environment to the user with a basic explanation of how to begin using the software environment.
'''


agent_prompt_template = '''
Interact with a software environment to solve a task. Imagine you are an intelligent agent working for a user and your target is to perform actions to complete the task goal. At the beginning of your interactions, you will be given a detailed description of the current environment and your goal to accomplish. For each of your turns, you will be given a list of actions which you can choose one to perform in this turn. You should provide two parts of your response: "THOUGHT" and "ACTION". For  "THOUGHT", you should first think about the current condition and plan for your future actions, and then output your "ACTION" in this turn. Your output must strictly follow this format:"THOUGHT: your thoughts.
 ACTION: your next action 
"; For "ACTION", you should directly output the action this turn. Your output must strictly follow this format:"ACTION: your next action
". After your each turn, the environment will respond based on your actions which you may use to plan your next few steps. if the environment output includes an error, that means the previous action is invalid and you should try more options. If you have finished the task, you can call the success function "success([outputs,...])" with any final outputs.
 Reminder: 
1. the action must follow any formats requested
2. Think when necessary, try to act directly more in the process.
If information is requested that you don't have, you may use placeholder information, but please note the information when calling "success()". You may use information you are aware of to help solve the task, but you should not attempt to solve the task without using the software environment. 

Software Environment: {}
Your Task: {}
'''

### Setup Interaction Creation

In [8]:
def create_agent_conversation(task):    
    num_turns = 20
    the_task = task['task']
    the_state = task['state']
    print("Task:", the_task)
    environment_prompt = environment_prompt_template.format(task['environment'], task['io'], task['state'])
    agent_prompt = agent_prompt_template.format(task['environment'], task['task'])
    agent_messages = [
        HumanMessage(content=agent_prompt),
    ]

    environment_messages = [
        HumanMessage(content=environment_prompt)
    ]

    for i in range(num_turns):
        environment_result = model.predict_messages(environment_messages)
        environment_messages.append(environment_result)
        agent_messages.append(HumanMessage(content=environment_result.content))
        agent_response = model.predict_messages(agent_messages)
        agent_result = agent_response.content.split("ACTION:")[1].strip()
        environment_messages.append(HumanMessage(content=agent_result))
        agent_messages.append(agent_response)
        if "success(" in agent_result:
            break
    task["conversation"] = agent_messages
    return task
    

In [9]:
# Worker function
def api_worker(input_queue, output_queue):
    global stop_threads
    while not stop_threads:
        try:
            data = input_queue.get(timeout=1)
            updated_data = create_agent_conversation(data)
            output_queue.put(updated_data)
            
            # Mark task as done
            input_queue.task_done()
        except queue.Empty:
            # No more items in queue
            return
        except KeyboardInterrupt:
            print("Received interrupt in worker. Re-queueing item...")
            input_queue.put(data)
            break
        except Exception as e:
            # Handle other exceptions as required
            print(f"Error processing data: {e}")
            input_queue.put(data)  # Re-insert the item into the queue


In [12]:
def save_queue_to_file(q, filename):
    with open(filename, 'wb') as file:
        pickle.dump(list(q.queue), file)

def load_queue_from_file(filename):
    try:
        q = queue.Queue()
        with open(filename, 'rb') as file:
            the_list = pickle.load(file)
            for item in the_list:
                q.put(item)
        return q
    except (FileNotFoundError, EOFError):
        return queue.Queue()


### Run Multi-threaded Agent Interactions

#### Load Data

In [24]:
# Load existing multi-threaded Agent Interactions file that we will add new interactions to
# Make sure to use the correct path to your .pkl file
file_path = "./synthetic/synthetic_agent_conversations.pkl"

# Open the file and load its contents into resulting_dicts
with open(file_path, 'rb') as file:
    resulting_dicts = pickle.load(file)

# Now resulting_dicts contains the data from the .pkl file
print(len(resulting_dicts))  # Just to verify the contents

42


In [22]:
# The input queue is the list of tasks that haven't yet been turned into agent interactions
input_queue = load_queue_from_file("./synthetic/input_queue.pkl")
output_queue = queue.Queue()

In [21]:
input_queue.qsize()

1483

#### Run Multi-threaded

In [23]:
# Number of threads
NUM_THREADS = 3

In [27]:
# Launch threads, when you want to stop, use Kernal>Interrupt and wait for it to complete current work
threads = []
stop_threads = False
for _ in range(NUM_THREADS):
    t = threading.Thread(target=api_worker, args=(input_queue, output_queue))
    t.start()
    threads.append(t)

try:
    for t in threads:
        t.join()

except KeyboardInterrupt:
    print("\nReceived keyboard interrupt. Signaling threads to terminate...")
    stop_threads = True

    # Now wait for all threads to finish
    for t in threads:
        t.join()

print("All threads have finished.")

Task:Task: Doctor Ava Thomas needs to revise the diagnosis for patient James White who was incorrectly diagnosed with gastroenteritis.
Task: Simulate a chromosomal DNA sequence with a length of 8 million bases, using the Oxford Nanopore sequencing technology with a mutation rate of 0.0003.
 Run an electromagnetic interference analysis for an automotive radar system
Error processing data: list index out of range
Task: Calculate the average transaction amount in USD for all entries in the 'Sales' dataset for the month of July.
Task: Create a unit test for a function 'calculateDiscount' in a pricing module of an e-commerce platform
Task: Analyze the environmental impact of a 1-year project to restore coral reefs in the Great Barrier Reef using lab-grown corals.
Task: Return 'The Hobbit' borrowed by user Richard West.
Task: Simulate a chromosomal DNA sequence with a length of 10 million bases, using the Oxford Nanopore sequencing technology with a mutation rate of 0.0004.


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600).


Task: Update the property details for a studio apartment in San Francisco listed by broker Lydia Morton, changing the bathroom count from 1 to 1.5.
Task: Try to increase the inventory by finding new items
Task: Simulate the DNA sequence of mitochondrial DNA with a length of 21000 bases, with a region of interest from 10000 to 11000, using the Illumina sequencing technology with a mutation rate of 0.002.
Task: Test the performance of the newly adjusted algorithm for the fractional knapsack problem.
Task: Read the results of a previously executed simulation to understand the impact of lead shielding on gamma-ray doses
Task: Modify the Colpitts oscillator designed by James Kim, by changing the inductor value to 1mH. Run a frequency sweep to observe the changes in resonant frequency.
Task: Introduce a new food stall selling vegan food.
Task: Fetch the current fuel status for Captain Garcia
Task: Close Ticket


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600).


Task: Suspend the process 'proc2' temporarily
Task: Optimize the design of a dipole antenna for a specific impedance
Task: Follow a pre-determined path with drone X876 through a dense forest using the 'follow_path' command
Task: Calculate the bandwidth of a Yagi-Uda antenna operating at a frequency of 900 MHz

Received keyboard interrupt. Signaling threads to terminate...
All threads have finished.


In [28]:
# Collect results
# resulting_dicts = []
while not output_queue.empty():
    resulting_dicts.append(output_queue.get())

print(len(resulting_dicts))


62


#### Save Files

In [29]:
# Save new agent interactions
with open("./synthetic/synthetic_agent_conversations.pkl", 'wb') as file:
    pickle.dump(resulting_dicts, file)

In [30]:
# save current state of the input queue
save_queue_to_file(input_queue, "./synthetic/input_queue.pkl")

### Print Agent Interactions to PDF for easy viewing

In [31]:
import html

def format_single_chat(chat_data):
    formatted_html = '<div style="border: 1px solid #ddd; padding: 10px; max-width: 1000px; margin-bottom: 20px;">'

    # Add task as a header
    formatted_html += f'<h2>Task: {html.escape(chat_data["task"])}</h2>'

    # Add environment, io, and state with a smaller font on a neutral background
    formatted_html += (
        '<div style="background-color: #f7f7f7; padding: 10px; border-radius: 5px; font-size: 0.9em;">'
        f'<strong>Environment:</strong> {html.escape(chat_data["environment"])}<br>'
        f'<strong>IO:</strong> {html.escape(chat_data["io"])}<br>'
        f'<strong>State:</strong> {html.escape(chat_data["state"])}'
        '</div>'
    )

    # Loop through the conversation
    for message in chat_data["conversation"]:
        # Depending on the message origin, use a different background color
        bg_color = "#f0f0f0" if message.__class__.__name__ == "HumanMessage" else "#d1e7fd"
        escaped_text = html.escape(message.content.strip())

        formatted_html += (
            f'<div style="background-color: {bg_color}; padding: 20px; '
            'border-radius: 5px; margin: 20px 0;">'
            f'<strong>{message.__class__.__name__.replace("Message", "")}:</strong> '
            f'{escaped_text}'
            '</div>'
        )

    formatted_html += '</div>'
    return formatted_html

In [32]:
def save_chats_to_pdf(list_of_chat_histories, filename):
    combined_html = ""
    for chat_history in list_of_chat_histories:
        combined_html += format_single_chat(chat_history)
    
    # Generate PDF from the combined HTML content
    pdfkit.from_string(combined_html, filename)


In [33]:
save_chats_to_pdf(resulting_dicts, "./synthetic/synthetic_agent_sampler2.pdf")