# Custom Sandbox

Firt of all, create the template using the comands here:
https://e2b.dev/docs/guide/custom-sandbox

In [1]:
# Get your API keys or save them to .env file.
import os
from dotenv import load_dotenv
load_dotenv()

# TODO: Get your OpenAI API key
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# TODO: Get your E2B API key from https://e2b.dev/docs
E2B_API_KEY = os.getenv("E2B_API_KEY")

In [9]:
from e2b import Sandbox                                                     
                                                                            
# Start sandbox                                                             
sandbox = Sandbox(template="vicente-sandbox")                               
                                                                            
# List the root directory
content = sandbox.filesystem.list("/")  
for item in content:
    print(f"Is '{item.name}' directory?", item.is_dir)                                 
                                                                            
# Close sandbox once done                                                   
sandbox.close()                                                             
 


Is '.dockerenv' directory? False
Is '.e2b' directory? False
Is 'bin' directory? False
Is 'boot' directory? True
Is 'code' directory? True
Is 'dev' directory? True
Is 'etc' directory? True
Is 'home' directory? True
Is 'lib' directory? False
Is 'lib32' directory? False
Is 'lib64' directory? False
Is 'libx32' directory? False
Is 'lost+found' directory? True
Is 'media' directory? True
Is 'mnt' directory? True
Is 'opt' directory? True
Is 'proc' directory? True
Is 'root' directory? True
Is 'run' directory? True
Is 'sbin' directory? False
Is 'srv' directory? True
Is 'swap' directory? True
Is 'sys' directory? True
Is 'tmp' directory? True
Is 'usr' directory? True
Is 'var' directory? True


# Data Science example

Example of a small data science analysis on E2B sandbox. I previosly built the container according to e2b.DockerFile.
- I send the dataset and a python script
- I request to run the script
- I print and download the results

In [3]:
# Get your API keys or save them to .env file.
import os
from dotenv import load_dotenv
load_dotenv()

# TODO: Get your OpenAI API key
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# TODO: Get your E2B API key from https://e2b.dev/docs
E2B_API_KEY = os.getenv("E2B_API_KEY")

In [19]:
from e2b import Sandbox                                                     
                                                                            
# Start sandbox     
# It seems that defining working directory with cwd is not working                                                        
# sandbox = Sandbox(template="data-science-sandbox", cwd="/code" )         
# Defalt working directory: /home/user                       
sandbox = Sandbox(template="data-science-sandbox")

# uploading the dataset. Params: path to local file                         
with open("remote_work.csv", "rb") as f:
    remote_path = sandbox.upload_file(f)     

# uploading the source code 
with open("remote_work.py", "rb") as f:
    remote_path = sandbox.upload_file(f)                          

# start process
python_exec = sandbox.process.start("python remote_work.py")  
python_exec.wait()
print(python_exec.stdout)

# list files
content = sandbox.filesystem.list("/home/user")  
for item in content:
    print(f"Is '{item.name}' directory?", item.is_dir)   

# Download result. Params: path to remote file
file_in_bytes = sandbox.download_file("/home/user/histogram.png")  
# Save file to local filesystem
with open("histogram_from_sandbox.png", "wb") as f:  
    f.write(file_in_bytes) 
                                                                            
# Close sandbox once done                                                   
sandbox.close()                                                             
 


  Employee_ID  Age  ... Sleep_Quality         Region
0     EMP0001   32  ...          Good         Europe
1     EMP0002   40  ...          Good           Asia
2     EMP0003   59  ...          Poor  North America
3     EMP0004   27  ...          Poor         Europe
4     EMP0005   49  ...       Average  North America
5     EMP0006   59  ...       Average  South America
6     EMP0007   31  ...          Poor           Asia
7     EMP0008   42  ...       Average  North America
8     EMP0009   56  ...          Poor         Europe
9     EMP0010   30  ...          Poor  North America

[10 rows x 20 columns]
Figure save: histogram.png
Is '.bash_logout' directory? False
Is '.bashrc' directory? False
Is '.cache' directory? True
Is '.config' directory? True
Is '.profile' directory? False
Is 'histogram.png' directory? False
Is 'remote_work.csv' directory? False
Is 'remote_work.py' directory? False


# Integration with OpenAI

This example will create the python code using a LLM, then we are going run the code in a sandbox provided by E2B

In [2]:
from openai import OpenAI
from dotenv import load_dotenv, find_dotenv
import os
load_dotenv()

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [23]:

completion = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[ 
    {"role": "system", "content": "You are a python software developper."},
    {"role": "user", "content": """Generate the content of a Python script, give me only the source code. The code must perform the following actions:
        (1) read a file named remote_work.csv
        (2) print columns names and its descriptions
        (3) print the number of samples (rows - 1)
        (4) remove null samples
        (5) plot the distribution by the second column
        Use libraries like pandas, numpy and malplotlib and return the results in this format:
        {
            "description": <your description of code>,
            "source_code": <python code>
        }
        """},
    ]
)
'''completion = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[ 
    {"role": "system", "content": "You are a python software developper."},
    {"role": "user", "content": """Generate the content of a Python script, give me only the source code. The code must perform the following actions:
        (1) read a file named remote_work.csv
        (2) print columns names and its descriptions
        (3) print the number of samples (rows - 1)
        (4) remove null samples
        (5) plot the distribution by the second column
        Use libraries like pandas, numpy and malplotlib
        """},
    ]
)'''

# show the LLM response
python_code = completion.choices[0].message.content

In [24]:
print(python_code)
file = open('remote_work_generated.py', 'w')
file.write(python_code)
file.close()

```json
{
    "description": "This Python script reads a CSV file named 'remote_work.csv', prints the column names and their descriptions, prints the number of non-header samples, removes any rows with null values, and plots the distribution based on the second column of the dataset. It utilizes pandas for data manipulation, numpy for handling numerical operations, and matplotlib for plotting the distribution.",
    "source_code": "import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# Read the 'remote_work.csv' file\ndf = pd.read_csv('remote_work.csv')\n\n# Print column names and descriptions (assuming the file includes descriptions)\n# This example assumes descriptions are in the second row, which is unusual and might need modification\ncolumn_descriptions = df.iloc[0]\nfor column, description in column_descriptions.items():\n    print(f'Column: {column}, Description: {description}')\n\n# Print the number of samples (excluding the header row)\n# If descriptions