In [None]:
from src.nodes.understand_element_nodes import agent_preprocessor, agent_code_generation, agent_extract_code, agent_code_review
from src.nodes.understand_element_nodes import conditional_should_continue_after_extraction, conditional_should_continue_after_code_review
import os, io, json, docker, tarfile

In [None]:
"""class AgentState(TypedDict):
    file_path: str
    first_few_lines: str
    user_info_about_data: str
    first_few_elements_path_name: str
    extracted_python_code: str
    code_review_result: CodeReviewResult
    generated_code: str
    instructions_to_code: str
    code_extraction_status: str
    code_review_status: str"""

init_state = {
    "file_path": "data.json",
    "first_few_lines": "",
    "user_info_about_data": "",
    "first_few_elements_path_name": "elements.json",
    "extracted_python_code": "",
    "code_review_result": "",
    "generated_code": "",
    "instructions_to_code": "",
    "code_extraction_status": "",
    "code_review_status": ""
}

In [None]:
#data.json is very large, so only first few lines are loaded. load only 10% of the file
def load_data(file_path):
    with open('data.json', 'r') as file:
        chunk = file.read(1024)
    return chunk

In [None]:
init_state["first_few_lines"] = load_data(init_state["file_path"])

In [None]:
init_state["user_info_about_data"] = "This is a json file. It contains a list of dictionaries. Elements of list are minecraft events triggered by players in minecraft server."

In [None]:
init_state = agent_preprocessor(init_state)

In [None]:
temp = init_state.copy()
temp = agent_code_generation(temp)

In [None]:
init_state = temp

In [None]:
temp = init_state.copy()
temp = agent_extract_code(temp)

In [None]:
init_state = temp
temp = init_state.copy()

In [None]:
temp = agent_code_review(temp)

In [None]:
import tempfile, shutil

In [None]:
def process_with_docker(init_state):

    client = docker.from_env()

    # Use a safer temporary directory
    custom_temp_base_dir = "temp"
    os.makedirs(custom_temp_base_dir, exist_ok=True)
    temp_dir = tempfile.mkdtemp(dir=custom_temp_base_dir)
    print(f"Temporary directory created at: {temp_dir}")

    container = None
    json_data = None

    # Create data.json
    data_json_path = init_state['file_path']
    destination_data_path = os.path.join(temp_dir, "data.json")
    shutil.copy(data_json_path, destination_data_path)
    print(f"Copied data.json to: {destination_data_path}")

    """# Create elements.json as an empty file
    destination_elements_path = os.path.join(temp_dir, "elements.json")
    with open(destination_elements_path, "w") as f:
        json.dump([], f)
    print(f"Created empty elements.json at: {destination_elements_path}")"""

    # Write the Python script
    temp_script_path = os.path.join(temp_dir, "temp_script.py")
    with open(temp_script_path, "w") as f:
        f.write(init_state['extracted_python_code'])
    print(f"Written script to: {temp_script_path}")

    # Run the Docker container
    container = client.containers.run(
        image="python",
        volumes={os.path.abspath(temp_dir): {'bind': '/usr/src/app', 'mode': 'rw', 'size': '5g'}},
        working_dir="/usr/src/app",
        detach=True,
        mem_limit="5g",
        stdout=True,
        stderr=True
    )
    container = client.containers.get(container.id)
    print(f"Container started: {container.id}")
    exit_code, output = container.exec_run(f"python temp_script.py", workdir="/usr/src/app", privileged=True)
    print("Python script output:", output.decode("utf-8"))
    
    # Debug: List files in the container
    con
    exit_code, output = container.exec_run("ls -l /usr/src/app", privileged=True)
    print("Files in container:", output.decode("utf-8"))

    # Retrieve elements.json
    output_file_path = "/usr/src/app/elements.json"
    tar_stream, _ = container.get_archive(output_file_path)
    tar_bytes = io.BytesIO(b"".join(tar_stream))

    print("Extracted TAR file:", tar_stream)

    # Extract JSON from TAR
    with tarfile.open(fileobj=tar_bytes, mode="r") as tar:
        for member in tar.getmembers():
            if member.name.endswith(".json"):
                json_file = tar.extractfile(member)
                json_data = json.loads(json_file.read().decode("utf-8"))
                print("Extracted JSON data:", json_data)

    return json_data


In [None]:
process_with_docker(init_state)

In [None]:
custom_temp_base_dir = "temp"
os.makedirs(custom_temp_base_dir, exist_ok=True)
temp_dir = tempfile.mkdtemp(dir=custom_temp_base_dir)
print(f"Temporary directory created at: {temp_dir}")

container = None
json_data = None

# Create data.json
data_json_path = init_state['file_path']
destination_data_path = os.path.join(temp_dir, "data.json")
shutil.copy(data_json_path, destination_data_path)
print(f"Copied data.json to: {destination_data_path}")

# Create elements.json as an empty file
destination_elements_path = os.path.join(temp_dir, "elements.json")
with open(destination_elements_path, "w") as f:
    json.dump([], f)
print(f"Created empty elements.json at: {destination_elements_path}")

# Write the Python script
temp_script_path = os.path.join(temp_dir, "temp_script.py")
with open(temp_script_path, "w") as f:
    f.write(init_state['extracted_python_code'])
print(f"Written script to: {temp_script_path}")

In [None]:
client = docker.from_env()

In [None]:
#create a container with temporary directory as volume
container = client.containers.run(
    image="python:3.9-slim",
    volumes={temp_dir: {'bind': '/usr/src/app', 'mode': 'rw'}},
    working_dir="/usr/src/app",
    detach=True,
    mem_limit="6g",
    command="python temp_script.py",
    
)

In [None]:
from crewai_tools import CodeInterpreterTool

In [None]:
tool = CodeInterpreterTool()

In [None]:
tool.run_code_in_docker(code="""
import json
import os

def extract_and_save_elements(input_filename, output_filename, num_elements):
    base_path = os.getcwd()
    input_file_path = os.path.join(base_path, input_filename)
    output_file_path = os.path.join(base_path, output_filename)

    with open(input_file_path, 'r') as file:
        data = json.load(file)

    extracted_elements = data[:num_elements]

    print(extracted_elements)

extract_and_save_elements('data.json', 'elements.json', 5)
""", libraries_used=['json', 'os'])

In [2]:
import json

In [4]:
with open('temp/data.json', 'r') as file:
    elements = json.load(file)

In [5]:


def flatten_json(nested_json, parent_key='', sep='.'):
    """
    Flatten a nested JSON dictionary while preserving semantic field names.
    """
    flattened = {}
    for key, value in nested_json.items():
        new_key = f"{parent_key}{sep}{key}" if parent_key else key
        if isinstance(value, dict):
            # Recurse into nested dictionaries
            flattened.update(flatten_json(value, new_key, sep))
        elif isinstance(value, list):
            # Handle lists: Join as comma-separated values for strings, keep as-is otherwise
            flattened[new_key] = ', '.join(map(str, value)) if all(isinstance(i, str) for i in value) else value
        else:
            flattened[new_key] = value
    return flattened

def flatten_large_json(json_obj, limit=100):
    """
    Flatten a large JSON object with events at the top level and limit the output to the first 'limit' events.
    """
    flattened_list = []
    count = 0
    for event_id, event_data in json_obj.items():
        if count >= limit:
            break
        # Flatten each event and add the event ID as a field
        flattened_event = flatten_json(event_data)
        flattened_event['event.id'] = event_id
        flattened_list.append(flattened_event)
        count += 1
    return flattened_list

# Example usage:
def main():
    # Load the JSON file (replace with your file path for the large JSON)
    with open("temp/data.json", "r") as file:
        json_data = json.load(file)

    # Flatten the JSON and limit to 100 player events
    flattened_data = flatten_large_json(json_data, limit=50)

    # Save the output to a file or print
    with open("flattened_events.json", "w") as output_file:
        json.dump(flattened_data, output_file, indent=2)

    # Optional: Print a few examples
    for event in flattened_data[:5]:  # Print first 5 for inspection
        print(event)


In [6]:
main()

{'event_type': 'block_break', 'player_id': 'player_6254', 'timestamp': '2024-11-19T23:37:22Z', 'location.x': -27, 'location.y': 63, 'location.z': -36, 'details.block_type': 'sand', 'details.tool_used': 'wooden_axe', 'details.dropped_items': 'stone', 'event.id': 'event_001'}
{'event_type': 'player_join', 'player_id': 'player_2731', 'timestamp': '2024-11-19T08:30:46Z', 'location.x': 218, 'location.y': 60, 'location.z': -238, 'details.message': 'Player joined the game', 'event.id': 'event_002'}
{'event_type': 'player_death', 'player_id': 'player_2195', 'timestamp': '2024-11-12T12:47:27Z', 'location.x': -478, 'location.y': 60, 'location.z': 495, 'details.cause': 'drowned', 'details.killer': 'player_1863', 'details.items_dropped': 'iron_sword, sand', 'event.id': 'event_003'}
{'event_type': 'chat_message', 'player_id': 'player_4048', 'timestamp': '2024-11-20T06:13:17Z', 'location.x': -498, 'location.y': 63, 'location.z': 248, 'details.message': 'Does anyone have spare food?', 'details.chat_c

In [7]:
with open("flattened_events.json", "r") as file:
    flattened_data = json.load(file)

In [8]:
# save all unique fields in a flattened json
unique_fields = set()
for event in flattened_data:
    unique_fields.update(event.keys())

In [12]:
from src.states.understand_element_state import FieldInfo

In [13]:
def get_element_with_field_name(field_name):
    count = 0
    elements = []
    for event in flattened_data:
        if field_name in event:
            count += 1
            elements.append(event)
            if count >= 5:
                break
    return elements

In [14]:
field_info_list = []
for field_name in unique_fields:
    field_elements = get_element_with_field_name(field_name)
    field_data_type = type(field_elements[0][field_name]).__name__
    field_values = [event[field_name] for event in field_elements]

    #check if field_data types can be very large ie list, str, bytes, set, tuple etc
    if field_data_type in ['list', 'set', 'tuple', 'dict', 'bytes', 'str', 'bytearray']:
        #convert to string and check if larger than 1000 characters
        temp_field_values = [str(value) for value in field_values]
        if any(len(value) > 1000 for value in temp_field_values):
            #save only first 1000 characters as str leaving field_data_type unchanged
            field_values = [value[:1000] for value in temp_field_values]
    
    #check if and field values in field_elements are of type list, set, tuple, dict, bytes, str, bytearray
    for element in field_elements:
        for element_field_name, element_field_value in element.items():
            if type(element_field_value).__name__ in ['list', 'set', 'tuple', 'dict', 'bytes', 'str', 'bytearray']:
                #convert to string and check if larger than 1000 characters
                if len(str(element_field_value)) > 1000:
                    element[element_field_name] = str(element_field_value)[:1000]

    field_info = {
        "field_name": field_name,
        "field_value_info" : {
            "data_type": field_data_type,
            "example_values": field_values
        },
        "field_elements": field_elements 
    }
    field_info_list.append(field_info)

In [16]:
with open("field_info.json", "w") as file:
    json.dump(field_info_list, file, indent=2)

In [22]:
from langchain_openai import ChatOpenAI
from src.states.understand_element_state import corrected_field_names_spelling_list

model = ChatOpenAI(model="gpt-4o", api_key=os.getenv("OPENAI_API_KEY"), streaming=True)

correct_field_name_spelling = model.with_structured_output(corrected_field_names_spelling_list)

ImportError: cannot import name 'corrected_field_names_spelling_list' from 'src.states.understand_element_state' (/Volumes/RenzovPersonal/packages/LiveOpsFrontDashboardMain/backend-testing/agentic-trials/src/states/understand_element_state.py)