In [None]:
IP = "" 

In [None]:
import logging
logging.basicConfig(level=logging.INFO)
from cua_bench.computers.remote import RemoteDesktopSession

session = RemoteDesktopSession(
    api_url=f"http://{IP}:5000",
    os_type="windows",
)
await session.start()

In [None]:
size = await session.interface.get_screen_size()
width = size["width"]
height = size["height"]
print(width, height)

1024 768


## Mouse Operations

In [None]:
# Left click at coordinates
await session.click(500, 300)

# Right click at coordinates
await session.right_click(600, 400)

# Double click at coordinates
await session.double_click(500, 300)

# Move cursor to coordinates (without clicking)
await session.move_to(700, 500)

# Drag from one position to another
await session.drag(from_x=100, from_y=100, to_x=400, to_y=400)

## Keyboard Operations

In [None]:
# Type text
await session.type("Hello, this is a test!")

# Press a single key
await session.key("enter")
await session.key("backspace")
await session.key("tab")
await session.key("escape")

# Press key combinations (hotkeys)
await session.hotkey(["ctrl", "a"])  # Select all
await session.hotkey(["ctrl", "c"])  # Copy
await session.hotkey(["ctrl", "v"])  # Paste
await session.hotkey(["alt", "tab"])  # Switch windows

## Scrolling

In [None]:
# Scroll down
await session.scroll(direction="down", amount=300)

# Scroll up
await session.scroll(direction="up", amount=300)

## Screenshot & Desktop State

In [None]:
# Capture a screenshot (returns PNG bytes)
screenshot_bytes = await session.screenshot()
print(f"Screenshot size: {len(screenshot_bytes)} bytes")

# Save screenshot to file
with open("screenshot.png", "wb") as f:
    f.write(screenshot_bytes)

# Display screenshot in notebook (using PIL)
from PIL import Image
import io
img = Image.open(io.BytesIO(screenshot_bytes))
display(img)

In [None]:
# Get desktop state snapshot (active windows info)
snapshot = await session.get_snapshot()
print(f"Windows: {len(snapshot.windows)}")
for win in snapshot.windows:
    print(f"  - {win.title}: {win.width}x{win.height} at ({win.x}, {win.y})")

## Shell Command Execution

In [None]:
# Execute a shell command (Windows PowerShell example)
result = await session.run_command("powershell -Command \"Get-Date\"")
print(f"Success: {result['success']}")
print(f"Output: {result['stdout']}")

# Execute command without raising on error (check=False)
result = await session.run_command("powershell -Command \"Get-Process\"", check=False)
print(f"Return code: {result['return_code']}")

## File Operations

In [None]:
# Write a text file to the remote environment
await session.write_file(r"C:\Users\User\Desktop\test.txt", "Hello from Python!")

# Read a text file from the remote environment
content = await session.read_file(r"C:\Users\User\Desktop\test.txt")
print(f"File content: {content}")

# Write binary data to a file
await session.write_bytes(r"C:\Users\User\Desktop\binary.dat", b"\x00\x01\x02\x03")

# Read binary data from a file
binary_data = await session.read_bytes(r"C:\Users\User\Desktop\binary.dat")
print(f"Binary data: {binary_data}")

In [None]:
# Check if a path exists
exists = await session.exists(r"C:\Users\User\Desktop")
print(f"Desktop exists: {exists}")

# List directory contents
files = await session.list_dir(r"C:\Users\User\Desktop")
print(f"Desktop files: {files}")

# Create a directory (with parents)
await session.makedirs(r"C:\Users\User\Desktop\new_folder\subfolder")

# Copy a file
await session.copy_file(r"C:\Users\User\Desktop\test.txt", r"C:\Users\User\Desktop\test_copy.txt")

# Move/rename a file
await session.move_file(r"C:\Users\User\Desktop\test_copy.txt", r"C:\Users\User\Desktop\renamed.txt")

# Remove a file
await session.remove_file(r"C:\Users\User\Desktop\renamed.txt")

## Application Management

In [None]:
# Launch an application by path
await session.launch_application(r"C:\Windows\System32\notepad.exe")

# Open a file with its default application (e.g., open a .lnk shortcut)
await session.run_file(r"C:\Users\User\Desktop\Excel.lnk")

# Install a registered app (if supported in app registry)
# await session.install_app("firefox", with_shortcut=True)

# Launch a registered app
# await session.launch_app("firefox", url="https://example.com")

## Low-Level Interface Operations

Access the underlying Computer SDK interface for more granular control:

In [None]:
# Access the low-level interface directly
interface = session.interface

# Type text using interface
await interface.type_text("Direct interface typing")

# Press key using interface
await interface.press_key("enter")

# Mouse operations using interface
await interface.left_click(500, 300)
await interface.right_click(600, 400)
await interface.double_click(500, 300)
await interface.move_cursor(700, 500)
await interface.drag_to(800, 600)  # Drag from current position

# Scroll using interface (x, y, clicks - positive=up, negative=down)
await interface.scroll(512, 384, -3)  # Scroll down 3 clicks at center

# Get screen size
size = await interface.get_screen_size()
print(f"Screen: {size['width']}x{size['height']}")

## Session Status & Health

In [None]:
# Check if the environment is responsive
is_ready = await session.check_status()
print(f"Environment is ready: {is_ready}")

# Wait until environment is ready (useful after restarts)
# ready = await session.wait_until_ready(timeout=60, poll_interval=2.0)


## Using Action Types

For programmatic control, you can use Action types directly:

In [None]:
from cua_bench.types import (
    ClickAction,
    RightClickAction,
    DoubleClickAction,
    DragAction,
    ScrollAction,
    TypeAction,
    KeyAction,
    HotkeyAction,
    WaitAction,
    MoveToAction,
)

# Execute actions using the step/execute_action method
await session.execute_action(ClickAction(x=500, y=300))
await session.execute_action(TypeAction(text="Hello from action!"))
await session.execute_action(KeyAction(key="enter"))
await session.execute_action(HotkeyAction(keys=["ctrl", "s"]))
await session.execute_action(ScrollAction(x=512, y=384, direction="down", amount=300))
await session.execute_action(WaitAction(seconds=1.0))
await session.execute_action(DragAction(from_x=100, from_y=100, to_x=300, to_y=300))

# Or use step() which is an alias for execute_action()
await session.step(ClickAction(x=500, y=300))