# Browser Use

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "your_openai_key_here"
os.environ["GEMINI_API_KEY"] = "your_gemini_key_here"
os.environ["GOOGLE_API_KEY"] = "your_google_key_here"
os.environ["DEEPSEEK_API_KEY"] = "your_deepseek_key_here"

from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_deepseek import ChatDeepSeek

from browser_use import Agent, BrowserSession
from pydantic import SecretStr
from dotenv import load_dotenv

from urllib.parse import urlparse
import datetime
import logging

import asyncio
import nest_asyncio

load_dotenv()
nest_asyncio.apply()


# ISP-style logger (used per topic)
def setup_isp_logger(path):
    logger = logging.getLogger("isp_logger")
    logger.setLevel(logging.INFO)
    logger.propagate = False
    for h in logger.handlers:
        logger.removeHandler(h)
    handler = logging.FileHandler(path, mode="w")
    handler.setFormatter(logging.Formatter("%(levelname)s | %(message)s"))
    logger.addHandler(handler)
    return logger

# # Load prompts from XML/text file
# def parse_topic_file(path):
#     import re
#     with open(path, "r", encoding="utf-8") as f:
#         content = f.read()
#     topics = re.findall(r"<topic num='(\d+)'>\s*<desc>(.*?)</desc>", content, re.DOTALL)
#     return [(int(num), desc.strip()) for num, desc in topics]

def parse_topic_file(filename):
    with open(filename, "r") as f:
        lines = [line.strip() for line in f if line.strip()]
    return [(i + 1, line) for i, line in enumerate(lines)]

async def run_all_browser_use_topics():

    isp_log_path = f"browseruse_log/rewritten/gpt4o-inst-SESSION14-1.txt"        
    isp_logger = setup_isp_logger(isp_log_path)

    browser_session = BrowserSession(user_data_dir=None, headless=True)
    await browser_session.start()

    full_prompt = f"""Instructions for Researcher
        
Objective:  
I want to learn about traditional Swahili dishes and how to cook them, so I can surprise my friend from Kenya. Please find relevant web pages that focus specifically on Swahili home cooking.
Instructions:

1. **Focus on Swahili Cuisine**  
   - Prioritize dishes that are traditional to the Swahili-speaking coastal regions of Kenya and East Africa.
   - Ensure the dishes you find are genuinely Swahili rather than general Kenyan or East African food unless they are widely recognized as part of Swahili home cuisine.
   
2. **Type of Resources**  
   - Seek out web pages that provide recipes, step-by-step instructions, or cooking guides, ideally from home cooks, food bloggers, or reputable culinary sites.
   - Prioritize resources that suit home cooking rather than professional/restaurant preparation.
   
3. **Format of Output**  
   - Structure your findings as a short report with clear headers:
     1. **Overview of Swahili Cuisine**  
     2. **List of Popular Swahili Dishes**  
     3. **Cooking Instructions/Resources**  
     4. **References and Links**  
   - Under each dish, include a brief description and the direct web page link to the cooking instructions.
   
4. **Web Page Selection**  
   - Prefer web pages with detailed and easy-to-follow instructions, pictures, or videos.
   - Give special consideration to resources published by Kenyans, Swahili communities, or sites focusing on authentic regional cuisine.
   
5. **Presentation**  
   - Include a table listing at least 5 traditional Swahili dishes, with columns for: Dish Name, Brief Description, and Link to Cooking Resource.
   - Indicate if any dishes have special significance (e.g., eaten at celebrations or daily meals).
   
6. **Open-Ended Preferences**  
   - No specific ingredient restrictions or dietary requirements were provided, so include all common Swahili dishes.
   - No language requirement is stated; prioritize English language resources, but include notable Swahili-language resources if especially authentic or instructive.
   
7. **Sources**  
   - Prefer official or reputable food sites, blogs by authentic home cooks, YouTube channels from Kenyan or Swahili creators, and cultural organizations.
   - Avoid aggregator or SEO-heavy copycat blogs.
   
8. **References and Linking**  
   - For each dish/resource, provide a direct link to the web page, not just the homepage.
   
Expected Output:  
A clearly formatted report with the above structure and a table cataloguing at least 5 Swahili dishes and direct links to recipes, followed by a references section with all sources linked.
If you need information about a particular dish’s popularity or special significance and can’t determine it from the recipe page, make a note of that and suggest further research.

Before concluding or summarizing, please collaborate with the MultimodalWebSurfer to explore relevant websites and gather concrete information. Use Bing as a search engine.

To complete the task, you must:
1. Click and visit pages—not just summarize search results or rely on prior knowledge.
2. Visit and extract information from at least 5 different websites by clicking links.
3. Only use information found on pages you actually opened.
4. Once you have gathered concrete details from at least 5 pages, write a brief summary and say: "Done with task."
5. If you encounter reCAPTCHA, switch to Bing search. 

Do not infer or assume anything unless it is directly stated on a visited webpage.
"""
    
    agent = Agent(
        enable_memory=False,
        task=full_prompt,
        initial_actions=[{"open_tab": {"url": "https://www.bing.com"}}],
        llm=ChatOpenAI(model="gpt-4o"),
        # llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash', api_key=api_key),
        # llm=ChatDeepSeek(base_url='https://api.deepseek.com/v1', model='deepseek-reasoner', api_key=SecretStr(api_key))

        browser_session=browser_session,
    )
    
    history = await agent.run()

    # Simulate ISP logs
    for url in history.urls():
        domain = urlparse(url).netloc or "unknown"
        timestamp = datetime.datetime.utcnow().isoformat()
        try:
            ip = socket.gethostbyname(domain)
        except Exception:
            ip = "unknown"
        if "bing.com" not in domain:
            isp_logger.info(
                f"[ISP] {timestamp} | Domain: {domain} | SrcIP: local | DstIP: {ip} | Size: unknown | [NAVIGATED] URL: {url}"
            )

    await browser_session.close()

# Run the full batch
await run_all_browser_use_topics()


In [None]:
import os
os.environ["OPENAI_API_KEY"] = "your_openai_key_here"
os.environ["GEMINI_API_KEY"] = "your_gemini_key_here"
os.environ["GOOGLE_API_KEY"] = "your_google_key_here"
os.environ["DEEPSEEK_API_KEY"] = "your_deepseek_key_here"

from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_deepseek import ChatDeepSeek

from browser_use import Agent, BrowserSession
from pydantic import SecretStr
from dotenv import load_dotenv

from urllib.parse import urlparse
import datetime
import logging

import asyncio
import nest_asyncio

load_dotenv()
nest_asyncio.apply()


# ISP-style logger (used per topic)
def setup_isp_logger(path):
    logger = logging.getLogger("isp_logger")
    logger.setLevel(logging.INFO)
    logger.propagate = False
    for h in logger.handlers:
        logger.removeHandler(h)
    handler = logging.FileHandler(path, mode="w")
    handler.setFormatter(logging.Formatter("%(levelname)s | %(message)s"))
    logger.addHandler(handler)
    return logger

# # Load prompts from XML/text file
# def parse_topic_file(path):
#     import re
#     with open(path, "r", encoding="utf-8") as f:
#         content = f.read()
#     topics = re.findall(r"<topic num='(\d+)'>\s*<desc>(.*?)</desc>", content, re.DOTALL)
#     return [(int(num), desc.strip()) for num, desc in topics]

def parse_topic_file(filename):
    with open(filename, "r") as f:
        lines = [line.strip() for line in f if line.strip()]
    return [(i + 1, line) for i, line in enumerate(lines)]

async def run_all_browser_use_topics():
    # topics = parse_topic_file("topictext-890.txt")
    topics = parse_topic_file("topictext-5prompt.txt")

    for topic_num, desc in topics:

        isp_log_path = f"browseruse_log/gemini-2.0/agent_traffic_{topic_num}.txt"
        
        if os.path.exists(isp_log_path):
            print(f"Skipping topic {topic_num} (already completed).")
            continue
            
        print(f"\nRunning topic {topic_num}...")
        
        isp_logger = setup_isp_logger(isp_log_path)

        browser_session = BrowserSession(user_data_dir=None, headless=True)
        await browser_session.start()

        full_prompt = f"""{desc}

Before concluding or summarizing, please collaborate with the MultimodalWebSurfer to explore relevant websites and gather concrete information. Use Bing as a search engine.

To complete the task, you must:
1. Click and visit pages—not just summarize search results or rely on prior knowledge.
2. Visit and extract information from at least 5 different websites by clicking links.
3. Only use information found on pages you actually opened.
4. Once you have gathered concrete details from at least 5 pages, write a brief summary and say: "Done with task."
5. If you encounter reCAPTCHA, switch to Bing search. 

Do not infer or assume anything unless it is directly stated on a visited webpage.
"""
        # llm = ChatGoogle(model='gemini-2.0-flash-exp', api_key=api_key)

        # api_key = os.getenv('GOOGLE_API_KEY')
        
        agent = Agent(
            enable_memory=False,
            task=full_prompt,
            initial_actions=[{"open_tab": {"url": "https://www.bing.com"}}],
            llm=ChatOpenAI(model="gpt-4o"),
            # llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash', api_key=api_key),
            # llm=ChatDeepSeek(base_url='https://api.deepseek.com/v1', model='deepseek-reasoner', api_key=SecretStr(api_key))

            browser_session=browser_session,
        )
        
        history = await agent.run()

        # Simulate ISP logs
        for url in history.urls():
            domain = urlparse(url).netloc or "unknown"
            timestamp = datetime.datetime.utcnow().isoformat()
            try:
                ip = socket.gethostbyname(domain)
            except Exception:
                ip = "unknown"
            if "bing.com" not in domain:
                isp_logger.info(
                    f"[ISP] {timestamp} | Domain: {domain} | SrcIP: local | DstIP: {ip} | Size: unknown | [NAVIGATED] URL: {url}"
                )

        await browser_session.close()

# Run the full batch
await run_all_browser_use_topics()


In [None]:
import os
import re
from collections import OrderedDict

log_dir = "browseruse_log"
log_prefix = "agent_traffic_"
log_suffix = ".txt"
num_files = 60

# Output dictionary: {filename: [domain1, domain2, ...]}
all_domain_seqs = {}

for i in range(1, num_files + 1):
    filename = f"{log_prefix}{i}{log_suffix}"
    path = os.path.join(log_dir, filename)

    domain_seq = []
    seen = set()

    with open(path, "r") as f:
        for line in f:
            if "[ISP]" in line and "Domain:" in line:
                # Extract domain from the line
                match = re.search(r"Domain:\s*([\w\.-]+)", line)
                if match:
                    domain = match.group(1)
                    if domain not in seen:
                        domain_seq.append(domain)
                        seen.add(domain)

    all_domain_seqs[f"prompt_{i}"] = domain_seq

# Optional: save as JSONL or print result
import json
with open("browseruse_domain_sequences.json", "w") as f:
    json.dump(all_domain_seqs, f, indent=2)

print("✅ Extracted domain sequences from all sessions.")
