Skip to content

[Bug]: BrowserContext.add_init_script: Target page, context or browser has been closed #997

@alllexx88

Description

@alllexx88

crawl4ai version

0.5.0.post8

Expected Behavior

Crawl with BFSDeepCrawl without errors.

Current Behavior

During BFSDeepCrawl, some pages fail to scrape with a BrowserContext.add_init_script: Target page, context or browser has been closed playwright error. It appears to happen randomly, but when scrawling enough pages, happens consistently enough. With the code from Code snippets, it happens for me on every run.

Is this reproducible?

Yes

Inputs Causing the Bug

Steps to Reproduce

Code snippets

import asyncio
from pathlib import Path

from crawl4ai import AsyncWebCrawler, CacheMode, CrawlerRunConfig
from crawl4ai.async_crawler_strategy import AsyncPlaywrightCrawlerStrategy
from crawl4ai.browser_manager import BrowserManager
from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy
from crawl4ai.deep_crawling import BFSDeepCrawlStrategy
from crawl4ai.deep_crawling.filters import FilterChain, URLPatternFilter
from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator

original_close = AsyncPlaywrightCrawlerStrategy.close


async def patched_close(self):
    """
    Patched close method that resets the Playwright instance after cleanup.

    This fixes the issue where subsequent crawl requests fail with:
    "BrowserType.launch: Target page, context or browser has been closed"
    """

    # Call the original close method
    await original_close(self)

    # Reset the static Playwright instance
    BrowserManager._playwright_instance = None

    return


AsyncPlaywrightCrawlerStrategy.close = patched_close


async def main(cat, filter_type):
    async with AsyncWebCrawler() as crawler:
        url_filter = URLPatternFilter(
            patterns=[f"https://www.lcwaikiki.ua/uk-UA/UA/product/*/{filter_type}/*"]
        )
        strategy = BFSDeepCrawlStrategy(
            max_depth=100,  # Crawl initial page + 100 levels deep
            include_external=False,  # Stay within the same domain
            max_pages=100,  # Maximum number of pages to crawl (optional)
            filter_chain=FilterChain([url_filter]),
            # score_threshold=0.3,  # Minimum score for URLs to be crawled (optional)
        )
        config = CrawlerRunConfig(
            deep_crawl_strategy=strategy,
            scraping_strategy=LXMLWebScrapingStrategy(),
            markdown_generator=DefaultMarkdownGenerator(),
            verbose=True,
            cache_mode=CacheMode.BYPASS,
            js_code="window.scrollTo(0, document.body.scrollHeight);",
            magic=True,
            simulate_user=True,
            override_navigator=True,
        )

        results = await crawler.arun(
            f"https://www.lcwaikiki.ua/uk-UA/UA/category/{cat}",
            config=config,
        )

        print(f"Crawled {len(results)} pages in total")

        uniq_urls = set()
        uniq_results = []
        for result in results:
            if (not result.url.removesuffix("/") in uniq_urls) and result.markdown:
                i = len(uniq_results)
                uniq_urls.add(result.url.removesuffix("/"))
                uniq_results.append(result)
                (Path(cat) / f"item_{i:02}.md").write_text(result.markdown.raw_markdown)
                (Path(cat) / f"item_{i:02}.url").write_text(result.url)
        print(f"Unique URLs: {len(uniq_urls)}")


if __name__ == "__main__":
    categories = {
        "men/shirts-c2": "Чоловікам/Сорочка",
        "women/coats-c56": "Жінкам/Пальто",
        "women/tunics-c50": "Жінкам/Туніка---сорочка",
        "women/leggings-c58": "Жінкам/Легінси",
        "women/sweatshirts-c48": "Жінкам/Толстовка",
        "women/trousers-c67": "Жінкам/Штани",
        "men/t-shirts-and-tops-c1": "Чоловікам/Футболка",
        "men/jackets-c11": "Чоловікам/Піджак",
        "men/shorts-c8": "Чоловікам/Шорти",
        "men/trousers-c16": "Чоловікам/Штани",
    }
    for directory in categories:
        Path(directory).mkdir(parents=True, exist_ok=True)
    loop = asyncio.get_event_loop()
    for cat, filter_type in categories.items():
        loop.run_until_complete(main(cat, filter_type))

OS

Linux

Python version

3.13.3

Browser

No response

Browser version

No response

Error logs & Screenshots (if applicable)

[ERROR]... × https://www.lcwaikiki.ua/uk-UA/UA/product/XSIDE/Чо... | Error: 
┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ × Unexpected error in _crawl_web at line 528 in wrap_api_call (.pixi/envs/default/lib/python3.13/site-                │
│ packages/playwright/_impl/_connection.py):                                                                            │
│   Error: BrowserContext.add_init_script: Target page, context or browser has been closed                              │
│                                                                                                                       │
│   Code context:                                                                                                       │
│   523           parsed_st = _extract_stack_trace_information_from_stack(st, is_internal)                              │
│   524           self._api_zone.set(parsed_st)                                                                         │
│   525           try:                                                                                                  │
│   526               return await cb()                                                                                 │
│   527           except Exception as error:                                                                            │
│   528 →             raise rewrite_error(error, f"{parsed_st['apiName']}: {error}") from None                          │
│   529           finally:                                                                                              │
│   530               self._api_zone.set(None)                                                                          │
│   531                                                                                                                 │
│   532       def wrap_api_call_sync(                                                                                   │
│   533           self, cb: Callable[[], Any], is_internal: bool = False                                                │
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘

Metadata

Metadata

Assignees

No one assigned

    Labels

    🐞 BugSomething isn't working🩺 Needs TriageNeeds attention of maintainers

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions