Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions docs/examples/scraping_strategies_performance.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import time, re
from crawl4ai.content_scraping_strategy import WebScrapingStrategy, LXMLWebScrapingStrategy
import time
import functools
import time
from collections import defaultdict

from crawl4ai.content_scraping_strategy import WebScrapingStrategy, LXMLWebScrapingStrategy

class TimingStats:
def __init__(self):
self.stats = defaultdict(lambda: defaultdict(lambda: {"calls": 0, "total_time": 0}))
Expand Down Expand Up @@ -95,7 +95,7 @@ def test_scraping():

# Time the scraping
print("\nStarting scrape...")
start_time = time.time()
start_time = time.perf_counter()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Use time.perf_counter() in the decorator for consistency
The timing_decorator still uses time.time(), which is less precise and can be affected by system clock adjustments. To maintain consistency with the high-resolution timer used in test_scraping, switch to time.perf_counter():

 def wrapper(*args, **kwargs):
-    start = time.time()
+    start = time.perf_counter()
     result = func(*args, **kwargs)
-    elapsed = time.time() - start
+    elapsed = time.perf_counter() - start
     timing_stats.add(strategy_name, func.__name__, elapsed)
     return result


kwargs = {
"url": "http://example.com",
Expand All @@ -117,7 +117,7 @@ def test_scraping():
timing_stats.report()

# Print stats of LXML output
print("\Turbo Output:")
print("\nTurbo Output:")
print(f"\nExtracted links: {len(result_selected.links.internal) + len(result_selected.links.external)}")
print(f"Extracted images: {len(result_selected.media.images)}")
print(f"Clean HTML size: {len(result_selected.cleaned_html)/1024:.2f} KB")
Expand Down