unclecode
diff --git a/‎README.md‎
Lines changed: 38 additions & 2 deletions b/‎README.md‎
Lines changed: 38 additions & 2 deletions
diff --git a/‎crawl4ai/__init__.py‎
Lines changed: 9 additions & 0 deletions b/‎crawl4ai/__init__.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎crawl4ai/__version__.py‎
Lines changed: 1 addition & 1 deletion b/‎crawl4ai/__version__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎crawl4ai/async_configs.py‎
Lines changed: 21 additions & 0 deletions b/‎crawl4ai/async_configs.py‎
Lines changed: 21 additions & 0 deletions
@@ -27,9 +27,11 @@
 
 Crawl4AI turns the web into clean, LLM ready Markdown for RAG, agents, and data pipelines. Fast, controllable, battle tested by a 50k+ star community.
 
-[✨ Check out latest update v0.7.3](#-recent-updates)
+[✨ Check out latest update v0.7.4](#-recent-updates)
 
-✨ New in v0.7.3: Undetected Browser Support, Multi-URL Configurations, Memory Monitoring, Enhanced Table Extraction, GitHub Sponsors. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.3.md)
+✨ New in v0.7.4: Revolutionary LLM Table Extraction with intelligent chunking, enhanced concurrency fixes, memory management refactor, and critical stability improvements. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.4.md)
+
+✨ Recent v0.7.3: Undetected Browser Support, Multi-URL Configurations, Memory Monitoring, Enhanced Table Extraction, GitHub Sponsors. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.3.md)
 
 <details>
   <summary>🤓 <strong>My Personal Story</strong></summary>
@@ -542,6 +544,40 @@ async def test_news_crawl():
 
 ## ✨ Recent Updates
 
+<details>
+<summary><strong>Version 0.7.4 Release Highlights - The Intelligent Table Extraction & Performance Update</strong></summary>
+
+- **🚀 LLMTableExtraction**: Revolutionary table extraction with intelligent chunking for massive tables:
+  ```python
+  from crawl4ai import LLMTableExtraction, LLMConfig
+  
+  # Configure intelligent table extraction
+  table_strategy = LLMTableExtraction(
+      llm_config=LLMConfig(provider="openai/gpt-4.1-mini"),
+      enable_chunking=True,           # Handle massive tables
+      chunk_token_threshold=5000,     # Smart chunking threshold
+      overlap_threshold=100,          # Maintain context between chunks
+      extraction_type="structured"    # Get structured data output
+  )
+  
+  config = CrawlerRunConfig(table_extraction_strategy=table_strategy)
+  result = await crawler.arun("https://complex-tables-site.com", config=config)
+  
+  # Tables are automatically chunked, processed, and merged
+  for table in result.tables:
+      print(f"Extracted table: {len(table['data'])} rows")
+  ```
+
+- **⚡ Dispatcher Bug Fix**: Fixed sequential processing bottleneck in arun_many for fast-completing tasks
+- **🧹 Memory Management Refactor**: Consolidated memory utilities into main utils module for cleaner architecture
+- **🔧 Browser Manager Fixes**: Resolved race conditions in concurrent page creation with thread-safe locking
+- **🔗 Advanced URL Processing**: Better handling of raw:// URLs and base tag link resolution
+- **🛡️ Enhanced Proxy Support**: Flexible proxy configuration supporting both dict and string formats
+
+[Full v0.7.4 Release Notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.4.md)
+
+</details>
+
 <details>
 <summary><strong>Version 0.7.3 Release Highlights - The Multi-Config Intelligence Update</strong></summary>
 
 
@@ -29,6 +29,12 @@
 )
 from .chunking_strategy import ChunkingStrategy, RegexChunking
 from .markdown_generation_strategy import DefaultMarkdownGenerator
+from .table_extraction import (
+    TableExtractionStrategy,
+    DefaultTableExtraction,
+    NoTableExtraction,
+    LLMTableExtraction,
+)
 from .content_filter_strategy import (
     PruningContentFilter,
     BM25ContentFilter,
@@ -156,6 +162,9 @@
     "ChunkingStrategy",
     "RegexChunking",
     "DefaultMarkdownGenerator",
+    "TableExtractionStrategy",
+    "DefaultTableExtraction",
+    "NoTableExtraction",
     "RelevantContentFilter",
     "PruningContentFilter",
     "BM25ContentFilter",
 
@@ -1,7 +1,7 @@
 # crawl4ai/__version__.py
 
 # This is the version that will be used for stable releases
-__version__ = "0.7.3"
+__version__ = "0.7.4"
 
 # For nightly builds, this gets set during build process
 __nightly_version__ = None
 
@@ -20,6 +20,7 @@
 from .markdown_generation_strategy import MarkdownGenerationStrategy, DefaultMarkdownGenerator
 from .content_scraping_strategy import ContentScrapingStrategy, LXMLWebScrapingStrategy
 from .deep_crawling import DeepCrawlStrategy
+from .table_extraction import TableExtractionStrategy, DefaultTableExtraction
 
 from .cache_context import CacheMode
 from .proxy_strategy import ProxyRotationStrategy
@@ -448,6 +449,10 @@ def __init__(
             self.chrome_channel = ""
         self.proxy = proxy
         self.proxy_config = proxy_config
+        if isinstance(self.proxy_config, dict):
+            self.proxy_config = ProxyConfig.from_dict(self.proxy_config)
+        if isinstance(self.proxy_config, str):
+            self.proxy_config = ProxyConfig.from_string(self.proxy_config)
 
 
         self.viewport_width = viewport_width
@@ -978,6 +983,8 @@ class CrawlerRunConfig():
                                          Default: False.
         table_score_threshold (int): Minimum score threshold for processing a table.
                                      Default: 7.
+        table_extraction (TableExtractionStrategy): Strategy to use for table extraction.
+                                     Default: DefaultTableExtraction with table_score_threshold.
 
         # Virtual Scroll Parameters
         virtual_scroll_config (VirtualScrollConfig or dict or None): Configuration for handling virtual scroll containers.
@@ -1104,6 +1111,7 @@ def __init__(
         image_description_min_word_threshold: int = IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD,
         image_score_threshold: int = IMAGE_SCORE_THRESHOLD,
         table_score_threshold: int = 7,
+        table_extraction: TableExtractionStrategy = None,
         exclude_external_images: bool = False,
         exclude_all_images: bool = False,
         # Link and Domain Handling Parameters
@@ -1159,6 +1167,11 @@ def __init__(
         self.parser_type = parser_type
         self.scraping_strategy = scraping_strategy or LXMLWebScrapingStrategy()
         self.proxy_config = proxy_config
+        if isinstance(proxy_config, dict):
+            self.proxy_config = ProxyConfig.from_dict(proxy_config)
+        if isinstance(proxy_config, str):
+            self.proxy_config = ProxyConfig.from_string(proxy_config)
+
         self.proxy_rotation_strategy = proxy_rotation_strategy
 
         # Browser Location and Identity Parameters
@@ -1215,6 +1228,12 @@ def __init__(
         self.exclude_external_images = exclude_external_images
         self.exclude_all_images = exclude_all_images
         self.table_score_threshold = table_score_threshold
+        
+        # Table extraction strategy (default to DefaultTableExtraction if not specified)
+        if table_extraction is None:
+            self.table_extraction = DefaultTableExtraction(table_score_threshold=table_score_threshold)
+        else:
+            self.table_extraction = table_extraction
 
         # Link and Domain Handling Parameters
         self.exclude_social_media_domains = (
@@ -1486,6 +1505,7 @@ def from_kwargs(kwargs: dict) -> "CrawlerRunConfig":
                 "image_score_threshold", IMAGE_SCORE_THRESHOLD
             ),
             table_score_threshold=kwargs.get("table_score_threshold", 7),
+            table_extraction=kwargs.get("table_extraction", None),
             exclude_all_images=kwargs.get("exclude_all_images", False),
             exclude_external_images=kwargs.get("exclude_external_images", False),
             # Link and Domain Handling Parameters
@@ -1594,6 +1614,7 @@ def to_dict(self):
             "image_description_min_word_threshold": self.image_description_min_word_threshold,
             "image_score_threshold": self.image_score_threshold,
             "table_score_threshold": self.table_score_threshold,
+            "table_extraction": self.table_extraction,
             "exclude_all_images": self.exclude_all_images,
             "exclude_external_images": self.exclude_external_images,
             "exclude_social_media_domains": self.exclude_social_media_domains,