# FinSight Data Collector Testing Notebook

This notebook tests all components of the enhanced data collector including:
- Basic stock data collection
- HuggingFace sentiment analysis
- Actual SEC filing downloads
- Earnings call data collection
- Competitor SEC filings analysis

The system works with defaults. For enhanced features, optionally set API keys as environment variables or in a .env file.


In [1]:
import sys
import os
import json
import pandas as pd
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Add the project root to Python path
sys.path.append('.')

# Import our enhanced data collector
from agents.data_collector import DataCollectorTools, DataRequest
from config.config import FinSightConfig
from shared_memory.memory_manager import SharedMemoryManager

print("✅ Imports successful")


✅ Imports successful


## 1. Initialize Data Collector Tools


In [2]:
# Initialize configuration
config = FinSightConfig()
print(f"📁 Charts output path: {config.charts_output_path}")
print(f"🔑 Finnhub API key configured: {'Yes' if config.finnhub_api_key else 'No'}")
print(f"🏢 Edgar user agent: {config.edgar_company_name} {config.edgar_email}")

# Initialize data collector tools
tools = DataCollectorTools(config)
print("\nData Collector Tools initialized")
print(f"Sentiment model loaded: {'Yes' if tools.sentiment_model else 'No (using fallback)'}")


📁 Charts output path: ./outputs/charts
🔑 Finnhub API key configured: Yes
🏢 Edgar user agent: FinSight user@finsight.ai


Device set to use cpu



Data Collector Tools initialized
Sentiment model loaded: Yes


## 2. Test Basic Stock Data Collection


In [3]:
# Test stock data collection
symbol = "AAPL"  # You can change this to any stock symbol

print(f"📈 Testing stock data collection for {symbol}...")
stock_result = tools.get_stock_data(symbol)

if stock_result["success"]:
    data = stock_result["data"]
    print(f"\n✅ Stock data collected successfully!")
    print(f"📊 Current Price: ${data['current_price']:.2f}")
    print(f"📊 Market Cap: {data['market_cap']:,}" if data['market_cap'] else "📊 Market Cap: N/A")
    print(f"📊 P/E Ratio: {data['pe_ratio']:.2f}" if data['pe_ratio'] else "📊 P/E Ratio: N/A")
    print(f"📊 52-Week High: ${data['52_week_high']:.2f}")
    print(f"📊 52-Week Low: ${data['52_week_low']:.2f}")
    print(f"📊 Beta: {data['beta']:.2f}" if data['beta'] else "📊 Beta: N/A")
    print(f"📊 Historical data points: {len(data['historical_data']['dates'])}")
else:
    print(f"❌ Error: {stock_result['error']}")


📈 Testing stock data collection for AAPL...

✅ Stock data collected successfully!
📊 Current Price: $196.58
📊 Market Cap: 2,936,079,646,720
📊 P/E Ratio: 30.62
📊 52-Week High: $260.10
📊 52-Week Low: $169.21
📊 Beta: 1.21
📊 Historical data points: 502


## 3. Test Financial Statements Collection


In [4]:
print(f"💰 Testing financial statements collection for {symbol}...")
financials_result = tools.get_company_financials(symbol)

if financials_result["success"]:
    data = financials_result["data"]
    print(f"\n✅ Financial statements collected successfully!")
    
    # Show key metrics
    metrics = data['key_metrics']
    print(f"\n📊 Key Financial Metrics:")
    print(f"   • Revenue Growth: {metrics.get('revenue_growth', 'N/A')}")
    print(f"   • Profit Margin: {metrics.get('profit_margin', 'N/A')}")
    print(f"   • Operating Margin: {metrics.get('operating_margin', 'N/A')}")
    print(f"   • Return on Equity: {metrics.get('return_on_equity', 'N/A')}")
    print(f"   • Current Ratio: {metrics.get('current_ratio', 'N/A')}")
    print(f"   • Debt to Equity: {metrics.get('debt_to_equity', 'N/A')}")
    
    # Show available statement periods
    print(f"\n📄 Financial Statements Available:")
    print(f"   • Income Statement periods: {len(data['income_statement'])}")
    print(f"   • Balance Sheet periods: {len(data['balance_sheet'])}")
    print(f"   • Cash Flow periods: {len(data['cash_flow'])}")
else:
    print(f"❌ Error: {financials_result['error']}")


💰 Testing financial statements collection for AAPL...

✅ Financial statements collected successfully!

📊 Key Financial Metrics:
   • Revenue Growth: 0.051
   • Profit Margin: 0.24301
   • Operating Margin: 0.31028998
   • Return on Equity: 1.38015
   • Current Ratio: 0.821
   • Debt to Equity: 146.994

📄 Financial Statements Available:
   • Income Statement periods: 5
   • Balance Sheet periods: 5
   • Cash Flow periods: 5


## 4. Test Enhanced News Collection with HuggingFace Sentiment Analysis


In [3]:
symbol = "AAPL"
print(f"📰 Testing news collection with sentiment analysis for {symbol}...")
print("📋 This will test both Finnhub and Marketaux APIs with fallback mechanism")

news_result = tools.get_company_news(symbol, days_back=7)

if news_result["success"]:
    data = news_result["data"]
    articles = data['articles']
    print(f"\n✅ News collection successful!")
    print(f"📰 Found {len(articles)} articles from the last 7 days")
    
    if articles:
        print(f"\n📊 Sentiment Analysis Results:")
        sentiment_counts = {'positive': 0, 'negative': 0, 'neutral': 0}
        model_counts = {}
        source_counts = {}
        
        for i, article in enumerate(articles[:5]):
            sentiment = article['sentiment']
            sentiment_counts[sentiment] += 1
            score = article['sentiment_score']
            model = article['sentiment_model']
            source = article['source']
            model_counts[model] = model_counts.get(model, 0) + 1
            source_counts[source] = source_counts.get(source, 0) + 1
            
            print(f"\n📄 Article {i+1}:")
            print(f"   📰 Headline: {article['headline'][:100]}...")
            print(f"   🎯 Sentiment: {sentiment.upper()} (score: {score:.3f})")
            print(f"   🤖 Model used: {model}")
            print(f"   🔗 Source: {source}")
            
            # Show entity information if available (Marketaux specific)
            if 'entities' in article and article['entities']:
                print(f"   🏢 Related Entities:")
                for entity in article['entities'][:2]:  # Show first 2 entities
                    print(f"      • {entity.get('name', 'Unknown')} ({entity.get('symbol', 'N/A')})")
                    if 'sentiment_score' in entity:
                        print(f"        Entity Sentiment: {entity.get('sentiment_score', 0):.3f}")
        
        print(f"\n📈 Overall Sentiment Distribution:")
        total = len(articles)
        for sentiment, count in sentiment_counts.items():
            percentage = (count / total) * 100
            print(f"   • {sentiment.capitalize()}: {count} articles ({percentage:.1f}%)")
        
        print(f"\n🤖 Sentiment Analysis Models Used:")
        for model, count in model_counts.items():
            percentage = (count / total) * 100
            print(f"   • {model}: {count} articles ({percentage:.1f}%)")
        
        print(f"\n📰 News Sources Distribution:")
        for source, count in source_counts.items():
            percentage = (count / total) * 100
            print(f"   • {source}: {count} articles ({percentage:.1f}%)")
        
        print(f"\n🔄 API Sources Used:")
        if any('entities' in article for article in articles):
            print("   • Marketaux API: Entity-aware financial news")
        if any('entities' not in article for article in articles):
            print("   • Finnhub API: General financial news")
    else:
        print("📰 No recent articles found")
        
    # Show API endpoints used
    print(f"\n🔗 API Endpoints Used:")
    print("   Finnhub API:")
    print("   📰 Company News: /company/news")
    print("\n   Marketaux API:")
    print("   📰 News Feed: /v1/news/all")
    print("      Parameters: symbols, filter_entities, language, countries")
else:
    print(f"❌ Error: {news_result['error']}")
    print("\n💡 Tips:")
    print("   • Ensure FINNHUB_API_KEY is set in your .env file")
    print("   • Ensure MARKETAUX_API_KEY is set in your .env file")
    print("   • Check if the symbol is correct")
    print("   • Try a well-known company (e.g., AAPL, MSFT)")
    print("   • Some companies may have limited news coverage")


📰 Testing news collection with sentiment analysis for AAPL...
📋 This will test both Finnhub and Marketaux APIs with fallback mechanism


Error getting news from Finnhub for AAPL: FinnhubAPIException(status_code: 401): Invalid API key, trying Marketaux fallback



✅ News collection successful!
📰 Found 3 articles from the last 7 days

📊 Sentiment Analysis Results:

📄 Article 1:
   📰 Headline: Stock Market Today: Payment Giants Lead Declines as Tech Shows Mixed Performance...
   🎯 Sentiment: POSITIVE (score: 0.077)
   🤖 Model used: marketaux
   🔗 Source: thestockmarketwatch.com
   🏢 Related Entities:
      • Apple Inc. (AAPL)
        Entity Sentiment: 0.077

📄 Article 2:
   📰 Headline: Garmin unveils flagship Apollo RA800 marine stereo...
   🎯 Sentiment: POSITIVE (score: 0.672)
   🤖 Model used: marketaux
   🔗 Source: gurufocus.com
   🏢 Related Entities:
      • Apple Inc. (AAPL)
        Entity Sentiment: 0.672

📄 Article 3:
   📰 Headline: Wall Street Breakfast Podcast: Fed Sees Tariff Inflation Coming (undefined:US10Y)...
   🎯 Sentiment: NEGATIVE (score: -0.296)
   🤖 Model used: marketaux
   🔗 Source: seekingalpha.com
   🏢 Related Entities:
      • Apple Inc. (AAPL)
        Entity Sentiment: -0.296

📈 Overall Sentiment Distribution:
   • Positive

In [5]:
print(f"📰 Testing news collection with sentiment analysis for {symbol}...")
news_result = tools.get_company_news(symbol, days_back=7)

if news_result["success"]:
    data = news_result["data"]
    articles = data['articles']
    print(f"\n✅ News collection successful!")
    print(f"📰 Found {len(articles)} articles from the last 7 days")
    
    if articles:
        print(f"\n📊 Sentiment Analysis Results:")
        sentiment_counts = {'positive': 0, 'negative': 0, 'neutral': 0}
        
        for i, article in enumerate(articles[:5]):
            sentiment = article['sentiment']
            sentiment_counts[sentiment] += 1
            score = article['sentiment_score']
            model = article['sentiment_model']
            
            print(f"\n📄 Article {i+1}:")
            print(f"   📰 Headline: {article['headline'][:100]}...")
            print(f"   🎯 Sentiment: {sentiment.upper()} (score: {score:.3f})")
            print(f"   🤖 Model used: {model}")
            print(f"   🔗 Source: {article['source']}")
        
        print(f"\n📈 Overall Sentiment Distribution:")
        total = len(articles)
        for sentiment, count in sentiment_counts.items():
            percentage = (count / total) * 100
            print(f"   • {sentiment.capitalize()}: {count} articles ({percentage:.1f}%)")
    else:
        print("📰 No recent articles found")
else:
    print(f"❌ Error: {news_result['error']}")


📰 Testing news collection with sentiment analysis for AAPL...

✅ News collection successful!
📰 Found 20 articles from the last 7 days

📊 Sentiment Analysis Results:

📄 Article 1:
   📰 Headline: Apple Just Erased a $300 Million Problem--But a $500M One Still Lurks...
   🎯 Sentiment: NEGATIVE (score: 0.531)
   🤖 Model used: huggingface
   🔗 Source: Yahoo

📄 Article 2:
   📰 Headline: Credit Card Annual Fees Are Going Up. What It Means for the Stocks....
   🎯 Sentiment: POSITIVE (score: 0.851)
   🤖 Model used: huggingface
   🔗 Source: Yahoo

📄 Article 3:
   📰 Headline: The Trump phone probably won’t be built in America (and may never be)...
   🎯 Sentiment: NEUTRAL (score: 0.938)
   🤖 Model used: huggingface
   🔗 Source: Yahoo

📄 Article 4:
   📰 Headline: Apple: Quiet After WWDC...
   🎯 Sentiment: NEUTRAL (score: 0.856)
   🤖 Model used: huggingface
   🔗 Source: SeekingAlpha

📄 Article 5:
   📰 Headline: Apple Just Snatched Back the Top Spot in China Smartphone Sales. Should You Buy AAPL Stoc

## 5. Test ESG Data Collection with FMP API and yfinance Fallback

This section tests the enhanced ESG data collection that uses both FMP API and yfinance sustainability data as a fallback source.


In [6]:
symbol = "AAPL"
print(f"🌱 Testing ESG data collection for {symbol} using FMP API with yfinance fallback...")
print("📋 This will test both FMP ESG endpoints and yfinance sustainability data")

esg_result = tools.get_esg_data(symbol)

if esg_result["success"]:
    data = esg_result["data"]
    print(f"\n✅ ESG data collection successful!")
    print(f"📊 Source: {data.get('source', 'Unknown')}")
    
    # Show ESG Scores
    print(f"\n🌱 ESG Scores:")
    print(f"   📊 Overall ESG Score: {data.get('esg_score', 'N/A')}")
    print(f"   🌍 Environment Score: {data.get('environment_score', 'N/A')}")
    print(f"   👥 Social Score: {data.get('social_score', 'N/A')}")
    print(f"   🏛️  Governance Score: {data.get('governance_score', 'N/A')}")
    
    # Show ESG Ratings
    print(f"\n📈 ESG Ratings:")
    print(f"   ⚠️  ESG Risk Rating: {data.get('esgrisk_rating', 'N/A')}")
    print(f"   🏭 Industry: {data.get('industry', 'N/A')}")
    print(f"   📊 Industry Rank: {data.get('industry_rank', 'N/A')}")
    print(f"   📅 Fiscal Year: {data.get('fiscal_year', 'N/A')}")
    
    # Show additional metadata
    print(f"\n📋 Additional Information:")
    print(f"   🏢 Company Name: {data.get('company_name', 'N/A')}")
    print(f"   🆔 CIK: {data.get('cik', 'N/A')}")
    print(f"   📅 ESG Data Date: {data.get('date', 'N/A')}")
    print(f"   📅 Rating Date: {data.get('rating_date', 'N/A')}")
    
    # Show yfinance-specific data if available
    if 'yfinance_esg_data' in data:
        yf_data = data['yfinance_esg_data']
        print(f"\n📊 Additional yfinance ESG Data:")
        print(f"   🎯 ESG Performance: {yf_data.get('esg_performance', 'N/A')}")
        print(f"   👥 Peer Group: {yf_data.get('peer_group', 'N/A')}")
        print(f"   📊 Peer Count: {yf_data.get('peer_count', 'N/A')}")
        print(f"   📈 Percentile: {yf_data.get('percentile', 'N/A')}")
        
        # Show peer performance if available
        if 'peer_esg_score_performance' in yf_data:
            print(f"\n📊 Peer ESG Performance:")
            peer_perf = yf_data['peer_esg_score_performance']
            print(f"   Min: {peer_perf.get('min', 'N/A')}")
            print(f"   Avg: {peer_perf.get('avg', 'N/A')}")
            print(f"   Max: {peer_perf.get('max', 'N/A')}")
    
    # Show data availability status
    available_fields = sum(1 for field in ['esg_score', 'environment_score', 'social_score', 'governance_score', 'esgrisk_rating'] 
                          if data.get(field) is not None)
    print(f"\n📊 Data Availability: {available_fields}/5 main ESG fields populated")
    
    if available_fields == 0:
        print("⚠️  No ESG data available. This could mean:")
        print("   • Symbol not covered by FMP or yfinance ESG data")
        print("   • FMP API key not configured")
        print("   • Company too small for ESG tracking")
        
else:
    print(f"❌ ESG Error: {esg_result['error']}")
    print("💡 Tips:")
    print("   • Ensure FMP_API_KEY is set in your .env file")
    print("   • Try a larger, more established company (e.g., AAPL, MSFT)")
    print("   • Check if the symbol is correct")


🌱 Testing ESG data collection for AAPL using FMP API...
📋 This will test the new FMP ESG endpoints for ESG scores and ratings


ESG Score API returned status 402 for AAPL


<Response [402]>


ESG Rating API returned status 402 for AAPL



✅ ESG data collection successful!
📊 Source: financial_modeling_prep

🌱 ESG Scores:
   📊 Overall ESG Score: None
   🌍 Environment Score: None
   👥 Social Score: None
   🏛️  Governance Score: None

📈 ESG Ratings:
   ⚠️  ESG Risk Rating: None
   🏭 Industry: None
   📊 Industry Rank: None
   📅 Fiscal Year: None

📋 Additional Information:
   🏢 Company Name: N/A
   🆔 CIK: N/A
   📅 ESG Data Date: N/A
   📅 Rating Date: N/A

📊 Data Availability: 0/5 main ESG fields populated
⚠️  No ESG data available from FMP for this symbol. This could mean:
   • Symbol not covered by FMP ESG data
   • FMP API key not configured
   • Company too small for ESG tracking


## 6. Test Earnings Call Transcript Collection with FMP API and API Ninjas Fallback

This section tests the enhanced earnings call transcript collection that uses both FMP API and API Ninjas as a fallback source.

In [7]:
print(f"💼 Testing earnings call transcript collection for {symbol} using FMP API with API Ninjas fallback...")
print("📋 This will test both FMP earnings transcript endpoints and API Ninjas fallback")

earnings_result = tools.get_earnings_call_data(symbol)

if earnings_result["success"]:
    data = earnings_result["data"]
    print(f"\n✅ Earnings transcript collection successful!")
    print(f"📊 Source: {data.get('source', 'Unknown')}")
    print(f"🏢 Symbol: {data.get('symbol', 'N/A')}")
    
    # Show earnings call metadata
    print(f"\n📅 Earnings Call Information:")
    fiscal_year = data.get('fiscal_year')
    quarter = data.get('quarter')
    date = data.get('date')
    
    print(f"   📅 Fiscal Year: {fiscal_year if fiscal_year else 'N/A'}")
    print(f"   📊 Quarter: Q{quarter if quarter else 'N/A'}")
    print(f"   📅 Date: {date if date else 'N/A'}")
    
    # Show transcript content
    transcript = data.get('transcript')
    if transcript and len(transcript.strip()) > 0:
        print(f"\n📝 Transcript Content:")
        print(f"   📊 Content Length: {len(transcript):,} characters")
        print(f"   📊 Word Count: ~{len(transcript.split()) if transcript else 0:,} words")
        
        # Show preview of transcript content
        preview_length = 500
        if len(transcript) > preview_length:
            print(f"\n📄 Transcript Preview (first {preview_length} characters):")
            print(f"   {transcript[:preview_length]}...")
            print(f"   [...content truncated...]")
        else:
            print(f"\n📄 Full Transcript Content:")
            print(f"   {transcript}")
        
        # Analyze transcript sections if possible
        sections = transcript.lower().split('operator') if transcript else []
        if len(sections) > 1:
            print(f"\n📊 Transcript Structure Analysis:")
            print(f"   📞 Operator segments: {len(sections) - 1}")
            print(f"   📝 Average segment length: {len(transcript) // len(sections):,} chars")
        
        
        # Perform sentiment analysis on transcript
        if len(transcript) > 100:
            print(f"\n🤖 Analyzing transcript sentiment...")
            # Take a sample from the middle of the transcript for sentiment analysis
            sample_start = len(transcript) // 4
            sample_end = sample_start + 1000  # 1000 character sample
            transcript_sample = transcript[sample_start:sample_end]
            
            sentiment_result = tools._analyze_sentiment(transcript_sample)
            print(f"   🎯 Overall Sentiment: {sentiment_result['label'].upper()}")
            print(f"   📊 Confidence Score: {sentiment_result['score']:.3f}")
            print(f"   🤖 Analysis Model: {sentiment_result['model']}")
            
    else:
        print(f"\n⚠️  No transcript content available")
        print("   This could mean:")
        print("   • No transcript available for the latest quarter")
        print("   • FMP API key not configured")
        print("   • API Ninjas key not configured")
        print("   • Symbol not covered by either service")
        print("   • Both APIs returned empty content")
    
    # Show API endpoints used
    print(f"\n🔗 API Endpoints Used:")
    print(f"   FMP API:")
    print(f"   📅 Transcript Dates: /stable/earning-call-transcript-dates")
    print(f"   📝 Transcript Content: /stable/earning-call-transcript")
    print(f"\n   API Ninjas:")
    print(f"   📝 Earnings Transcript: /v1/earningscall")
        
else:
    print(f"❌ Earnings Error: {earnings_result['error']}")
    print("💡 Tips:")
    print("   • Ensure FMP_API_KEY is set in your .env file")
    print("   • Try a well-known company with regular earnings calls (e.g., AAPL, MSFT)")
    print("   • Check if the symbol is correct")
    print("   • Some companies may not have recent transcripts available")


💼 Testing earnings call transcript collection for AAPL using FMP API...
📋 This will test the new FMP earnings transcript endpoints


Earnings calendar API returned status 402
Transcript list API returned status 402 for AAPL



✅ Earnings transcript collection successful!
📊 Source: financial_modeling_prep
🏢 Symbol: AAPL

📅 Earnings Call Information:
   📅 Fiscal Year: N/A
   📊 Quarter: QN/A
   📅 Date: N/A

⚠️  No transcript content available
   This could mean:
   • No transcript available for the latest quarter
   • FMP API key not configured
   • Symbol not covered by FMP transcript service
   • API returned empty content

🔗 FMP API Endpoints Used:
   📅 Transcript Dates: /stable/earning-call-transcript-dates
   📝 Transcript Content: /stable/earning-call-transcript


## 7. Test SEC Filings Download (Actual Files)


In [5]:
symbol = "AAPL"  # You can change this to any stock symbol

print(f"📋 Testing SEC filings download for {symbol}...")
print("⚠️  This may take a few minutes as we download actual SEC filings")
print("🔄 System will try SEC-API.IO first (for PDFs), then fallback to direct EDGAR download")

sec_result = tools.get_sec_filings(symbol, filing_types=["10-K", "10-Q"], max_filings=2)

if sec_result["success"]:
    data = sec_result["data"]
    print(f"\n✅ SEC filings download successful!")
    print(f"📁 Download path: {data['download_path']}")
    print(f"📋 Total filings downloaded: {len(data['filings_downloaded'])}")
    print(f"🔄 Download method: {data.get('source', 'Unknown')}")
    
    # Show API usage statistics if available
    if 'api_calls_made' in data:
        print(f"📊 SEC-API.IO calls made: {data['api_calls_made']}")
        print(f"♻️  Files reused: {data['files_reused']}")
    
    # Show filing summaries
    if data['filing_summaries']:
        print(f"\n📄 Filing Details:")
        for i, filing in enumerate(data['filing_summaries']):
            if 'error' not in filing:
                print(f"\n📋 Filing {i+1}:")
                print(f"   📝 Type: {filing['filing_type']}")
                print(f"   📅 Date: {filing.get('date', 'Unknown')}")
                print(f"   📄 File: {filing.get('file_path', 'N/A')}")
                print(f"   🔄 Method: {filing.get('download_method', 'Unknown')}")
                
                # Show accession number if available
                if 'accession_number' in filing:
                    print(f"   🆔 Accession: {filing['accession_number']}")
                
                # Show text preview for non-PDF files
                text_summary = filing.get('text_summary', '')
                if text_summary:
                    print(f"   📝 Preview: {text_summary[:200]}...")
            else:
                print(f"\n❌ {filing['filing_type']}: {filing['error']}")
    
    # Show actual downloaded files structure
    if data['filings_downloaded']:
        print(f"\n📁 Downloaded File Structure:")
        for filing in data['filings_downloaded'][:2]:
            print(f"   📋 {filing['filing_type']} - {filing.get('filing_date', 'Unknown date')}")
            
            # Handle both PDF and HTML/TXT files
            file_path = filing.get('filing_path') or filing.get('file_path')
            if file_path:
                print(f"       📄 File path: {file_path}")
                
                # Check if file actually exists and show details
                if os.path.exists(file_path):
                    file_size = os.path.getsize(file_path)
                    file_ext = os.path.splitext(file_path)[1].lower()
                    
                    if file_ext == '.pdf':
                        print(f"       ✅ PDF file exists ({file_size:,} bytes) - Ready for multimodal analysis")
                    else:
                        print(f"       ✅ File exists ({file_size:,} bytes)")
                        print(f"       📊 Financial tables: {len(filing.get('tables_found', []))}") 
                        print(f"       📝 Text length: {len(filing.get('text_summary', ''))} characters")
                else:
                    print(f"       ❌ File not found")
            else:
                print(f"       ❌ No file path available")
                
            # Show download method
            method = filing.get('download_method', 'Unknown')
            if method == 'sec-api.io':
                print(f"       🚀 Downloaded as optimized PDF via SEC-API.IO")
            elif method == 'reused_existing':
                print(f"       ♻️  Reused existing file (saved API call)")
            elif method == 'edgar_direct':
                print(f"       📋 Downloaded via direct EDGAR access")
else:
    print(f"❌ Error: {sec_result['error']}")
    print("💡 Tip: If SEC-API.IO failed, check if SEC_API_KEY is set in your .env file")

📋 Testing SEC filings download for AAPL...
⚠️  This may take a few minutes as we download actual SEC filings
🔄 System will try SEC-API.IO first (for PDFs), then fallback to direct EDGAR download

✅ SEC filings download successful!
📁 Download path: outputs/sec_filings\AAPL
📋 Total filings downloaded: 0
🔄 Download method: sec-api.io
📊 SEC-API.IO calls made: 0
♻️  Files reused: 0


## 8. Test Competitor SEC Filings Analysis


In [None]:
print(f"🏢 Testing competitor SEC filings analysis for {symbol}...")
print("⚠️  This may take several minutes as we download competitor filings")

competitor_result = tools.get_competitor_sec_filings(symbol, filing_types=["10-K"], max_filings=1)

if competitor_result["success"]:
    data = competitor_result["data"]
    print(f"\n✅ Competitor analysis successful!")
    print(f"🏢 Primary symbol: {data['primary_symbol']}")
    print(f"🏢 Competitors analyzed: {', '.join(data['competitors_analyzed'])}")
    
    # Show competitor filing results
    competitor_filings = data['competitor_filings']
    print(f"\n📋 Competitor Filing Results:")
    
    for competitor, filing_data in competitor_filings.items():
        print(f"\n🏢 {competitor}:")
        if 'error' in filing_data:
            print(f"   ❌ Error: {filing_data['error']}")
        else:
            filings_downloaded = filing_data.get('filings_downloaded', [])
            print(f"   📋 Filings downloaded: {len(filings_downloaded)}")
            
            if filings_downloaded:
                for filing in filings_downloaded[:1]:
                    print(f"   📄 {filing['filing_type']} - {filing.get('filing_date', 'Unknown')}")
                    print(f"       📊 Tables found: {len(filing.get('tables_found', []))}")
                    print(f"       📝 Text preview: {filing.get('text_summary', '')[:100]}...")
else:
    print(f"❌ Error: {competitor_result['error']}")


## 9. Summary and Data Structure Overview


In [None]:
print("📋 ENHANCED DATA COLLECTOR TESTING SUMMARY WITH FMP INTEGRATION")
print("=" * 70)

# Test all components one more time and show structure
test_symbol = "AAPL"
print(f"🔍 Testing all components for {test_symbol}:")

components = [
    ("Stock Data", lambda: tools.get_stock_data(test_symbol)),
    ("Financial Statements", lambda: tools.get_company_financials(test_symbol)),
    ("News with Sentiment", lambda: tools.get_company_news(test_symbol, 3)),
    ("ESG Data (FMP)", lambda: tools.get_esg_data(test_symbol)),
    ("Earnings Transcripts (FMP)", lambda: tools.get_earnings_call_data(test_symbol)),
    ("Market Trends", lambda: tools.get_market_trends(test_symbol))
]

results_summary = {}

for component_name, test_func in components:
    try:
        result = test_func()
        status = "✅ Success" if result.get("success") else "❌ Failed"
        results_summary[component_name] = status
        print(f"   {status}: {component_name}")
    except Exception as e:
        results_summary[component_name] = f"❌ Error: {str(e)[:50]}..."
        print(f"   ❌ Error: {component_name} - {str(e)[:50]}...")

print(f"\n📊 COMPONENT STATUS SUMMARY:")
for component, status in results_summary.items():
    print(f"   {component}: {status}")

print(f"\n🎯 KEY ENHANCEMENTS IMPLEMENTED:")
print(f"   🤖 HuggingFace FinBERT sentiment analysis")
print(f"   📋 Actual SEC filing downloads and parsing")
print(f"   🌱 FMP ESG data collection with scores and ratings")
print(f"   💼 FMP earnings call transcript collection")
print(f"   🏢 Competitor SEC filings analysis")
print(f"   📊 Enhanced data structures for multimodal analysis")

print(f"\n🔗 FMP API INTEGRATIONS:")
print(f"   📊 ESG Endpoints: /stable/esg-disclosures, /stable/esg-ratings")
print(f"   💼 Earnings Endpoints: /stable/earning-call-transcript-dates, /stable/earning-call-transcript")
print(f"   🔑 Requires: FMP_API_KEY environment variable")

print(f"\n📁 File Storage Locations:")
print(f"   📋 SEC Filings: {config.sec_filings_path}")
print(f"   📊 Charts: {config.charts_output_path}")
print(f"   💾 Vector DB: {config.vector_db_path}")
print(f"   🗄️  Shared Memory: {config.shared_memory_path}")

print(f"\n📋 NEW TESTING SECTIONS ADDED:")
print(f"   🌱 Section 7: ESG Data Collection with FMP API")
print(f"   💼 Section 8: Earnings Call Transcript Collection with FMP API")
print(f"   🔬 Section 9: Multi-symbol ESG and Earnings Testing")

print(f"\n✅ ENHANCED TESTING COMPLETE!")
print(f"📝 All components including new FMP integrations have been tested.")
print(f"🚀 The system is ready for comprehensive financial research with ESG and earnings data!")
