In [2]:
# ./scraper/notebooks/scraper_test.ipynb

import sys
import os
import json
import random
from datetime import datetime, timedelta
import traceback
from IPython.display import display, HTML

# Add the src directory to the path
sys.path.append('../src')

from hotel_scraper import KayakHotelScraper

def print_header(text, level=1):
    """Print formatted header with emojis"""
    emoji_map = {
        'start': 'üöÄ',
        'init': '‚öôÔ∏è',
        'progress': 'üìä',
        'success': '‚úÖ',
        'error': '‚ùå',
        'info': '‚ÑπÔ∏è',
        'warning': '‚ö†Ô∏è',
        'save': 'üíæ',
        'complete': 'üèÅ'
    }
    
    prefix = '=' * (level * 2)
    emoji = emoji_map.get(text.lower().split()[0], 'üìå')
    print(f"\n{prefix} {emoji} {text} {prefix}")

def print_section(text):
    """Print section separator with text"""
    print(f"\n{'='*20} {text} {'='*20}")

def print_hotel_details(hotel):
    """Print detailed hotel information"""
    print("\nüìç Basic Information:")
    print(f"  ‚Ä¢ Name: {hotel.get('name', 'N/A')}")
    print(f"  ‚Ä¢ Location: {hotel.get('location', 'N/A')}")
    print(f"  ‚Ä¢ Address: {hotel.get('full_address', 'N/A')}")
    print(f"  ‚Ä¢ Stars: {hotel.get('stars', 'N/A')}")
    print(f"  ‚Ä¢ Base Price: {hotel.get('price', 'N/A')}")
    
    print("\n‚≠ê Ratings & Reviews:")
    print(f"  ‚Ä¢ Rating: {hotel.get('rating', 'N/A')}")
    print(f"  ‚Ä¢ Reviews: {hotel.get('reviews', 'N/A')}")
    
    if hotel.get('description'):
        print("\nüìù Description:")
        print(f"  {hotel['description'][:200]}...")
    
    if hotel.get('amenities'):
        print("\nüéØ Amenities:")
        for category, items in hotel['amenities'].items():
            if items:
                print(f"  ‚Ä¢ {category.title()}:")
                for item in items[:5]:
                    print(f"    - {item}")
    
    if hotel.get('rooms'):
        print("\nüõèÔ∏è Room Options:")
        for i, room in enumerate(hotel['rooms'], 1):
            print(f"\n  Room Option {i}:")
            print(f"    ‚Ä¢ Provider: {room.get('provider', 'N/A')}")
            print(f"    ‚Ä¢ Price: {room.get('price', 'N/A')}")
            print(f"    ‚Ä¢ Bed Configuration: {room.get('bed_config', 'N/A')}")
            print(f"    ‚Ä¢ Board Type: {room.get('board_type', 'N/A')}")
            print(f"    ‚Ä¢ Cancellation: {room.get('cancellation_policy', 'N/A')}")
            if room.get('conditions'):
                print("    ‚Ä¢ Special Conditions:")
                for condition in room['conditions']:
                    print(f"      - {condition}")
    
    print("\n" + "-"*50)

def test_scraper(limit=5):
    """Test the hotel scraper with detailed logging"""
    
    print_header("Start Scraper Test")
    
    # Create data directory if it doesn't exist
    os.makedirs('/app/data', exist_ok=True)

    # Set search parameters
    city = "Errachidia"
    check_in = datetime(2025, 1, 10)
    check_out = datetime(2025, 1, 14)

    # Print test configuration
    print(f"\nüåç Testing configuration:")
    print(f"  ‚Ä¢ City: {city}")
    print(f"  ‚Ä¢ Check-in: {check_in.date()}")
    print(f"  ‚Ä¢ Check-out: {check_out.date()}")
    print(f"  ‚Ä¢ Hotel limit: {limit}")
    
    try:
        print_header("Initialize Scraper", level=2)
        scraper = KayakHotelScraper(
            city=city,
            check_in_date=check_in,
            check_out_date=check_out
        )

        print_header("Start Scraping", level=2)
        hotels = scraper.scrape_hotels(limit=limit)
        
        if hotels:
            print_header("Success", level=2)
            print(f"Successfully scraped {len(hotels)} hotels")
            
            # Save results
            scraper.save_results()
            print("\nüíæ Results saved to: /app/data/hotel_data.json")
            
            # Print detailed results
            print_header("Detailed Results", level=2)
            for idx, hotel in enumerate(hotels, 1):
                print_section(f"Hotel {idx}: {hotel.get('name', 'Unknown')}")
                print_hotel_details(hotel)
                
            # Data file verification
            try:
                with open('/app/data/hotel_data.json', 'r', encoding='utf-8') as f:
                    saved_data = json.load(f)
                print(f"\n‚úÖ Verified saved data: {len(saved_data)} hotels in JSON file")
            except Exception as e:
                print(f"\n‚ö†Ô∏è Error verifying saved data: {str(e)}")
                
        else:
            print_header("Error", level=2)
            print("No hotels were scraped")
        
    except Exception as e:
        print_header("Error", level=2)
        print(f"An error occurred during scraping:")
        print(str(e))
        print("\nFull traceback:")
        traceback.print_exc()
        
    finally:
        try:
            scraper.close()
            print("\nüîí Scraper closed successfully")
        except:
            print("\n‚ö†Ô∏è Error closing scraper")
        print_header("Complete", level=2)

if __name__ == "__main__":
    # Run the test
    test_scraper()


== üöÄ Start Scraper Test ==

üåç Testing configuration:
  ‚Ä¢ City: Errachidia
  ‚Ä¢ Check-in: 2025-01-10
  ‚Ä¢ Check-out: 2025-01-14
  ‚Ä¢ Hotel limit: 5

==== üìå Initialize Scraper ====


2024-12-29 21:52:13,543 [INFO] WebDriver initialized successfully
2024-12-29 21:52:13,546 [INFO] Loading URL: https://www.kayak.com/hotels/errachidia-c52508/2025-01-10/2025-01-14/2adults?sort=rank_a



==== üöÄ Start Scraping ====


2024-12-29 21:52:32,979 [INFO] Page loaded successfully
2024-12-29 21:52:33,087 [INFO] Found 28 hotels
2024-12-29 21:52:33,089 [INFO] Processing first 5 hotels
2024-12-29 21:52:33,095 [INFO] Processing hotel 1/5
2024-12-29 21:52:33,341 [INFO] Fetching details from: https://www.kayak.com/hotels/Ighiz-Inn-Resort,Errachidia-p170319-h9063437-details/2025-01-10/2025-01-14/2adults?psid=ehFkDwprPY&pm=daybase#overview
2024-12-29 21:52:33,346 [INFO] Loading URL: https://www.kayak.com/hotels/Ighiz-Inn-Resort,Errachidia-p170319-h9063437-details/2025-01-10/2025-01-14/2adults?psid=ehFkDwprPY&pm=daybase#overview
2024-12-29 21:52:47,964 [INFO] Page loaded successfully
2024-12-29 21:52:58,581 [INFO] Successfully processed: Ighiz Inn Resort
2024-12-29 21:52:59,701 [INFO] Processing hotel 2/5
2024-12-29 21:53:03,595 [INFO] Processing hotel 3/5
2024-12-29 21:53:07,408 [INFO] Processing hotel 4/5
2024-12-29 21:53:11,105 [INFO] Processing hotel 5/5
2024-12-29 21:53:14,426 [INFO] Successfully processed 1 ho


==== ‚úÖ Success ====
Successfully scraped 1 hotels

üíæ Results saved to: /app/data/hotel_data.json

==== üìå Detailed Results ====


üìç Basic Information:
  ‚Ä¢ Name: Ighiz Inn Resort
  ‚Ä¢ Location: Nearby - Forkan Mosque, Garden Boutalamine
  ‚Ä¢ Address: Zone touristique, Route nationale N¬∞ 10, Errachidia 53000
  ‚Ä¢ Stars: N/A
  ‚Ä¢ Base Price: $40

‚≠ê Ratings & Reviews:
  ‚Ä¢ Rating: 8.0
  ‚Ä¢ Reviews: Very good (379)

üìù Description:
  Hotel in Errachidia with free breakfast and a seasonal outdoor pool This smoke-free hotel features a restaurant, a coffee shop/cafe, and a seasonal outdoor pool. Free buffet breakfast, free WiFi in pu...

üéØ Amenities:

üõèÔ∏è Room Options:

  Room Option 1:
    ‚Ä¢ Provider: Booking.com
    ‚Ä¢ Price: $40
    ‚Ä¢ Bed Configuration: N/A
    ‚Ä¢ Board Type: N/A
    ‚Ä¢ Cancellation: N/A

  Room Option 2:
    ‚Ä¢ Provider: KAYAK
    ‚Ä¢ Price: $40
    ‚Ä¢ Bed Configuration: N/A
    ‚Ä¢ Board Type: N/A
    ‚Ä¢ Cancellation: N/A

  Room O

2024-12-29 21:53:14,668 [INFO] WebDriver closed successfully



üîí Scraper closed successfully

==== üèÅ Complete ====
