In [2]:
# ./scraper/notebooks/scraper_test.ipynb

import sys
import os
import json
import random
from datetime import datetime, timedelta
import traceback
from IPython.display import display, HTML

# Add the src directory to the path
sys.path.append('../src')

from hotel_scraper import KayakHotelScraper

def print_header(text, level=1):
    """Print formatted header with emojis"""
    emoji_map = {
        'start': '🚀',
        'init': '⚙️',
        'progress': '📊',
        'success': '✅',
        'error': '❌',
        'info': 'ℹ️',
        'warning': '⚠️',
        'save': '💾',
        'complete': '🏁'
    }
    
    prefix = '=' * (level * 2)
    emoji = emoji_map.get(text.lower().split()[0], '📌')
    print(f"\n{prefix} {emoji} {text} {prefix}")

def print_section(text):
    """Print section separator with text"""
    print(f"\n{'='*20} {text} {'='*20}")

def print_hotel_details(hotel):
    """Print detailed hotel information"""
    print("\n📍 Basic Information:")
    print(f"  • Name: {hotel.get('name', 'N/A')}")
    print(f"  • Location: {hotel.get('location', 'N/A')}")
    print(f"  • Address: {hotel.get('full_address', 'N/A')}")
    print(f"  • Stars: {hotel.get('stars', 'N/A')}")
    print(f"  • Base Price: {hotel.get('price', 'N/A')}")
    
    print("\n⭐ Ratings & Reviews:")
    print(f"  • Rating: {hotel.get('rating', 'N/A')}")
    print(f"  • Reviews: {hotel.get('reviews', 'N/A')}")
    
    if hotel.get('description'):
        print("\n📝 Description:")
        print(f"  {hotel['description'][:200]}...")
    
    if hotel.get('amenities'):
        print("\n🎯 Amenities:")
        for category, items in hotel['amenities'].items():
            if items:
                print(f"  • {category.title()}:")
                for item in items[:5]:
                    print(f"    - {item}")
    
    if hotel.get('rooms'):
        print("\n🛏️ Room Options:")
        for i, room in enumerate(hotel['rooms'], 1):
            print(f"\n  Room Option {i}:")
            print(f"    • Provider: {room.get('provider', 'N/A')}")
            print(f"    • Price: {room.get('price', 'N/A')}")
            print(f"    • Bed Configuration: {room.get('bed_config', 'N/A')}")
            print(f"    • Board Type: {room.get('board_type', 'N/A')}")
            print(f"    • Cancellation: {room.get('cancellation_policy', 'N/A')}")
            if room.get('conditions'):
                print("    • Special Conditions:")
                for condition in room['conditions']:
                    print(f"      - {condition}")
    
    print("\n" + "-"*50)

def test_scraper(limit=5):
    """Test the hotel scraper with detailed logging"""
    
    print_header("Start Scraper Test")
    
    # Create data directory if it doesn't exist
    os.makedirs('/app/data', exist_ok=True)

    # Set search parameters
    city = "Errachidia"
    check_in = datetime(2025, 1, 10)
    check_out = datetime(2025, 1, 14)

    # Print test configuration
    print(f"\n🌍 Testing configuration:")
    print(f"  • City: {city}")
    print(f"  • Check-in: {check_in.date()}")
    print(f"  • Check-out: {check_out.date()}")
    print(f"  • Hotel limit: {limit}")
    
    try:
        print_header("Initialize Scraper", level=2)
        scraper = KayakHotelScraper(
            city=city,
            check_in_date=check_in,
            check_out_date=check_out
        )

        print_header("Start Scraping", level=2)
        hotels = scraper.scrape_hotels(limit=limit)
        
        if hotels:
            print_header("Success", level=2)
            print(f"Successfully scraped {len(hotels)} hotels")
            
            # Save results
            scraper.save_results()
            print("\n💾 Results saved to: /app/data/hotel_data.json")
            
            # Print detailed results
            print_header("Detailed Results", level=2)
            for idx, hotel in enumerate(hotels, 1):
                print_section(f"Hotel {idx}: {hotel.get('name', 'Unknown')}")
                print_hotel_details(hotel)
                
            # Data file verification
            try:
                with open('/app/data/hotel_data.json', 'r', encoding='utf-8') as f:
                    saved_data = json.load(f)
                print(f"\n✅ Verified saved data: {len(saved_data)} hotels in JSON file")
            except Exception as e:
                print(f"\n⚠️ Error verifying saved data: {str(e)}")
                
        else:
            print_header("Error", level=2)
            print("No hotels were scraped")
        
    except Exception as e:
        print_header("Error", level=2)
        print(f"An error occurred during scraping:")
        print(str(e))
        print("\nFull traceback:")
        traceback.print_exc()
        
    finally:
        try:
            scraper.close()
            print("\n🔒 Scraper closed successfully")
        except:
            print("\n⚠️ Error closing scraper")
        print_header("Complete", level=2)

if __name__ == "__main__":
    # Run the test
    test_scraper()


== 🚀 Start Scraper Test ==

🌍 Testing configuration:
  • City: Errachidia
  • Check-in: 2025-01-10
  • Check-out: 2025-01-14
  • Hotel limit: 5

==== 📌 Initialize Scraper ====


2024-12-29 21:52:13,543 [INFO] WebDriver initialized successfully
2024-12-29 21:52:13,546 [INFO] Loading URL: https://www.kayak.com/hotels/errachidia-c52508/2025-01-10/2025-01-14/2adults?sort=rank_a



==== 🚀 Start Scraping ====


2024-12-29 21:52:32,979 [INFO] Page loaded successfully
2024-12-29 21:52:33,087 [INFO] Found 28 hotels
2024-12-29 21:52:33,089 [INFO] Processing first 5 hotels
2024-12-29 21:52:33,095 [INFO] Processing hotel 1/5
2024-12-29 21:52:33,341 [INFO] Fetching details from: https://www.kayak.com/hotels/Ighiz-Inn-Resort,Errachidia-p170319-h9063437-details/2025-01-10/2025-01-14/2adults?psid=ehFkDwprPY&pm=daybase#overview
2024-12-29 21:52:33,346 [INFO] Loading URL: https://www.kayak.com/hotels/Ighiz-Inn-Resort,Errachidia-p170319-h9063437-details/2025-01-10/2025-01-14/2adults?psid=ehFkDwprPY&pm=daybase#overview
2024-12-29 21:52:47,964 [INFO] Page loaded successfully
2024-12-29 21:52:58,581 [INFO] Successfully processed: Ighiz Inn Resort
2024-12-29 21:52:59,701 [INFO] Processing hotel 2/5
2024-12-29 21:53:03,595 [INFO] Processing hotel 3/5
2024-12-29 21:53:07,408 [INFO] Processing hotel 4/5
2024-12-29 21:53:11,105 [INFO] Processing hotel 5/5
2024-12-29 21:53:14,426 [INFO] Successfully processed 1 ho


==== ✅ Success ====
Successfully scraped 1 hotels

💾 Results saved to: /app/data/hotel_data.json

==== 📌 Detailed Results ====


📍 Basic Information:
  • Name: Ighiz Inn Resort
  • Location: Nearby - Forkan Mosque, Garden Boutalamine
  • Address: Zone touristique, Route nationale N° 10, Errachidia 53000
  • Stars: N/A
  • Base Price: $40

⭐ Ratings & Reviews:
  • Rating: 8.0
  • Reviews: Very good (379)

📝 Description:
  Hotel in Errachidia with free breakfast and a seasonal outdoor pool This smoke-free hotel features a restaurant, a coffee shop/cafe, and a seasonal outdoor pool. Free buffet breakfast, free WiFi in pu...

🎯 Amenities:

🛏️ Room Options:

  Room Option 1:
    • Provider: Booking.com
    • Price: $40
    • Bed Configuration: N/A
    • Board Type: N/A
    • Cancellation: N/A

  Room Option 2:
    • Provider: KAYAK
    • Price: $40
    • Bed Configuration: N/A
    • Board Type: N/A
    • Cancellation: N/A

  Room Option 3:
    • Provider: Priceline
    • Price: $40
    • B

2024-12-29 21:53:14,668 [INFO] WebDriver closed successfully



🔒 Scraper closed successfully

==== 🏁 Complete ====
