# Hotel Search Results 
Target Website: Booking.com 

Location: Cowes, VIC

Period: Next 30 days

In [32]:
# install library
# !pip install bs4 requests selenium

In [33]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import pandas as pd
import datetime
import time
import matplotlib.pyplot as plt

In [34]:
# set chrome diver path
CHROMEDRIVER_PATH = r"C:<path>\chromedriver.exe"

In [35]:
# automatically scroll the page
def autoLoadPage(driver):
    scroll_pause_time = 3 # pause between each scroll
    screen_height = driver.execute_script("return window.screen.height;") # Browser window height
    tmp_count = 1
    while True:
        # scroll down
        driver.execute_script(f"window.scrollTo(0, {screen_height*tmp_count*tmp_count});")
        driver.execute_script("window.scrollTo(0, window.scrollY - 200)")
        tmp_count +=1
        time.sleep(scroll_pause_time)

        # check if reaching the end of the page
        scroll_height = driver.execute_script("return document.body.scrollHeight;")
        
        if (screen_height*tmp_count*tmp_count)> scroll_height:
            break

In [36]:
# get request
def getRequest(target_url):
    # send HTTP request
    response = requests.get(target_url)

    # Check if request was successful
    if response.status_code == 200:
        print('> Success Retrieve Data - Response: {}'.format(response.status_code))
        # Parse HTML content
        soup = BeautifulSoup(response.content, 'html.parser')
        return soup
    else:
        print('> Failed Retrieve Data - Response: {}'.format(response.status_code))
        return []

In [37]:
def getBookingComSearchResults(search_city,checkin_date,checkout_date,
                               currency,adults_no,children_no,rooms_no):
    start = time.process_time()
    print("> scraping Booking.com search results")

    target_url = "https://www.booking.com/searchresults.en-gb.html?ss="+search_city+"&dest_type=city&checkin="+checkin_date+"&checkout="+checkout_date+"&group_adults="+str(adults_no)+"&group_children="+str(children_no)+"&no_rooms="+str(rooms_no)+"&selected_currency="+currency+"&order=popularity"
    print("  ..get url: {}".format(target_url))
    
    soup = getRequest(target_url)

    # driver process
    service = Service(executable_path=CHROMEDRIVER_PATH)
    options = webdriver.ChromeOptions()
    try:
        driver = webdriver.Chrome(service=service, options=options)
    except:
        print("> ERROR: check driver version!!")
    driver.get(target_url)
    time.sleep(3)

    # close ads
    print("  ..check ads")
    try:
        
        tmp_ad_element = driver.find_element(By.XPATH,"/html/body/div[25]/div/div/div/div[1]/div[1]/div/button")
        tmp_ad_element.click()
        print("    ..close ads")
    except:
        try:
            
            tmp_ad_element = driver.find_element(By.XPATH,"/html/body/div[26]/div/div/div/div[1]/div[1]/div/button")
            tmp_ad_element.click()
            print("    ..close ads")
        except:
            try:
                driver.find_element(By.CLASS_NAME,"dba1b3bddf e99c25fd33 aabf155f9a f42ee7b31a a86bcdb87f b02ceec9d7").click()
                print("    ..close ads by CLASS_NAME")
            except:
                print("    ..no ads")
            

    for i in range(10):
        autoLoadPage(driver)

        html_content = driver.page_source
        soup = BeautifulSoup(html_content,'html.parser')

        # get number of properties that website found
        try:
            web_found = soup.find("h1",{"aria-live":"assertive"}).text.split(" ")[2]
            web_found = int(web_found)
        except:
            try:
                web_found = soup.find("h1",{"aria-live":"assertive"}).text.split(" ")[1]
                web_found = int(web_found)
            except:
                if web_found == "No":
                    print("> No properties found!")
                    allData = []
                    web_found = 0
                    break

        print("  ..web found: {}" .format(web_found))

        allData = soup.find_all("div",{"data-testid":"property-card"})
        print("  ..round: {} found: {} from {} items".format(str(i+1), str(len(allData)), str(web_found)))

        # click load more results
        try:
            buttons = driver.find_elements(By.TAG_NAME, "button")
            for button in buttons:
                if button.text == "Load more results":
                    button.click()
                    time.sleep(1)
        except:
            print("  ..no more results")
        try:
            if len(allData) >= int(web_found)-1:
                break
        except:
            break

    hotel_list = []
    for i in range(len(allData)):
        hotel_info = {}
        hotel_info["arrivalDate"] = checkin_date
        hotel_info["departureDate"] = checkout_date
        
        # extract the hotel name
        try:
            hotel_info["name"] = allData[i].find('div',{'data-testid':'title'}).text
        except:
            hotel_info["name"] = None

        # extract the hotel location
        try: 
            hotel_info["location"] = allData[i].find('span',{'class':'cf35c10683 d57d1b7d64','data-testid':'address'}).text
        except:
            hotel_info["location"] = None
            
        # extract the hotel star
        try:
            hotel_info["stars"] = allData[i].find('div',{'class':'f97c3d5c2f'}).attrs['aria-label']  # get value in aira-label
        except:
            hotel_info["stars"] = None
        
        # extract the hotel rating
        try:
            hotel_info["rating"] = allData[i].find('div',{'class':'a3b8729ab1 e6208ee469 cb2cbb3ccb'}).text
        except:
            hotel_info["rating"] = None
        
        # extract the hotel score
        try:
            hotel_info["score"] = allData[i].find('div',{'class':'a3b8729ab1 d86cee9b25'}).text
        except:
            hotel_info["score"] = None
            
        # extract the hotel price
        try:
            hotel_info["price"] = allData[i].find('span',{'class':'f6431b446c fbfd7c1165 e84eb96b1f'}).text.replace(u'\xa0',' ')
        except:
            hotel_info["price"] = None
        
        # extract the recommended room
        try:
            hotel_info["recommendedRoom"] = allData[i].find('h4',{'role': 'link'}).text
        except:
            hotel_info["recommendedRoom"] = None

        # extract the number of reviews
        try:
            hotel_info['reviews'] = allData[i].find('div', {'class': 'abf093bdfe f45d8e4c32 d935416c47'}).text
        except:
            hotel_info['reviews'] = None

        # extract the details
        try:
            hotel_info['details'] = allData[i].find('li',class_='a6a38de85e').text
        except:
            hotel_info['details'] = None

        hotel_list.append(hotel_info)

    driver.close()

    print("  ..Time spends: {}".format(time.process_time() - start))
    return hotel_list

In [38]:
# set variable
search_city = "Cowes"
nights = 1
currency = "AUD"
adults_no = 2
children_no = 0
rooms_no = 1 
next_days = 30

In [39]:
# main
## print info setting
print("> Hotel Search Results")
print("  ..night(s): {}".format(nights))
print("  ..search city: {}".format(search_city))
print("  ..currency: {}".format(currency))

booking_list = []
for i in range(next_days):
    checkin_date = str(datetime.date.today()+ datetime.timedelta(days=i))
    checkout_date = str(datetime.date.today()+ datetime.timedelta(days=i+nights))
    print(">check-in: {}".format(checkin_date))
    
    booking_cur = getBookingComSearchResults(search_city,checkin_date,checkout_date,
                                            currency,adults_no,children_no,rooms_no)
    booking_list = booking_list+booking_cur

> Hotel Search Results
  ..night(s): 1
  ..search city: Cowes
  ..currency: AUD
>check-in: 2024-10-22
> scraping Booking.com search results
  ..get url: https://www.booking.com/searchresults.en-gb.html?ss=Cowes&dest_type=city&checkin=2024-10-22&checkout=2024-10-23&group_adults=2&group_children=0&no_rooms=1&selected_currency=AUD&order=popularity
> Success Retrieve Data - Response: 200
  ..check ads
    ..no ads
  ..web found: 6
  ..round: 1 found: 14 from 6 items
  ..Time spends: 0.4375
>check-in: 2024-10-23
> scraping Booking.com search results
  ..get url: https://www.booking.com/searchresults.en-gb.html?ss=Cowes&dest_type=city&checkin=2024-10-23&checkout=2024-10-24&group_adults=2&group_children=0&no_rooms=1&selected_currency=AUD&order=popularity
> Success Retrieve Data - Response: 200
  ..check ads
    ..close ads
  ..web found: 33
  ..round: 1 found: 33 from 33 items
  ..Time spends: 1.046875
>check-in: 2024-10-24
> scraping Booking.com search results
  ..get url: https://www.bookin

In [40]:
booking_df = pd.DataFrame(booking_list)
booking_df

Unnamed: 0,arrivalDate,departureDate,name,location,stars,rating,score,price,recommendedRoom,reviews,details
0,2024-10-22,2024-10-23,Coachman Motel and Holiday Units,,,Good,Scored 7.9 7.9,AUD 153,Premium Twin Room,"1,175 reviews","2 beds (1 single, 1 large double)"
1,2024-10-22,2024-10-23,The Nature Resort Villas,,,Very good,Scored 8.2 8.2,AUD 430,Studio with Spa Bath,313 reviews,Entire studio • 1 bathroom • 1 kitchen • 46m²1...
2,2024-10-22,2024-10-23,Twin Beaches Retreat Cowes,,,Superb,Scored 9.4 9.4,AUD 732,Four-Bedroom House,46 reviews,Entire holiday home • 4 bedrooms • 1 living ro...
3,2024-10-22,2024-10-23,Hollydene House - Cowes,,,Very good,Scored 8.2 8.2,AUD 875,Five-Bedroom House,6 reviews,Entire holiday home • 5 bedrooms • 1 living ro...
4,2024-10-22,2024-10-23,The Sandpiper Cove Retreat,,,Exceptional,Scored 10 10,AUD 589,Five-Bedroom House,3 reviews,Entire holiday home • 5 bedrooms • 2 bathrooms...
...,...,...,...,...,...,...,...,...,...,...,...
1197,2024-11-20,2024-11-21,"Seafoam, Cowes, New Home, Fast NBN WIFI",,,Exceptional,Scored 9.5 9.5,AUD 499,Three-Bedroom House,7 reviews,Entire holiday home • 3 bedrooms • 2 living ro...
1198,2024-11-20,2024-11-21,"Pet friendly, Family home close to town",,,Fabulous,Scored 8.8 8.8,AUD 501,Three-Bedroom House,25 reviews,Entire holiday home • 3 bedrooms • 1 bathroom4...
1199,2024-11-20,2024-11-21,Kiana Escape @Millowl,,,Exceptional 10,,AUD 329,Four-Bedroom House,24 external reviews,Entire holiday home • 4 bedrooms • 2 bathrooms...
1200,2024-11-20,2024-11-21,"Homey Beach Cottage with Spa, walk to beach",,,Review score,Scored 5.7 5.7,AUD 501,Three-Bedroom House,4 reviews,Entire holiday home • 3 bedrooms • 2 bathrooms...
