In [None]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome('chromedriver', options=chrome_options)
defaultTimeout = 30

hotelData = pd.read_csv('hotelUrls.csv')

totalHotelsToScrape = len(hotelData)
print(f"Preparing to scrape {totalHotelsToScrape} hotels")

hotelNames = []
hotelDescriptions = []

getPageRetryMax = 5
currentTry = 1

for index, row in hotelData.iterrows():
    currentTry = 1
    while True:

        if(currentTry > getPageRetryMax):
            print(f"Failed to retrieve information from hotel at index {index}, skipping")
            hotelNames.append("")
            hotelDescriptions.append("")
            break

        try:
            #There would be a nicer way to track this index, but this works
            currentHotelIndex = len(hotelNames)

            currentPageUrl = row['url']
            driver.get(currentPageUrl)

            #Get hotel name from page, it has an ID
            hotelNameElement = WebDriverWait(driver, defaultTimeout).until(
                EC.presence_of_element_located((By.ID, "HEADING"))
            )

            hotelName = hotelNameElement.text

            aboutElement = WebDriverWait(driver, defaultTimeout).until(
                EC.presence_of_element_located((By.ID, "ABOUT_TAB"))
            )

            descriptionElement = WebDriverWait(aboutElement, defaultTimeout).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div[class='pIRBV _T']"))
            )

            descriptionText = descriptionElement.text.replace('\n', " ")

            #If the name is blank/small just put a blank string in the array to keep our column aligned with hotelData
            if(len(hotelName) < 5):
                hotelNames.append("")
            else:
                hotelNames.append(hotelName)

            #If the description is blank/small just put a blank string in the array to keep our column aligned with hotelData
            if(len(descriptionText) < 5):
                hotelDescriptions.append("")
            else:
                hotelDescriptions.append(descriptionText)

            print(f"Scraped hotel {currentHotelIndex + 1} of {totalHotelsToScrape}, hotelName: {hotelName}, len(descriptionText): {len(descriptionText)}")
            break
        except Exception:
            print(f"Failed to get data from page at index {index}, retrying...")
            currentTry = currentTry + 1
            continue

hotelData['hotelNames'] = hotelNames
hotelData['hotelDescriptions'] = hotelDescriptions

hotelData.to_csv("hotelData.csv", index = False, header = True)
driver.quit()