In [1]:
import os
import glob
import time

import requests
from bs4 import BeautifulSoup
import pandas as pd
from loguru import logger

import config

In [4]:
url_review = "https://www.booking.com/reviewlist.html"

# Set up the headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}

# Set up the payload template
payload_template = {
    "cc1": "ch",
    "pagename": "badrutt-s-palace-st-moritz",
    "type": "total",
    "sort": "f_recent_desc",
    "time_of_year": "",
    "dist": "1",
    "rows": "25",
}

params_list = []

# Set up logging configuration
logger.add("scraping_log.log", rotation="500 MB", level="INFO")

# Initialize a list to store data
data_list = []

# Make requests and scrape data
page = 1
while True:
    payload = payload_template.copy()
    payload["offset"] = (page - 1) * int(payload["rows"])  # Adjust offset for each page

    # Make the request
    response = requests.get(url_review, headers=headers, params=payload)

    # Save parameters to the list
    params_list.append(payload)

    # Parse the HTML content
    soup = BeautifulSoup(response.text, "html.parser")
    parsed = []

    # Extract reviews
    review_boxes = soup.select(".review_list_new_item_block")
    if not review_boxes:
        break  # Break out of the loop if no reviews are found

    for review_box in review_boxes:
        get_css = lambda css: review_box.select_one(css).text.strip() if review_box.select_one(css) else ""

        review_text = (
            "".join(review_box.select_one(".c-review__body").stripped_strings)
            if review_box.select_one(".c-review__body")
            else ""
        )
        lang = review_box.select_one(".c-review__body").get("lang") if review_box.select_one(".c-review__body") else ""

        # Extract negative review and hotel response
        negative_review_element = review_box.select_one(".c-review__body span.c-review__body[lang='en-us']")
        negative_review = negative_review_element.text.strip() if negative_review_element else ""
        response_review = get_css(
            ".c-review-block__response__body"
        )  # Add appropriate CSS selector for response reviews

        parsed.append(
            {
                "hotel_name": "badrutt-s-palace-st-moritz",
                "review_id": review_box.get("data-review-url"),
                "review_score": get_css(".bui-review-score__badge"),
                "review_title": get_css(".c-review-block__title"),
                "review_date": get_css(".c-review-block__date"),
                "user_name": get_css(".bui-avatar-block__title"),
                "user_country": get_css(".bui-avatar-block__subtitle"),
                "room_type": get_css(".c-review-block__room-link .bui-list__body"),
                "stay_date": get_css(".c-review-block__stay-date .c-review-block__date"),
                "travel_type": get_css(".review-panel-wide__traveller_type .bui-list__body"),
                "stay_night": get_css(".c-review-block__stay-date .bui-list__body"),
                "review_text": review_text,
                "lang": lang,
                "negative_review": negative_review,
                "response_review": response_review,
            }
        )

    # Add parsed data to the list
    data_list.extend(parsed)

    # Process parsed data as needed
    logger.info(f"Page {page} processed. Total reviews: {len(parsed)}")

    page += 1
    time.sleep(1.5)

# Convert data list to a pandas DataFrame
review_details_df = pd.DataFrame(data_list)

# Return the DataFrame

[32m2023-11-26 11:05:20.911[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m88[0m - [1mPage 1 processed. Total reviews: 25[0m
[32m2023-11-26 11:05:23.627[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m88[0m - [1mPage 2 processed. Total reviews: 25[0m
[32m2023-11-26 11:05:26.350[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m88[0m - [1mPage 3 processed. Total reviews: 25[0m
[32m2023-11-26 11:05:28.946[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m88[0m - [1mPage 4 processed. Total reviews: 25[0m
[32m2023-11-26 11:05:31.566[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m88[0m - [1mPage 5 processed. Total reviews: 25[0m
[32m2023-11-26 11:05:34.207[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m88[0m - [1mPage 6 processed. Total reviews: 25[0m
[32m2023-11-26 11:05:36.936[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m88[0m - [1mPage

Unnamed: 0,hotel_name,review_id,review_score,review_title,review_date,user_name,user_country,room_type,stay_date,travel_type,stay_night,review_text,lang,negative_review,response_review
0,badrutt-s-palace-st-moritz,bc051b18071bac93,10,Exceptional,September 2023,Loraine,United Kingdom,Deluxe Double Room lake view,September 2023,Couple,2 nights · \n\nSeptember 2023,There are no comments available for this review,en,,
1,badrutt-s-palace-st-moritz,8708b90a07f808db,8.0,Very good,August 2023,Oliver,Switzerland,Signature Double Room lake view with balcony,August 2023,Couple,2 nights · \n\nAugust 2023,There are no comments available for this review,de,,
2,badrutt-s-palace-st-moritz,b9ba3e40b989254d,10,Exceptional,September 2023,Bruno,Switzerland,Deluxe Double Room Village Side,September 2023,Couple,1 night · \n\nSeptember 2023,Everything was exceptional from the staff to t...,en-us,,"Dear Bruno,\nThank you for sharing your fantas..."
3,badrutt-s-palace-st-moritz,d72c01c6b5b16e3f,10,Exceptional,September 2023,Sarah,Lebanon,Superior Single Room Village Side,September 2023,Solo traveller,2 nights · \n\nSeptember 2023,There are no comments available for this review,en-us,,
4,badrutt-s-palace-st-moritz,1308007b91ba9a53,9.0,Superb,August 2023,Pascal,Switzerland,Deluxe Double Room Village Side,August 2023,Couple,3 nights · \n\nAugust 2023,There are no comments available for this review,it,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,badrutt-s-palace-st-moritz,40041e748bdd2e1d,10,Exceptional,December 2020,Schalal,Monaco,Deluxe Double Room Village Side,December 2020,Family,1 night · \n\nDecember 2020,There are no comments available for this review,de,,
183,badrutt-s-palace-st-moritz,882919bf01f654bd,10,Gut,December 2020,Bieri,Switzerland,Signature Double Room lake view with balcony,December 2020,Family,7 nights · \n\nDecember 2020,There are no comments available for this review,de,,
184,badrutt-s-palace-st-moritz,9c9c7f64184525de,10,Exceptional,December 2020,Elio,Switzerland,Superior Double Room Village Side,December 2020,Group,1 night · \n\nDecember 2020,There are no comments available for this review,de,,
185,badrutt-s-palace-st-moritz,afb5edf8a453d479,8.0,Very good,December 2020,Anonymous,Italy,,December 2020,Couple,3 nights · \n\nDecember 2020,There are no comments available for this review,it,,


In [5]:
review_details_df.head()

Unnamed: 0,hotel_name,review_id,review_score,review_title,review_date,user_name,user_country,room_type,stay_date,travel_type,stay_night,review_text,lang,negative_review,response_review
0,badrutt-s-palace-st-moritz,bc051b18071bac93,10.0,Exceptional,September 2023,Loraine,United Kingdom,Deluxe Double Room lake view,September 2023,Couple,2 nights · \n\nSeptember 2023,There are no comments available for this review,en,,
1,badrutt-s-palace-st-moritz,8708b90a07f808db,8.0,Very good,August 2023,Oliver,Switzerland,Signature Double Room lake view with balcony,August 2023,Couple,2 nights · \n\nAugust 2023,There are no comments available for this review,de,,
2,badrutt-s-palace-st-moritz,b9ba3e40b989254d,10.0,Exceptional,September 2023,Bruno,Switzerland,Deluxe Double Room Village Side,September 2023,Couple,1 night · \n\nSeptember 2023,Everything was exceptional from the staff to t...,en-us,,"Dear Bruno,\nThank you for sharing your fantas..."
3,badrutt-s-palace-st-moritz,d72c01c6b5b16e3f,10.0,Exceptional,September 2023,Sarah,Lebanon,Superior Single Room Village Side,September 2023,Solo traveller,2 nights · \n\nSeptember 2023,There are no comments available for this review,en-us,,
4,badrutt-s-palace-st-moritz,1308007b91ba9a53,9.0,Superb,August 2023,Pascal,Switzerland,Deluxe Double Room Village Side,August 2023,Couple,3 nights · \n\nAugust 2023,There are no comments available for this review,it,,
