## Hotels

In [1]:
import sys
import os
import dotenv
import requests
import pandas as pd
import numpy as np
import random as rand
from calendar import monthrange
import asyncio
import aiohttp


sys.path.append("..")

dotenv.load_dotenv()

True

In [32]:
tk1 = os.getenv('booking_token')

In [49]:
def extract_hotel_info(hotel_data):
    result = dict(hotel_name = [], price = [], rating = [], distance_from_center = [], acc_type = [], city = [])
    
    for hotel in hotel_data:
        hotel_name = hotel.get("hotel_name", np.nan)
        price = hotel.get("min_total_price", np.nan)
        rating = hotel.get("review_score", np.nan)
        distance_from_center = hotel.get("distance_to_cc", np.nan)
        acc_type = hotel.get("accommodation_type_name", np.nan)
        city = hotel.get("city_trans", np.nan)

        result["hotel_name"].append(hotel_name)
        result["price"].append(price)
        result["rating"].append(rating)
        result["distance_from_center"].append(distance_from_center)
        result["acc_type"].append(acc_type)
        result["city"].append(city)

        df_hotel = pd.DataFrame(result)
    return df_hotel

In [33]:
destinations = ["budapest", "milan"]

url_loc = "https://booking-com.p.rapidapi.com/v1/hotels/locations"

headers = {
	"x-rapidapi-key": tk1,
	"x-rapidapi-host": "booking-com.p.rapidapi.com"
}

loc_ids = []

for loc in destinations:
    querystring = {"locale":"es","name":loc}
    response = requests.get(url_loc, headers=headers, params=querystring)
    loc_ids.append(response.json()[0]["dest_id"])

In [35]:
loc_ids

['-850553', '-121726']

In [None]:
def extract_hotel_info_loc():
    url = "https://booking-com.p.rapidapi.com/v1/hotels/search"
    headers = {
        "x-rapidapi-key": tk1,
        "x-rapidapi-host": "booking-com.p.rapidapi.com"
    }

    list_df_hotel = []

    children_n = 1
    children_ages = ",".join(rand.choices([str(a) for a in range(1,11)], k = children_n))

    adult_n = 2
    room_n = 1

    for loc_id in loc_ids:



        page = 1
        trip_duration = 10
        year = 2025
        month = 1
        day_in = 1
        day_out = day_in + trip_duration-1

        
        while day_out <= monthrange(year,month)[1]:
            date_in = pd.to_datetime(f"{year}-{month}-{day_in}").date()
            date_out = pd.to_datetime(f"{year}-{month}-{day_out}").date()
            print(date_in)

            querystring = {"children_ages":children_ages,"page_number":page,"adults_number":adult_n,"children_number":children_n,
                        "room_number":room_n,"include_adjacency":"true","units":"metric",
                        "categories_filter_ids":"class::3,class::4,class::5","checkout_date":date_out,
                        "dest_id":loc_id,"filter_by_currency":"EUR","dest_type":"city","checkin_date":date_in,
                        "order_by":"popularity","locale":"en-gb"}

            response = requests.get(url, headers=headers, params=querystring)
            print(response.status_code)
            data = response.json()["result"]
            hotel_info = extract_hotel_info(data)

            dias = ["Lunes", "Martes", "Miercoles", "Jueves", "Viernes", "Sabado", "Domingo"]

            hotel_info["date_in"] = date_in
            hotel_info["date_out"] = date_out
            hotel_info["day_in"] = dias[date_in.weekday()-1]
            hotel_info["day_out"] = dias[date_out.weekday()-1]

            list_df_hotel.append(hotel_info)
            print(children_ages)
            day_in += 1
            day_out = day_in + trip_duration-1

In [None]:
async def main(loc_ids, headers):
    children_n = 1
    children_ages = ",".join(rand.choices([str(a) for a in range(1, 11)], k=children_n))
    adult_n = 2
    room_n = 1
    trip_duration = 10
    year = 2025  # You can adjust the year as needed

    async with aiohttp.ClientSession() as session:
        tasks = []
        for loc_id in loc_ids:
            task = extract_hotel_info_loc(session, loc_id, children_ages, adult_n, room_n, trip_duration, year)
            tasks.append(task)
        results = await asyncio.gather(*tasks)
        
        # Flatten the results into a single list of dataframes
        list_df_hotel = [item for sublist in results for item in sublist]
        
        return list_df_hotel

In [170]:
pd.concat(list_df_hotel)[pd.concat(list_df_hotel)["acc_type"] == "Hotel"].groupby(["city", "day_in"])[["price", "rating"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,price,rating
city,day_in,Unnamed: 2_level_1,Unnamed: 3_level_1
Budapest,Domingo,1106.304393,8.522727
Budapest,Jueves,1035.950286,8.607692
Budapest,Lunes,1005.103657,8.5125
Budapest,Martes,1298.115414,8.584615
Budapest,Miercoles,1165.512577,8.745455
Budapest,Sabado,1024.794801,8.535294
Budapest,Viernes,994.167278,8.55
Milan,Domingo,1626.768667,8.093333
Milan,Jueves,1842.277692,8.257692
Milan,Lunes,1845.928667,8.126667
