In [21]:
import json
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd

# Initialize WebDriver
driver = webdriver.Chrome()  # Ensure ChromeDriver is installed and compatible

# Navigate to the website's domain to set up the context
driver.get('https://app.airdna.co')  # Adjust URL to match the domain of the cookies

# Load cookies from the cookies.json file
with open('cookies.json', 'r') as cookiesfile:
    cookies = json.load(cookiesfile)

# Add each cookie to the Selenium browser session
for cookie in cookies:
    cookie.pop('sameSite', None)
    cookie.pop('storeId', None)
    cookie.pop('session', None)
    cookie.pop('hostOnly', None)
    cookie.pop('httpOnly', None)
    cookie.pop('path', None)
    cookie.pop('expirationDate', None)

    try:
        driver.add_cookie(cookie)
    except Exception as e:
        print(f"Could not add cookie {cookie['name']}: {e}")

# Refresh the page to apply the cookies
driver.refresh()
time.sleep(3)

# Navigate to the specific page
driver.get('https://www.airdna.co/vacation-rental-data/app/fr/ile-de-france')
time.sleep(3)

# Find all links that end with 'overview'
links = driver.find_elements(By.CSS_SELECTOR, "a")
overview_urls = [link.get_attribute('href') for link in links if link.get_attribute('href') and link.get_attribute('href').endswith("overview")]

data = []

# Loop through each overview URL
for url in overview_urls:
    try:
        driver.get(url)
        time.sleep(5)

        # Extract city name
        try:
            city_name = driver.find_element(By.XPATH, "//span[contains(@class, 'MuiBox-root') and text()]").text
        except:
            city_name = "N/A"

        # Extract market score
        try:
            market_score = driver.find_element(By.XPATH, "//h1[contains(@class, 'MuiTypography-titleXL')]").text
        except:
            market_score = "N/A"

        # Extract annual revenue
        try:
            annual_revenue = driver.find_element(By.XPATH, "//h6[text()='Annual Revenue']/following-sibling::div//h3").text
        except:
            annual_revenue = "N/A"

        # Extract occupancy rate
        try:
            occupancy_rate = driver.find_element(By.XPATH, "//h6[text()='Occupancy Rate']/following-sibling::div//h3").text
        except:
            occupancy_rate = "N/A"

        # Extract average daily rate
        try:
            avg_daily_rate = driver.find_element(By.XPATH, "//h6[text()='Average Daily Rate']/following-sibling::div//h3").text
        except:
            avg_daily_rate = "N/A"
        try:
            seasonality = driver.find_element(By.XPATH, "//p[contains(@class, 'MuiTypography-body2') and contains(text(), 'Seasonality')]/preceding-sibling::p").text
        except:
            seasonality = "N/A"

        # Append the data
        data.append({
            "City": city_name,
            "Market Score": market_score,
            "Annual Revenue": annual_revenue,
            "Occupancy Rate": occupancy_rate,
            "Average Daily Rate": avg_daily_rate
            "Seasonality" : seasonality
        })

        print(f"Data extracted for {city_name}")

    except Exception as e:
        print(f"Error processing URL {url}: {e}")

# Create a DataFrame
df = pd.DataFrame(data)
print(df)
# Save the DataFrame to a CSV file
df.to_csv('Final_file.csv', index=False)
print("Data saved to 'Final_file.csv'.")

# Close the browser
 driver.quit()


Data extracted for Ablis
Data extracted for Ablon Sur Seine
Data extracted for Acheres La Foret
Data extracted for Acheres
Data extracted for Aincourt
Data extracted for Alfortville
Data extracted for Andilly
Data extracted for Andresy
Data extracted for Angerville
Data extracted for Annet Sur Marne
Data extracted for Antony
Data extracted for Arbonne La Foret
Data extracted for Arcueil
Data extracted for Argenteuil
Data extracted for Arnouville Les Mantes
Data extracted for Arnouville
Data extracted for Arpajon
Data extracted for Asnieres Sur Oise
Data extracted for Asnieres Sur Seine
Data extracted for Athis Mons
Data extracted for Aubergenville
Data extracted for Aubervilliers
Data extracted for Auffargis
Data extracted for Aulnay Sous Bois
Data extracted for Aulnoy
Data extracted for Auvers Sur Oise
Data extracted for Avon
Data extracted for Bagneaux Sur Loing
Data extracted for Bagneux
Data extracted for Bagnolet
Data extracted for Baillet En France
Data extracted for Bailly Romai

Data extracted for Jouy Le Moutier
Data extracted for Jouy Sur Morin
Data extracted for Juvisy Sur Orge
Data extracted for Juziers
Data extracted for L Etang La Ville
Data extracted for L Hay Les Roses
Data extracted for L Ile Saint Denis
Data extracted for L Isle Adam
Data extracted for La Boissiere Ecole
Data extracted for La Celle Saint Cloud
Data extracted for La Chapelle La Reine
Data extracted for La Chapelle Moutils
Data extracted for La Courneuve
Data extracted for La Ferte Alais
Data extracted for La Ferte Gaucher
Data extracted for La Ferte Sous Jouarre
Data extracted for La Frette Sur Seine
Data extracted for La Garenne Colombes
Data extracted for La Genevraye
Data extracted for La Grande Paroisse
Data extracted for La Houssaye En Brie
Data extracted for La Queue En Brie
Data extracted for La Roche Guyon
Data extracted for La Rochette
Data extracted for La Verriere
Data extracted for La Ville Du Bois
Data extracted for Lagny Sur Marne
Data extracted for Larchant
Data extract

Data extracted for Saint Hilarion
Data extracted for Saint Illiers La Ville
Data extracted for Saint Jean Les Deux Jumeaux
Data extracted for Saint Leu La Foret
Data extracted for Saint Loup De Naud
Data extracted for Saint Mammes
Data extracted for Saint Mande
Data extracted for Saint Mard
Data extracted for Saint Martin En Biere
Data extracted for Saint Martin La Garenne
Data extracted for Saint Maur Des Fosses
Data extracted for Saint Maurice
Data extracted for Saint Michel Sur Orge
Data extracted for Saint Nom La Breteche
Data extracted for Saint Ouen L Aumone
Data extracted for Saint Ouen
Data extracted for Saint Pathus
Data extracted for Saint Pierre Du Perray
Data extracted for Saint Pierre Les Nemours
Data extracted for Saint Prix
Data extracted for Saint Remy L Honore
Data extracted for Saint Remy Les Chevreuse
Data extracted for Saint Sauveur Sur Ecole
Data extracted for Saint Thibault Des Vignes
Data extracted for Saint Witz
Data extracted for Sainte Genevieve Des Bois
Data 

In [22]:
df.head()

Unnamed: 0,City,Market Score,Annual Revenue,Occupancy Rate,Average Daily Rate
0,Ablis,2,$6.2K,40%,$82.1
1,Ablon Sur Seine,18,$3.1K,41%,$83.3
2,Acheres La Foret,94,$31.9K,52%,$312.7
3,Acheres,96,$5.2K,43%,$104.9
4,Aincourt,19,$7.8K,29%,$289.6


In [23]:
# Export the DataFrame to a CSV file
df.to_csv('airdna_data.csv', index=False)

print("Data exported to airdna_data.csv")


Data exported to airdna_data.csv


In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 650 entries, 0 to 649
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   City                650 non-null    object
 1   Market Score        650 non-null    object
 2   Annual Revenue      650 non-null    object
 3   Occupancy Rate      650 non-null    object
 4   Average Daily Rate  650 non-null    object
dtypes: object(5)
memory usage: 25.5+ KB
