In [None]:
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
import pandas as pd
import re, time, random

# ===========================
# Charger le CSV
# ===========================
df_data_villes_clean = pd.read_csv('data_test.csv')
df_data_villes_clean = df_data_villes_clean.iloc[0:50]
df_data_villes_clean['Code_postal'] = df_data_villes_clean['Code_postal'].astype(str)

# ===========================
# Fonction pour nettoyer les nombres
# ===========================
def extract_number(text):
    clean_text = text.replace("\u202f", "").replace("\xa0", "").replace("€", "").strip()
    clean_text = clean_text.replace(",", ".")
    return float(clean_text)

# ===========================
# Lancer Playwright
# ===========================
with sync_playwright() as p:
    browser = p.chromium.launch(headless=False)  # headless=False pour tester
    context = browser.new_context(
        locale='fr-FR',
        user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                   'AppleWebKit/537.36 (KHTML, like Gecko) '
                   'Chrome/116.0.0.0 Safari/537.36'
    )

    page = context.new_page()
    page.goto("https://www.meilleursagents.com/prix-immobilier/")

    all_data = []

    for index, row in df_data_villes_clean.iterrows():
        city = f"{row['Lib_MA']} ({row['Code_postal']})"
        try:
            time.sleep(random.uniform(2, 4))
            
            # Scroll aléatoire
            page.evaluate(f"window.scrollBy(0, {random.randint(100, 500)});")
            time.sleep(random.uniform(0.5, 1.5))
            
            # Taper la ville dans la barre de recherche
            search_box = page.locator("input[name='q']")
            search_box.fill("")  # clear
            for char in city:
                search_box.type(char, delay=random.randint(30,70))  # frappe réaliste
            search_box.press("Enter")
            
            time.sleep(random.uniform(3, 5))
            
            html = page.content()
            soup = BeautifulSoup(html, 'html.parser')
            raw_text3 = soup.find_all("ul", class_="prices-summary__price-range")
            data_dict = {}
            
            if raw_text3:
                title_text = soup.find("title").get_text().strip()
                ville = title_text.split("Prix immobilier")[1].split("(")[0].strip()
                data_dict["ville"] = ville
                
                for ul in raw_text3:
                    li_items = ul.find_all("li")
                    label = li_items[0].get_text().strip().lower()
                    main_value = extract_number(li_items[1].get_text())
                    range_text = li_items[2].get_text().replace("\u202f", "").replace("\xa0", "").replace(",", ".")
                    numbers_in_range = [float(n) for n in re.findall(r"[\d\.]+", range_text)]

                    if "prix" in label:
                        if "prix_appartement" not in data_dict:
                            data_dict["prix_appartement"] = main_value
                            data_dict["min_appartement"] = numbers_in_range[0]
                            data_dict["max_appartement"] = numbers_in_range[1]
                        else:
                            data_dict["prix_maison"] = main_value
                            data_dict["min_maison"] = numbers_in_range[0]
                            data_dict["max_maison"] = numbers_in_range[1]
                    elif "loyer" in label:
                        if "loyer_appartement" not in data_dict:
                            data_dict["loyer_appartement"] = main_value
                            data_dict["loyer_min_appartement"] = numbers_in_range[0]
                            data_dict["loyer_max_appartement"] = numbers_in_range[1]
                        else:
                            data_dict["loyer_maison"] = main_value
                            data_dict["loyer_min_maison"] = numbers_in_range[0]
                            data_dict["loyer_max_maison"] = numbers_in_range[1]
                
                data_dict["city"] = city
                all_data.append(data_dict)
                print(f"Datas ajoutées pour la ville : {ville}")

            else:
                page.reload()
                time.sleep(random.uniform(2,4))

        except Exception as e:
            print(f"Erreur pour la ville {city}: {e}")

    browser.close()

# ===========================
# Sauvegarder CSV
# ===========================
df_meilleur_agent = pd.DataFrame(all_data)
df_meilleur_agent.to_csv('data_test_clean.csv', index=False)
print("Scraping terminé ! CSV enregistré.")


NotImplementedError: 