In [None]:

import os, time, warnings, sys
from urllib.parse import urljoin
import requests
from requests.exceptions import SSLError
from bs4 import BeautifulSoup

BASE_URL = "https://max.de.wilde.web.ulb.be/camille/"
SAVE_DIR = "pdfs"
HEADERS = {"User-Agent": "Mozilla/5.0", "Referer": BASE_URL}

def safe_get(url, stream=False, timeout=30, **kwargs):
    
    try:
        import certifi
        verify_opt = certifi.where()
    except Exception:
        verify_opt = True

    try:
        resp = requests.get(url, headers=HEADERS, timeout=timeout, verify=verify_opt, stream=stream, **kwargs)
        resp.raise_for_status()
        return resp
    except SSLError:
       
        try:
            from requests.packages.urllib3.exceptions import InsecureRequestWarning
        except Exception:
            from urllib3.exceptions import InsecureRequestWarning
        warnings.simplefilter("ignore", InsecureRequestWarning)
        resp = requests.get(url, headers=HEADERS, timeout=timeout, verify=False, stream=stream, **kwargs)
        resp.raise_for_status()
        print("⚠️ Fallback SSL: verify=False activé (problème TLS local).")
        return resp

def main():
    os.makedirs(SAVE_DIR, exist_ok=True)

    # 1) page index
    r = safe_get(BASE_URL, stream=False, timeout=30)
    soup = BeautifulSoup(r.content, "html.parser")

    # 2) lister pdfs
    links = [a.get('href') for a in soup.find_all('a') if a.get('href')]
    pdfs = [h for h in links if h.lower().endswith('.pdf')]
    if not pdfs:
        print("Aucun PDF détecté.")
        return

    # 3) télécharger
    count = 0
    for href in pdfs:
        full = urljoin(BASE_URL, href)
        out = os.path.join(SAVE_DIR, os.path.basename(href))
        print("↓", out)
        r = safe_get(full, stream=True, timeout=60)
        try:
            with open(out, "wb") as f:
                for chunk in r.iter_content(8192):
                    if chunk: f.write(chunk)
            count += 1
            print("✅", out)
        finally:
            r.close()
        time.sleep(0.15)

    print(f"Terminé — {count} fichiers téléchargés.")
    if count == 51:
        print("✅ Contrôle OK: 51 PDFs récupérés.")

if __name__ == "__main__":
    main()


⚠️ Fallback SSL: verify=False activé (problème TLS local).
↓ pdfs/KB_JB230_1892-08-07_01-0003.pdf
⚠️ Fallback SSL: verify=False activé (problème TLS local).
✅ pdfs/KB_JB230_1892-08-07_01-0003.pdf
↓ pdfs/KB_JB427_1920-01-10_01-00004.pdf
⚠️ Fallback SSL: verify=False activé (problème TLS local).
✅ pdfs/KB_JB427_1920-01-10_01-00004.pdf
↓ pdfs/KB_JB555_1836-02-08_01-00002.pdf
⚠️ Fallback SSL: verify=False activé (problème TLS local).
✅ pdfs/KB_JB555_1836-02-08_01-00002.pdf
↓ pdfs/KB_JB638_1860-05-21_01-00002.pdf
⚠️ Fallback SSL: verify=False activé (problème TLS local).
✅ pdfs/KB_JB638_1860-05-21_01-00002.pdf
↓ pdfs/KB_JB773_1918-11-30_01-00002.pdf
⚠️ Fallback SSL: verify=False activé (problème TLS local).
✅ pdfs/KB_JB773_1918-11-30_01-00002.pdf
↓ pdfs/KB_JB838_1887-12-28_01-00003.pdf
⚠️ Fallback SSL: verify=False activé (problème TLS local).
✅ pdfs/KB_JB838_1887-12-28_01-00003.pdf
↓ pdfs/KB_JB230_1903-10-16_01-0002.pdf
⚠️ Fallback SSL: verify=False activé (problème TLS local).
✅ pdfs/KB_J