# Download Spain

In [9]:
import requests
import xml.etree.ElementTree as ET
import zipfile
import io
import geopandas as gpd
import pandas as pd
import os
import time
import fiona

In [10]:
BASE_URL = "https://www.catastro.hacienda.gob.es/INSPIRE/buildings/ES.SDGC.BU.atom.xml"
OUT_DIR_MAIN = "/data/uscuni-ulce/extension/spain"
CAT_DOMAIN = "https://www.catastro.hacienda.gob.es/INSPIRE/"

In [None]:
def get_feed_entries(url):
    """Return list of dicts with 'title', 'link', 'id' from an ATOM feed."""
    try:
        r = requests.get(url, timeout=30)  # ⬅ timeout avoids infinite hanging
        r.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Skipping feed {url} due to error: {e}")
        return []

    root = ET.fromstring(r.content)
    ns = {"atom": "http://www.w3.org/2005/Atom"}

    entries = []
    for entry in root.findall("atom:entry", ns):
        title = entry.find("atom:title", ns).text
        link = entry.find("atom:link", ns).attrib["href"]
        id_elt = entry.find("atom:id", ns)
        entries.append({
            "title": title,
            "link": link,
            "id": id_elt.text if id_elt is not None else None,
        })
    return entries

provinces = get_feed_entries(BASE_URL)
print(provinces)

In [None]:
for p_idx, prov in enumerate(provinces):

    print(f"\nProvince {p_idx}: {prov['title']}")

    # Step 2: get municipalities in this province
    municipalities = get_feed_entries(prov["link"])
    print(f"  -> {len(municipalities)} municipalities")

    for m_idx, muni in enumerate(municipalities):
        # Generate a safe filename for each municipality
        muni_id = muni["id"] if muni["id"] else f"muni_{p_idx:02d}_{m_idx:04d}"
        muni_id_safe = muni_id.replace("/", "_")
        
        try:
            r = requests.get(muni["link"], timeout=120)
            r.raise_for_status()
        except Exception as e:
            print(f"    Download failed: {e}")
            continue

        try:
            with zipfile.ZipFile(io.BytesIO(r.content)) as z:
                building_files = [m for m in z.namelist() if m.lower().endswith("building.gml")]
                for member in building_files:
                    # Save using just the GML filename
                    out_path = os.path.join(OUT_DIR_MAIN, os.path.basename(member))
                    with z.open(member) as src, open(out_path, "wb") as dst:
                        dst.write(src.read())
                    print(f"Saved {out_path}")

        except Exception as e:
            print(f"    Failed processing {muni_id}: {e}")

        time.sleep(0.2)  # polite delay


## Download Basque Country sepearately

In [11]:
special_provinces = [
    "https://geo.araba.eus/atom/BU/Buildings.atom",
    "https://apli.bizkaia.eus/apps/Danok/INSPIRE/buildings.xml",
    "https://b5m.gipuzkoa.eus/inspire/download/buildings.xml",
    "https://filescartografia.navarra.es/2_CARTOGRAFIA_TEMATICA/2_7_CATASTRO/2_7_3_INSPIRE_ATOM/2_7_3_2_BU/Buildings_ServiceATOM_Navarra.xml"
]

OUT_DIR_BASQUE = "/data/uscuni-ulce/extension/basque"

In [15]:
# Example ATOM feed for Gipuzkoa
url = "https://b5m.gipuzkoa.eus/inspire/download/buildings.xml"

r = requests.get(url, timeout=120)
r.raise_for_status()
root = ET.fromstring(r.content)
ns = {"atom": "http://www.w3.org/2005/Atom"}

for entry in root.findall("atom:entry", ns):
    alt_link = entry.find("atom:link[@rel='alternate']", ns)
    if alt_link is not None:
        zip_url = alt_link.attrib["href"]
        print(f"Downloading {zip_url} ...")
        r2 = requests.get(zip_url, timeout=120)
        r2.raise_for_status()
        with zipfile.ZipFile(io.BytesIO(r2.content)) as z:
            for gml_file in z.namelist():
                if gml_file.lower().endswith(".gml"):
                    outpath = os.path.join(OUT_DIR, os.path.basename(gml_file))
                    with z.open(gml_file) as src, open(outpath, "wb") as dst:
                        dst.write(src.read())
                    print(f"Saved {outpath}")

In [1]:
!ls "/data/uscuni-ulce/extension/basque"

ES.BFA.BU.013.gfs  ES.GFA.BU.gml  ES.GFA.BUO.gml  araba_alava  gipuzkoa


In [15]:
# Loop from 1 to 915
for idx in range(1,915):
    # Make URL with leading zeros (e.g., 001, 002)
    zip_url = f"https://apli.bizkaia.eus/apps/Danok/INSPIRE/ES.BFA.BU.{str(idx).zfill(3)}.zip"
    print(f"Downloading {zip_url} ...")
    
    try:
        r = requests.get(zip_url, timeout=60)
        r.raise_for_status()
    except Exception as e:
        print(f"  -> Failed to download {zip_url}: {e}")
        continue

    # Open ZIP in memory
    with zipfile.ZipFile(io.BytesIO(r.content)) as z:
        # Find GML file
        gml_file_name = [f for f in z.namelist() if f.lower().endswith(".gml")]
        if not gml_file_name:
            print(f"  -> No GML found in {zip_url}")
            continue
        
        gml_file_name = gml_file_name[0]
        
        # Extract GML and save with unique name
        new_gml_name = f"ES.BFA.BU.{str(idx).zfill(3)}.gml"
        gml_path = os.path.join(OUT_DIR_BASQUE, new_gml_name)
        
        with z.open(gml_file_name) as gml_file, open(gml_path, "wb") as f_out:
            f_out.write(gml_file.read())
        
        print(f"  -> Saved {gml_path}")

Downloading https://apli.bizkaia.eus/apps/Danok/INSPIRE/ES.BFA.BU.020.zip ...
  -> Saved /data/uscuni-ulce/extension/basque/ES.BFA.BU.020.gml
