# Preprocess Spain and Basque Country buildings

In [2]:
import geopandas as gpd
import pandas as pd
from pathlib import Path
import os

## Spain

In [3]:
# Folder containing the GML files
folder = Path("/data/uscuni-ulce/extension/spain/")

# Find all .gml files
gml_files = list(folder.glob("*.gml"))

# Read and concatenate into one GeoDataFrame
gdfs = [gpd.read_file(f,columns = ["geometry","beginning", "end","value","currentUse"],use_arrow=True).to_crs("EPSG:3035") for f in gml_files]

In [10]:
spain_raw = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True)).explode()
spain_raw

Unnamed: 0,beginning,end,currentUse,value,geometry
0,1944-01-01T00:00:00,1944-01-01T00:00:00,4_3_publicServices,55,"POLYGON ((3328786.428 2102374.993, 3328777.707..."
1,1944-01-01T00:00:00,1944-01-01T00:00:00,3_industrial,76,"POLYGON ((3328723.849 2101563.942, 3328724.862..."
2,1900-01-01T00:00:00,2000-01-01T00:00:00,4_3_publicServices,119,"POLYGON ((3324585.802 2104952.066, 3324588.398..."
3,2000-01-01T00:00:00,2000-01-01T00:00:00,2_agriculture,1047,"POLYGON ((3325928.207 2102926.23, 3325920.92 2..."
3,2000-01-01T00:00:00,2000-01-01T00:00:00,2_agriculture,1047,"POLYGON ((3326027.383 2102950.242, 3326036.09 ..."
...,...,...,...,...,...
12474033,1943-01-01T00:00:00,2000-01-01T00:00:00,1_residential,588,"POLYGON ((3325293.988 2132331.395, 3325291.564..."
12474034,1943-01-01T00:00:00,1943-01-01T00:00:00,3_industrial,612,"POLYGON ((3325301.591 2132309.053, 3325303.907..."
12474035,1943-01-01T00:00:00,1943-01-01T00:00:00,1_residential,198,"POLYGON ((3325315.231 2132297.456, 3325321.111..."
12474036,1943-01-01T00:00:00,2003-01-01T00:00:00,1_residential,414,"POLYGON ((3325329.104 2132292.959, 3325328.34 ..."


In [24]:
# Compute area in meters (ensure projected CRS)
area_mask = spain_raw.area > 50000

# CurrentUse is public services or NaN
use_mask = (spain_raw["currentUse"] == "4_3_publicServices") | (spain_raw["currentUse"].isna())

# Drop rows matching both conditions
selected = spain_raw.drop(spain_raw[area_mask & use_mask].index)

In [29]:
selected=selected.rename(columns = {"value":"floor_area"})

In [30]:
selected

Unnamed: 0,beginning,end,currentUse,floor_area,geometry
0,1944-01-01T00:00:00,1944-01-01T00:00:00,4_3_publicServices,55,"POLYGON ((3328786.428 2102374.993, 3328777.707..."
1,1944-01-01T00:00:00,1944-01-01T00:00:00,3_industrial,76,"POLYGON ((3328723.849 2101563.942, 3328724.862..."
2,1900-01-01T00:00:00,2000-01-01T00:00:00,4_3_publicServices,119,"POLYGON ((3324585.802 2104952.066, 3324588.398..."
3,2000-01-01T00:00:00,2000-01-01T00:00:00,2_agriculture,1047,"POLYGON ((3325928.207 2102926.23, 3325920.92 2..."
3,2000-01-01T00:00:00,2000-01-01T00:00:00,2_agriculture,1047,"POLYGON ((3326027.383 2102950.242, 3326036.09 ..."
...,...,...,...,...,...
12474033,1943-01-01T00:00:00,2000-01-01T00:00:00,1_residential,588,"POLYGON ((3325293.988 2132331.395, 3325291.564..."
12474034,1943-01-01T00:00:00,1943-01-01T00:00:00,3_industrial,612,"POLYGON ((3325301.591 2132309.053, 3325303.907..."
12474035,1943-01-01T00:00:00,1943-01-01T00:00:00,1_residential,198,"POLYGON ((3325315.231 2132297.456, 3325321.111..."
12474036,1943-01-01T00:00:00,2003-01-01T00:00:00,1_residential,414,"POLYGON ((3325329.104 2132292.959, 3325328.34 ..."


In [31]:
selected.to_parquet("/data/uscuni-ulce/extension/spain/clean_3035.parquet")

## Araba/Alava

In [3]:
!ls "/data/uscuni-ulce/extension/basque/araba_alava"

ES.AFA.BU.0101_3042.gml  ES.AFA.BU.0123_3042.gml  ES.AFA.BU.0147_3042.gml
ES.AFA.BU.0102_3042.gml  ES.AFA.BU.0126_3042.gml  ES.AFA.BU.0149_3042.gml
ES.AFA.BU.0103_3042.gml  ES.AFA.BU.0127_3042.gml  ES.AFA.BU.0151_3042.gml
ES.AFA.BU.0104_3042.gml  ES.AFA.BU.0128_3042.gml  ES.AFA.BU.0152_3042.gml
ES.AFA.BU.0106_3042.gml  ES.AFA.BU.0130_3042.gml  ES.AFA.BU.0153_3042.gml
ES.AFA.BU.0108_3042.gml  ES.AFA.BU.0131_3042.gml  ES.AFA.BU.0154_3042.gml
ES.AFA.BU.0109_3042.gml  ES.AFA.BU.0132_3042.gml  ES.AFA.BU.0155_3042.gml
ES.AFA.BU.0110_3042.gml  ES.AFA.BU.0133_3042.gml  ES.AFA.BU.0156_3042.gml
ES.AFA.BU.0111_3042.gml  ES.AFA.BU.0134_3042.gml  ES.AFA.BU.0157_3042.gml
ES.AFA.BU.0113_3042.gml  ES.AFA.BU.0135_3042.gml  ES.AFA.BU.0158_3042.gml
ES.AFA.BU.0114_3042.gml  ES.AFA.BU.0136_3042.gml  ES.AFA.BU.0159_3042.gml
ES.AFA.BU.0116_3042.gml  ES.AFA.BU.0137_3042.gml  ES.AFA.BU.0160_3042.gml
ES.AFA.BU.0117_3042.gml  ES.AFA.BU.0139_3042.gml  ES.AFA.BU.0161_3042.gml
ES.AFA.BU.0118_3042.gml  ES.AFA.BU.014

In [11]:
# Folder containing the GML files
folder = Path("/data/uscuni-ulce/extension/basque/araba_alava")

# Find all .gml files
gml_files = list(folder.glob("*.gml"))

# Read and concatenate into one GeoDataFrame
gdfs = [gpd.read_file(f,columns = ["geometry","anyPoint", "value"],use_arrow=True).to_crs("EPSG:3035") for f in gml_files]

In [12]:
araba_alava = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
araba_alava

Unnamed: 0,anyPoint,value,geometry
0,1997-12-31T00:00:00,3,"POLYGON ((3311074.159 2265588.406, 3311069.784..."
1,2009-12-31T00:00:00,0,"POLYGON ((3310798.778 2265145.178, 3310798.903..."
2,1970-12-31T00:00:00,3,"POLYGON ((3311051.897 2265045.697, 3311071.174..."
3,1997-12-31T00:00:00,6,"POLYGON ((3311071.662 2265047.478, 3311072.765..."
4,2001-12-31T00:00:00,3,"POLYGON ((3311198.001 2265026.355, 3311216.489..."
...,...,...,...
81257,2004-12-31T00:00:00,3,"POLYGON ((3301872.91 2257088.536, 3301870.612 ..."
81258,2008-12-31T00:00:00,6,"POLYGON ((3302975.665 2258112.935, 3302974.023..."
81259,1800-12-31T00:00:00,6,"POLYGON ((3300315.89 2254941.917, 3300316.288 ..."
81260,1800-12-31T00:00:00,6,"POLYGON ((3300260.191 2254767.308, 3300261.799..."


In [15]:
araba_alava = araba_alava.rename(columns={"value":"height","anyPoint":"beginning"})
araba_alava.crs

<Projected CRS: EPSG:3035>
Name: ETRS89-extended / LAEA Europe
Axis Info [cartesian]:
- Y[north]: Northing (metre)
- X[east]: Easting (metre)
Area of Use:
- name: Europe - European Union (EU) countries and candidates. Europe - onshore and offshore: Albania; Andorra; Austria; Belgium; Bosnia and Herzegovina; Bulgaria; Croatia; Cyprus; Czechia; Denmark; Estonia; Faroe Islands; Finland; France; Germany; Gibraltar; Greece; Hungary; Iceland; Ireland; Italy; Kosovo; Latvia; Liechtenstein; Lithuania; Luxembourg; Malta; Monaco; Montenegro; Netherlands; North Macedonia; Norway including Svalbard and Jan Mayen; Poland; Portugal including Madeira and Azores; Romania; San Marino; Serbia; Slovakia; Slovenia; Spain including Canary Islands; Sweden; Switzerland; Türkiye (Turkey); United Kingdom (UK) including Channel Islands and Isle of Man; Vatican City State.
- bounds: (-35.58, 24.6, 44.83, 84.73)
Coordinate Operation:
- name: Europe Equal Area 2001
- method: Lambert Azimuthal Equal Area
Datum: Eur

In [16]:
araba_alava.to_parquet("/data/uscuni-ulce/extension/basque/araba_alava/clean_3035.parquet")

## Bizkaia

In [8]:
sample = gpd.read_file("/data/uscuni-ulce/extension/basque/bizkaia/ES.BFA.BU.001.gml")
sample

Unnamed: 0,gml_id,identifier,beginLifespanVersion,anyPoint,beginning,end,end_,elevation,endLifespanVersion,informationSystem,...,percentage,numberOfDwellings,numberOfBuildingUnits,numberOfFloorsAboveGround,parts,referenceGeometry,horizontalGeometryEstimatedAccuracy,horizontalGeometryEstimatedAccuracy_uom,verticalGeometryEstimatedAccuracy,geometry
0,BuildingS.1,https://geo.bizkaia.eus/arcgisserverinspire/re...,2011-07-14T15:28:37,,,1986-12-31T00:00:00,,,,https://appsec.ebizkaia.eus/O4GC000C/vistas/vi...,...,,,16,3.0,,True,1.0,m,,"POLYGON ((-2.61664 43.16855, -2.61664 43.16859..."
1,BuildingS.2,https://geo.bizkaia.eus/arcgisserverinspire/re...,2013-12-19T08:45:15,,,1963-12-31T00:00:00,,,,https://appsec.ebizkaia.eus/O4GC000C/vistas/vi...,...,,8.0,11,6.0,,True,1.0,m,,"POLYGON ((-2.61212 43.16836, -2.61212 43.16839..."
2,BuildingS.3,https://geo.bizkaia.eus/arcgisserverinspire/re...,2013-12-19T08:45:15,,,1960-12-31T00:00:00,,,,https://appsec.ebizkaia.eus/O4GC000C/vistas/vi...,...,,18.0,23,7.0,,True,1.0,m,,"POLYGON ((-2.61222 43.16837, -2.61221 43.16838..."
3,BuildingS.4,https://geo.bizkaia.eus/arcgisserverinspire/re...,2013-12-19T08:45:15,,,1963-12-31T00:00:00,,,,https://appsec.ebizkaia.eus/O4GC000C/vistas/vi...,...,,8.0,13,6.0,,True,1.0,m,,"POLYGON ((-2.612 43.16836, -2.612 43.16834, -2..."
4,BuildingS.6,https://geo.bizkaia.eus/arcgisserverinspire/re...,2017-05-30T12:02:58,,,1970-12-31T00:00:00,,,,https://appsec.ebizkaia.eus/O4GC000C/vistas/vi...,...,,12.0,15,5.0,,True,1.0,m,,"POLYGON ((-2.61165 43.16818, -2.61166 43.1681,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1351,BuildingS.682840,https://geo.bizkaia.eus/arcgisserverinspire/re...,2025-05-28T08:02:03,,,1975-12-31T00:00:00,,,,https://appsec.ebizkaia.eus/O4GC000C/vistas/vi...,...,,1.0,1,3.0,,True,1.0,m,,"POLYGON ((-2.62217 43.12445, -2.62235 43.12458..."
1352,BuildingS.682971,https://geo.bizkaia.eus/arcgisserverinspire/re...,2025-06-06T10:47:52,,,1920-12-31T00:00:00,,,,https://appsec.ebizkaia.eus/O4GC000C/vistas/vi...,...,,3.0,3,4.0,,True,1.0,m,,"POLYGON ((-2.60901 43.15284, -2.60888 43.15292..."
1353,BuildingS.683219,https://geo.bizkaia.eus/arcgisserverinspire/re...,2025-06-26T10:41:00,,,2016-12-31T00:00:00,,,,https://appsec.ebizkaia.eus/O4GC000C/vistas/vi...,...,,1.0,1,3.0,,True,1.0,m,,"POLYGON ((-2.60976 43.1502, -2.60976 43.1502, ..."
1354,BuildingS.683522,https://geo.bizkaia.eus/arcgisserverinspire/re...,2025-07-11T13:11:16,,,1800-12-31T00:00:00,,,,https://appsec.ebizkaia.eus/O4GC000C/vistas/vi...,...,,1.0,1,3.0,,True,1.0,m,,"POLYGON ((-2.60113 43.14278, -2.60106 43.14274..."
