# Standardise building footprints

Ensure that all the building footprints are stored in a standardised form.

In [3]:
import geopandas as gpd
import pandas as pd

DATADIR = "/data/uscuni-ulce/buildings_standardised/"

In [4]:
regions = [
    "buildings_austria.pq",
    "buildings_slovakia.pq",
    "buildings_poland.pq",
    "buildings_czechia.pq",
]

### Germany

In [5]:
ger_regions = [
    "bavaria",
    "brandeburg",
    "bw",
    "hessen",
    "mv",
    "nrw",
    "rlp",
    "saarland",
    "sachsen",
    "sachsen-anhalt",
    "sh",
    "th",
    "ni",
    "bremen",
    "hamburg",
    "berlin",
]

#### Bavaria

In [6]:
bavaria = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[0]}.pq"
)

In [9]:
bavaria["currentUse"] = None
bavaria["buildingNature"] = None
bavaria["heightAboveGround"] = None

#### Brandenburg

In [12]:
bb = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[1]}.pq"
)

In [15]:
bb["currentUse"] = bb["funktion"]
bb["buildingNature"] = bb["gebnutzbez"]
bb["heightAboveGround"] = None

#### BW

In [17]:
bw = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[2]}.pq"
)

In [20]:
bw["currentUse"] = None
bw["buildingNature"] = None
bw["heightAboveGround"] = None

#### Hessen

In [22]:
hessen = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[3]}.pq"
)

In [25]:
hessen["currentUse"] = hessen["funktion"]
hessen["buildingNature"] = hessen["gebnutzbez"]
hessen["heightAboveGround"] = None

#### MV

In [27]:
mv = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[4]}.pq"
)

In [30]:
mv["currentUse"] = mv["funktion"]
mv["buildingNature"] = mv["gebnutzbez"]
mv["heightAboveGround"] = None

#### NRW

In [33]:
nrw = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[5]}.pq"
)

In [36]:
nrw["currentUse"] = nrw["funktion"]
nrw["buildingNature"] = nrw["gebnutzbez"]
nrw["heightAboveGround"] = None

#### RLP

In [38]:
rlp = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[6]}.pq"
)

In [41]:
rlp["currentUse"] = rlp["funktion"]
rlp["buildingNature"] = rlp["gebnutzbez"]
rlp["heightAboveGround"] = None

#### Saarland

lots of random buildings, doesnt look like official data

In [44]:
saarland = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[7]}.pq"
)

In [47]:
saarland["currentUse"] = None
saarland["buildingNature"] = None
saarland["heightAboveGround"] = None

#### Sachsen

In [49]:
sachsen = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[8]}.pq"
)

In [52]:
sachsen["currentUse"] = sachsen["funktion"]
sachsen["buildingNature"] = sachsen["gebnutzbez"]
sachsen["heightAboveGround"] = None

#### sachsen-anhalt

In [54]:
sa = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_{ger_regions[9]}.pq"
)

In [57]:
sa["currentUse"] = sa["funktion"]
sa["buildingNature"] = sa["gebnutzbez"]
sa["heightAboveGround"] = None

#### schlesig holsten

In [59]:
sh = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[10]}.pq"
)

In [62]:
sh["currentUse"] = sh["funktion"]
sh["buildingNature"] = sh["gebnutzbez"]
sh["heightAboveGround"] = None

#### Thuringia

In [64]:
th = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[11]}.pq"
)

In [67]:
th["currentUse"] = th["funktion"]
th["buildingNature"] = th["gebnutzbez"]
th["heightAboveGround"] = None

#### niedersaxony

In [69]:
ni = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[12]}.pq"
)

In [72]:
ni["currentUse"] = None
ni["buildingNature"] = ni["gfk__bez"]
ni["heightAboveGround"] = None

#### bremen

In [75]:
bremen = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[13]}.pq"
).to_crs(epsg=3035)

In [78]:
bremen["currentUse"] = bremen["FUNKTION"]
bremen["buildingNature"] = bremen["GEBNUTZBEZ"]
bremen["heightAboveGround"] = None

#### hamburg

In [81]:
hamburg = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[14]}.pq"
).to_crs(epsg=3035)

In [84]:
hamburg["currentUse"] = None
hamburg["buildingNature"] = None
hamburg["heightAboveGround"] = None

#### berlin

In [88]:
berlin = gpd.read_parquet(
    f"/data/uscuni-ulce/buildings_standardised/buildings_germany_{ger_regions[15]}.pq"
)

In [91]:
berlin["currentUse"] = None
berlin["buildingNature"] = None
berlin["heightAboveGround"] = None

#### Merge Germany

In [93]:
print(
    ",".join(
        [
            "bavaria",
            "brandeburg",
            "bw",
            "hessen",
            "mv",
            "nrw",
            "rlp",
            "saarland",
            "sachsen",
            "sachsen-anhalt",
            "sh",
            "th",
            "ni",
            "bremen",
            "hamburg",
            "berlin",
        ]
    )
)

bavaria,brandeburg,bw,hessen,mv,nrw,rlp,saarland,sachsen,sachsen-anhalt,sh,th,ni,bremen,hamburg,berlin


In [94]:
subset = ["currentUse", "buildingNature", "heightAboveGround", "geometry"]

In [95]:
all_gdfs = [
    bavaria,
    bb,
    bw,
    hessen,
    mv,
    nrw,
    rlp,
    saarland,
    sachsen,
    sa,
    sh,
    th,
    ni,
    bremen,
    hamburg,
    berlin,
]

In [96]:
%%time
all_germany = pd.concat((gdf[subset] for gdf in all_gdfs), ignore_index=True)

CPU times: user 23.2 s, sys: 1.04 s, total: 24.2 s
Wall time: 24.2 s


In [97]:
all_germany.to_parquet("/data/uscuni-ulce/buildings_standardised/merged_germany.pq")

### Austria

In [99]:
austria = gpd.read_parquet(DATADIR + regions[0]).to_crs(epsg=3035)

In [102]:
"""
'F_CODE', 'F_NAME', 'STRUCTURE FUNCTION', 'AGWR_OBJECT NUMBER', 'AGWR_TYPE',
'VERIFICATION_LB', 'DEL_FLAG', 'FLIGHT_YEAR', 'AS_DATE',
'RECORD_TYPE', 'GROUND_HEIGHT_MIN', 'RECORD_TYPE_GROUND_MIN',
'GROUND_HEIGHT_MEDIAN', 'RECORD_TYPE_GROUND_HEIGHT_MEDIAN',
'GROUND_HEIGHT_MAX', 'GROUND_TYPE_GROUND_MAX', 'OBJECT_HEIGHT_MEDIAN',
'OBJECT_TYPE_HEIGHT_MEDIAN', 'HEIGHT_OBJECT_MAX',
'RECORD_TYPE_HEIGHT_OBJECT_MAX', 'HEIGHT_OBJECT_EAVES',
'RECORD_TYPE_HEIGHT_OBJECT_EAVES', 'DATA SOURCE', 'DATA SOURCE_EXT_ID',
'CREATE_DATE', 'EDIT_DATE', 'OBJECTID', 'GLOBALID', 'SHAPE_AREA',
'geometry'
"""

"\n'F_CODE', 'F_NAME', 'STRUCTURE FUNCTION', 'AGWR_OBJECT NUMBER', 'AGWR_TYPE',\n'VERIFICATION_LB', 'DEL_FLAG', 'FLIGHT_YEAR', 'AS_DATE',\n'RECORD_TYPE', 'GROUND_HEIGHT_MIN', 'RECORD_TYPE_GROUND_MIN',\n'GROUND_HEIGHT_MEDIAN', 'RECORD_TYPE_GROUND_HEIGHT_MEDIAN',\n'GROUND_HEIGHT_MAX', 'GROUND_TYPE_GROUND_MAX', 'OBJECT_HEIGHT_MEDIAN',\n'OBJECT_TYPE_HEIGHT_MEDIAN', 'HEIGHT_OBJECT_MAX',\n'RECORD_TYPE_HEIGHT_OBJECT_MAX', 'HEIGHT_OBJECT_EAVES',\n'RECORD_TYPE_HEIGHT_OBJECT_EAVES', 'DATA SOURCE', 'DATA SOURCE_EXT_ID',\n'CREATE_DATE', 'EDIT_DATE', 'OBJECTID', 'GLOBALID', 'SHAPE_AREA',\n'geometry'\n"

In [106]:
austria["currentUse"] = austria["BAUWERKSFUNKTION"]
austria["buildingNature"] = austria["AGWR_TYP"]
austria["heightAboveGround"] = austria["HOEHE_OBJEKT_MEDIAN"]

### Slovakia

In [109]:
slovakia = gpd.read_parquet(DATADIR + regions[1]).to_crs(epsg=3035)

### Poland

In [117]:
poland = gpd.read_parquet(DATADIR + regions[2])

In [120]:
poland["currentUse"] = None
poland["buildingNature"] = None
poland["heightAboveGround"] = None

### Czechia

In [124]:
czechia = gpd.read_parquet(DATADIR + regions[3])

In [129]:
czechia["currentUse"] = None

### Merge all data

In [132]:
subset = ["currentUse", "buildingNature", "heightAboveGround", "geometry"]

In [133]:
all_gdfs = [all_germany, poland, austria, czechia, slovakia]

In [134]:
%%time
all_buildings = pd.concat((gdf[subset] for gdf in all_gdfs), ignore_index=True)



CPU times: user 29.1 s, sys: 1.36 s, total: 30.4 s
Wall time: 30.3 s


In [136]:
all_buildings.to_parquet(
    "/data/uscuni-ulce/buildings_standardised/merged_ce_buildings.pq"
)