In [1]:
import requests
from glob import glob
import geopandas as gpd
from io import BytesIO
import re
import pandas as pd

In [2]:
DATADIR = '/data/uscuni-ulce/cadastre_buildings_raw/'

### Download german building footprints

In [3]:
ger_regions = ['bavaria', 'brandeburg', 'bw', 'hessen', 'mv',
       'nrw', 'rlp', 'saarland', 'sachsen', 'sachsen-anhalt', 'sg', 
       'th', 'ni', 'bremen', 'hamburg', 'berlin']

In [4]:
def process_wfs(region_name, size, url, params, count, start=0,):
    params = params.copy()
    i = start
    for i in range(start, size+count, count):
        
        print(i)
        params['count'] = count,
        params['startIndex'] = i,

        # Make the request
        response = requests.get(url, params=params)
        
        if response.status_code != 200:
            print(i, count)
            print(response.content)
            break

        try:
            gdf = gpd.read_file(BytesIO(response.content))
        except Exception as e:
            print(e)
            break
            
        gdf.to_parquet(DATADIR + f"buildings_{region_name}_{i}.pq")

In [5]:
def get_size(url, params):
    params = params.copy()
    params['resultType'] = 'hits'
    response = requests.get(url, params=params)
    if response.status_code == 200:
        size = re.findall(r'numberMatched="[0-9]+"', str(response.content))
        size = int(size[0].split('=')[-1][1:-1])
        return size

    return 'unknown'

### brandenburg

In [None]:
%%time

url = "https://isk.geobasis-bb.de/ows/alkis_vereinf_wfs"

# Parameters for the GetFeature request

params = {
        "SERVICE": "WFS",
        "REQUEST": "GetFeature",
        "TYPENAMES": "ave:GebaeudeBauwerk",
        "SRSNAME": "EPSG:4326",
        "VERSION": '2.0.0',
    }

size = get_size(url, params)
print(size)


In [None]:
count = 100_000
process_wfs("germany_brandeburg", size, url, params, count)

In [17]:
region = ger_regions[1]
region

'brandeburg'

In [18]:
files = glob(f'{DATADIR}buildings_germany_{region}*')

In [19]:
sorted(files)[:10]

['/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_brandeburg_0.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_brandeburg_100000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_brandeburg_1000000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_brandeburg_1100000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_brandeburg_1200000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_brandeburg_1300000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_brandeburg_1400000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_brandeburg_1500000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_brandeburg_1600000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_brandeburg_1700000.pq']

In [20]:
len(files)

25

In [21]:
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]
if 'oid' in gdf.columns:
    gdf = gdf[~gdf['oid'].duplicated()]

In [22]:
gdf.shape

(2482380, 13)

In [25]:
gdf.to_parquet(f'/data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_{region}.pq')

### Sachsen

In [None]:
#sachsen
i = 0
params = {
        "SERVICE": "WFS",
        "REQUEST": "GetFeature",
        "TYPENAMES": "ave:GebaeudeBauwerk",
        "SRSNAME": "EPSG:25833",
        "VERSION": '2.0.0',
    }
url = 'https://geodienste.sachsen.de/aaa/public_alkis/vereinf/wfs'

size = get_size(url, params)
print(size)

In [None]:
count = 10000
process_wfs("germany_sachsen", size, url, params, count)

In [40]:
region = ger_regions[8]
region

'sachsen'

In [41]:
files = glob(f'{DATADIR}buildings_germany_{region}*')

In [42]:
sorted(files)[:10]

['/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sachsen-anhalt.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sachsen_0.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sachsen_10000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sachsen_100000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sachsen_1000000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sachsen_1010000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sachsen_1020000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sachsen_1030000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sachsen_1040000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sachsen_1050000.pq']

In [43]:
len(files)

229

In [30]:
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]
if 'oid' in gdf.columns:
    gdf = gdf[~gdf['oid'].duplicated()]

In [31]:
gdf.shape

(4018680, 12)

In [44]:
gdf.to_parquet(f'/data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_{region}.pq')

### baddwen_wurteburd

In [None]:
#
params = {
        "SERVICE": "WFS",
        "REQUEST": "GetFeature",
        "TYPENAMES": "bu-core2d:Building",
        "SRSNAME": "EPSG:25833",
        "VERSION": '2.0.0',
    }
url = 'https://owsproxy.lgl-bw.de/owsproxy/wfs/WFS_INSP_BW_Gebaeude_ALKIS'

size = get_size(url, params)
print(size)

In [None]:

count = 10000
process_wfs("germany_bw", size, url, params, count)

In [47]:
region = ger_regions[2]
region

'bw'

In [48]:
files = glob(f'{DATADIR}buildings_germany_{region}*')

In [49]:
sorted(files)[:10]

['/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_bw_0.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_bw_10000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_bw_100000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_bw_1000000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_bw_1010000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_bw_1020000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_bw_1030000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_bw_1040000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_bw_1050000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_bw_1060000.pq']

In [50]:
len(files)

645

In [51]:
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]
if 'oid' in gdf.columns:
    gdf = gdf[~gdf['oid'].duplicated()]

In [52]:
gdf.shape

(6434424, 24)

In [None]:
gdf.to_parquet(f'/data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_{region}.pq')

### mv


In [None]:
params = {
        "SERVICE": "WFS",
        "REQUEST": "GetFeature",
        "TYPENAMES": "ave:GebaeudeBauwerk",
        "SRSNAME": "EPSG:5650",
        "VERSION": '2.0.0',
    }
url = 'https://www.geodaten-mv.de/dienste/alkis_wfs_einfach'
size = get_size(url, params)
print(size)

In [None]:
count = 30_000
process_wfs("germany_mv", size, url, params, count)

In [5]:
region = ger_regions[4]
region

'mv'

In [6]:
files = glob(f'{DATADIR}buildings_germany_{region}*')

In [7]:
sorted(files)[:10]

['/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_mv_0.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_mv_1020000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_mv_1050000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_mv_1080000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_mv_1110000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_mv_1140000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_mv_1170000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_mv_120000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_mv_1200000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_mv_1230000.pq']

In [8]:
len(files)

51

In [9]:
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]
if 'oid' in gdf.columns:
    gdf = gdf[~gdf['oid'].duplicated()]

In [10]:
gdf.shape

(1511902, 13)

In [13]:
gdf.to_parquet(f'/data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_{region}.pq')

### RLP


In [None]:

# # https://www.geoportal.rlp.de/mapbender/php/mod_inspireDownloadFeed.php?id=30cc0a9a-cbad-66e3-68a5-7b75544814d8&type=DATASET&generateFrom=wfs&wfsid=519&featuretypeid=2713
# #rlp
# i = 0
# params = {
#         "SERVICE": "WFS",
#         "REQUEST": "GetFeature",
#         "TYPENAMES": "bu-core2d:Building",
#         "SRSNAME": "EPSG:25832",
#         "VERSION": '2.0.0',
#     }
# url = 'https://www.geoportal.rlp.de/registry/wfs/519'
# size = get_size(url, params)
# print(size)


# # count = 100000
# # process_wfs("germany_rlp", url, params, count)


for i in range(0, 3320444, 4000):
    url = f'https://www.geoportal.rlp.de/spatial-objects/519/collections/ave:GebaeudeBauwerk/items?offset={i}&limit=4000&f=json'
    gdf = gpd.read_file(url)
    gdf.to_parquet(DATADIR + f'buildings_germany_rlp_{i}.pq')
    print(i)

In [73]:
region = ger_regions[6]
region

'rlp'

In [74]:
files = glob(f'{DATADIR}buildings_germany_{region}*')

In [75]:
sorted(files)[:10]

['/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_rlp_0.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_rlp_100000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_rlp_1000000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_rlp_1004000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_rlp_1008000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_rlp_1012000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_rlp_1016000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_rlp_1020000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_rlp_1024000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_rlp_1028000.pq']

In [76]:
len(files)

831

In [77]:
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]
if 'oid' in gdf.columns:
    gdf = gdf[~gdf['oid'].duplicated()]

In [78]:
gdf.shape

(3320444, 13)

In [79]:
gdf.to_parquet(f'/data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_{region}.pq')

### saarland


In [None]:

# # i = 0
# # params = {
# #         "SERVICE": "WFS",
# #         "REQUEST": "GetFeature",
# #         "TYPENAMES": "bu-core2d:Building",
# #         "SRSNAME": "EPSG:4326",
# #         "VERSION": '2.0.0',
# #     }
# # url = 'https://geoportal.saarland.de/gdi-sl/inspirewfs_2D_Gebaeude_ALKIS'
# # size = get_size(url, params)
# # print(size)

# # params['count'] = size
# # response = requests.get(url, params=params)


for i in range(0, 622841, 500):
    url = f'https://geoportal.saarland.de/spatial-objects/406/collections/bu-core2d:Building/items?offset={i}&limit=500&f=json'
    gdf = gpd.read_file(url)
    gdf.to_parquet(DATADIR + f'buildings_germany_saarland_{i}.pq')
    print(i)

In [84]:
region = ger_regions[7]
region

'saarland'

In [85]:
files = glob(f'{DATADIR}buildings_germany_{region}*')

In [86]:
sorted(files)[:10]

['/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_saarland_0.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_saarland_1000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_saarland_10000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_saarland_100000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_saarland_100500.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_saarland_101000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_saarland_101500.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_saarland_102000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_saarland_102500.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_saarland_103000.pq']

In [87]:
len(files)

1246

In [88]:
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]
if 'oid' in gdf.columns:
    gdf = gdf[~gdf['oid'].duplicated()]

In [89]:
gdf.shape

(622841, 16)

In [90]:
gdf.to_parquet(f'/data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_{region}.pq')

### nrw

In [None]:

# i = 0
# params = {
#         "SERVICE": "WFS",
#         "REQUEST": "GetFeature",
#         "TYPENAMES": "bu-core2d:Building",
#         "SRSNAME": "EPSG:4326",
#         "VERSION": '2.0.0',
#     }
# url = 'https://www.wfs.nrw.de/geobasis/wfs_nw_inspire-gebaeude-2d_alkis'

# size = get_size(url, params)
# size
# count = 10000
# process_wfs("germany_nrw", url, params, count, start=)

step = 5_000

for i in range(0, 12646244, step):
    url = f'https://ogc-api.nrw.de/lika/v1/collections/gebaeude_bauwerk/items?offset={i}&limit={step}&f=json'
    gdf = gpd.read_file(url)
    gdf.to_parquet(DATADIR + f'buildings_germany_nrw_{i}.pq')
    print(i, gdf.shape)

In [64]:
region = ger_regions[5]
region

'nrw'

In [65]:
files = glob(f'{DATADIR}buildings_germany_{region}*')

In [66]:
sorted(files)[:10]

['/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_nrw_0.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_nrw_10000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_nrw_100000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_nrw_1000000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_nrw_10000000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_nrw_10005000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_nrw_10010000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_nrw_10015000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_nrw_10020000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_nrw_10025000.pq']

In [67]:
len(files)

2530

In [68]:
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]
if 'oid' in gdf.columns:
    gdf = gdf[~gdf['oid'].duplicated()]

In [71]:
gdf.shape

(12646244, 11)

In [72]:
gdf.to_parquet(f'/data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_{region}.pq')

### NI

In [None]:
# https://ni-lgln-opengeodata.hub.arcgis.com/apps/1cadea38ffb341bc83019b3d7f787dd3/explore


# #niedersachsen
# i = 0
# params = {
#         "SERVICE": "WFS",
#         "REQUEST": "GetFeature",
#         "TYPENAMES": "bu-core2d:Building",
#         "VERSION": '2.0.0',
#     }
# url = 'https://www.inspire.niedersachsen.de/doorman/noauth/alkis-dls-bu-core2d?'

# size = get_size(url, params)
# print(size)

# count = 10000
# process_wfs("germany_niedersachsen", url, params, count)

In [None]:
links = gpd.read_file('https://arcgis-geojson.s3.eu-de.cloud-object-storage.appdomain.cloud/alkis-vektor/lgln-opengeodata-alkis-vektor.geojson')

In [None]:
import urllib.parse
for link in links.zip.values:
    link = urllib.parse.quote(link, safe=':/')
    print(link)
    gdf = gpd.read_file(link, layer='gebaeude')
    gdf.to_parquet(DATADIR + f'buildings_germany_ni_{link.split('/')[-1][:-9]}.pq')

In [98]:
region = ger_regions[12]
region

'ni'

In [99]:
files = glob(f'{DATADIR}buildings_germany_{region}*')

In [100]:
sorted(files)[:10]

['/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_ni_lkr_03101_Stadt_Braunschweig_kon.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_ni_lkr_03102_Stadt_Salzgitter_kon.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_ni_lkr_03103_Stadt_Wolfsburg_kon.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_ni_lkr_03151_Gifhorn_kon.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_ni_lkr_03153_Goslar_kon.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_ni_lkr_03154_Helmstedt_kon.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_ni_lkr_03155_Northeim_kon.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_ni_lkr_03157_Peine_kon.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_ni_lkr_03158_Wolfenb%C3%BCttel_kon.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_ni_lkr_03159_G%C3%B6ttingen_kon.pq']

In [101]:
len(files)

45

In [102]:
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]
if 'oid' in gdf.columns:
    gdf = gdf[~gdf['oid'].duplicated()]

In [103]:
gdf.shape

(6442316, 10)

In [104]:
gdf.to_parquet(f'/data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_{region}.pq')

### hessen


In [7]:
# %%capture cap
# params = {
#         "SERVICE": "WFS",
#         "REQUEST": "GetFeature",
#         "TYPENAMES": "bu-core2d:Building",
#         "SRSNAME": "EPSG:4258",
#         "VERSION": '2.0.0',
#     }
# url = 'https://inspire-hessen.de/ows/services/org.2.29e99e26-ab08-474a-92c4-9e8f6bc679ea_wfs?'
# size = get_size(url, params)
# print(size)

# count = 10_000
# process_wfs("germany_hessen", size, url, params, count)

In [53]:
# %%capture cap
# step = 500

# import time
# for i in range(0, 4997363, step):
#     url = f'https://www.geoportal.hessen.de/spatial-objects/711/collections/bu-core2d:Building/items?offset={i}&limit={step}&f=json'
    
#     try:
#         gdf = gpd.read_file(url)
#         gdf.to_parquet(DATADIR + f'buildings_germany_hessen_{i}.pq')
#     except Exception as e:
#         time.sleep(5)
#         gdf = gpd.read_file(url)
#         gdf.to_parquet(DATADIR + f'buildings_germany_hessen_{i}.pq')
#         print(e)
#     print(i, gdf.shape)

In [8]:
%%capture cap
params = {
        "SERVICE": "WFS",
        "REQUEST": "GetFeature",
        "TYPENAMES": "ave:GebaeudeBauwerk",
        "SRSNAME": "EPSG:25832",
        "VERSION": '2.0.0',
    }
url = 'https://www.gds.hessen.de/wfs2/aaa-suite/cgi-bin/alkis/vereinf/wfs'
size = get_size(url, params)
print(size)

count = 100_000
process_wfs("germany_hessen", size, url, params, count)

In [12]:
region = ger_regions[3]
region

'hessen'

In [13]:
files = glob(f'{DATADIR}buildings_germany_{region}*')

In [14]:
sorted(files)[:10]

['/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_hessen_0.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_hessen_100000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_hessen_1000000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_hessen_1100000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_hessen_1200000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_hessen_1300000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_hessen_1400000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_hessen_1500000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_hessen_1600000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_hessen_1700000.pq']

In [15]:
len(files)

53

In [16]:
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]
if 'oid' in gdf.columns:
    gdf = gdf[~gdf['oid'].duplicated()]

In [17]:
gdf.shape

(5211807, 13)

In [18]:
gdf.to_parquet(f'/data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_{region}.pq')

### Thuringen

In [None]:
# #thuringen
# i = 0
# params = {
#         "SERVICE": "WFS",
#         "REQUEST": "GetFeature",
#         "TYPENAMES": "ave:GebaeudeBauwerk",
#         "SRSNAME": "EPSG:5650",
#         "VERSION": '2.0.0',
#     }

# url = 'https://www.geoproxy.geoportal-th.de/geoproxy/services/adv_alkis_wfs'
# size = get_size(url, params)
# print(size)

In [None]:
# count = 100_000
# process_wfs("germany_th", size, url, params, count)

In [None]:
#!wget https://geoportal.geoportal-th.de/dienste/atom_th_alkis?type=dataset&amp;id=33c5e445-e8eb-426b-a174-d77284d76dc9

In [None]:
!wget https://geoportal.geoportal-th.de/ALKIS/Shape/ALKIS_KB51_shp.zip
!wget https://geoportal.geoportal-th.de/ALKIS/Shape/ALKIS_KB52_shp.zip
!wget https://geoportal.geoportal-th.de/ALKIS/Shape/ALKIS_KB53_shp.zip
!wget https://geoportal.geoportal-th.de/ALKIS/Shape/ALKIS_KB54_shp.zip
!wget https://geoportal.geoportal-th.de/ALKIS/Shape/ALKIS_KB55_shp.zip
!wget https://geoportal.geoportal-th.de/ALKIS/Shape/ALKIS_KB56_shp.zip
!wget https://geoportal.geoportal-th.de/ALKIS/Shape/ALKIS_KB57_shp.zip
!wget https://geoportal.geoportal-th.de/ALKIS/Shape/ALKIS_KB58_shp.zip

In [None]:
%%capture
!unzip ALKIS_KB51_shp.zip -d temp/
!unzip ALKIS_KB52_shp.zip -d temp/
!unzip ALKIS_KB53_shp.zip -d temp/
!unzip ALKIS_KB54_shp.zip -d temp/
!unzip ALKIS_KB55_shp.zip -d temp/
!unzip ALKIS_KB56_shp.zip -d temp/
!unzip ALKIS_KB57_shp.zip -d temp/
!unzip ALKIS_KB58_shp.zip -d temp/

In [None]:
files= glob.glob('temp/*_shp.zip')
len(files)

In [None]:
%%time
frames = []
for f in files:
    layers = gpd.list_layers(f)
    layer = layers[layers['name'].str.contains('GebaeudeBauwerk')]['name']
    if layer.shape[0]:
        layer = layer.values[0]
    else:
        continue
    gdf = gpd.read_file(f, layer=layer)
    if gdf.shape[0]:
        frames.append(gdf.set_crs(epsg=25832).to_crs(epsg=3035))


In [None]:
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]
elif 'oid' in gdf.columns:
    gdf = gdf[~gdf['oid'].duplicated()]

In [None]:
gdf.to_parquet(DATADIR + 'buildings_germany_th.pq')

In [None]:
!rm -r temp/

In [None]:
!rm ALKIS_KB*

In [None]:
!cp /data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_th.pq /data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_th.pq

### sachsen-anhalt

In [None]:
!wget https://www.geodatenportal.sachsen-anhalt.de/gfds_webshare/download/LVermGeo/Geodatenportal/externedaten/GBIS_Gebaeude.zip

In [None]:
!unzip GBIS_Gebaeude.zip

In [None]:
files = glob('GBIS_Gebaeude_*')

In [None]:
frames = []
for f in files:
    gdf = gpd.read_file(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]

In [None]:
gdf.to_parquet(DATADIR + 'buildings_germany_sachsen-anhalt.pq')

In [None]:
!rm GBIS_Gebaeude*

In [None]:
!cp /data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sachsen-anhalt.pq /data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_sachsen-anhalt.pq

In [None]:
# #sachsen-anhalt
# params = {
#         "SERVICE": "WFS",
#         "REQUEST": "GetFeature",
#         "TYPENAMES": "bu-core2d:Building",
#         "SRSNAME": "EPSG:25833",
#         "VERSION": '2.0.0',
#     }
# url = 'https://www.geodatenportal.sachsen-anhalt.de/wss/service/INSPIRE_LVermGeo_WFS_ALKIS_BU/guest'
# size = get_size(url, params)
# print(size)


In [None]:
# count = 10_000
# process_wfs("germany_sachsen-anhalt", size, url, params, count)

### schlesig hollstein

In [None]:
# ## schlesig hollstein
# params = {
#         "SERVICE": "WFS",
#         "REQUEST": "GetFeature",
#         "TYPENAMES": "bu-core2d:Building",
#         "SRSNAME": "EPSG:4326",
#         "VERSION": '2.0.0',
#     }
# url = 'https://service.gdi-sh.de/SH_INSPIREDOWNLOAD_AIII_BU?service=WFS&version=2.0.0'

# size = get_size(url, params)
# print(size)



# count = 10_000
# process_wfs("germany_sh", url, params, count)

In [None]:
!wget https://service.gdi-sh.de/SH_OpenGBD/feeds/Atom_SH_ALKIS_vereinf_OpenGBD/Atom_SH_ALKIS_vereinf_OpenGBD.xml

In [None]:
import xml.etree.ElementTree as ET

# Load the XML file
tree = ET.parse('Atom_SH_ALKIS_vereinf_OpenGBD.xml')
root = tree.getroot()


# Define the namespaces
namespaces = {
    'atom': 'http://www.w3.org/2005/Atom',
    'georss': 'http://www.georss.org/georss',
    'inspire_dls': 'http://inspire.ec.europa.eu/schemas/inspire_dls/1.0',
    'lang': 'ger'
}

# Find all elements with the tag 'entry'
entries = root.findall('atom:entry', namespaces)
codes = []

# Extract and print the 'inspire_dls:spatial_dataset_identifier_code' attribute for each entry
for entry in entries:
    # Find the 'inspire_dls:spatial_dataset_identifier_code' element within the entry
    spatial_code = entry.find('inspire_dls:spatial_dataset_identifier_code', namespaces)
    codes.append(spatial_code.text)

In [None]:
for code in codes:
    url = f'https://dienste.gdi-sh.de/WFS_SH_ALKIS_vereinf_OpenGBD?Request=GetFeature&Service=WFS&Version=2.0.0&gemeindeschluessel={code}&StoredQuery_ID=GetGebByGemeinde'
    gdf = gpd.read_file(url, driver='GML')
    gdf.to_parquet(DATADIR + f'buildings_germany_sg_{code}.pq')

In [None]:
!rm Atom_SH_ALKIS_vereinf_OpenGBD.xml

In [91]:
region = ger_regions[10]
region

'sg'

In [92]:
files = glob(f'{DATADIR}buildings_germany_{region}*')

In [93]:
sorted(files)[:10]

['/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sg_01001000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sg_01002000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sg_01003000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sg_01004000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sg_01051001.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sg_01051002.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sg_01051003.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sg_01051004.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sg_01051005.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_sg_01051006.pq']

In [94]:
len(files)

1106

In [95]:
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]
if 'oid' in gdf.columns:
    gdf = gdf[~gdf['oid'].duplicated()]

In [96]:
gdf.shape

(2539436, 13)

In [97]:
gdf.to_parquet(f'/data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_sh.pq')

### bavaria

In [None]:
import requests 
def download_url(url, save_path, chunk_size=256):
    r = requests.get(url, stream=True, allow_redirects=True)
    print(r.content)
    with open(save_path, 'wb') as fd:
        for chunk in r.iter_content(chunk_size=chunk_size):
            fd.write(chunk)

In [None]:
# bavaria - https://geodaten.bayern.de/opengeodata/OpenDataDetail.html?pn=hausumringe

links = """https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/096_Unterfranken_Hausumringe.zip
https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/094_Oberfranken_Hausumringe.zip
https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/095_Mittelfranken_Hausumringe.zip
https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/093_Oberpfalz_Hausumringe.zip
https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/097_Schwaben_Hausumringe.zip
https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/091_Oberbayern_Hausumringe.zip
https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/092_Niederbayern_Hausumringe.zip"""

In [None]:
!wget https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/096_Unterfranken_Hausumringe.zip
!wget https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/094_Oberfranken_Hausumringe.zip
!wget https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/095_Mittelfranken_Hausumringe.zip
!wget https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/093_Oberpfalz_Hausumringe.zip
!wget https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/097_Schwaben_Hausumringe.zip
!wget https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/091_Oberbayern_Hausumringe.zip
!wget https://geodaten.bayern.de/odd/m/3/daten/hausumringe/bezirk/data/092_Niederbayern_Hausumringe.zip

In [None]:
%%time
for i, dl_link in enumerate(links.split('\n')):
    region_name = dl_link.split('_')[-2].lower()
    region_file = dl_link.split('/')[-1]
    gpd.read_file(region_file).to_parquet(DATADIR + f'buildings_germany_bavaria_{region_name}.pq')

In [None]:
!rm *_Hausumringe.zip

In [9]:
files = glob(f'data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_bavaria*')

In [13]:
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]
if 'oid' in gdf.columns:
    gdf = gdf[~gdf['oid'].duplicated()]

In [14]:
gdf.shape

(10131375, 2)

In [16]:
gdf.to_parquet('/data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_bavaria.pq')

### Berlin

In [None]:
params = {
        "SERVICE": "WFS",
        "REQUEST": "GetFeature",
        "TYPENAMES": "bu2d_alkis:Building",
        "SRSNAME": "EPSG:25832",
        "VERSION": '2.0.0',
    }
url = 'https://gdi.berlin.de/services/wms/bu2d_alkis'
size = get_size(url, params)
print(size)

In [None]:
count = 1000
process_wfs("germany_berlin", size, url, params, count, start=283000)

In [109]:
region = ger_regions[15]
region

'berlin'

In [110]:
files = glob(f'{DATADIR}buildings_germany_{region}*')

In [111]:
sorted(files)[:10]

['/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_berlin_0.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_berlin_1000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_berlin_10000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_berlin_100000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_berlin_101000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_berlin_102000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_berlin_103000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_berlin_104000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_berlin_105000.pq',
 '/data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_berlin_106000.pq']

In [112]:
len(files)

652

In [113]:
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)
if 'gml_id' in gdf.columns:
    gdf = gdf[~gdf['gml_id'].duplicated()]
if 'oid' in gdf.columns:
    gdf = gdf[~gdf['oid'].duplicated()]

In [114]:
gdf.shape

(651811, 12)

In [117]:
gdf.to_parquet(f'/data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_{region}.pq')

### Hamburg

In [None]:

# params = {
#         "SERVICE": "WFS",
#         "REQUEST": "GetFeature",
#         "TYPENAMES": "bu-core2d:Building",
#         # "SRSNAME": "EPSG:4326",
#         "VERSION": '2.0.0',
#         # "count": 10000,
#         # "startIndex": i
#     }
# url = 'https://geodienste.hamburg.de/HH_WFS_INSPIRE_Gebaeude_2D_ALKIS'

# response = requests.get(url, params=params)

# gdf = gpd.read_file(response.content)
# gdf

In [None]:


hamburg_url = 'https://daten-hamburg.de/inspire/hh_inspire_gebaeude_2d_alkis/INSPIRE_HH_Gebaeude_2D_ALKIS_2020-07-15.zip'

In [None]:
!wget https://daten-hamburg.de/inspire/hh_inspire_gebaeude_2d_alkis/INSPIRE_HH_Gebaeude_2D_ALKIS_2020-07-15.zip

In [None]:
!unzip INSPIRE_HH_Gebaeude_2D_ALKIS_2020-07-15.zip

In [None]:
import geopandas as gpd

In [None]:
gpd.list_layers('result.gml')

In [None]:
gpd.read_file('result.gml', layer='building').set_crs(epsg=25832).to_parquet(DATADIR + 'buildings_germany_hamburg.pq')

In [None]:
## cleanup
!rm result.gml
!rm result.gfs
!rm wfs_bu_alkis_2020-07-15.xsd 
!rm INSPIRE_HH_Gebaeude_2D_ALKIS_2020-07-15.zip

In [None]:
!cp /data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_hamburg.pq /data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_hamburg.pq

### Bremen

In [None]:
bremen = 'https://gdi2.geo.bremen.de/inspire/download/ADV-Shape/data/ALKIS_AdV_SHP_2024_04_HB.zip'

In [None]:
!wget https://gdi2.geo.bremen.de/inspire/download/ADV-Shape/data/ALKIS_AdV_SHP_2024_04_HB.zip

In [None]:
!unzip ALKIS_AdV_SHP_2024_04_HB.zip

In [None]:
gpd.read_file('Bremen/GebaeudeBauwerk.shp').to_parquet(DATADIR + 'buildings_germany_bremen.pq')

In [None]:
!rm -r Bremen/

In [None]:
!rm -r ALKIS_AdV_SHP_2024_04_HB.zip

In [None]:
!cp /data/uscuni-ulce/cadastre_buildings_raw/buildings_germany_bremen.pq /data/uscuni-ulce/cadastre_buildings_standardised/buildings_germany_bremen.pq

## Download Poland

In [None]:
# https://mapy.geoportal.gov.pl/iMapLite/atomExplorer/

In [None]:

links  = """http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=34ad9f34a0b7dc8205a933bb058e91c1&name=podkarpackie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=78896afe993bcb34365fe7b8a7f9966f&name=pomorskie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=be4bfa1478a6eeedd67faffea96eafda&name=opolskie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=c7881c0de4e3d86548ba0e97c50b490a&name=lodzkie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=79dcc64d4c0c4e8873365f04c55a4940&name=lubuskie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=ac47d220ce942cc3668f703fe4bd7a60&name=mazowieckie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=9e87962dd94adddb2838f6321778f04a&name=kujawskopomorskie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=8818a617772a6c9dad222dd52acecc29&name=slaskie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=4b636d1174ceff606fb1c9c95c145915&name=dolnoslaskie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=99c8bdcfc3173a3a7c2d18bf675f4315&name=malopolskie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=cb81449840b96ff21fba1a42e094c074&name=lubelskie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=9bb1518bbf0d8e22ec338c80dcc83f61&name=zachodniopomorskie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=0217fcdf1da066af11a62067530786a9&name=warminskomazurskie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=67eb749e1ae54cb7aa9a8aa1d90d8dd7&name=wielkopolskie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=fc9fe6a16296acf6a4072a08f5be1342&name=podlaskie_bu.zip
http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=e968c6db383a80a34b702866ede92cb1&name=swietorzyskie_bu.zip"""

In [None]:
# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=34ad9f34a0b7dc8205a933bb058e91c1&name=podkarpackie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=78896afe993bcb34365fe7b8a7f9966f&name=pomorskie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=be4bfa1478a6eeedd67faffea96eafda&name=opolskie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=c7881c0de4e3d86548ba0e97c50b490a&name=lodzkie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=79dcc64d4c0c4e8873365f04c55a4940&name=lubuskie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=ac47d220ce942cc3668f703fe4bd7a60&name=mazowieckie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=9e87962dd94adddb2838f6321778f04a&name=kujawskopomorskie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=8818a617772a6c9dad222dd52acecc29&name=slaskie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=4b636d1174ceff606fb1c9c95c145915&name=dolnoslaskie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=99c8bdcfc3173a3a7c2d18bf675f4315&name=malopolskie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=cb81449840b96ff21fba1a42e094c074&name=lubelskie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=9bb1518bbf0d8e22ec338c80dcc83f61&name=zachodniopomorskie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=0217fcdf1da066af11a62067530786a9&name=warminskomazurskie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=67eb749e1ae54cb7aa9a8aa1d90d8dd7&name=wielkopolskie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=fc9fe6a16296acf6a4072a08f5be1342&name=podlaskie_bu.zip

# !wget http://mapy.geoportal.gov.pl/wss/service/ATOM/httpauth/download/?fileId=e968c6db383a80a34b702866ede92cb1&name=swietorzyskie_bu.zip

In [None]:
links = links.split('\n')

In [None]:

for i, dl_link in enumerate(links):
    dl_name = dl_link.split('=')[-1]
    download_url(dl_link, dl_name)
    print(dl_name)

In [None]:
for i, dl_link in enumerate(links[5:]):
    dl_name = dl_link.split('=')[-1]
    if dl_name == 'mazowieckie_bu.zip':
        continue
    gdf = gpd.read_file(dl_name)
    gdf.to_parquet(DATADIR + f'buildings_poland_{dl_name[:-4]}.pq')

In [None]:
!unzip mazowieckie_bu.zip

In [None]:
gpd.list_layers('mazowieckie_bu1.gml')

In [None]:
gpd.list_layers('mazowieckie_bu2.gml')

In [None]:
gdf = gpd.read_file('mazowieckie_bu1.gml')
gdf.to_parquet(DATADIR + f'buildings_poland_mazowieckie_bu1.pq')

gdf = gpd.read_file('mazowieckie_bu2.gml')
gdf.to_parquet(DATADIR + f'buildings_poland_mazowieckie_bu2.pq')

In [None]:
!rm mazowieckie_bu1.gml
!rm mazowieckie_bu2.gml

In [None]:
files = glob('/data/uscuni-ulce/cadastre_buildings_raw/buildings_poland*')
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    print(f)

    # swap lon lat
    coords = shapely.get_coordinates(gdf.geometry)
    coords = coords[:, [1, 0]]
    sizes = shapely.get_num_coordinates(gdf.geometry).values
    polygon_id = np.repeat(np.arange(gdf.shape[0]), sizes)
    new_geom = pd.DataFrame(coords).groupby(polygon_id).apply(lambda x: shapely.Polygon(x.values))
    gdf['geometry'] = new_geom
    
    frames.append(gdf.to_crs(epsg=3035))
    
polish_gdf = pd.concat(frames, ignore_index=True)

In [None]:
polish_gdf.to_parquet('/data/uscuni-ulce/cadastre_buildings_standardised/buildings_poland.pq')

## Download Czechia

In [None]:
import httpx
import pyogrio
import geopandas as gpd
from io import BytesIO
import time

url_adm = "https://services.cuzk.cz/gml/inspire/au/epsg-4258/1.zip"
units = pyogrio.read_dataframe(url_adm, layer="AdministrativeUnit")
obce = units[units.LocalisedCharacterString == "Obec"]

In [None]:
obce

In [None]:
def process_czech_unit(code):
    url_building = f"https://services.cuzk.cz/gml/inspire/bu/epsg-4258/{code}.zip"
    r = httpx.get(url_building, verify=False)

    gdf = gpd.read_file(BytesIO(r.content), layer='Building')

    gdf.to_parquet(DATADIR + f'buildings_czechia_{code}.pq')

In [None]:
import warnings

with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    
    for code in obce["nationalCode"]:
        try:
            process_czech_unit(code)
        except:
            time.sleep(5)
            process_czech_unit(code)

In [None]:
files = glob('/data/uscuni-ulce/cadastre_buildings_raw/buildings_czechia*')
frames = []
for f in files:
    gdf = gpd.read_parquet(f)
    if gdf.shape[0]:
        frames.append(gdf.to_crs(epsg=3035))
gdf = pd.concat(frames, ignore_index=True)

In [None]:
gdf.to_parquet('/data/uscuni-ulce/cadastre_buildings_standardised/buildings_czechia.pq')

## Download Slovakia

In [None]:
!wget https://opendata.skgeodesy.sk/static/INSPIRE/Buildings/inspire_bu_gpkg.zip --no-check-certificate

In [None]:
gdf = gpd.read_file('zip://inspire_bu_gpkg.zip!INSPIRE_BU.gpkg')

In [None]:
gdf.to_parquet('/data/uscuni-ulce/cadastre_buildings_standardised/buildings_slovakia.pq')

In [None]:
!rm inspire_bu_gpkg.zip

## Download Austria

In [None]:
austria = 'https://data.bev.gv.at/download/DLM/DLM_20230125/DLM_8000_BAUWERK_20230125.gpkg'

In [None]:
%%time
download_url(austria, '../data/DLM_8000_BAUWERK_20230125.gpkg')

In [None]:
gpd.list_layers( '../data/DLM_8000_BAUWERK_20230125.gpkg')

In [None]:
import pyogrio

In [None]:
pyogrio.read_info('../data/DLM_8000_BAUWERK_20230125.gpkg', layer='BWK_8100_BAUWERK_F')

In [None]:
gpd.read_file('../data/DLM_8000_BAUWERK_20230125.gpkg', layer='BWK_8100_BAUWERK_F').to_parquet('/data/uscuni-ulce/cadastre_buildings_standardised/buildings_austria.pq')

In [None]:
# !rm ../data/DLM_8000_BAUWERK_20230125.gpkg