In [1]:
from src.utils import *


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [2]:
# Load required libraries
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import fiona
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from shapely.ops import unary_union
from unidecode import unidecode
import glob
import csv
from datetime import datetime
import dask.dataframe as dd
import dask_geopandas as dg
from dask.distributed import Client
import gc

In [38]:
# Path to data folders
indata_f = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas'
outdata_f = os.path.join(indata_f, 'OutputData', 'batch1')
if not os.path.exists(outdata_f):
    # Create the folder if it doesn't exist
    os.makedirs(outdata_f)

# 0 PREPARE A LOG FILE FOR QC
log_file = 'log_GQA_Step1.csv'
log_path = os.path.join(outdata_f, log_file)

# Initialize Dask client
client = Client()

# Define engines
engines = {
    'fiona': {'engine': 'fiona'},
    'pyogrio': {'engine': 'pyogrio'},
    'pyogrio+arrow': {'engine': 'pyogrio', 'use_arrow': True}
          
}


Perhaps you already have a cluster running?
Hosting the HTTP server on port 51074 instead


In [102]:
# 1 READ URBAN CENTRES
# Read shapefile
uc_file_path = os.path.join(indata_f, 'UrbanCentres', 'HDC2021_RG.shp')
# Read the GeoPackage file
uc = gpd.read_file(uc_file_path)
uc['HDENS_NAME'].fillna('aaa', inplace=True)


In [153]:
uc.head()

Unnamed: 0,HDENS_CLST,HDENS_NAME,HDENS_2011,POPL_2021,CNTR_CODE,MBRS_CODE_,SHAPE_AREA,SHAPE_LEN,geometry,cityLocalName_unicode
0,GEOSTAT21_018,Stockholm,GEOSTAT11_018,1462910.0,SE,1,342000000.0,162000.0,"POLYGON ((4775000.000 4038000.000, 4774000.000...",Stockholm
1,GEOSTAT21_019,Haninge,GEOSTAT11_019,94485.0,SE,1,36000000.0,38000.0,"POLYGON ((4787000.000 4033000.000, 4786000.000...",Haninge
2,GEOSTAT21_020,Södertälje,GEOSTAT11_020,74475.0,SE,1,22000000.0,24000.0,"POLYGON ((4756000.000 4031000.000, 4755000.000...",Sodertalje
3,GEOSTAT21_021,Tartu,GEOSTAT11_021,84168.0,EE,2,24000000.0,24000.0,"POLYGON ((5292000.000 4030000.000, 5290000.000...",Tartu
4,GEOSTAT21_022,Örebro,GEOSTAT11_022,92695.0,SE,1,27000000.0,26000.0,"POLYGON ((4620000.000 4027000.000, 4618000.000...",Orebro


In [115]:
# Add unicode names
uc_unicode_ls = []
for uc_raw_name in uc_names_ls:
    uc_unicode = unidecode(uc_raw_name)
    uc_unicode_ls.append(uc_unicode)
uc['cityLocalName_unicode'] = uc_unicode_ls

In [40]:
# 2 READ NOISE DATA
# Load agglomerations delineations
agls_file_path = os.path.join(indata_f, 'NoiseData', 'DF1_5_Agglomerations_20240429.gpkg')

# Read the GeoPackage file
agls = gpd.read_file(agls_file_path, layer = 'dbo.DF15_AgglomerationSource_Valid_LatestDelivery', 
                     **engines['pyogrio+arrow'],columns=['agglomerationId_identifier', 'agglomerationName_nameEng', 'geometry'])



In [41]:
# Read table to list the cities to process
intable = pd.read_csv(r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\Deliveries\M1\Batch1_100_agglomerations.csv')
# replace input #############
##########################
cities_ls = intable.HDENS_CLST.tolist()

In [42]:
# Read table with HDENS Urban centres information and Agglomerations link
HDENS_AGGL_tbl = pd.read_csv(r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\Processing\UrbanCentres_Agglomerations_csv.csv')

In [22]:
HDENS_AGGL_tbl.head()

Unnamed: 0,HDENS_NAME,HDENS_2011,POPL_2021,CNTR_CODE,agglomerationId_identifier,agglomerationName_nameEng,size,numberOfInhabitants,countryCode,Batch
0,A Coruña,GEOSTAT11_610,282450,ES,AG_ES_11_15030,A Coruña,37.54,247046,ES,
1,Aachen,GEOSTAT11_360,224750,DE,AG_DE_NW_13,Aachen,161.0,246443,DE,
2,Aalborg,GEOSTAT11_031,149041,DK,AG_DK_00_4,Aalborg,66.0,138581,DK,1.0
3,Aguadulce / La Gangosa,,55615,ES,AG_ES_61_04079,Roquetas de Mar,20.82,106147,ES,
4,Aix-en-Provence,GEOSTAT11_676,101708,FR,AG_FR_00_23,Marseille,3149.22,1886800,FR,


In [152]:
counter= 1
agl_error_ls = []


# Loop through test cities
for cityLocalName in cities_ls:
    print(str(cityLocalName))
    print(counter)
    start_time = datetime.now()
    print(str(start_time))

    cityLocalName_unicode = unidecode(cityLocalName)        
    uc_city = uc.query(f'cityLocalName_unicode == "{cityLocalName_unicode}"')
    if uc_city.empty:
        print ('error in agl city')
        agl_error_ls.append(cityLocalName_unicode)
    
    else:
        ctry_code = uc_city.CNTR_CODE.values.astype(str)[0]
        print(ctry_code)
        cityLocalName_unicode = unidecode(cityLocalName)
        print(f'loading aglomeration city {cityLocalName_unicode}')
        agglomerationId_identifier = HDENS_AGGL_tbl.query(f'HDENS_NAME == "{cityLocalName}"').agglomerationId_identifier.values.astype(str)
        if agglomerationId_identifier.size == 0:
            agglomerationId_identifier = 'NotAvailable'
            print ("agglomerationId_identifier")
        else:
            for i in range(len(agglomerationId_identifier)):
                print(agglomerationId_identifier[i])
                print ("agglomerationId_identifier")        
            
                agl_city = agls.query(f'agglomerationId_identifier == "{agglomerationId_identifier[i]}"')
                if agl_city.empty:
                    agglomerationId_identifier = 'NotAvailable'
                    print ("agglomerationId_identifier")
                else:
                    output_path = os.path.join(outdata_f, f'{ctry_code}_{cityLocalName_unicode}_{agglomerationId_identifier[i]}_GQA_centroids.shp')
                    if not os.path.exists(output_path):
                        # Check noise contour maps GeoPackage file
                        ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export.gpkg')
                        layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'           
                        #ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_{ctry_code}.gpkg')
                        #layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'
                        #layerName = f'dbodf48_agg_noisecontours_roadsinagglomeration_lden_valid_latestdelivery_poly_{ctry_code}'
                        ncm = gpd.read_file(ncm_file_path, layer=layerName, columns=['category', 'geometry'], 
                                            engine='pyogrio', use_arrow=True, bbox= tuple(agl_city.total_bounds))
                        print ("ncm")

                        # Perform spatial overlay (intersection) 
                        ncm_agl = gpd.overlay(ncm, agl_city, how='intersection')
                        print ("ncm_agl")

                        # Aggregate the area with lower band values (quieter bands)
                        ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')
                        ncm_agl_city.category.fillna(0)

                        # Select a subset of columns of interest
                        ncm_dis = ncm_agl_city[['category', 'geometry']]
                        
                        # Define the list of noisy classes
                        noisy_classes = ['Lden5559', 'Lden6064', 'Lden6569', 'Lden7074', 'LdenGreaterThan75']

                        # Create a condition based on the category column
                        condition = ncm_dis['category'].isin(noisy_classes)  # Replace 'category_column' with the actual column name

                        # Specify the condition and create a new category column based on the condition
                        ncm_dis['noisy'] = 0
                        ncm_dis.loc[condition, 'noisy'] = 1
                        ncm_dis = ncm_dis[['noisy', 'geometry']]
                        ncm_dis_dg = dg.from_geopandas(ncm_dis, npartitions=10)
                        ncm_dis = ncm_dis_dg.dissolve(by='noisy').compute().reset_index()
                        print ("ncm_dis")

                        # 3 READ UA DATA        
                        # Load GeoPackage info
                        data_f = r'A:\Copernicus\UrbanAtlas\UrbanAtlas\UA2018'
                        ctry_code = uc_city.CNTR_CODE.values.astype(str)[0] 
                        city_unicodeName_upper = unidecode(cityLocalName).upper()
                        folder_path = glob.glob(os.path.join(data_f, f'{ctry_code}*{city_unicodeName_upper}*'))
                        ua_file_path =  glob.glob(os.path.join(folder_path[0], 'Data', f'{ctry_code}*{city_unicodeName_upper}*.gpkg'))
                        layers_ls = fiona.listlayers(ua_file_path[0])
                        print ("layers_ls")

                        # Read the GeoPackage file
                        ua = gpd.read_file(ua_file_path[0], layer= layers_ls[0], 
                                        columns= ['country', 'fua_name', 'fua_code','code_2018', 'class_2018', 'geometry'], engine='pyogrio', 
                                        use_arrow=True, bbox= tuple(uc_city.total_bounds))
                        print ("loaded ua in urban city")

                        # Select 'green' classes
                        uagreen = ua.query('code_2018 == "14100" or code_2018 == "31000"')
                        
                        # 4 SELECT UA INTERSECTING UC
                        # Perform spatial overlay (intersection)
                        uagreen_urbc = gpd.overlay(uagreen, uc_city, how='intersection')

                        # 5 IDENTIFY GREEN AREAS EXCLUDED (NOT COVERED BY NCM)
                        # Perform spatial overlay (intersection)
                        nqgreen = gpd.overlay(uagreen_urbc, ncm_dis, how='intersection') #noisy/quiet green
                        not_covered = uagreen_urbc.geometry.difference(uagreen_urbc.geometry.intersection(nqgreen.geometry.unary_union))
                        # Filter out empty polygons(not empty polygons)
                        green_not_covered_by_ncm = not_covered[~not_covered.is_empty]

                        # save to shapefile
                        file_path = os.path.join(outdata_f, f'{ctry_code}_{cityLocalName_unicode}_{agglomerationId_identifier[i]}_green_not_covered_by_ncm.shp')
                        green_not_covered_by_ncm.to_file(file_path, driver='ESRI Shapefile')
                        print ("green_not_covered_by_ncm")

                        # 6 IDENTIFY QUIET/NOISY AREAS
                        ## for statistics need to calculate area again
                        # Calculate the area for each geometry and create a new column 'area'
                        nqgreen['area_m2'] = nqgreen['geometry'].area
                        nqgreen['area_ha'] = round(nqgreen['area_m2']* 0.0001,2)
                        nqgreen['area_km2'] = round(nqgreen['area_ha']* 0.01,2)
                        nqgreen_area = nqgreen.groupby(['code_2018', 'noisy'])['area_m2'].sum().reset_index()
                        nqgreen_area['area_ha'] = round(nqgreen_area['area_m2']* 0.0001,2)
                        nqgreen_area['area_km2'] = round(nqgreen_area['area_ha']* 0.01,2)

                        # 7 EXPORT GREEN QUIET AREAS (GQA)
                        nqgreen = nqgreen[['country', 'fua_name', 'fua_code', 'HDENS_2011', 'code_2018', 'class_2018', 'noisy',  'area_m2', 'area_ha', 'area_km2', 'geometry']]
                        GQA = nqgreen.query('noisy == 0')
                        GNA = nqgreen.query('noisy == 1')

                        # Export to shapefile
                        file_path = os.path.join(outdata_f, f'{ctry_code}_{cityLocalName_unicode}_{agglomerationId_identifier[i]}_GQA.shp')
                        GQA.to_file(file_path, driver='ESRI Shapefile')
                        print ("GQA")

                        # 8 CREATE CENTROIDS FOR GQA POLYGONS
                        # Create a new GeoDataFrame with centroids as points
                        GQA_pts = gpd.GeoDataFrame(geometry=GQA['geometry'].centroid)
                        GQA_pts['oid'] = GQA.index
                        GQA_pts['fua_name'] = GQA.fua_name
                        GQA_pts['fua_code'] = GQA.fua_code
                        GQA_pts['HDENS_2011'] = GQA.HDENS_2011

                        # Export to shapefile
                        file_path = os.path.join(outdata_f, f'{ctry_code}_{cityLocalName_unicode}_{agglomerationId_identifier[i]}_GQA_centroids.shp')
                        GQA_pts.to_file(file_path, driver='ESRI Shapefile')

                        print ("GQA_pts")
                
                        # Calculate the duration
                        end_time = datetime.now()
                        processing_time = end_time - start_time

                        print ("str(processing_time)")
                        
                        ## write output values into log file
                        uc_km2 = round(uc_city.area.sum()/1000000,2)
                        agl_city_km2 = round(agl_city.area.sum()/1000000,2)
                        ncm_agl_city_km2 = round(ncm_agl_city.area.sum()/1000000,2)
                        ua_km2 = round(ua.area.sum()/1000000,2)
                        uagreen_km2 = round(uagreen.area.sum()/1000000,2)
                        uagreen_urbc_km2 = round(uagreen_urbc.area.sum()/1000000,2)
                        nqgreen_m2 = round(nqgreen.area.sum(),2)
                        green_not_covered_by_ncm_m2 = round(green_not_covered_by_ncm.area.sum(),2)
                        GQA_m2 = round(GQA.area.sum(),2)
                        GNA_m2 = round(GNA.area.sum(),2)
                        processing_duration = str(processing_time)

                        log_entry = create_log_entry(cityLocalName, agglomerationId_identifier, uc_km2, agl_city_km2, 
                                                ncm_agl_city_km2,ua_km2, uagreen_km2, uagreen_urbc_km2, nqgreen_m2, 
                                                green_not_covered_by_ncm_m2, GQA_m2, GNA_m2, processing_time)
                        write_log(log_path, log_entry)
                        # Clean up intermediate variables to free memory
                        del agl_city, ncm, ncm_agl, ncm_agl_city, ncm_dis, ua, uagreen, uagreen_urbc, nqgreen, green_not_covered_by_ncm, GQA, GNA, GQA_pts
    counter= counter+1

print(agl_error_ls)

Aalborg
1
2024-07-15 09:43:08.335212
DK
loading aglomeration city Aalborg
AG_DK_00_4
agglomerationId_identifier
Aarhus
2
2024-07-15 09:43:08.356207
error in agl city
AggloLux
3
2024-07-15 09:43:08.361198
error in agl city
Agglomeration Amsterdam-Haarlem
4
2024-07-15 09:43:08.365198
error in agl city
Agglomeration Rotterdam-Dordrecht
5
2024-07-15 09:43:08.371196
error in agl city
Agglomeration Utrecht
6
2024-07-15 09:43:08.382199
error in agl city
Agglomeration Zwolle
7
2024-07-15 09:43:08.392197
error in agl city
AggloSud
8
2024-07-15 09:43:08.398198
error in agl city
AMADORA
9
2024-07-15 09:43:08.402198
error in agl city
Antwerp
10
2024-07-15 09:43:08.405199
error in agl city
Basel
11
2024-07-15 09:43:08.410198
CH-DE
loading aglomeration city Basel
AG_CH_00_5
agglomerationId_identifier


DataLayerError: Layer 'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_CH-DE' could not be opened

In [151]:
CNTR_CODE

NameError: name 'CNTR_CODE' is not defined

In [149]:
nqgreen 

Unnamed: 0,code_2018,HDENS_CLST,HDENS_NAME,HDENS_2011,POPL_2021,CNTR_CODE,MBRS_CODE_,SHAPE_AREA,SHAPE_LEN,cityLocalName_unicode,noisy,geometry,area_m2,area_ha,area_km2
0,14100,GEOSTAT21_031,Aalborg,GEOSTAT11_031,149041.0,DK,1,48000000.0,34000.0,Aalborg,1,"POLYGON ((4314825.103 3772662.917, 4314839.580...",13615.396081,1.36,0.01
1,14100,GEOSTAT21_031,Aalborg,GEOSTAT11_031,149041.0,DK,1,48000000.0,34000.0,Aalborg,1,"POLYGON ((4320012.522 3768572.875, 4320078.561...",17879.839167,1.79,0.02
2,14100,GEOSTAT21_031,Aalborg,GEOSTAT11_031,149041.0,DK,1,48000000.0,34000.0,Aalborg,1,"POLYGON ((4313432.265 3768417.155, 4313404.075...",9323.289365,0.93,0.01
3,14100,GEOSTAT21_031,Aalborg,GEOSTAT11_031,149041.0,DK,1,48000000.0,34000.0,Aalborg,1,"POLYGON ((4314000.000 3772486.379, 4314000.000...",879.274017,0.09,0.00
4,14100,GEOSTAT21_031,Aalborg,GEOSTAT11_031,149041.0,DK,1,48000000.0,34000.0,Aalborg,1,"POLYGON ((4315304.802 3772479.787, 4315304.696...",10616.202135,1.06,0.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,14100,GEOSTAT21_031,Aalborg,GEOSTAT11_031,149041.0,DK,1,48000000.0,34000.0,Aalborg,0,"POLYGON ((4320592.038 3768837.488, 4320601.592...",9471.949732,0.95,0.01
231,14100,GEOSTAT21_031,Aalborg,GEOSTAT11_031,149041.0,DK,1,48000000.0,34000.0,Aalborg,0,"POLYGON ((4320428.214 3768777.119, 4320428.189...",2301.686242,0.23,0.00
232,14100,GEOSTAT21_031,Aalborg,GEOSTAT11_031,149041.0,DK,1,48000000.0,34000.0,Aalborg,0,"POLYGON ((4320757.196 3768986.065, 4320749.557...",18966.463687,1.90,0.02
233,14100,GEOSTAT21_031,Aalborg,GEOSTAT11_031,149041.0,DK,1,48000000.0,34000.0,Aalborg,0,"POLYGON ((4320428.507 3769185.324, 4320426.847...",1091.826572,0.11,0.00


In [143]:
agglomerationId_identifier

array(['AG_DK_00_4'], dtype='<U10')

In [108]:
uc_names_ls = uc.HDENS_NAME.to_list()

In [None]:
def is_name_in_list(name):
    return name.lower() in [n.lower() for n in names_list]

In [110]:
uc_names_ls

['Stockholm',
 'Haninge',
 'Södertälje',
 'Tartu',
 'Örebro',
 'Stavanger',
 'Norrköping',
 'Linköping',
 'Kristiansand',
 'Jönköping / Huskvarna',
 'Göteborg',
 'Borås',
 'Rīga',
 'Aalborg',
 'Växjö',
 'Liepāja',
 'Daugavpils',
 'Panevėžys',
 'Klaipėda',
 'Århus',
 'Helsingborg',
 'København',
 'Lund',
 'Kaunas',
 'Vilnius',
 'Malmö',
 'Odense',
 'Gdynia',
 'Flensburg',
 'Suwałki',
 'Galway',
 'Gdańsk',
 'Słupsk',
 'Dublin',
 'Elbląg',
 'Blanchardstown',
 'Ełk',
 'Kiel',
 'Koszalin',
 'Olsztyn',
 'Lütten Klein',
 'Greifswald, Hansestadt',
 'Rostock',
 'Limerick',
 'Neumünster',
 'Lübeck',
 'Białystok',
 'Grudziądz',
 'Łomża',
 'Hamburg',
 'Bremerhaven',
 'Szczecin',
 'Neu Wulmstorf / Neugraben-Fischbek / Hausbruch',
 'Harburg',
 'Kasztelanka',
 'Bydgoszcz',
 'Piła',
 'Cork',
 'Toruń',
 'Groningen',
 'Leeuwarden',
 'Lüneburg',
 'Oldenburg (Oldenburg)',
 'Gröpelingen / Vegesack',
 'Bremen',
 'Inowrocław',
 'Delmenhorst',
 'Assen',
 'Włocławek',
 'Płock',
 'Legionowo',
 'Kobyłka-Kobylak 

Stockholm
Stockholm
Haninge
Haninge
Södertälje
Sodertalje
Tartu
Tartu
Örebro
Orebro
Stavanger
Stavanger
Norrköping
Norrkoping
Linköping
Linkoping
Kristiansand
Kristiansand
Jönköping / Huskvarna
Jonkoping / Huskvarna
Göteborg
Goteborg
Borås
Boras
Rīga
Riga
Aalborg
Aalborg
Växjö
Vaxjo
Liepāja
Liepaja
Daugavpils
Daugavpils
Panevėžys
Panevezys
Klaipėda
Klaipeda
Århus
Arhus
Helsingborg
Helsingborg
København
Kobenhavn
Lund
Lund
Kaunas
Kaunas
Vilnius
Vilnius
Malmö
Malmo
Odense
Odense
Gdynia
Gdynia
Flensburg
Flensburg
Suwałki
Suwalki
Galway
Galway
Gdańsk
Gdansk
Słupsk
Slupsk
Dublin
Dublin
Elbląg
Elblag
Blanchardstown
Blanchardstown
Ełk
Elk
Kiel
Kiel
Koszalin
Koszalin
Olsztyn
Olsztyn
Lütten Klein
Lutten Klein
Greifswald, Hansestadt
Greifswald, Hansestadt
Rostock
Rostock
Limerick
Limerick
Neumünster
Neumunster
Lübeck
Lubeck
Białystok
Bialystok
Grudziądz
Grudziadz
Łomża
Lomza
Hamburg
Hamburg
Bremerhaven
Bremerhaven
Szczecin
Szczecin
Neu Wulmstorf / Neugraben-Fischbek / Hausbruch
Neu Wulmstorf / N

In [114]:
uc_unicode_ls

['Stockholm',
 'Haninge',
 'Sodertalje',
 'Tartu',
 'Orebro',
 'Stavanger',
 'Norrkoping',
 'Linkoping',
 'Kristiansand',
 'Jonkoping / Huskvarna',
 'Goteborg',
 'Boras',
 'Riga',
 'Aalborg',
 'Vaxjo',
 'Liepaja',
 'Daugavpils',
 'Panevezys',
 'Klaipeda',
 'Arhus',
 'Helsingborg',
 'Kobenhavn',
 'Lund',
 'Kaunas',
 'Vilnius',
 'Malmo',
 'Odense',
 'Gdynia',
 'Flensburg',
 'Suwalki',
 'Galway',
 'Gdansk',
 'Slupsk',
 'Dublin',
 'Elblag',
 'Blanchardstown',
 'Elk',
 'Kiel',
 'Koszalin',
 'Olsztyn',
 'Lutten Klein',
 'Greifswald, Hansestadt',
 'Rostock',
 'Limerick',
 'Neumunster',
 'Lubeck',
 'Bialystok',
 'Grudziadz',
 'Lomza',
 'Hamburg',
 'Bremerhaven',
 'Szczecin',
 'Neu Wulmstorf / Neugraben-Fischbek / Hausbruch',
 'Harburg',
 'Kasztelanka',
 'Bydgoszcz',
 'Pila',
 'Cork',
 'Torun',
 'Groningen',
 'Leeuwarden',
 'Luneburg',
 'Oldenburg (Oldenburg)',
 'Gropelingen / Vegesack',
 'Bremen',
 'Inowroclaw',
 'Delmenhorst',
 'Assen',
 'Wloclawek',
 'Plock',
 'Legionowo',
 'Kobylka-Kobylak 

In [None]:

# Sample data creation
data = {
    'name': ['Café', 'naïve', 'résumé', 'Alice', 'Bob', 'Charlie'],
    'geometry': [None, None, None, None, None, None]  # Placeholder geometries
}
gdf = gpd.GeoDataFrame(data)

# Define the list of names to check
names_list = ['Cafe', 'naive', 'resume', 'Alice', 'Bob']

# Define the function to unidecode the name and check if it is in the list
def is_unidecoded_name_in_list(name):
    unidecoded_name = unidecode(name)
    return unidecoded_name in names_list

# Apply the function to create a new column
gdf['is_name_in_list'] = gdf['name'].apply(is_unidecoded_name_in_list)

# Optionally, inspect the result
print(gdf[['name', 'is_name_in_list']])


In [94]:
def is_unidecoded_name_in_list(name):
    unidecoded_name = unidecode(name)
    return unidecoded_name in uc_names_ls

In [103]:
uc.HDENS_NAME.apply(is_unidecoded_name_in_list)


0       True
1       True
2      False
3       True
4      False
       ...  
692     True
693     True
694     True
695     True
696    False
Name: HDENS_NAME, Length: 697, dtype: bool

In [135]:
counter= 1
agl_error_ls = []
# Loop through test cities
for cityLocalName in cities_ls:
    print(str(cityLocalName))
    print(counter)
    start_time = datetime.now()
    #print(str(start_time))

    cityLocalName_unicode = unidecode(cityLocalName)        
    uc_city = uc.query(f'cityLocalName_unicode == "{cityLocalName_unicode}"')
    if uc_city.empty:
        print ('error in agl city')
        agl_error_ls.append(cityLocalName_unicode)
    else:
        ctry_code = uc_city.CNTR_CODE.values.astype(str)[0]
        print(ctry_code)
    counter = counter+ 1

Aalborg
1
DK
Aarhus
2
error in agl city
AggloLux
3
error in agl city
Agglomeration Amsterdam-Haarlem
4
error in agl city
Agglomeration Rotterdam-Dordrecht
5
error in agl city
Agglomeration Utrecht
6
error in agl city
Agglomeration Zwolle
7
error in agl city
AggloSud
8
error in agl city
AMADORA
9
error in agl city
Antwerp
10
error in agl city
Basel
11
CH-DE
Bergen
12
NO
Berlin
13
DE
Bern
14
CH
Bilbao
15
ES
Bonn
16
DE
Bordeaux
17
FR
Bremen
18
DE
Brest
19
FR
Brno
20
CZ
Brussels-Capital
21
error in agl city
Caen
22
FR
City of Rijeka
23
error in agl city
City of Split
24
error in agl city
City of Zagreb
25
error in agl city
Clermont-Ferrand
26
FR
Cologne
27
error in agl city
Copenhagen
28
error in agl city
Cork
29
IE
Cracow
30
error in agl city
Darmstadt
31
DE
Dublin
32
IE
Espoo
33
error in agl city
Fredrikstad/Sarpsborg
34
error in agl city
GDAŃSK
35
error in agl city
Geneva
36
error in agl city
Ghent
37
error in agl city
Gothenburg
38
error in agl city
Graz
39
AT
Hamburg
40
DE
Helsingborg

In [None]:
counter= 1
agl_error_ls = []
# Loop through test cities
for cityLocalName in cities_ls:
    print(str(cityLocalName))
    print(counter)
    start_time = datetime.now()
    #print(str(start_time))

    cityLocalName_unicode = unidecode(cityLocalName)        
    uc_city = uc.query(f'cityLocalName_unicode == "{cityLocalName_unicode}"')
    if uc_city.empty:
        print ('error in agl city')
        agl_error_ls.append(cityLocalName_unicode)
    else:
        ctry_code = uc_city.CNTR_CODE.values.astype(str)[0]
        print(ctry_code)
    counter = counter+ 1

In [137]:
len(agl_error_ls)

45

In [121]:
cityLocalName_unicode

'Aarhus'

In [120]:
uc.query(f'cityLocalName_unicode == "{cityLocalName_unicode}"')

Unnamed: 0,HDENS_CLST,HDENS_NAME,HDENS_2011,POPL_2021,CNTR_CODE,MBRS_CODE_,SHAPE_AREA,SHAPE_LEN,geometry,cityLocalName_unicode


In [73]:
cityLocalName_unicode

'Aarhus'

In [72]:
uc.query(f'HDENS_NAME == "{cityLocalName_unicode}"')

Unnamed: 0,HDENS_CLST,HDENS_NAME,HDENS_2011,POPL_2021,CNTR_CODE,MBRS_CODE_,SHAPE_AREA,SHAPE_LEN,geometry


In [69]:
uc.query(f'HDENS_NAME == "{cityLocalName_unicode}"')

Unnamed: 0,HDENS_CLST,HDENS_NAME,HDENS_2011,POPL_2021,CNTR_CODE,MBRS_CODE_,SHAPE_AREA,SHAPE_LEN,geometry
13,GEOSTAT21_031,Aalborg,GEOSTAT11_031,149041.0,DK,1,48000000.0,34000.0,"POLYGON ((4320000.000 3767000.000, 4318000.000..."


In [65]:
uc_city

Unnamed: 0,HDENS_CLST,HDENS_NAME,HDENS_2011,POPL_2021,CNTR_CODE,MBRS_CODE_,SHAPE_AREA,SHAPE_LEN,geometry


In [83]:
uc.query('CNTR_CODE=="DK"')[1:2].HDENS_NAME.values.astype(str)[0]

'Århus'

In [87]:
from unidecode import unidecode

In [88]:
unidecode(uc.query('CNTR_CODE=="DK"')[1:2].HDENS_NAME.values.astype(str)[0])

'Arhus'

In [123]:
uc.head()

Unnamed: 0,HDENS_CLST,HDENS_NAME,HDENS_2011,POPL_2021,CNTR_CODE,MBRS_CODE_,SHAPE_AREA,SHAPE_LEN,geometry,cityLocalName_unicode
0,GEOSTAT21_018,Stockholm,GEOSTAT11_018,1462910.0,SE,1,342000000.0,162000.0,"POLYGON ((4775000.000 4038000.000, 4774000.000...",Stockholm
1,GEOSTAT21_019,Haninge,GEOSTAT11_019,94485.0,SE,1,36000000.0,38000.0,"POLYGON ((4787000.000 4033000.000, 4786000.000...",Haninge
2,GEOSTAT21_020,Södertälje,GEOSTAT11_020,74475.0,SE,1,22000000.0,24000.0,"POLYGON ((4756000.000 4031000.000, 4755000.000...",Sodertalje
3,GEOSTAT21_021,Tartu,GEOSTAT11_021,84168.0,EE,2,24000000.0,24000.0,"POLYGON ((5292000.000 4030000.000, 5290000.000...",Tartu
4,GEOSTAT21_022,Örebro,GEOSTAT11_022,92695.0,SE,1,27000000.0,26000.0,"POLYGON ((4620000.000 4027000.000, 4618000.000...",Orebro


In [124]:
uc.query('CNTR_CODE=="DK"')

Unnamed: 0,HDENS_CLST,HDENS_NAME,HDENS_2011,POPL_2021,CNTR_CODE,MBRS_CODE_,SHAPE_AREA,SHAPE_LEN,geometry,cityLocalName_unicode
13,GEOSTAT21_031,Aalborg,GEOSTAT11_031,149041.0,DK,1,48000000.0,34000.0,"POLYGON ((4320000.000 3767000.000, 4318000.000...",Aalborg
19,GEOSTAT21_047,Århus,GEOSTAT11_047,245448.0,DK,1,72000000.0,58000.0,"POLYGON ((4334000.000 3666000.000, 4334000.000...",Arhus
21,GEOSTAT21_903,København,,1360930.0,DK,1,299000000.0,130000.0,"POLYGON ((4463000.000 3607000.000, 4463000.000...",Kobenhavn
26,GEOSTAT21_060,Odense,GEOSTAT11_060,135431.0,DK,1,48000000.0,42000.0,"POLYGON ((4347000.000 3584000.000, 4346000.000...",Odense


In [128]:
uc.query(f'cityLocalName_unicode=="Aarhus"')

Unnamed: 0,HDENS_CLST,HDENS_NAME,HDENS_2011,POPL_2021,CNTR_CODE,MBRS_CODE_,SHAPE_AREA,SHAPE_LEN,geometry,cityLocalName_unicode


In [127]:
uc.query(f'cityLocalName_unicode=="{cityLocalName_unicode}"')

Unnamed: 0,HDENS_CLST,HDENS_NAME,HDENS_2011,POPL_2021,CNTR_CODE,MBRS_CODE_,SHAPE_AREA,SHAPE_LEN,geometry,cityLocalName_unicode


In [125]:
cityLocalName_unicode

'Aarhus'

In [None]:
uc.query('CNTR_CODE=="DK"')

In [None]:
uc['cityLocalName_unicode'] = uc_unicode_ls

In [61]:
ctry_code = uc_city.CNTR_CODE.values.astype(str)[0]

In [57]:
cities_ls[0]

'Aarhus'

In [55]:
cityLocalName

'Aarhus'

In [52]:
uc.query(f'HDENS_NAME == "Aalborg"')

Unnamed: 0,HDENS_CLST,HDENS_NAME,HDENS_2011,POPL_2021,CNTR_CODE,MBRS_CODE_,SHAPE_AREA,SHAPE_LEN,geometry
13,GEOSTAT21_031,Aalborg,GEOSTAT11_031,149041.0,DK,1,48000000.0,34000.0,"POLYGON ((4320000.000 3767000.000, 4318000.000..."


In [54]:
uc.query(f'HDENS_NAME == "Aalborg"').CNTR_CODE.values.astype(str)

array(['DK'], dtype='<U2')

In [139]:
counter= 1
agl_error_ls = []

# Loop through test cities
for cityLocalName in cities_ls:
    print(str(cityLocalName))
    print(counter)
    start_time = datetime.now()
    print(str(start_time))

    cityLocalName_unicode = unidecode(cityLocalName)        
    uc_city = uc.query(f'cityLocalName_unicode == "{cityLocalName_unicode}"')
    if uc_city.empty:
        print ('error in agl city')
        agl_error_ls.append(cityLocalName_unicode)
    
    else:
        ctry_code = uc_city.CNTR_CODE.values.astype(str)[0]
        cityLocalName_unicode = unidecode(cityLocalName)

        output_path = os.path.join(outdata_f, f'{ctry_code}_{cityLocalName_unicode}_GQA_centroids.shp')
        if not os.path.exists(output_path):
            print(f'loading aglomeration city {cityLocalName_unicode}')
            agglomerationId_identifier = HDENS_AGGL_tbl.query(f'HDENS_NAME == "{cityLocalName}"').agglomerationId_identifier.values.astype(str)
            agl_city = agls.query(f'agglomerationId_identifier == "{agglomerationId_identifier}"')

Aalborg
1
2024-07-14 12:34:24.681303
loading aglomeration city Aalborg
Aarhus
1
2024-07-14 12:34:24.696298
error in agl city
AggloLux
1
2024-07-14 12:34:24.699294
error in agl city
Agglomeration Amsterdam-Haarlem
1
2024-07-14 12:34:24.702292
error in agl city
Agglomeration Rotterdam-Dordrecht
1
2024-07-14 12:34:24.705297
error in agl city
Agglomeration Utrecht
1
2024-07-14 12:34:24.709293
error in agl city
Agglomeration Zwolle
1
2024-07-14 12:34:24.711293
error in agl city
AggloSud
1
2024-07-14 12:34:24.714294
error in agl city
AMADORA
1
2024-07-14 12:34:24.717293
error in agl city
Antwerp
1
2024-07-14 12:34:24.722293
error in agl city
Basel
1
2024-07-14 12:34:24.730293
loading aglomeration city Basel
Bergen
1
2024-07-14 12:34:24.744307
loading aglomeration city Bergen
Berlin
1
2024-07-14 12:34:24.760298
loading aglomeration city Berlin
Bern
1
2024-07-14 12:34:24.772299
loading aglomeration city Bern
Bilbao
1
2024-07-14 12:34:24.785311
loading aglomeration city Bilbao
Bonn
1
2024-07-14

ValueError: multi-line expressions are only valid in the context of data, use DataFrame.eval

In [141]:
for i in range(len(agglomerationId_identifier)):
    print(agglomerationId_identifier[i])

AG_ES_30_28006
AG_ES_30_28058
AG_ES_30_28065
AG_ES_30_28074
AG_ES_30_28079
