In [3]:
from src.utils import *

# Load required libraries
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import fiona
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from shapely.ops import unary_union
from shapely.errors import TopologicalError
from unidecode import unidecode
import glob
import csv
from datetime import datetime
import dask.dataframe as dd
import dask_geopandas as dg
from dask.distributed import Client
import gc
import re


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [4]:
# Path to data folders
indata_f = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas'
outdata_f = os.path.join(indata_f, 'OutputData', 'step1_GQA')
if not os.path.exists(outdata_f):
    # Create the folder if it doesn't exist
    os.makedirs(outdata_f)

# 0 PREPARE A LOG FILE FOR QC
log_file = 'log_GQA_Step1_231024.csv'
log_path = os.path.join(outdata_f, log_file)

# Initialize Dask client
client = Client()

# Define engines
engines = {
    'fiona': {'engine': 'fiona'},
    'pyogrio': {'engine': 'pyogrio'},
    'pyogrio+arrow': {'engine': 'pyogrio', 'use_arrow': True}
          
}


In [5]:
# 1 READ URBAN CENTRES
# Read shapefile
uc_file_path = os.path.join(indata_f, 'UrbanCentres', 'HDC2021_RG_InputUpdateB2B3B4Copy.shp')
# Read the GeoPackage file
uc = gpd.read_file(uc_file_path)
uc['CNTR_CODE'].fillna('AA', inplace=True)

# Select cities for processing in this batch
###uc_sel = uc.query('Batch==1.0 & CNTR_CODE != "SE"')
uc_sel = uc.query('Batch>0.0 and CNTR_CODE=="DE"')
uc_sel = uc_sel.sort_values(by='CNTR_CODE')

# Read table to list the cities to process using urban centre code
cities_ls = uc_sel.HDENS_CLST.tolist()
len(cities_ls)

64

In [6]:
## uc_2remove from conflict folder
uc_2remove_path = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\OutputData\oct_run\GQA_conflictives'
shps = glob.glob(os.path.join(uc_2remove_path, '*.shp'))
for i in shps:
    code = str(os.path.basename(i)[:-7][:-6])
    print(code)
    cities_ls.remove(str(code))

len(cities_ls)

GEOSTAT21_248


ValueError: list.remove(x): x not in list

In [23]:
# Final GQAs
QGA_Final_path = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\OutputData\step1_GQA'
# Read table with HDENS Urban centres information and Agglomerations link
HDENS_AGGL_tbl = pd.read_csv(r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\Processing\UrbanCentres_Agglomerations_csv.csv')
# Join uc code field to this table
HDENS_AGGL_tbl = HDENS_AGGL_tbl.merge(uc[['POPL_2021', 'HDENS_CLST']], on='POPL_2021')

# 1 UA DATA FOLDER
ua_data_f = r'A:\Copernicus\UrbanAtlas\UrbanAtlas\UA2018'

# 2 READ NOISE DATA
# Load agglomerations delineations
agls_file_path = os.path.join(indata_f, 'NoiseData', 'DF1_5_Agglomerations_20240429.gpkg')

# Read the GeoPackage file
agls = gpd.read_file(agls_file_path, layer = 'dbo.DF15_AgglomerationSource_Valid_LatestDelivery', 
                     **engines['pyogrio+arrow'],columns=['agglomerationId_identifier', 'agglomerationName_nameEng', 'geometry'])

# 3 TRANSLATOR TABLE
# Crosswalk table containing the different codes from input sources
codes_path = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\Processing\Codes.csv'
codes = pd.read_csv(codes_path)

In [60]:
codes.query('HDENS_CLST == "GEOSTAT21_327"')

Unnamed: 0,FREQUENCY,HDENS_CLST,HDENS_NAME,HDENS_2011,agglomerationId_identifier,agglomerationName_nameEng,UA2018
135,1,GEOSTAT21_327,Düsseldorf,GEOSTAT11_327,AG_DE_NW_2,Duesseldorf,DE011L1_DUSSELDORF_UA2018_v013
136,1,GEOSTAT21_327,Düsseldorf,GEOSTAT11_327,AG_DE_NW_21,Neuss,DE011L1_DUSSELDORF_UA2018_v013


In [61]:
codes_simpl = codes[['HDENS_CLST', 'HDENS_NAME', 'UA2018']]

In [64]:
codes_simpl = codes_simpl.drop_duplicates()

In [65]:
codes_simpl.query('HDENS_CLST == "GEOSTAT21_327"')

Unnamed: 0,HDENS_CLST,HDENS_NAME,UA2018
135,GEOSTAT21_327,Düsseldorf,DE011L1_DUSSELDORF_UA2018_v013
136,GEOSTAT21_327,Düsseldorf,DE011L1_DUSSELDORF_UA2018_v013


In [66]:
codes_grouped = codes_simpl.groupby(['HDENS_CLST']).size().reset_index(name='count')

In [67]:
codes_grouped

Unnamed: 0,HDENS_CLST,count
0,GEOSTAT21_002,1
1,GEOSTAT21_003,1
2,GEOSTAT21_004,1
3,GEOSTAT21_005,1
4,GEOSTAT21_006,1
...,...,...
394,GEOSTAT21_943,1
395,GEOSTAT21_945,1
396,GEOSTAT21_950,1
397,GEOSTAT21_958,1


In [59]:
codes_grouped.query('HDENS_CLST == "GEOSTAT21_327"')

Unnamed: 0,HDENS_CLST,count
116,GEOSTAT21_327,2


In [58]:
codes_grouped.query('count>1')

Unnamed: 0,HDENS_CLST,count
79,GEOSTAT21_222,2
102,GEOSTAT21_297,2
116,GEOSTAT21_327,2
121,GEOSTAT21_334,2
140,GEOSTAT21_363,2
162,GEOSTAT21_405,2
163,GEOSTAT21_408,2
176,GEOSTAT21_429,2
218,GEOSTAT21_511,2
334,GEOSTAT21_773,2


In [22]:
codes_grouped.query('FREQUENCY > 1')

Unnamed: 0_level_0,Unnamed: 1_level_0,FREQUENCY,HDENS_NAME,HDENS_2011,agglomerationId_identifier,agglomerationName_nameEng
UA2018,HDENS_CLST,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ES001L3_MADRID_UA2018_v013,GEOSTAT21_773,3,3,3,3,3
ES019L3_BILBAO_UA2018_v013,GEOSTAT21_650,2,2,2,2,2
FR001L1_PARIS_UA2018_v013,GEOSTAT21_906,5,5,5,5,5
DE001L1_BERLIN_UA2018_v013,GEOSTAT21_188,2,2,2,2,2
DE004L1_KOLN_UA2018_v013,GEOSTAT21_338,2,2,2,2,2
DE004L1_KOLN_UA2018_v013,GEOSTAT21_346,3,3,3,3,3
DE005L1_FRANKFURT_AM_MAIN_UA2018_v013,GEOSTAT21_401,2,2,2,2,2
DE038L1_RUHRGEBIET_UA2018_v013,GEOSTAT21_297,2,2,2,2,2
DE038L1_RUHRGEBIET_UA2018_v013,GEOSTAT21_299,10,10,10,10,10
DE038L1_RUHRGEBIET_UA2018_v013,GEOSTAT21_302,3,3,3,3,3


In [10]:
cities_ls = sorted(cities_ls)

In [12]:
cities_ls[-1:]

['GEOSTAT21_925']

In [21]:
counter= 1
agl_error_ls = []

# Loop through test cities
for uc_city_code in cities_ls[-1:]:
    print(counter)
    start_time = datetime.now()
    print(str(start_time))

    ua_path = codes.query(f'HDENS_CLST=="{uc_city_code}"').UA2018.values[0].strip()
    if ua_path == 'not available':
        agl_error_ls.append(uc_city_code +" UA not available")
    
    else:
        inGQA = os.path.join(QGA_Final_path, '{}_finalGQA.shp'.format(uc_city_code))
        if not os.path.exists(inGQA):
            urban_center = uc.query(f'HDENS_CLST=="{uc_city_code}"')
            HDENS_AGGL_city = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}'")
            agl_id_city_ls = HDENS_AGGL_city.agglomerationId_identifier.values.astype(str).tolist()
            for agl_id in agl_id_city_ls:
                HDENS_AGGL_city_aglid = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}' & agglomerationId_identifier=='{agl_id}'")
                ctry_code = HDENS_AGGL_city_aglid.CNTR_CODE.values.astype(str)[0]
                aglo_name = HDENS_AGGL_city_aglid.agglomerationName_nameEng.values.astype(str)[0]
                aglo_name = aglo_name.split('/')[0]
                aglo_name = aglo_name.split(' ')[0]

                if len(ctry_code.split('-'))>1:
                    print(F'>1 countries {ctry_code}')
                    ctry_code = ctry_code.split('-')[0]
                
                city_agl_cd = f"{ctry_code}-{uc_city_code}-{agl_id}-{aglo_name}"
                print(city_agl_cd)
                if ctry_code=='DE':
                    print('Loading agglomeration boundary for selected city')      
                    # Load agglomeration boundary for selected city
                    agl_city = agls.query(f'agglomerationId_identifier == "{agl_id}"')
                    if agl_city.empty:
                        agglomerationId_identifier = 'NotAvailable'
                        print ("agglomerationId_identifier Not Available")
                        agl_error_ls.append(city_agl_cd + " agglomerationId_identifier Not Available")
                    else:
                        GQA_uc_aglo_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA.shp')
                        if not os.path.exists(GQA_uc_aglo_path):
                            try:
                                # Check noise contour maps GeoPackage file
                                ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export.gpkg')
                                layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'           
                                #ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_{ctry_code}.gpkg')
                                #layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'
                                #layerName = f'dbodf48_agg_noisecontours_roadsinagglomeration_lden_valid_latestdelivery_poly_{ctry_code}'
                                ncm = gpd.read_file(ncm_file_path, layer=layerName, columns=['category', 'geometry'], 
                                                    engine='pyogrio', use_arrow=True, bbox= tuple(agl_city.total_bounds))
                                print ("ncm")

                                ncm = gpd.clip(ncm, agl_city)

                                # Define the list of noisy classes
                                noisy_classes = ['Lden5559', 'Lden6064', 'Lden6569', 'Lden7074', 'LdenGreaterThan75']
                                print(noisy_classes)
                                # Create a condition based on the category column
                                condition = ncm['category'].isin(noisy_classes)  # Replace 'category_column' with the actual column name

                                # Specify the condition and create a new category column based on the condition
                                ncm['noisy'] = 0
                                ncm.loc[condition, 'noisy'] = 1
                                ncm = ncm[['noisy', 'geometry']]
                                print(ncm)
                                ncm_dis_dg = dg.from_geopandas(ncm, npartitions=10)
                                ncm_dis = ncm_dis_dg.dissolve(by='noisy').compute().reset_index()
                                print ("ncm_dis")

                                # Perform spatial overlay (intersection) 
                                ncm_agl = gpd.overlay(ncm_dis, agl_city, how='intersection')
                                print ("ncm_agl")

                                # Aggregate the area with lower band values (quieter bands)
                                ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')
                                print ("union")

                                ncm_agl_city['noisy'] = ncm_agl_city.noisy.fillna(0)
                                print ("fillna")

                                # Select a subset of columns of interest
                                ncm_dis = ncm_agl_city[['noisy', 'geometry']]
                                print(ncm_dis)
                                

                                # 3 READ URBAN ATLAS DATA       
                                file_path = os.path.join(ua_data_f, f'{ua_path}\Data\{ua_path}.gpkg')
                                # Read the GeoPackage file
                                ua = gpd.read_file(file_path, layer= ua_path[:-5], 
                                            columns= ['country', 'fua_name', 'fua_code','code_2018', 'class_2018', 'geometry'], 
                                            engine='pyogrio', 
                                            use_arrow=True, bbox= tuple(urban_center.total_bounds))
                                print ("loaded ua in urban city")                        

                                # Select 'green' classes
                                uagreen = ua.query('code_2018 == "14100" or code_2018 == "31000"')
                                
                                # 4 SELECT UA INTERSECTING UC
                                # Perform spatial overlay (intersection)
                                uagreen_urbc = gpd.overlay(uagreen, urban_center, how='intersection')

                                # 5 IDENTIFY GREEN AREAS EXCLUDED (NOT COVERED BY NCM)
                                # Perform spatial overlay (intersection)
                                nqgreen = gpd.overlay(uagreen_urbc, ncm_dis, how='intersection') #noisy/quiet green
                                not_covered = uagreen_urbc.geometry.difference(uagreen_urbc.geometry.intersection(nqgreen.geometry.unary_union))
                                # Filter out empty polygons(not empty polygons)
                                green_not_covered_by_ncm = not_covered[~not_covered.is_empty]

                                # save to shapefile
                                file_path = os.path.join(outdata_f, f'{city_agl_cd}_green_not_covered_by_ncm.shp')
                                #green_not_covered_by_ncm.to_file(file_path, driver='ESRI Shapefile')
                                print ("green_not_covered_by_ncm")

                                # 6 IDENTIFY QUIET/NOISY AREAS
                                ## for statistics need to calculate area again
                                # Calculate the area for each geometry and create a new column 'area'
                                nqgreen['area_m2'] = nqgreen['geometry'].area
                                nqgreen['area_ha'] = round(nqgreen['area_m2']* 0.0001,2)
                                nqgreen['area_km2'] = round(nqgreen['area_ha']* 0.01,2)
                                nqgreen_area = nqgreen.groupby(['code_2018', 'noisy'])['area_m2'].sum().reset_index()
                                nqgreen_area['area_ha'] = round(nqgreen_area['area_m2']* 0.0001,2)
                                nqgreen_area['area_km2'] = round(nqgreen_area['area_ha']* 0.01,2)

                                # 7 EXPORT GREEN QUIET AREAS (GQA)
                                nqgreen = nqgreen[['country', 'fua_name', 'fua_code', 'HDENS_2011', 'code_2018', 'class_2018', 'noisy',  'area_m2', 'area_ha', 'area_km2', 'geometry']]
                                GQA = nqgreen.query('noisy == 0')
                                GNA = nqgreen.query('noisy == 1')

                                # Export to shapefile
                                print ('Export to shapefile')
                                
                                print (GQA_uc_aglo_path)
                                GQA.to_file(GQA_uc_aglo_path, driver='ESRI Shapefile')
                                print ("GQA")

                                # 8 CREATE CENTROIDS FOR GQA POLYGONS
                                # Create a new GeoDataFrame with centroids as points
                                GQA_pts = gpd.GeoDataFrame(geometry=GQA['geometry'].centroid)
                                GQA_pts['oid'] = GQA.index
                                GQA_pts['fua_name'] = GQA.fua_name
                                GQA_pts['fua_code'] = GQA.fua_code
                                GQA_pts['HDENS_2011'] = GQA.HDENS_2011

                                # Export to shapefile
                                file_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                                GQA_pts.to_file(file_path, driver='ESRI Shapefile')

                                print ("GQA_pts")
                        
                                # Calculate the duration
                                end_time = datetime.now()
                                processing_time = end_time - start_time

                                print ("str(processing_time)")
                                
                                ## write output values into log file
                                uc_km2 = round(uc_city.area.sum()/1000000,2)
                                agl_city_km2 = round(agl_city.area.sum()/1000000,2)
                                ncm_agl_city_km2 = round(ncm_agl_city.area.sum()/1000000,2)
                                ua_km2 = round(ua.area.sum()/1000000,2)
                                uagreen_km2 = round(uagreen.area.sum()/1000000,2)
                                uagreen_urbc_km2 = round(uagreen_urbc.area.sum()/1000000,2)
                                nqgreen_m2 = round(nqgreen.area.sum(),2)
                                green_not_covered_by_ncm_m2 = round(green_not_covered_by_ncm.area.sum(),2)
                                GQA_m2 = round(GQA.area.sum(),2)
                                GNA_m2 = round(GNA.area.sum(),2)
                                processing_duration = str(processing_time)

                                log_entry = create_log_entry(aglo_name, agl_id, uc_km2, agl_city_km2, 
                                                        ncm_agl_city_km2,ua_km2, uagreen_km2, uagreen_urbc_km2, nqgreen_m2, 
                                                        green_not_covered_by_ncm_m2, GQA_m2, GNA_m2, processing_time)
                                write_log(log_path, log_entry)

                                # Clean up intermediate variables to free memory
                                del agl_city, ncm, ncm_agl, ncm_agl_city, ncm_dis, ua, uagreen, uagreen_urbc, nqgreen, green_not_covered_by_ncm, GQA, GNA, GQA_pts
                            except:
                                print("Error " + city_agl_cd)
                                agl_error_ls.append(city_agl_cd +" Topological error")
    counter= counter+1

print(agl_error_ls)

1
2024-10-31 12:48:03.413600
DE-GEOSTAT21_925-AG_DE_NW_19-Solingen
Loading agglomeration boundary for selected city
DE-GEOSTAT21_925-AG_DE_NW_7-Wuppertal
Loading agglomeration boundary for selected city
[]


In [22]:
GQA_uc_aglo_path

'P:\\Environment and Health\\Noise\\ServiceContract\\2024_ServiceContract\\QuietAreas\\OutputData\\step1_GQA\\DE-GEOSTAT21_925-AG_DE_NW_7-Wuppertal_GQA.shp'

In [19]:
agl_city

Unnamed: 0,agglomerationId_identifier,agglomerationName_nameEng,geometry
205,AG_DE_NW_7,Wuppertal,"MULTIPOLYGON (((4114455.585 3131062.616, 41144..."


In [18]:
ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export.gpkg')
layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'           
#ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_{ctry_code}.gpkg')
#layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'
#layerName = f'dbodf48_agg_noisecontours_roadsinagglomeration_lden_valid_latestdelivery_poly_{ctry_code}'
ncm = gpd.read_file(ncm_file_path, layer=layerName, columns=['category', 'geometry'], 
                    engine='pyogrio', use_arrow=True, bbox= tuple(agl_city.total_bounds))

In [20]:
ncm = gpd.clip(ncm, agl_city)

In [None]:
ncm

In [15]:
agl_city

Unnamed: 0,agglomerationId_identifier,agglomerationName_nameEng,geometry
205,AG_DE_NW_7,Wuppertal,"MULTIPOLYGON (((4114455.585 3131062.616, 41144..."


In [6]:
agl_error_ls

['DE-GEOSTAT21_305-AG_DE_NW_16-Hagen Topological error',
 'DE-GEOSTAT21_302-AG_DE_NW_2-Duesseldorf Topological error',
 'DE-GEOSTAT21_302-AG_DE_NW_5-Duisburg Topological error',
 'DE-GEOSTAT21_302-AG_DE_NW_17-Muelheim Topological error',
 'DE-GEOSTAT21_297-AG_DE_NW_5-Duisburg Topological error',
 'DE-GEOSTAT21_297-AG_DE_NW_14-Krefeld Topological error',
 'DE-GEOSTAT21_297-AG_DE_NW_26-Moers Topological error',
 'DE-GEOSTAT21_334-AG_DE_NW_2-Duesseldorf Topological error',
 'DE-GEOSTAT21_299-AG_DE_NW_6-Bochum Topological error',
 'DE-GEOSTAT21_299-AG_DE_NW_22-Bottrop Topological error',
 'DE-GEOSTAT21_299-AG_DE_NW_3-Dortmund Topological error',
 'DE-GEOSTAT21_299-AG_DE_NW_5-Duisburg Topological error',
 'DE-GEOSTAT21_299-AG_DE_NW_4-Essen Topological error',
 'DE-GEOSTAT21_299-AG_DE_NW_11-Gelsenkirchen Topological error',
 'DE-GEOSTAT21_299-AG_DE_NW_20-Herne Topological error',
 'DE-GEOSTAT21_299-AG_DE_NW_17-Muelheim Topological error',
 'DE-GEOSTAT21_299-AG_DE_NW_15-Oberhausen Topological

In [8]:
import geopandas as gpd
from shapely.geometry import MultiPolygon

# Assuming you have a GeoDataFrame with measured geometry
ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_DE_update.gpkg')
layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_DE' 
ncm = gpd.read_file(ncm_file_path, layer=layerName, columns=['category', 'geometry'], engine='pyogrio', use_arrow=True)
print ("ncm")

gdf = ncm

# Function to remove the M dimension
def strip_m_dimension(geometry):
    # Convert to WKT (Well-Known Text) to remove M
    if geometry.has_z:
        return MultiPolygon([geom for geom in geometry.geoms])  # Preserve Z if needed
    else:
        return geometry  # If it's already a standard geometry

# Apply the function to your GeoDataFrame
gdf['geometry'] = gdf['geometry'].apply(strip_m_dimension)

  return next(self.gen)


ncm


In [9]:
ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_DE_update_v2.gpkg')
gdf.to_file(ncm_file_path, driver='GPKG', layer='ncm_DE_upd')  

In [10]:
counter= 1
agl_error_ls = []

# Loop through test cities
for uc_city_code in cities_ls:
    print(counter)
    start_time = datetime.now()
    print(str(start_time))

    ua_path = codes.query(f'HDENS_CLST=="{uc_city_code}"').UA2018.values[0].strip()
    if ua_path == 'not available':
        agl_error_ls.append(uc_city_code +" UA not available")
    
    else:
        inGQA = os.path.join(QGA_Final_path, '{}_finalGQA.shp'.format(uc_city_code))
        if not os.path.exists(inGQA):
            urban_center = uc.query(f'HDENS_CLST=="{uc_city_code}"')
            HDENS_AGGL_city = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}'")
            agl_id_city_ls = HDENS_AGGL_city.agglomerationId_identifier.values.astype(str).tolist()
            for agl_id in agl_id_city_ls:
                HDENS_AGGL_city_aglid = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}' & agglomerationId_identifier=='{agl_id}'")
                ctry_code = HDENS_AGGL_city_aglid.CNTR_CODE.values.astype(str)[0]
                aglo_name = HDENS_AGGL_city_aglid.agglomerationName_nameEng.values.astype(str)[0]
                aglo_name = aglo_name.split('/')[0]
                aglo_name = aglo_name.split(' ')[0]

                if len(ctry_code.split('-'))>1:
                    print(F'>1 countries {ctry_code}')
                    ctry_code = ctry_code.split('-')[0]
                
                city_agl_cd = f"{ctry_code}-{uc_city_code}-{agl_id}-{aglo_name}"
                print(city_agl_cd)
                if ctry_code=='DE':
                    print('Loading agglomeration boundary for selected city')      
                    # Load agglomeration boundary for selected city
                    agl_city = agls.query(f'agglomerationId_identifier == "{agl_id}"')
                    if agl_city.empty:
                        agglomerationId_identifier = 'NotAvailable'
                        print ("agglomerationId_identifier Not Available")
                        agl_error_ls.append(city_agl_cd + " agglomerationId_identifier Not Available")
                    else:
                        output_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                        if not os.path.exists(output_path):
                            try:
                                # Check noise contour maps GeoPackage file
                                ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_DE_update_v2.gpkg')
                                layerName = f'ncm_DE_upd'           
                                #ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_{ctry_code}.gpkg')
                                #layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'
                                #layerName = f'dbodf48_agg_noisecontours_roadsinagglomeration_lden_valid_latestdelivery_poly_{ctry_code}'
                                ncm = gpd.read_file(ncm_file_path, layer=layerName, columns=['category', 'geometry'], 
                                                    engine='pyogrio', use_arrow=True, bbox= tuple(agl_city.total_bounds))
                                print ("ncm")

                                # Define the list of noisy classes
                                noisy_classes = ['Lden5559', 'Lden6064', 'Lden6569', 'Lden7074', 'LdenGreaterThan75']

                                # Create a condition based on the category column
                                condition = ncm['category'].isin(noisy_classes)  # Replace 'category_column' with the actual column name

                                # Specify the condition and create a new category column based on the condition
                                ncm['noisy'] = 0
                                ncm.loc[condition, 'noisy'] = 1
                                ncm = ncm[['noisy', 'geometry']]
                                ncm_dis_dg = dg.from_geopandas(ncm, npartitions=10)
                                ncm_dis = ncm_dis_dg.dissolve(by='noisy').compute().reset_index()
                                print ("ncm_dis")

                                # Perform spatial overlay (intersection) 
                                ncm_agl = gpd.overlay(ncm_dis, agl_city, how='intersection')
                                print ("ncm_agl")

                                # Aggregate the area with lower band values (quieter bands)
                                ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')
                                print ("union")

                                ncm_agl_city['noisy'] = ncm_agl_city.noisy.fillna(0)
                                print ("fillna")

                                # Select a subset of columns of interest
                                ncm_dis = ncm_agl_city[['noisy', 'geometry']]
                                print(ncm_dis)
                                

                                # 3 READ URBAN ATLAS DATA       
                                file_path = os.path.join(ua_data_f, f'{ua_path}\Data\{ua_path}.gpkg')
                                # Read the GeoPackage file
                                ua = gpd.read_file(file_path, layer= ua_path[:-5], 
                                            columns= ['country', 'fua_name', 'fua_code','code_2018', 'class_2018', 'geometry'], 
                                            engine='pyogrio', 
                                            use_arrow=True, bbox= tuple(urban_center.total_bounds))
                                print ("loaded ua in urban city")                        

                                # Select 'green' classes
                                uagreen = ua.query('code_2018 == "14100" or code_2018 == "31000"')
                                
                                # 4 SELECT UA INTERSECTING UC
                                # Perform spatial overlay (intersection)
                                uagreen_urbc = gpd.overlay(uagreen, urban_center, how='intersection')

                                # 5 IDENTIFY GREEN AREAS EXCLUDED (NOT COVERED BY NCM)
                                # Perform spatial overlay (intersection)
                                nqgreen = gpd.overlay(uagreen_urbc, ncm_dis, how='intersection') #noisy/quiet green
                                not_covered = uagreen_urbc.geometry.difference(uagreen_urbc.geometry.intersection(nqgreen.geometry.unary_union))
                                # Filter out empty polygons(not empty polygons)
                                green_not_covered_by_ncm = not_covered[~not_covered.is_empty]

                                # save to shapefile
                                file_path = os.path.join(outdata_f, f'{city_agl_cd}_green_not_covered_by_ncm.shp')
                                #green_not_covered_by_ncm.to_file(file_path, driver='ESRI Shapefile')
                                print ("green_not_covered_by_ncm")

                                # 6 IDENTIFY QUIET/NOISY AREAS
                                ## for statistics need to calculate area again
                                # Calculate the area for each geometry and create a new column 'area'
                                nqgreen['area_m2'] = nqgreen['geometry'].area
                                nqgreen['area_ha'] = round(nqgreen['area_m2']* 0.0001,2)
                                nqgreen['area_km2'] = round(nqgreen['area_ha']* 0.01,2)
                                nqgreen_area = nqgreen.groupby(['code_2018', 'noisy'])['area_m2'].sum().reset_index()
                                nqgreen_area['area_ha'] = round(nqgreen_area['area_m2']* 0.0001,2)
                                nqgreen_area['area_km2'] = round(nqgreen_area['area_ha']* 0.01,2)

                                # 7 EXPORT GREEN QUIET AREAS (GQA)
                                nqgreen = nqgreen[['country', 'fua_name', 'fua_code', 'HDENS_2011', 'code_2018', 'class_2018', 'noisy',  'area_m2', 'area_ha', 'area_km2', 'geometry']]
                                GQA = nqgreen.query('noisy == 0')
                                GNA = nqgreen.query('noisy == 1')

                                # Export to shapefile
                                print ('Export to shapefile')
                                GQA_uc_aglo_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA.shp')
                                print (GQA_uc_aglo_path)
                                GQA.to_file(GQA_uc_aglo_path, driver='ESRI Shapefile')
                                print ("GQA")

                                # 8 CREATE CENTROIDS FOR GQA POLYGONS
                                # Create a new GeoDataFrame with centroids as points
                                GQA_pts = gpd.GeoDataFrame(geometry=GQA['geometry'].centroid)
                                GQA_pts['oid'] = GQA.index
                                GQA_pts['fua_name'] = GQA.fua_name
                                GQA_pts['fua_code'] = GQA.fua_code
                                GQA_pts['HDENS_2011'] = GQA.HDENS_2011

                                # Export to shapefile
                                file_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                                GQA_pts.to_file(file_path, driver='ESRI Shapefile')

                                print ("GQA_pts")
                        
                                # Calculate the duration
                                end_time = datetime.now()
                                processing_time = end_time - start_time

                                print ("str(processing_time)")
                                
                                ## write output values into log file
                                uc_km2 = round(uc_city.area.sum()/1000000,2)
                                agl_city_km2 = round(agl_city.area.sum()/1000000,2)
                                ncm_agl_city_km2 = round(ncm_agl_city.area.sum()/1000000,2)
                                ua_km2 = round(ua.area.sum()/1000000,2)
                                uagreen_km2 = round(uagreen.area.sum()/1000000,2)
                                uagreen_urbc_km2 = round(uagreen_urbc.area.sum()/1000000,2)
                                nqgreen_m2 = round(nqgreen.area.sum(),2)
                                green_not_covered_by_ncm_m2 = round(green_not_covered_by_ncm.area.sum(),2)
                                GQA_m2 = round(GQA.area.sum(),2)
                                GNA_m2 = round(GNA.area.sum(),2)
                                processing_duration = str(processing_time)

                                log_entry = create_log_entry(aglo_name, agl_id, uc_km2, agl_city_km2, 
                                                        ncm_agl_city_km2,ua_km2, uagreen_km2, uagreen_urbc_km2, nqgreen_m2, 
                                                        green_not_covered_by_ncm_m2, GQA_m2, GNA_m2, processing_time)
                                write_log(log_path, log_entry)

                                # Clean up intermediate variables to free memory
                                del agl_city, ncm, ncm_agl, ncm_agl_city, ncm_dis, ua, uagreen, uagreen_urbc, nqgreen, green_not_covered_by_ncm, GQA, GNA, GQA_pts
                            except:
                                print("Error " + city_agl_cd)
                                agl_error_ls.append(city_agl_cd +" Topological error")
    counter= counter+1

print(agl_error_ls)

1
2024-10-23 13:14:49.146948
2
2024-10-23 13:14:49.162948
DE-GEOSTAT21_305-AG_DE_NW_16-Hagen
Loading agglomeration boundary for selected city
ncm
