In [2]:
from src.utils import *


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [3]:
# Load required libraries
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import fiona
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from shapely.ops import unary_union
from shapely.errors import TopologicalError
from unidecode import unidecode
import glob
import csv
from datetime import datetime
import dask.dataframe as dd
import dask_geopandas as dg
from dask.distributed import Client
import gc
import re

In [4]:
# Path to data folders
indata_f = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas'
outdata_f = os.path.join(indata_f, 'OutputData', 'batch1_allprocessed', 'step1')
if not os.path.exists(outdata_f):
    # Create the folder if it doesn't exist
    os.makedirs(outdata_f)

# 0 PREPARE A LOG FILE FOR QC
log_file = 'log_GQA_Step1.csv'
log_path = os.path.join(outdata_f, log_file)

# Initialize Dask client
client = Client()

# Define engines
engines = {
    'fiona': {'engine': 'fiona'},
    'pyogrio': {'engine': 'pyogrio'},
    'pyogrio+arrow': {'engine': 'pyogrio', 'use_arrow': True}
          
}


Perhaps you already have a cluster running?
Hosting the HTTP server on port 49271 instead


In [5]:
# 1 COMMON SOURCES FOR ALL DATA
# URBAN CENTRES
# Read shapefile
uc_file_path = os.path.join(indata_f, 'UrbanCentres', 'HDC2021_RG_Input.shp')
# Read the GeoPackage file
uc = gpd.read_file(uc_file_path)
uc['CNTR_CODE'].fillna('AA', inplace=True)

# Select cities for processing in this batch
uc_sel = uc.query('Batch==1.0')
uc_sel = uc_sel.sort_values(by='CNTR_CODE')

# Read table to list the cities to process using urban centre code
cities_ls = uc_sel.HDENS_CLST.tolist()

# NOISE DATA
# Load agglomerations delineations
agls_file_path = os.path.join(indata_f, 'NoiseData', 'DF1_5_Agglomerations_20240429.gpkg')

# Read the GeoPackage file
agls = gpd.read_file(agls_file_path, layer = 'dbo.DF15_AgglomerationSource_Valid_LatestDelivery', 
                     **engines['pyogrio+arrow'],columns=['agglomerationId_identifier', 'agglomerationName_nameEng', 'geometry'])

# URBAN ATLAS
# Read table with HDENS Urban centres information and Agglomerations link
HDENS_AGGL_tbl = pd.read_csv(r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\Processing\UrbanCentres_Agglomerations_csv.csv')
# Join uc code field to this table
HDENS_AGGL_tbl = HDENS_AGGL_tbl.merge(uc[['POPL_2021', 'HDENS_CLST']], on='POPL_2021')

# TRANSLATOR TABLE
# Crosswalk table containing the different codes from input sources
codes_path = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\Processing\Codes.csv'
codes = pd.read_csv(codes_path)

In [6]:
temp_f = os.path.join(indata_f, 'Processing', 'temp')

In [7]:
len(cities_ls)

91

In [8]:
counter= 1
agl_error_ls = []


# Loop through test cities
for uc_city_code in cities_ls:
    print(counter)
    start_time = datetime.now()
    print(str(start_time))
    #urban centre boundary delineation
    uc_city = uc_sel.query(f"HDENS_CLST=='{uc_city_code}'")

    HDENS_AGGL_city = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}'")
    agl_id_city_ls = HDENS_AGGL_city.agglomerationId_identifier.values.astype(str).tolist()
    for agl_id in agl_id_city_ls:
        HDENS_AGGL_city_aglid = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}' & agglomerationId_identifier=='{agl_id}'")
        ctry_code = HDENS_AGGL_city_aglid.CNTR_CODE.values.astype(str)[0]
        aglo_name = HDENS_AGGL_city_aglid.agglomerationName_nameEng.values.astype(str)[0]
        city_agl_cd = f"{ctry_code}-{uc_city_code}-{agl_id}-{aglo_name}"
        print(city_agl_cd)
        if len(ctry_code.split('-'))>1:
               print('>1 countries')
               agl_error_ls.append(city_agl_cd + " bordering countries")
        else:
            print('1 country for this urban centre')      
            # Load agglomeration boundary for selected city
            agl_city = agls.query(f'agglomerationId_identifier == "{agl_id}"')
            if agl_city.empty:
                agglomerationId_identifier = 'NotAvailable'
                print ("agglomerationId_identifier Not Available")
                agl_error_ls.append(city_agl_cd + " agglomerationId_identifier Not Available")
            else:
                output_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                if not os.path.exists(output_path):
                    try:
                        # Check noise contour maps GeoPackage file
                        ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export.gpkg')
                        layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'           
                        #ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_{ctry_code}.gpkg')
                        #layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'
                        #layerName = f'dbodf48_agg_noisecontours_roadsinagglomeration_lden_valid_latestdelivery_poly_{ctry_code}'
                        ncm = gpd.read_file(ncm_file_path, layer=layerName, columns=['category', 'geometry'], 
                                            engine='pyogrio', use_arrow=True, bbox= tuple(agl_city.total_bounds))
                        print ("ncm")

                        # Define the list of noisy classes
                        noisy_classes = ['Lden5559', 'Lden6064', 'Lden6569', 'Lden7074', 'LdenGreaterThan75']

                        # Create a condition based on the category column
                        condition = ncm['category'].isin(noisy_classes)  # Replace 'category_column' with the actual column name

                        # Specify the condition and create a new category column based on the condition
                        ncm['noisy'] = 0
                        ncm.loc[condition, 'noisy'] = 1
                        ncm = ncm[['noisy', 'geometry']]
                        ncm_dis_dg = dg.from_geopandas(ncm, npartitions=10)
                        ncm_dis = ncm_dis_dg.dissolve(by='noisy').compute().reset_index()
                        print ("ncm_dis")

                        # Export to shapefile
                        file_path = os.path.join(temp_f, f'{city_agl_cd}_ncm_dis.shp')                       
                        ncm_dis.to_file(file_path, driver='ESRI Shapefile')
                        print ("exported ncm_dis")
                    

                        # Perform spatial overlay (intersection) 
                        ncm_agl = gpd.overlay(ncm_dis, agl_city, how='intersection')
                        print ("ncm_agl")

                        # Export to shapefile
                        file_path = os.path.join(temp_f, f'{city_agl_cd}_ncm_agl.shp')
                        ncm_agl.to_file(file_path, driver='ESRI Shapefile')
                        print ("exported ncm_agl")

                        # Aggregate the area with lower band values (quieter bands)
                        ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')
                        print ("union")

                        ncm_agl_city['noisy'] = ncm_agl_city.noisy.fillna(0)
                        print ("fillna")

                        # Export to shapefile
                        file_path = os.path.join(temp_f, f'{city_agl_cd}_ncm_agl_city.shp')                       
                        ncm_agl_city.to_file(file_path, driver='ESRI Shapefile')
                        print ("exported ncm_agl")

                        # Select a subset of columns of interest
                        ncm_dis = ncm_agl_city[['noisy', 'geometry']]
                        print(ncm_dis)
                        

                        # 3 READ UA DATA        
                        # Load GeoPackage info
                        data_f = r'A:\Copernicus\UrbanAtlas\UrbanAtlas\UA2018'
                        city_codes = codes.query(f'HDENS_CLST == "{uc_city_code}"')
                        ua_city_string = city_codes.UA2018.values.astype(str)[0]
                        print(ua_city_string)
                        folder_path = glob.glob(os.path.join(data_f, f'*{ua_city_string}*'))
                        ua_file_path =  glob.glob(os.path.join(folder_path[0], 'Data', f'*{ua_city_string}*.gpkg'))
                        layers_ls = fiona.listlayers(ua_file_path[0])
                        if not layers_ls:
                            agl_error_ls.append(city_agl_cd +" ua not found")
                        else:    
                            print (f"layers: {ua_file_path[0]}")

                            # Read the GeoPackage file
                            ua = gpd.read_file(ua_file_path[0], layer= layers_ls[0], 
                                            columns= ['country', 'fua_name', 'fua_code','code_2018', 'class_2018', 'geometry'], engine='pyogrio', 
                                            use_arrow=True, bbox= tuple(uc_city.total_bounds))
                            print ("loaded ua in urban city")

                            # Select 'green' classes
                            uagreen = ua.query('code_2018 == "14100" or code_2018 == "31000"')
                            
                            # 4 SELECT UA INTERSECTING UC
                            # Perform spatial overlay (intersection)
                            uagreen_urbc = gpd.overlay(uagreen, uc_city, how='intersection')
                            file_path = os.path.join(temp_f, f'{city_agl_cd}_uagreen_urbc.shp')
                            uagreen_urbc.to_file(file_path, driver='ESRI Shapefile')

                            # 5 IDENTIFY GREEN AREAS EXCLUDED (NOT COVERED BY NCM)
                            # Perform spatial overlay (intersection)                          
                            nqgreen = gpd.overlay(uagreen_urbc, ncm_dis, how='intersection') #noisy/quiet green
                            file_path = os.path.join(temp_f, f'{city_agl_cd}_nqgreen.shp')
                            nqgreen.to_file(file_path, driver='ESRI Shapefile')
                            
                            not_covered = uagreen_urbc.geometry.difference(uagreen_urbc.geometry.intersection(nqgreen.geometry.unary_union))
                            file_path = os.path.join(temp_f, f'{city_agl_cd}_not_covered.shp')
                            not_covered.to_file(file_path, driver='ESRI Shapefile')

                            # Filter out empty polygons(not empty polygons)
                            green_not_covered_by_ncm = not_covered[~not_covered.is_empty]

                            # save to shapefile
                            file_path = os.path.join(outdata_f, f'{city_agl_cd}_green_not_covered_by_ncm.shp')
                            green_not_covered_by_ncm.to_file(file_path, driver='ESRI Shapefile')
                            print ("green_not_covered_by_ncm")

                            # 6 IDENTIFY QUIET/NOISY AREAS
                            ## for statistics need to calculate area again
                            # Calculate the area for each geometry and create a new column 'area'
                            nqgreen['area_m2'] = nqgreen['geometry'].area
                            nqgreen['area_ha'] = round(nqgreen['area_m2']* 0.0001,2)
                            nqgreen['area_km2'] = round(nqgreen['area_ha']* 0.01,2)
                            nqgreen_area = nqgreen.groupby(['code_2018', 'noisy'])['area_m2'].sum().reset_index()
                            nqgreen_area['area_ha'] = round(nqgreen_area['area_m2']* 0.0001,2)
                            nqgreen_area['area_km2'] = round(nqgreen_area['area_ha']* 0.01,2)

                            # 7 EXPORT GREEN QUIET AREAS (GQA)
                            nqgreen = nqgreen[['country', 'fua_name', 'fua_code', 'HDENS_2011', 'code_2018', 'class_2018', 'noisy',  'area_m2', 'area_ha', 'area_km2', 'geometry']]
                            GQA = nqgreen.query('noisy == 0')
                            GNA = nqgreen.query('noisy == 1')

                            # Export to shapefile
                            file_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA.shp')
                            GQA.to_file(file_path, driver='ESRI Shapefile')
                            print ("GQA")

                            # 8 CREATE CENTROIDS FOR GQA POLYGONS
                            # Create a new GeoDataFrame with centroids as points
                            GQA_pts = gpd.GeoDataFrame(geometry=GQA['geometry'].centroid)
                            GQA_pts['oid'] = GQA.index
                            GQA_pts['fua_name'] = GQA.fua_name
                            GQA_pts['fua_code'] = GQA.fua_code
                            GQA_pts['HDENS_2011'] = GQA.HDENS_2011

                            # Export to shapefile
                            file_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                            GQA_pts.to_file(file_path, driver='ESRI Shapefile')
                            print ("GQA_pts")
                    
                            # Calculate the duration
                            end_time = datetime.now()
                            processing_time = end_time - start_time

                            print ("str(processing_time)")
                            
                            ## write output values into log file
                            uc_km2 = round(uc_city.area.sum()/1000000,2)
                            agl_city_km2 = round(agl_city.area.sum()/1000000,2)
                            ncm_agl_city_km2 = round(ncm_agl_city.area.sum()/1000000,2)
                            ua_km2 = round(ua.area.sum()/1000000,2)
                            uagreen_km2 = round(uagreen.area.sum()/1000000,2)
                            uagreen_urbc_km2 = round(uagreen_urbc.area.sum()/1000000,2)
                            nqgreen_m2 = round(nqgreen.area.sum(),2)
                            green_not_covered_by_ncm_m2 = round(green_not_covered_by_ncm.area.sum(),2)
                            GQA_m2 = round(GQA.area.sum(),2)
                            GNA_m2 = round(GNA.area.sum(),2)
                            processing_duration = str(processing_time)

                            log_entry = create_log_entry(aglo_name, agl_id, uc_km2, agl_city_km2, 
                                                    ncm_agl_city_km2,ua_km2, uagreen_km2, uagreen_urbc_km2, nqgreen_m2, 
                                                    green_not_covered_by_ncm_m2, GQA_m2, GNA_m2, processing_time)
                            write_log(log_path, log_entry)

                            # Clean up intermediate variables to free memory
                            del agl_city, ncm, ncm_agl, ncm_agl_city, ncm_dis, ua, uagreen, uagreen_urbc, nqgreen, green_not_covered_by_ncm, GQA, GNA, GQA_pts
                    except:
                        print("Error " + city_agl_cd)
                        agl_error_ls.append(city_agl_cd +" Error")
        counter= counter+1

print(agl_error_ls)

1
2024-07-24 08:48:43.478102
AT-GEOSTAT21_520-AG_AT_00_5-Innsbruck
1 country for this urban centre
2
2024-07-24 08:48:43.510097
AT-GEOSTAT21_522-AG_AT_00_2-Graz
1 country for this urban centre
3
2024-07-24 08:48:43.521096
AT-GEOSTAT21_495-AG_AT_00_4-Salzburg
1 country for this urban centre
4
2024-07-24 08:48:43.533094
AT-GEOSTAT21_479-AG_AT_00_3-Linz
1 country for this urban centre
5
2024-07-24 08:48:43.545099
AT-GEOSTAT21_481-AG_AT_00_1-Wien
1 country for this urban centre
6
2024-07-24 08:48:43.565091
BE-GEOSTAT21_317-AG_BE_FL_1-Antwerp
1 country for this urban centre
7
2024-07-24 08:48:43.577094
BE-GEOSTAT21_300-AG_BE_FL_1-Antwerp
1 country for this urban centre
8
2024-07-24 08:48:43.600075
BE-GEOSTAT21_332-AG_BE_FL_2-Ghent
1 country for this urban centre
9
2024-07-24 08:48:43.614083
BE-GEOSTAT21_344-AG_BE_FL_4-Leuven
1 country for this urban centre
10
2024-07-24 08:48:43.627097
BE-GEOSTAT21_364-AG_BE_WA_2-Liege
1 country for this urban centre
ncm
ncm_dis
exported ncm_dis
ncm_agl


  ncm_agl.to_file(file_path, driver='ESRI Shapefile')


exported ncm_agl
union
fillna


  ncm_agl_city.to_file(file_path, driver='ESRI Shapefile')


exported ncm_agl
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((4005039.585 3068923.430, 40050...
1    1.0  MULTIPOLYGON (((4012818.401 3066339.161, 40128...
2    0.0  MULTIPOLYGON (((4015078.585 3071540.455, 40150...
BE005L2_LIEGE_UA2018_v013
layers: A:\Copernicus\UrbanAtlas\UrbanAtlas\UA2018\BE005L2_LIEGE_UA2018_v013\Data\BE005L2_LIEGE_UA2018_v013.gpkg
loaded ua in urban city


  return lib.intersection(a, b, **kwargs)


green_not_covered_by_ncm
GQA
GQA_pts
str(processing_time)
11
2024-07-24 08:53:45.000175
BE-GEOSTAT21_349-AG_BE_BR_1-Brussels-Capital
1 country for this urban centre
12
2024-07-24 08:53:45.041158
CH-GEOSTAT21_521-AG_CH_00_2-Zurich
1 country for this urban centre
13
2024-07-24 08:53:45.058166
CH-GEOSTAT21_542-AG_CH_00_9-Lausanne
1 country for this urban centre
14
2024-07-24 08:53:45.080176
CH-GEOSTAT21_528-AG_CH_00_3-Bern
1 country for this urban centre
15
2024-07-24 08:53:45.098160
CH-DE-GEOSTAT21_510-AG_CH_00_5-Basel
>1 countries
16
2024-07-24 08:53:45.104170
CH-FR-GEOSTAT21_549-AG_CH_00_10-Geneva
>1 countries
17
2024-07-24 08:53:45.111171
CY-GEOSTAT21_843-AG_CY_00_1-Nicosia
1 country for this urban centre
ncm
ncm_dis
exported ncm_dis
ncm_agl


  ncm_agl.to_file(file_path, driver='ESRI Shapefile')


exported ncm_agl
union
fillna


  ncm_agl_city.to_file(file_path, driver='ESRI Shapefile')


exported ncm_agl
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((6436644.388 1673293.309, 64366...
1    1.0  MULTIPOLYGON (((6443078.618 1665461.218, 64430...
2    0.0  MULTIPOLYGON (((6443084.843 1665458.371, 64430...
CY001L2_LEFKOSIA_UA2018_v013
layers: A:\Copernicus\UrbanAtlas\UrbanAtlas\UA2018\CY001L2_LEFKOSIA_UA2018_v013\Data\CY001L2_LEFKOSIA_UA2018_v013.gpkg
loaded ua in urban city
green_not_covered_by_ncm
GQA
GQA_pts
str(processing_time)
18
2024-07-24 08:55:28.711561
CY-GEOSTAT21_845-AG_CY_00_4-Larnaca
1 country for this urban centre
ncm
ncm_dis
exported ncm_dis


# processing SE

In [7]:


# Select cities for processing in this batch
uc_sel = uc.query('Batch==1.0 & CNTR_CODE == "SE"')
uc_sel = uc_sel.sort_values(by='CNTR_CODE')

# Read table to list the cities to process using urban centre code
cities_ls = uc_sel.HDENS_CLST.tolist()



In [8]:
cities_ls

['GEOSTAT21_018',
 'GEOSTAT21_024',
 'GEOSTAT21_025',
 'GEOSTAT21_029',
 'GEOSTAT21_048',
 'GEOSTAT21_049',
 'GEOSTAT21_054',
 'GEOSTAT21_013']

In [9]:
outdata_f = os.path.join(indata_f, 'OutputData', 'batch1_allprocessed', 'step1')

In [1]:
counter= 1
agl_error_ls = []


# Loop through test cities
for uc_city_code in cities_ls:
    print(counter)
    start_time = datetime.now()
    print(str(start_time))
    #urban centre boundary delineation
    uc_city = uc_sel.query(f"HDENS_CLST=='{uc_city_code}'")

    HDENS_AGGL_city = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}'")
    agl_id_city_ls = HDENS_AGGL_city.agglomerationId_identifier.values.astype(str).tolist()
    for agl_id in agl_id_city_ls:
        HDENS_AGGL_city_aglid = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}' & agglomerationId_identifier=='{agl_id}'")
        ctry_code = HDENS_AGGL_city_aglid.CNTR_CODE.values.astype(str)[0]
        aglo_name = HDENS_AGGL_city_aglid.agglomerationName_nameEng.values.astype(str)[0]
        city_agl_cd = f"{ctry_code}-{uc_city_code}-{agl_id}-{aglo_name}"
        print(city_agl_cd)
        if len(ctry_code.split('-'))>1:
               print('>1 countries')
               agl_error_ls.append(city_agl_cd + " bordering countries")
        else:
            print('1 country for this urban centre')      
            # Load agglomeration boundary for selected city
            agl_city = agls.query(f'agglomerationId_identifier == "{agl_id}"')
            if agl_city.empty:
                agglomerationId_identifier = 'NotAvailable'
                print ("agglomerationId_identifier Not Available")
                agl_error_ls.append(city_agl_cd + " agglomerationId_identifier Not Available")
            else:
                output_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                if not os.path.exists(output_path):
                    try:
                        # Check noise contour maps GeoPackage file
                        ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export.gpkg')
                        layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'           
                        #ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_{ctry_code}.gpkg')
                        #layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'
                        #layerName = f'dbodf48_agg_noisecontours_roadsinagglomeration_lden_valid_latestdelivery_poly_{ctry_code}'
                        ncm = gpd.read_file(ncm_file_path, layer=layerName, columns=['category', 'geometry'], 
                                            engine='pyogrio', use_arrow=True, bbox= tuple(agl_city.total_bounds))
                        print ("ncm")

                        # Define the list of noisy classes
                        noisy_classes = ['Lden5559', 'Lden6064', 'Lden6569', 'Lden7074', 'LdenGreaterThan75']

                        # Create a condition based on the category column
                        condition = ncm['category'].isin(noisy_classes)  # Replace 'category_column' with the actual column name

                        # Specify the condition and create a new category column based on the condition
                        ncm['noisy'] = 0
                        ncm.loc[condition, 'noisy'] = 1
                        ncm = ncm[['noisy', 'geometry']]
                        ncm_dis_dg = dg.from_geopandas(ncm, npartitions=10)
                        ncm_dis = ncm_dis_dg.dissolve(by='noisy').compute().reset_index()
                        print ("ncm_dis")

                        # Export to shapefile
                        file_path = os.path.join(temp_f, f'{city_agl_cd}_ncm_dis.shp')                       
                        ncm_dis.to_file(file_path, driver='ESRI Shapefile')
                        print ("exported ncm_dis")
                    

                        # Perform spatial overlay (intersection) 
                        ncm_agl = gpd.overlay(ncm_dis, agl_city, how='intersection')
                        print ("ncm_agl")

                        # Export to shapefile
                        file_path = os.path.join(temp_f, f'{city_agl_cd}_ncm_agl.shp')
                        ncm_agl.to_file(file_path, driver='ESRI Shapefile')
                        print ("exported ncm_agl")

                        # Aggregate the area with lower band values (quieter bands)
                        ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')
                        print ("union")

                        ncm_agl_city['noisy'] = ncm_agl_city.noisy.fillna(0)
                        print ("fillna")

                        # Export to shapefile
                        file_path = os.path.join(temp_f, f'{city_agl_cd}_ncm_agl_city.shp')                       
                        ncm_agl_city.to_file(file_path, driver='ESRI Shapefile')
                        print ("exported ncm_agl")

                        # Select a subset of columns of interest
                        ncm_dis = ncm_agl_city[['noisy', 'geometry']]
                        print(ncm_dis)
                        

                        # 3 READ UA DATA        
                        # Load GeoPackage info
                        data_f = r'A:\Copernicus\UrbanAtlas\UrbanAtlas\UA2018'
                        city_codes = codes.query(f'HDENS_CLST == "{uc_city_code}"')
                        ua_city_string = city_codes.UA2018.values.astype(str)[0]
                        print(ua_city_string)
                        folder_path = glob.glob(os.path.join(data_f, f'*{ua_city_string}*'))
                        ua_file_path =  glob.glob(os.path.join(folder_path[0], 'Data', f'*{ua_city_string}*.gpkg'))
                        layers_ls = fiona.listlayers(ua_file_path[0])
                        if not layers_ls:
                            agl_error_ls.append(city_agl_cd +" ua not found")
                        else:    
                            print (f"layers: {ua_file_path[0]}")

                            # Read the GeoPackage file
                            ua = gpd.read_file(ua_file_path[0], layer= layers_ls[0], 
                                            columns= ['country', 'fua_name', 'fua_code','code_2018', 'class_2018', 'geometry'], engine='pyogrio', 
                                            use_arrow=True, bbox= tuple(uc_city.total_bounds))
                            print ("loaded ua in urban city")

                            # Select 'green' classes
                            uagreen = ua.query('code_2018 == "14100" or code_2018 == "31000"')
                            
                            # 4 SELECT UA INTERSECTING UC
                            # Perform spatial overlay (intersection)
                            uagreen_urbc = gpd.overlay(uagreen, uc_city, how='intersection')
                            file_path = os.path.join(temp_f, f'{city_agl_cd}_uagreen_urbc.shp')
                            uagreen_urbc.to_file(file_path, driver='ESRI Shapefile')

                            # 5 IDENTIFY GREEN AREAS EXCLUDED (NOT COVERED BY NCM)
                            # Perform spatial overlay (intersection)                          
                            nqgreen = gpd.overlay(uagreen_urbc, ncm_dis, how='intersection') #noisy/quiet green
                            file_path = os.path.join(temp_f, f'{city_agl_cd}_nqgreen.shp')
                            nqgreen.to_file(file_path, driver='ESRI Shapefile')
                            
                            not_covered = uagreen_urbc.geometry.difference(uagreen_urbc.geometry.intersection(nqgreen.geometry.unary_union))
                            file_path = os.path.join(temp_f, f'{city_agl_cd}_not_covered.shp')
                            not_covered.to_file(file_path, driver='ESRI Shapefile')

                            # Filter out empty polygons(not empty polygons)
                            green_not_covered_by_ncm = not_covered[~not_covered.is_empty]

                            # save to shapefile
                            file_path = os.path.join(outdata_f, f'{city_agl_cd}_green_not_covered_by_ncm.shp')
                            green_not_covered_by_ncm.to_file(file_path, driver='ESRI Shapefile')
                            print ("green_not_covered_by_ncm")

                            # 6 IDENTIFY QUIET/NOISY AREAS
                            ## for statistics need to calculate area again
                            # Calculate the area for each geometry and create a new column 'area'
                            nqgreen['area_m2'] = nqgreen['geometry'].area
                            nqgreen['area_ha'] = round(nqgreen['area_m2']* 0.0001,2)
                            nqgreen['area_km2'] = round(nqgreen['area_ha']* 0.01,2)
                            nqgreen_area = nqgreen.groupby(['code_2018', 'noisy'])['area_m2'].sum().reset_index()
                            nqgreen_area['area_ha'] = round(nqgreen_area['area_m2']* 0.0001,2)
                            nqgreen_area['area_km2'] = round(nqgreen_area['area_ha']* 0.01,2)

                            # 7 EXPORT GREEN QUIET AREAS (GQA)
                            nqgreen = nqgreen[['country', 'fua_name', 'fua_code', 'HDENS_2011', 'code_2018', 'class_2018', 'noisy',  'area_m2', 'area_ha', 'area_km2', 'geometry']]
                            GQA = nqgreen.query('noisy == 0')
                            GNA = nqgreen.query('noisy == 1')

                            # Export to shapefile
                            file_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA.shp')
                            GQA.to_file(file_path, driver='ESRI Shapefile')
                            print ("GQA")

                            # 8 CREATE CENTROIDS FOR GQA POLYGONS
                            # Create a new GeoDataFrame with centroids as points
                            GQA_pts = gpd.GeoDataFrame(geometry=GQA['geometry'].centroid)
                            GQA_pts['oid'] = GQA.index
                            GQA_pts['fua_name'] = GQA.fua_name
                            GQA_pts['fua_code'] = GQA.fua_code
                            GQA_pts['HDENS_2011'] = GQA.HDENS_2011

                            # Export to shapefile
                            file_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                            GQA_pts.to_file(file_path, driver='ESRI Shapefile')
                            print ("GQA_pts")
                    
                            # Calculate the duration
                            end_time = datetime.now()
                            processing_time = end_time - start_time

                            print ("str(processing_time)")
                            
                            ## write output values into log file
                            uc_km2 = round(uc_city.area.sum()/1000000,2)
                            agl_city_km2 = round(agl_city.area.sum()/1000000,2)
                            ncm_agl_city_km2 = round(ncm_agl_city.area.sum()/1000000,2)
                            ua_km2 = round(ua.area.sum()/1000000,2)
                            uagreen_km2 = round(uagreen.area.sum()/1000000,2)
                            uagreen_urbc_km2 = round(uagreen_urbc.area.sum()/1000000,2)
                            nqgreen_m2 = round(nqgreen.area.sum(),2)
                            green_not_covered_by_ncm_m2 = round(green_not_covered_by_ncm.area.sum(),2)
                            GQA_m2 = round(GQA.area.sum(),2)
                            GNA_m2 = round(GNA.area.sum(),2)
                            processing_duration = str(processing_time)

                            log_entry = create_log_entry(aglo_name, agl_id, uc_km2, agl_city_km2, 
                                                    ncm_agl_city_km2,ua_km2, uagreen_km2, uagreen_urbc_km2, nqgreen_m2, 
                                                    green_not_covered_by_ncm_m2, GQA_m2, GNA_m2, processing_time)
                            write_log(log_path, log_entry)

                            # Clean up intermediate variables to free memory
                            del agl_city, ncm, ncm_agl, ncm_agl_city, ncm_dis, ua, uagreen, uagreen_urbc, nqgreen, green_not_covered_by_ncm, GQA, GNA, GQA_pts
                    except:
                        print("Error " + city_agl_cd)
                        agl_error_ls.append(city_agl_cd +" Topological error")
        counter= counter+1

print(agl_error_ls)

NameError: name 'cities_ls' is not defined