In [1]:
from src.utils import *


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [2]:
# Load required libraries
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import fiona
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from shapely.ops import unary_union
from shapely.errors import TopologicalError
from unidecode import unidecode
import glob
import csv
from datetime import datetime
import dask.dataframe as dd
import dask_geopandas as dg
from dask.distributed import Client
import gc
import re

In [3]:
# Path to data folders
indata_f = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas'
outdata_f = os.path.join(indata_f, 'OutputData', 'step1_GQA')
if not os.path.exists(outdata_f):
    # Create the folder if it doesn't exist
    os.makedirs(outdata_f)

# 0 PREPARE A LOG FILE FOR QC
log_file = 'log_GQA_Step1_231024.csv'
log_path = os.path.join(outdata_f, log_file)

# Initialize Dask client
client = Client()

# Define engines
engines = {
    'fiona': {'engine': 'fiona'},
    'pyogrio': {'engine': 'pyogrio'},
    'pyogrio+arrow': {'engine': 'pyogrio', 'use_arrow': True}
          
}


In [4]:
# 1 READ URBAN CENTRES
# Read shapefile
uc_file_path = os.path.join(indata_f, 'UrbanCentres', 'HDC2021_RG_InputUpdateB2B3B4.shp')
# Read the GeoPackage file
uc = gpd.read_file(uc_file_path)
uc['CNTR_CODE'].fillna('AA', inplace=True)

# Select cities for processing in this batch
###uc_sel = uc.query('Batch==1.0 & CNTR_CODE != "SE"')
uc_sel = uc.query('Batch>0.0')
uc_sel = uc_sel.sort_values(by='CNTR_CODE')

# Read table to list the cities to process using urban centre code
cities_ls = uc_sel.HDENS_CLST.tolist()
len(cities_ls)

230

In [5]:
## uc_2remove from conflict folder
uc_2remove_path = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\OutputData\oct_run\GQA_conflictives'
shps = glob.glob(os.path.join(uc_2remove_path, '*.shp'))
for i in shps:
    code = str(os.path.basename(i)[:-7][:-6])
    print(code)
    cities_ls.remove(str(code))

len(cities_ls)

GEOSTAT21_402


229

In [6]:
# Final GQAs
QGA_Final_path = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\OutputData\GQA'

In [7]:
# 1 UA DATA FOLDER
ua_data_f = r'A:\Copernicus\UrbanAtlas\UrbanAtlas\UA2018'

# 2 READ NOISE DATA
# Load agglomerations delineations
agls_file_path = os.path.join(indata_f, 'NoiseData', 'DF1_5_Agglomerations_20240429.gpkg')

# Read the GeoPackage file
agls = gpd.read_file(agls_file_path, layer = 'dbo.DF15_AgglomerationSource_Valid_LatestDelivery', 
                     **engines['pyogrio+arrow'],columns=['agglomerationId_identifier', 'agglomerationName_nameEng', 'geometry'])

# 3 TRANSLATOR TABLE
# Crosswalk table containing the different codes from input sources
codes_path = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\Processing\Codes.csv'
codes = pd.read_csv(codes_path)

In [8]:
# Read table with HDENS Urban centres information and Agglomerations link
HDENS_AGGL_tbl = pd.read_csv(r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\Processing\UrbanCentres_Agglomerations_csv.csv')
# Join uc code field to this table
HDENS_AGGL_tbl = HDENS_AGGL_tbl.merge(uc[['POPL_2021', 'HDENS_CLST']], on='POPL_2021')

In [9]:
ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_DE_update.gpkg')
layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_DE' 
ncm = gpd.read_file(ncm_file_path, layer=layerName, columns=['category', 'geometry'], engine='pyogrio', use_arrow=True)
print ("ncm")

  return next(self.gen)


ncm


In [10]:
import geopandas as gpd
from shapely.geometry import MultiPolygon

# Assuming you have a GeoDataFrame with measured geometry
gdf = ncm

# Function to remove the M dimension
def strip_m_dimension(geometry):
    # Convert to WKT (Well-Known Text) to remove M
    if geometry.has_z:
        return MultiPolygon([geom for geom in geometry.geoms])  # Preserve Z if needed
    else:
        return geometry  # If it's already a standard geometry

# Apply the function to your GeoDataFrame
gdf['geometry'] = gdf['geometry'].apply(strip_m_dimension)

# Now the geometries are standard and usable in GeoPandas


In [11]:
ncm = gdf

In [11]:
gdf.head()

Unnamed: 0,category,geometry
0,Lden5559,MULTIPOLYGON Z (((4047217.697 3074411.678 0.00...


In [12]:
ncm_DE = gdf

In [13]:
# Define the list of noisy classes
noisy_classes = ['Lden5559', 'Lden6064', 'Lden6569', 'Lden7074', 'LdenGreaterThan75']

# Create a condition based on the category column
condition = ncm_DE['category'].isin(noisy_classes)  # Replace 'category_column' with the actual column name

# Specify the condition and create a new category column based on the condition
ncm_DE['noisy'] = 0
ncm_DE.loc[condition, 'noisy'] = 1
ncm_DE = ncm_DE[['noisy', 'geometry']]

In [14]:
ncm_DE

Unnamed: 0,noisy,geometry
0,1,MULTIPOLYGON Z (((4047217.697 3074411.678 0.00...


In [11]:
import geopandas as gpd
from shapely.ops import transform

# Function to drop the Z coordinate
def drop_z(geom):
    return transform(lambda x, y, z=None: (x, y), geom)

In [10]:


ncm['geometry'] = ncm['geometry'].apply(drop_z)
ncm.head()



Unnamed: 0,category,geometry
0,Lden5559,"MULTIPOLYGON (((4047217.697 3074411.678, 40471..."


In [14]:
counter= 1
agl_error_ls = []

# Loop through test cities
for uc_city_code in cities_ls:
    print(counter)
    start_time = datetime.now()
    print(str(start_time))

    ua_path = codes.query(f'HDENS_CLST=="{uc_city_code}"').UA2018.values[0].strip()
    if ua_path == 'not available':
        agl_error_ls.append(uc_city_code +" UA not available")
    
    else:
        inGQA = os.path.join(QGA_Final_path, '{}_finalGQA.shp'.format(uc_city_code))
        if not os.path.exists(inGQA):
            urban_center = uc.query(f'HDENS_CLST=="{uc_city_code}"')
            HDENS_AGGL_city = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}'")
            agl_id_city_ls = HDENS_AGGL_city.agglomerationId_identifier.values.astype(str).tolist()
            for agl_id in agl_id_city_ls:
                HDENS_AGGL_city_aglid = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}' & agglomerationId_identifier=='{agl_id}'")
                ctry_code = HDENS_AGGL_city_aglid.CNTR_CODE.values.astype(str)[0]
                aglo_name = HDENS_AGGL_city_aglid.agglomerationName_nameEng.values.astype(str)[0]
                aglo_name = aglo_name.split('/')[0]
                city_agl_cd = f"{ctry_code}-{uc_city_code}-{agl_id}-{aglo_name}"
                
                if len(ctry_code.split('-'))>1:
                    ctry_code = ctry_code.split('-')[0]
                if ctry_code =='DE':
                    print(city_agl_cd)    
                    # Load agglomeration boundary for selected city
                    agl_city = agls.query(f'agglomerationId_identifier == "{agl_id}"')
                    if agl_city.empty:
                        agglomerationId_identifier = 'NotAvailable'
                        print ("agglomerationId_identifier Not Available")
                        agl_error_ls.append(city_agl_cd + " agglomerationId_identifier Not Available")
                    else:
                        output_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                        if not os.path.exists(output_path):
                            try:
                                print ("ncm_agl")
                                # Perform spatial overlay (intersection) 
                                ncm_agl = gpd.overlay(ncm_DE, agl_city, how='intersection')
                                print ("ncm_agl")

                                # Aggregate the area with lower band values (quieter bands)
                                ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')
                                print ("union")

                                ncm_agl_city['noisy'] = ncm_agl_city.noisy.fillna(0)
                                print ("fillna")

                                # Select a subset of columns of interest
                                ncm_dis = ncm_agl_city[['noisy', 'geometry']]
                                print(ncm_dis)
                                

                                # 3 READ URBAN ATLAS DATA                                        
                                print (ua_path) 
                                file_path = os.path.join(ua_data_f, f'{ua_path}\Data\{ua_path}.gpkg')
                                # Read the GeoPackage file
                                ua = gpd.read_file(file_path, layer= ua_path[:-5], 
                                            columns= ['country', 'fua_name', 'fua_code','code_2018', 'class_2018', 'geometry'], 
                                            engine='pyogrio', 
                                            use_arrow=True, bbox= tuple(urban_center.total_bounds))
                                print ("loaded ua in urban city")                        

                                # Select 'green' classes
                                uagreen = ua.query('code_2018 == "14100" or code_2018 == "31000"')
                                
                                # 4 SELECT UA INTERSECTING UC
                                # Perform spatial overlay (intersection)
                                uagreen_urbc = gpd.overlay(uagreen, urban_center, how='intersection')

                                # 5 IDENTIFY GREEN AREAS EXCLUDED (NOT COVERED BY NCM)
                                # Perform spatial overlay (intersection)
                                nqgreen = gpd.overlay(uagreen_urbc, ncm_dis, how='intersection') #noisy/quiet green
                                not_covered = uagreen_urbc.geometry.difference(uagreen_urbc.geometry.intersection(nqgreen.geometry.unary_union))
                                # Filter out empty polygons(not empty polygons)
                                green_not_covered_by_ncm = not_covered[~not_covered.is_empty]

                                # save to shapefile
                                file_path = os.path.join(outdata_f, f'{city_agl_cd}_green_not_covered_by_ncm.shp')
                                #green_not_covered_by_ncm.to_file(file_path, driver='ESRI Shapefile')
                                print ("green_not_covered_by_ncm")

                                # 6 IDENTIFY QUIET/NOISY AREAS
                                ## for statistics need to calculate area again
                                # Calculate the area for each geometry and create a new column 'area'
                                nqgreen['area_m2'] = nqgreen['geometry'].area
                                nqgreen['area_ha'] = round(nqgreen['area_m2']* 0.0001,2)
                                nqgreen['area_km2'] = round(nqgreen['area_ha']* 0.01,2)
                                nqgreen_area = nqgreen.groupby(['code_2018', 'noisy'])['area_m2'].sum().reset_index()
                                nqgreen_area['area_ha'] = round(nqgreen_area['area_m2']* 0.0001,2)
                                nqgreen_area['area_km2'] = round(nqgreen_area['area_ha']* 0.01,2)

                                # 7 EXPORT GREEN QUIET AREAS (GQA)
                                nqgreen = nqgreen[['country', 'fua_name', 'fua_code', 'HDENS_2011', 'code_2018', 'class_2018', 'noisy',  'area_m2', 'area_ha', 'area_km2', 'geometry']]
                                GQA = nqgreen.query('noisy == 0')
                                GNA = nqgreen.query('noisy == 1')

                                # Export to shapefile
                                print ('Export to shapefile')
                                GQA_uc_aglo_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA.shp')
                                print (GQA_uc_aglo_path)
                                GQA.to_file(GQA_uc_aglo_path, driver='ESRI Shapefile')
                                print ("GQA")

                                # 8 CREATE CENTROIDS FOR GQA POLYGONS
                                # Create a new GeoDataFrame with centroids as points
                                GQA_pts = gpd.GeoDataFrame(geometry=GQA['geometry'].centroid)
                                GQA_pts['oid'] = GQA.index
                                GQA_pts['fua_name'] = GQA.fua_name
                                GQA_pts['fua_code'] = GQA.fua_code
                                GQA_pts['HDENS_2011'] = GQA.HDENS_2011

                                # Export to shapefile
                                file_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                                GQA_pts.to_file(file_path, driver='ESRI Shapefile')

                                print ("GQA_pts")
                        
                                # Calculate the duration
                                end_time = datetime.now()
                                processing_time = end_time - start_time

                                print ("str(processing_time)")
                                
                                ## write output values into log file
                                uc_km2 = round(uc_city.area.sum()/1000000,2)
                                agl_city_km2 = round(agl_city.area.sum()/1000000,2)
                                ncm_agl_city_km2 = round(ncm_agl_city.area.sum()/1000000,2)
                                ua_km2 = round(ua.area.sum()/1000000,2)
                                uagreen_km2 = round(uagreen.area.sum()/1000000,2)
                                uagreen_urbc_km2 = round(uagreen_urbc.area.sum()/1000000,2)
                                nqgreen_m2 = round(nqgreen.area.sum(),2)
                                green_not_covered_by_ncm_m2 = round(green_not_covered_by_ncm.area.sum(),2)
                                GQA_m2 = round(GQA.area.sum(),2)
                                GNA_m2 = round(GNA.area.sum(),2)
                                processing_duration = str(processing_time)

                                log_entry = create_log_entry(aglo_name, agl_id, uc_km2, agl_city_km2, 
                                                        ncm_agl_city_km2,ua_km2, uagreen_km2, uagreen_urbc_km2, nqgreen_m2, 
                                                        green_not_covered_by_ncm_m2, GQA_m2, GNA_m2, processing_time)
                                write_log(log_path, log_entry)

                                # Clean up intermediate variables to free memory
                                del agl_city, ncm, ncm_agl, ncm_agl_city, ncm_dis, ua, uagreen, uagreen_urbc, nqgreen, green_not_covered_by_ncm, GQA, GNA, GQA_pts
                            except:
                                print("Error " + city_agl_cd)
                                agl_error_ls.append(city_agl_cd +" Topological error")
    counter= counter+1

print(agl_error_ls)

1
2024-10-23 12:43:50.024614
2
2024-10-23 12:43:50.042617
3
2024-10-23 12:43:50.046511
4
2024-10-23 12:43:50.060518
5
2024-10-23 12:43:50.067506
6
2024-10-23 12:43:50.071508
7
2024-10-23 12:43:50.074508
8
2024-10-23 12:43:50.077508
9
2024-10-23 12:43:50.080509
10
2024-10-23 12:43:50.083511
11
2024-10-23 12:43:50.086511
12
2024-10-23 12:43:50.090513
13
2024-10-23 12:43:50.093530
14
2024-10-23 12:43:50.099509
15
2024-10-23 12:43:50.101515
16
2024-10-23 12:43:50.113514
17
2024-10-23 12:43:50.118542
18
2024-10-23 12:43:50.122514
19
2024-10-23 12:43:50.126533
20
2024-10-23 12:43:50.137533
21
2024-10-23 12:43:50.138519
22
2024-10-23 12:43:50.149548
23
2024-10-23 12:43:50.156515
24
2024-10-23 12:43:50.161504
25
2024-10-23 12:43:50.166503
26
2024-10-23 12:43:50.171503
27
2024-10-23 12:43:50.175503
28
2024-10-23 12:43:50.178544
29
2024-10-23 12:43:50.181504
30
2024-10-23 12:43:50.190506
31
2024-10-23 12:43:50.199537
32
2024-10-23 12:43:50.207532
33
2024-10-23 12:43:50.228533
34
2024-10-23 12:43

In [9]:
counter= 1
agl_error_ls = []

# Loop through test cities
for uc_city_code in cities_ls:
    print(counter)
    start_time = datetime.now()
    print(str(start_time))

    ua_path = codes.query(f'HDENS_CLST=="{uc_city_code}"').UA2018.values[0].strip()
    if ua_path == 'not available':
        agl_error_ls.append(uc_city_code +" UA not available")
    
    else:
        inGQA = os.path.join(QGA_Final_path, '{}_finalGQA.shp'.format(uc_city_code))
        if not os.path.exists(inGQA):
            urban_center = uc.query(f'HDENS_CLST=="{uc_city_code}"')
            HDENS_AGGL_city = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}'")
            agl_id_city_ls = HDENS_AGGL_city.agglomerationId_identifier.values.astype(str).tolist()
            for agl_id in agl_id_city_ls:
                HDENS_AGGL_city_aglid = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}' & agglomerationId_identifier=='{agl_id}'")
                ctry_code = HDENS_AGGL_city_aglid.CNTR_CODE.values.astype(str)[0]
                aglo_name = HDENS_AGGL_city_aglid.agglomerationName_nameEng.values.astype(str)[0]
                aglo_name = aglo_name.split('/')[0]
                city_agl_cd = f"{ctry_code}-{uc_city_code}-{agl_id}-{aglo_name}"
                print(city_agl_cd)
                if len(ctry_code.split('-'))>1:
                    print(F'>1 countries {ctry_code}')
                    ctry_code = ctry_code.split('-')[0]
                else:
                    print('1 country for this urban centre')      
                    # Load agglomeration boundary for selected city
                    agl_city = agls.query(f'agglomerationId_identifier == "{agl_id}"')
                    if agl_city.empty:
                        agglomerationId_identifier = 'NotAvailable'
                        print ("agglomerationId_identifier Not Available")
                        agl_error_ls.append(city_agl_cd + " agglomerationId_identifier Not Available")
                    else:
                        output_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                        if not os.path.exists(output_path):
                            try:
                                # Check noise contour maps GeoPackage file
                                ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_DE_update.gpkg')
                                layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'           
                                #ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_{ctry_code}.gpkg')
                                #layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'
                                #layerName = f'dbodf48_agg_noisecontours_roadsinagglomeration_lden_valid_latestdelivery_poly_{ctry_code}'
                                ncm = gpd.read_file(ncm_file_path, layer=layerName, columns=['category', 'geometry'], 
                                                    engine='pyogrio', use_arrow=True, bbox= tuple(agl_city.total_bounds))
                                print ("ncm")
                                print(ncm.head())

                            except:
                                print("Error " + city_agl_cd)
                            





1
2024-10-23 11:57:58.558052
1
2024-10-23 11:57:58.564057
1
2024-10-23 11:57:58.568050
1
2024-10-23 11:57:58.570054
1
2024-10-23 11:57:58.573049
1
2024-10-23 11:57:58.576052
1
2024-10-23 11:57:58.579062
1
2024-10-23 11:57:58.582053
1
2024-10-23 11:57:58.584050
1
2024-10-23 11:57:58.589052
1
2024-10-23 11:57:58.594051
1
2024-10-23 11:57:58.597048
1
2024-10-23 11:57:58.599054
1
2024-10-23 11:57:58.602051
1
2024-10-23 11:57:58.604066
1
2024-10-23 11:57:58.607050
1
2024-10-23 11:57:58.608054
1
2024-10-23 11:57:58.612050
1
2024-10-23 11:57:58.619051
CH-GEOSTAT21_523-AG_CH_00_13-Biel
1 country for this urban centre
1
2024-10-23 11:57:58.647047
1
2024-10-23 11:57:58.652048
1
2024-10-23 11:57:58.661053
1
2024-10-23 11:57:58.665053
1
2024-10-23 11:57:58.669048
1
2024-10-23 11:57:58.676054
1
2024-10-23 11:57:58.680057
1
2024-10-23 11:57:58.684054
1
2024-10-23 11:57:58.688056
1
2024-10-23 11:57:58.692051
CZ-GEOSTAT21_403-AG_CZ_00_003-Ostrava
1 country for this urban centre
1
2024-10-23 11:57:58.7

  return next(self.gen)


ncm
   category                                           geometry
0  Lden5559  MULTIPOLYGON Z (((4047217.697 3074411.678 0.00...
DE-GEOSTAT21_334-AG_DE_NW_19-Solingen
1 country for this urban centre


  return next(self.gen)


Error DE-GEOSTAT21_334-AG_DE_NW_19-Solingen
1
2024-10-23 12:00:37.015041
1
2024-10-23 12:00:37.029036
1
2024-10-23 12:00:37.045040
1
2024-10-23 12:00:37.056036
1
2024-10-23 12:00:37.067037
1
2024-10-23 12:00:37.076037
DE-GEOSTAT21_297-AG_DE_NW_5-Duisburg
1 country for this urban centre


  return next(self.gen)


In [38]:
agl_error_ls

['CH-GEOSTAT21_941-AG_CH_00_7-Baden - Brugg Topological error',
 'CH-GEOSTAT21_943-AG_CH_00_12-Zug Topological error',
 'CH-GEOSTAT21_523-AG_CH_00_13-Biel/Bienne Topological error',
 'CH-GEOSTAT21_531-AG_CH_00_11-Fribourg Topological error',
 'CZ-GEOSTAT21_398-AG_CZ_00_003-Ostrava Topological error',
 'CZ-GEOSTAT21_403-AG_CZ_00_003-Ostrava Topological error',
 'CZ-GEOSTAT21_404-AG_CZ_00_003-Ostrava Topological error',
 'DE-GEOSTAT21_243-AG_DE_NW_10-Muenster Topological error',
 'DE-GEOSTAT21_360-AG_DE_NW_13-Aachen Topological error',
 'DE-GEOSTAT21_354-AG_DE_NW_1-Cologne Topological error',
 'DE-GEOSTAT21_919-AG_DE_HH_1-Hamburg Topological error',
 'DE-GEOSTAT21_121-AG_DE_HH_1-Hamburg Topological error',
 'DE-GEOSTAT21_140-AG_DE_HB_1-Bremen Topological error',
 'DE-GEOSTAT21_141-AG_DE_NI_4-Oldenburg Topological error',
 'ES-GEOSTAT21_800-AG_ES_52_12040-Castellón de la Plana Topological error',
 'ES-GEOSTAT21_768-AG_ES_30_28148-Torrejón de Ardoz Topological error',
 'ES-GEOSTAT21_774-AG

In [39]:
len(agl_error_ls)

33

In [46]:
HDENS_AGGL_tbl.sort_values('CNTR_CODE').query('CNTR_CODE=="FR-BE"')

Unnamed: 0,HDENS_NAME,HDENS_2011,POPL_2021,CNTR_CODE,agglomerationId_identifier,agglomerationName_nameEng,size,numberOfInhabitants,countryCode,Batch,HDENS_CLST
239,Lille,GEOSTAT11_361,956633,FR-BE,AG_FR_00_21,Lille,647.78,1154100,FR,,GEOSTAT21_361


In [46]:
counter= 1
agl_error_ls = []

# Loop through test cities
for uc_city_code in cities_ls:
    print(counter)
    start_time = datetime.now()
    print(str(start_time))
    print(uc_city_code)

    inGQA = os.path.join(QGA_Final_path, '{}_finalGQA.shp'.format(uc_city_code))
    if not os.path.exists(inGQA):
        urban_center = uc.query(f'HDENS_CLST=="{uc_city_code}"')
        print(urban_center)
        HDENS_AGGL_city = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}'")
        ua_path = codes.query(f'HDENS_CLST=="{uc_city_code}"').UA2018.values[0].strip()
        print (ua_path) 
        file_path = os.path.join(ua_data_f, f'{ua_path}\Data\{ua_path}.gpkg')
        agl_id_city_ls = HDENS_AGGL_city.agglomerationId_identifier.values.astype(str).tolist()
        for agl_id in agl_id_city_ls:
            HDENS_AGGL_city_aglid = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}' & agglomerationId_identifier=='{agl_id}'")
            ctry_code = HDENS_AGGL_city_aglid.CNTR_CODE.values.astype(str)[0]
            aglo_name = HDENS_AGGL_city_aglid.agglomerationName_nameEng.values.astype(str)[0]
            print(aglo_name)
            aglo_name = aglo_name.split('/')[0]
            aglo_name = aglo_name.split(' ')[0]
            city_agl_cd = f"{ctry_code}-{uc_city_code}-{agl_id}-{aglo_name}"
            GQA_uc_aglo_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA.shp')
            print(city_agl_cd)

            print (GQA_uc_aglo_path)
        if ua_path == 'not available':
            agl_error_ls.append(uc_city_code +" UA not available")

1
2024-10-22 16:54:05.522222
GEOSTAT21_495
1
2024-10-22 16:54:05.531204
GEOSTAT21_479
1
2024-10-22 16:54:05.534201
GEOSTAT21_481
1
2024-10-22 16:54:05.538199
GEOSTAT21_520
1
2024-10-22 16:54:05.540191
GEOSTAT21_522
1
2024-10-22 16:54:05.542207
GEOSTAT21_332
1
2024-10-22 16:54:05.543200
GEOSTAT21_304
1
2024-10-22 16:54:05.545213
GEOSTAT21_349
1
2024-10-22 16:54:05.546190
GEOSTAT21_317
1
2024-10-22 16:54:05.548187
GEOSTAT21_379
1
2024-10-22 16:54:05.549190
GEOSTAT21_344
1
2024-10-22 16:54:05.550187
GEOSTAT21_364
1
2024-10-22 16:54:05.552181
GEOSTAT21_542
1
2024-10-22 16:54:05.554191
GEOSTAT21_941
        HDENS_CLST                                         HDENS_NAME  \
318  GEOSTAT21_941  Gebenstorf / Turgi / Obersiggenthal / Baden / ...   

    HDENS_2011  POPL_2021 CNTR_CODE  MBRS_CODE_  SHAPE_AREA  SHAPE_LEN  Batch  \
318       None    63585.0        CH           9  23000000.0    34000.0    2.0   

     Area_ha                                           geometry  
318   2300.0  POLYGON 

In [18]:
agl_error_ls

['GEOSTAT21_943 UA not available']

In [14]:
codes.query(f'HDENS_CLST=="{uc_city_code}"')



Unnamed: 0,FREQUENCY,HDENS_CLST,HDENS_NAME,HDENS_2011,agglomerationId_identifier,agglomerationName_nameEng,UA2018
473,1,GEOSTAT21_943,Cham / Zug,,AG_CH_00_12,Zug,not available


In [16]:
codes

Unnamed: 0,FREQUENCY,HDENS_CLST,HDENS_NAME,HDENS_2011,agglomerationId_identifier,agglomerationName_nameEng,UA2018
0,1,GEOSTAT21_002,Oulu / Uleåborg,GEOSTAT11_002,AG_FI_00_6,Oulu,FI004L4_OULU_UA2018_v013
1,1,GEOSTAT21_003,Umeå,GEOSTAT11_003,SE_a_ag2480,Umea,SE005L1_UMEA_UA2018_v013
2,1,GEOSTAT21_004,Trondheim,GEOSTAT11_004,AG_NO_00_3,Trondheim,NO003L1_TRONDHEIM_UA2018_v013
3,1,GEOSTAT21_005,Jyväskylä,GEOSTAT11_005,AG_FI_00_9,Jyvaskyla,FI009L2_JYVASKYLA_UA2018_v013
4,1,GEOSTAT21_006,Tampere / Tammerfors,GEOSTAT11_006,AG_FI_00_3,Tampere,FI002L3_TAMPERE_UA2018_v013
...,...,...,...,...,...,...,...
473,1,GEOSTAT21_943,Cham / Zug,,AG_CH_00_12,Zug,not available
474,1,GEOSTAT21_945,Saint-Priest / Mions,,AG_FR_00_22,Lyon,FR003L2_LYON_UA2018_v013
475,1,GEOSTAT21_950,Fonte Nuova / Colleverde / Setteville / Case R...,,AG_IT_00_00009,Rome,IT001L3_ROMA_UA2018_v013
476,1,GEOSTAT21_958,Alcantarilla,,AG_ES_62_30030,Murcia,ES007L2_MURCIA_UA2018_v013


In [19]:
len(agl_error_ls)

32

In [20]:
agl_error_ls

['GEOSTAT21_941 UA not available',
 'GEOSTAT21_943 UA not available',
 'CH-GEOSTAT21_523-AG_CH_00_13-Biel Topological error',
 'GEOSTAT21_531 UA not available',
 'CZ-GEOSTAT21_403-AG_CZ_00_003-Ostrava Topological error',
 'CZ-GEOSTAT21_404-AG_CZ_00_003-Ostrava Topological error',
 'DE-GEOSTAT21_243-AG_DE_NW_10-Muenster Topological error',
 'DE-GEOSTAT21_360-AG_DE_NW_13-Aachen Topological error',
 'DE-GEOSTAT21_354-AG_DE_NW_1-Cologne Topological error',
 'DE-GEOSTAT21_919-AG_DE_HH_1-Hamburg Topological error',
 'DE-GEOSTAT21_121-AG_DE_HH_1-Hamburg Topological error',
 'DE-GEOSTAT21_140-AG_DE_HB_1-Bremen Topological error',
 'DE-GEOSTAT21_141-AG_DE_NI_4-Oldenburg Topological error',
 'ES-GEOSTAT21_800-AG_ES_52_12040-Castellón de la Plana Topological error',
 'ES-GEOSTAT21_768-AG_ES_30_28148-Torrejón de Ardoz Topological error',
 'ES-GEOSTAT21_774-AG_ES_30_28058-Fuenlabrada Topological error',
 'ES-GEOSTAT21_774-AG_ES_30_28074-Leganes Topological error',
 'NL-GEOSTAT21_181-AG_NL_00_04-Agg

In [22]:
uc.columns.to_list()

['HDENS_CLST',
 'HDENS_NAME',
 'HDENS_2011',
 'POPL_2021',
 'CNTR_CODE',
 'MBRS_CODE_',
 'SHAPE_AREA',
 'SHAPE_LEN',
 'Batch',
 'Area_ha',
 'geometry']

In [23]:
qc_tbl = uc[['HDENS_CLST', 
 'HDENS_NAME','CNTR_CODE','Batch']]

In [24]:
qc_tbl

Unnamed: 0,HDENS_CLST,HDENS_NAME,CNTR_CODE,Batch
0,GEOSTAT21_018,Stockholm,SE,1.0
1,GEOSTAT21_019,Haninge,SE,
2,GEOSTAT21_020,Södertälje,SE,
3,GEOSTAT21_021,Tartu,EE,1.0
4,GEOSTAT21_022,Örebro,SE,2.0
...,...,...,...,...
692,GEOSTAT21_868,Puerto de la Cruz,ES,
693,GEOSTAT21_870,Arrecife,ES,
694,GEOSTAT21_871,Las Palmas,ES,
695,GEOSTAT21_872,Telde,ES,


In [25]:
qc_tbl['GQA1'] = 0
qc_tbl['GQATotal'] = 0
qc_tbl['SA'] = 0
qc_tbl['Pop'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qc_tbl['GQA1'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qc_tbl['GQATotal'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qc_tbl['SA'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the

In [35]:
qc_tbl = qc_tbl.query('Batch >= 1').sort_values('HDENS_CLST').reset_index()

In [36]:
qc_tbl.to_csv(r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\OutputData\Processing_ControlSheet.csv', sep=',')

In [9]:
counter= 1
agl_error_ls = []

# Loop through test cities
for uc_city_code in cities_ls:
    print(counter)
    start_time = datetime.now()
    print(str(start_time))

    ua_path = codes.query(f'HDENS_CLST=="{uc_city_code}"').UA2018.values[0].strip()
    if ua_path == 'not available':
        agl_error_ls.append(uc_city_code +" UA not available")
    
    else:
        inGQA = os.path.join(QGA_Final_path, '{}_finalGQA.shp'.format(uc_city_code))
        if not os.path.exists(inGQA):
            urban_center = uc.query(f'HDENS_CLST=="{uc_city_code}"')
            HDENS_AGGL_city = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}'")
            agl_id_city_ls = HDENS_AGGL_city.agglomerationId_identifier.values.astype(str).tolist()
            for agl_id in agl_id_city_ls:
                HDENS_AGGL_city_aglid = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}' & agglomerationId_identifier=='{agl_id}'")
                ctry_code = HDENS_AGGL_city_aglid.CNTR_CODE.values.astype(str)[0]
                aglo_name = HDENS_AGGL_city_aglid.agglomerationName_nameEng.values.astype(str)[0]
                aglo_name = aglo_name.split('/')[0]
                aglo_name = aglo_name.split(' ')[0]
                city_agl_cd = f"{ctry_code}-{uc_city_code}-{agl_id}-{aglo_name}"
                print(city_agl_cd)
                if len(ctry_code.split('-'))>1:
                    print(F'>1 countries {ctry_code}')
                    ctry_code = ctry_code.split('-')[0]
                else:
                    print('1 country for this urban centre')      
                    # Load agglomeration boundary for selected city
                    agl_city = agls.query(f'agglomerationId_identifier == "{agl_id}"')
                    if agl_city.empty:
                        agglomerationId_identifier = 'NotAvailable'
                        print ("agglomerationId_identifier Not Available")
                        agl_error_ls.append(city_agl_cd + " agglomerationId_identifier Not Available")
                    else:
                        output_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                        if not os.path.exists(output_path):
                            try:
                                # Check noise contour maps GeoPackage file
                                ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export.gpkg')
                                layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'           
                                #ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_{ctry_code}.gpkg')
                                #layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'
                                #layerName = f'dbodf48_agg_noisecontours_roadsinagglomeration_lden_valid_latestdelivery_poly_{ctry_code}'
                                ncm = gpd.read_file(ncm_file_path, layer=layerName, columns=['category', 'geometry'], 
                                                    engine='pyogrio', use_arrow=True, bbox= tuple(agl_city.total_bounds))
                                print ("ncm")

                                # Define the list of noisy classes
                                noisy_classes = ['Lden5559', 'Lden6064', 'Lden6569', 'Lden7074', 'LdenGreaterThan75']

                                # Create a condition based on the category column
                                condition = ncm['category'].isin(noisy_classes)  # Replace 'category_column' with the actual column name

                                # Specify the condition and create a new category column based on the condition
                                ncm['noisy'] = 0
                                ncm.loc[condition, 'noisy'] = 1
                                ncm = ncm[['noisy', 'geometry']]
                                ncm_dis_dg = dg.from_geopandas(ncm, npartitions=10)
                                ncm_dis = ncm_dis_dg.dissolve(by='noisy').compute().reset_index()
                                print ("ncm_dis")

                                # Perform spatial overlay (intersection) 
                                ncm_agl = gpd.overlay(ncm_dis, agl_city, how='intersection')
                                print ("ncm_agl")

                                # Aggregate the area with lower band values (quieter bands)
                                ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')
                                print ("union")

                                ncm_agl_city['noisy'] = ncm_agl_city.noisy.fillna(0)
                                print ("fillna")

                                # Select a subset of columns of interest
                                ncm_dis = ncm_agl_city[['noisy', 'geometry']]
                                print(ncm_dis)
                                

                                # 3 READ URBAN ATLAS DATA        
                                
                                print (ua_path) 
                                file_path = os.path.join(ua_data_f, f'{ua_path}\Data\{ua_path}.gpkg')
                                # Read the GeoPackage file
                                ua = gpd.read_file(file_path, layer= ua_path[:-5], 
                                            columns= ['country', 'fua_name', 'fua_code','code_2018', 'class_2018', 'geometry'], 
                                            engine='pyogrio', 
                                            use_arrow=True, bbox= tuple(urban_center.total_bounds))
                                print ("loaded ua in urban city")                        

                                # Select 'green' classes
                                uagreen = ua.query('code_2018 == "14100" or code_2018 == "31000"')
                                
                                # 4 SELECT UA INTERSECTING UC
                                # Perform spatial overlay (intersection)
                                uagreen_urbc = gpd.overlay(uagreen, urban_center, how='intersection')

                                # 5 IDENTIFY GREEN AREAS EXCLUDED (NOT COVERED BY NCM)
                                # Perform spatial overlay (intersection)
                                nqgreen = gpd.overlay(uagreen_urbc, ncm_dis, how='intersection') #noisy/quiet green
                                not_covered = uagreen_urbc.geometry.difference(uagreen_urbc.geometry.intersection(nqgreen.geometry.unary_union))
                                # Filter out empty polygons(not empty polygons)
                                green_not_covered_by_ncm = not_covered[~not_covered.is_empty]

                                # save to shapefile
                                file_path = os.path.join(outdata_f, f'{city_agl_cd}_green_not_covered_by_ncm.shp')
                                #green_not_covered_by_ncm.to_file(file_path, driver='ESRI Shapefile')
                                print ("green_not_covered_by_ncm")

                                # 6 IDENTIFY QUIET/NOISY AREAS
                                ## for statistics need to calculate area again
                                # Calculate the area for each geometry and create a new column 'area'
                                nqgreen['area_m2'] = nqgreen['geometry'].area
                                nqgreen['area_ha'] = round(nqgreen['area_m2']* 0.0001,2)
                                nqgreen['area_km2'] = round(nqgreen['area_ha']* 0.01,2)
                                nqgreen_area = nqgreen.groupby(['code_2018', 'noisy'])['area_m2'].sum().reset_index()
                                nqgreen_area['area_ha'] = round(nqgreen_area['area_m2']* 0.0001,2)
                                nqgreen_area['area_km2'] = round(nqgreen_area['area_ha']* 0.01,2)

                                # 7 EXPORT GREEN QUIET AREAS (GQA)
                                nqgreen = nqgreen[['country', 'fua_name', 'fua_code', 'HDENS_2011', 'code_2018', 'class_2018', 'noisy',  'area_m2', 'area_ha', 'area_km2', 'geometry']]
                                GQA = nqgreen.query('noisy == 0')
                                GNA = nqgreen.query('noisy == 1')

                                # Export to shapefile
                                print ('Export to shapefile')
                                GQA_uc_aglo_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA.shp')
                                print (GQA_uc_aglo_path)
                                GQA.to_file(GQA_uc_aglo_path, driver='ESRI Shapefile')
                                print ("GQA")

                                # 8 CREATE CENTROIDS FOR GQA POLYGONS
                                # Create a new GeoDataFrame with centroids as points
                                GQA_pts = gpd.GeoDataFrame(geometry=GQA['geometry'].centroid)
                                GQA_pts['oid'] = GQA.index
                                GQA_pts['fua_name'] = GQA.fua_name
                                GQA_pts['fua_code'] = GQA.fua_code
                                GQA_pts['HDENS_2011'] = GQA.HDENS_2011

                                # Export to shapefile
                                file_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                                GQA_pts.to_file(file_path, driver='ESRI Shapefile')

                                print ("GQA_pts")
                        
                                # Calculate the duration
                                end_time = datetime.now()
                                processing_time = end_time - start_time

                                print ("str(processing_time)")
                                
                                ## write output values into log file
                                uc_km2 = round(uc_city.area.sum()/1000000,2)
                                agl_city_km2 = round(agl_city.area.sum()/1000000,2)
                                ncm_agl_city_km2 = round(ncm_agl_city.area.sum()/1000000,2)
                                ua_km2 = round(ua.area.sum()/1000000,2)
                                uagreen_km2 = round(uagreen.area.sum()/1000000,2)
                                uagreen_urbc_km2 = round(uagreen_urbc.area.sum()/1000000,2)
                                nqgreen_m2 = round(nqgreen.area.sum(),2)
                                green_not_covered_by_ncm_m2 = round(green_not_covered_by_ncm.area.sum(),2)
                                GQA_m2 = round(GQA.area.sum(),2)
                                GNA_m2 = round(GNA.area.sum(),2)
                                processing_duration = str(processing_time)

                                log_entry = create_log_entry(aglo_name, agl_id, uc_km2, agl_city_km2, 
                                                        ncm_agl_city_km2,ua_km2, uagreen_km2, uagreen_urbc_km2, nqgreen_m2, 
                                                        green_not_covered_by_ncm_m2, GQA_m2, GNA_m2, processing_time)
                                write_log(log_path, log_entry)

                                # Clean up intermediate variables to free memory
                                del agl_city, ncm, ncm_agl, ncm_agl_city, ncm_dis, ua, uagreen, uagreen_urbc, nqgreen, green_not_covered_by_ncm, GQA, GNA, GQA_pts
                            except:
                                print("Error " + city_agl_cd)
                                agl_error_ls.append(city_agl_cd +" Topological error")
    counter= counter+1

print(agl_error_ls)

1
2024-10-22 16:57:12.271003
2
2024-10-22 16:57:12.277003
3
2024-10-22 16:57:12.284003
4
2024-10-22 16:57:12.294020
5
2024-10-22 16:57:12.311015
6
2024-10-22 16:57:12.316020
7
2024-10-22 16:57:12.320028
8
2024-10-22 16:57:12.328001
9
2024-10-22 16:57:12.335009
10
2024-10-22 16:57:12.344007
11
2024-10-22 16:57:12.354006
12
2024-10-22 16:57:12.363003
13
2024-10-22 16:57:12.367004
14
2024-10-22 16:57:12.369004
15
2024-10-22 16:57:12.372006
16
2024-10-22 16:57:12.374006
CH-GEOSTAT21_523-AG_CH_00_13-Biel
1 country for this urban centre
17
2024-10-22 16:57:12.392004
18
2024-10-22 16:57:12.407007
19
2024-10-22 16:57:12.408005
20
2024-10-22 16:57:12.417008
21
2024-10-22 16:57:12.423007
22
2024-10-22 16:57:12.427007
23
2024-10-22 16:57:12.432014
24
2024-10-22 16:57:12.435007
25
2024-10-22 16:57:12.438006
26
2024-10-22 16:57:12.441005
27
2024-10-22 16:57:12.443004
28
2024-10-22 16:57:12.446008
29
2024-10-22 16:57:12.449006
CZ-GEOSTAT21_398-AG_CZ_00_003-Ostrava
1 country for this urban centre
30


In [39]:
counter= 1
agl_error_ls = []

# Loop through test cities
for uc_city_code in cities_ls[0:300]:

    start_time = datetime.now()

    print(uc_city_code)

    inGQA = os.path.join(QGA_Final_path, '{}_finalGQA.shp'.format(uc_city_code))
    if not os.path.exists(inGQA):
        urban_center = uc.query(f'HDENS_CLST=="{uc_city_code}"')

        HDENS_AGGL_city = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}'")
        ua_path = codes.query(f'HDENS_CLST=="{uc_city_code}"').UA2018.values[0].strip()

        file_path = os.path.join(ua_data_f, f'{ua_path}\Data\{ua_path}.gpkg')
        agl_id_city_ls = HDENS_AGGL_city.agglomerationId_identifier.values.astype(str).tolist()
        for agl_id in agl_id_city_ls:
            HDENS_AGGL_city_aglid = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}' & agglomerationId_identifier=='{agl_id}'")
            ctry_code = HDENS_AGGL_city_aglid.CNTR_CODE.values.astype(str)[0]
            aglo_name = HDENS_AGGL_city_aglid.agglomerationName_nameEng.values.astype(str)[0]
            print(aglo_name)
            aglo_name = aglo_name.split('/')[0]
            aglo_name = aglo_name.split(' ')[0]
            print(aglo_name)

GEOSTAT21_495
GEOSTAT21_520
GEOSTAT21_479
GEOSTAT21_522
GEOSTAT21_481
GEOSTAT21_317
GEOSTAT21_304
GEOSTAT21_332
GEOSTAT21_349
GEOSTAT21_344
GEOSTAT21_364
GEOSTAT21_379
GEOSTAT21_941
Baden - Brugg
Baden
GEOSTAT21_514
GEOSTAT21_943
Zug
Zug
GEOSTAT21_523
Biel/Bienne
Biel
GEOSTAT21_528
GEOSTAT21_531
Fribourg
Fribourg
GEOSTAT21_557
GEOSTAT21_542
GEOSTAT21_521
GEOSTAT21_510
GEOSTAT21_549
GEOSTAT21_843
GEOSTAT21_856
GEOSTAT21_362
GEOSTAT21_358
GEOSTAT21_430
GEOSTAT21_398
Ostrava
Ostrava
GEOSTAT21_397
GEOSTAT21_403
Ostrava
Ostrava
GEOSTAT21_414
GEOSTAT21_416
GEOSTAT21_404
Ostrava
Ostrava
GEOSTAT21_454
GEOSTAT21_339
GEOSTAT21_408
GEOSTAT21_331
GEOSTAT21_925
Solingen
Solingen
Wuppertal
Wuppertal
GEOSTAT21_327
GEOSTAT21_435
GEOSTAT21_237
GEOSTAT21_441
GEOSTAT21_346
GEOSTAT21_447
GEOSTAT21_448
GEOSTAT21_243
Muenster
Muenster
GEOSTAT21_455
GEOSTAT21_417
GEOSTAT21_413
GEOSTAT21_445
GEOSTAT21_307
Krefeld
Krefeld
GEOSTAT21_405
GEOSTAT21_312
GEOSTAT21_488
GEOSTAT21_295
GEOSTAT21_487
GEOSTAT21_907
GEOST

#### Run DE update