In [1]:
from src.utils import *


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [2]:
# Load required libraries
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import fiona
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from shapely.ops import unary_union
from shapely.errors import TopologicalError
from unidecode import unidecode
import glob
import csv
from datetime import datetime
import dask.dataframe as dd
import dask_geopandas as dg
from dask.distributed import Client
import gc
import re

In [3]:
# Path to data folders
indata_f = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas'
outdata_f = os.path.join(indata_f, 'OutputData', 'batch1_6080')
if not os.path.exists(outdata_f):
    # Create the folder if it doesn't exist
    os.makedirs(outdata_f)

# 0 PREPARE A LOG FILE FOR QC
log_file = 'log_GQA_Step1.csv'
log_path = os.path.join(outdata_f, log_file)

# Initialize Dask client
client = Client()

# Define engines
engines = {
    'fiona': {'engine': 'fiona'},
    'pyogrio': {'engine': 'pyogrio'},
    'pyogrio+arrow': {'engine': 'pyogrio', 'use_arrow': True}
          
}


Perhaps you already have a cluster running?
Hosting the HTTP server on port 60530 instead


In [4]:
# 1 READ URBAN CENTRES
# Read shapefile
uc_file_path = os.path.join(indata_f, 'UrbanCentres', 'HDC2021_RG_Input.shp')
# Read the GeoPackage file
uc = gpd.read_file(uc_file_path)
uc['CNTR_CODE'].fillna('AA', inplace=True)

# Select cities for processing in this batch
uc_sel = uc.query('Batch==1.0 & CNTR_CODE != "SE"')
uc_sel = uc_sel.sort_values(by='CNTR_CODE')

# Read table to list the cities to process using urban centre code
cities_ls = uc_sel.HDENS_CLST.tolist()


In [5]:
# 2 READ NOISE DATA
# Load agglomerations delineations
agls_file_path = os.path.join(indata_f, 'NoiseData', 'DF1_5_Agglomerations_20240429.gpkg')

# Read the GeoPackage file
agls = gpd.read_file(agls_file_path, layer = 'dbo.DF15_AgglomerationSource_Valid_LatestDelivery', 
                     **engines['pyogrio+arrow'],columns=['agglomerationId_identifier', 'agglomerationName_nameEng', 'geometry'])



In [6]:
# Read table with HDENS Urban centres information and Agglomerations link
HDENS_AGGL_tbl = pd.read_csv(r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\Processing\UrbanCentres_Agglomerations_csv.csv')
# Join uc code field to this table
HDENS_AGGL_tbl = HDENS_AGGL_tbl.merge(uc[['POPL_2021', 'HDENS_CLST']], on='POPL_2021')

In [7]:
counter= 1
agl_error_ls = []


# Loop through test cities
for uc_city_code in cities_ls[60:80]:
    print(counter)
    start_time = datetime.now()
    print(str(start_time))

    HDENS_AGGL_city = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}'")
    agl_id_city_ls = HDENS_AGGL_city.agglomerationId_identifier.values.astype(str).tolist()
    for agl_id in agl_id_city_ls:
        HDENS_AGGL_city_aglid = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}' & agglomerationId_identifier=='{agl_id}'")
        ctry_code = HDENS_AGGL_city_aglid.CNTR_CODE.values.astype(str)[0]
        aglo_name = HDENS_AGGL_city_aglid.agglomerationName_nameEng.values.astype(str)[0]
        city_agl_cd = f"{ctry_code}-{uc_city_code}-{agl_id}-{aglo_name}"
        print(city_agl_cd)
        if len(ctry_code.split('-'))>1:
               print('>1 countries')
               agl_error_ls.append(city_agl_cd + " bordering countries")
        else:
            print('1 country for this urban centre')      
            # Load agglomeration boundary for selected city
            agl_city = agls.query(f'agglomerationId_identifier == "{agl_id}"')
            if agl_city.empty:
                agglomerationId_identifier = 'NotAvailable'
                print ("agglomerationId_identifier Not Available")
                agl_error_ls.append(city_agl_cd + " agglomerationId_identifier Not Available")
            else:
                output_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                if not os.path.exists(output_path):
                    try:
                        # Check noise contour maps GeoPackage file
                        ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export.gpkg')
                        layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'           
                        #ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_{ctry_code}.gpkg')
                        #layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'
                        #layerName = f'dbodf48_agg_noisecontours_roadsinagglomeration_lden_valid_latestdelivery_poly_{ctry_code}'
                        ncm = gpd.read_file(ncm_file_path, layer=layerName, columns=['category', 'geometry'], 
                                            engine='pyogrio', use_arrow=True, bbox= tuple(agl_city.total_bounds))
                        print ("ncm")

                        # Define the list of noisy classes
                        noisy_classes = ['Lden5559', 'Lden6064', 'Lden6569', 'Lden7074', 'LdenGreaterThan75']

                        # Create a condition based on the category column
                        condition = ncm['category'].isin(noisy_classes)  # Replace 'category_column' with the actual column name

                        # Specify the condition and create a new category column based on the condition
                        ncm['noisy'] = 0
                        ncm.loc[condition, 'noisy'] = 1
                        ncm = ncm[['noisy', 'geometry']]
                        ncm_dis_dg = dg.from_geopandas(ncm, npartitions=10)
                        ncm_dis = ncm_dis_dg.dissolve(by='noisy').compute().reset_index()
                        print ("ncm_dis")

                        # Perform spatial overlay (intersection) 
                        ncm_agl = gpd.overlay(ncm_dis, agl_city, how='intersection')
                        print ("ncm_agl")

                        # Aggregate the area with lower band values (quieter bands)
                        ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')
                        print ("union")

                        ncm_agl_city['noisy'] = ncm_agl_city.noisy.fillna(0)
                        print ("fillna")

                        # Select a subset of columns of interest
                        ncm_dis = ncm_agl_city[['noisy', 'geometry']]
                        print(ncm_dis)
                        

                        # 3 READ UA DATA        
                        # Load GeoPackage info
                        data_f = r'A:\Copernicus\UrbanAtlas\UrbanAtlas\UA2018'
                        uc_city = uc_sel.query(f'HDENS_CLST == "{uc_city_code}"')
                        cityname = uc_city.HDENS_NAME.values.astype(str)[0]
                        #city_unicodeName_upper = unidecode(aglo_name).upper()
                        city_unicodeName_upper = unidecode(cityname).upper()
                        print(city_unicodeName_upper)
                        folder_path = glob.glob(os.path.join(data_f, f'{ctry_code}*{city_unicodeName_upper}*'))
                        ua_file_path =  glob.glob(os.path.join(folder_path[0], 'Data', f'{ctry_code}*{city_unicodeName_upper}*.gpkg'))
                        layers_ls = fiona.listlayers(ua_file_path[0])
                        print ("layers_ls")

                        # Read the GeoPackage file
                        ua = gpd.read_file(ua_file_path[0], layer= layers_ls[0], 
                                        columns= ['country', 'fua_name', 'fua_code','code_2018', 'class_2018', 'geometry'], engine='pyogrio', 
                                        use_arrow=True, bbox= tuple(uc_city.total_bounds))
                        print ("loaded ua in urban city")

                        # Select 'green' classes
                        uagreen = ua.query('code_2018 == "14100" or code_2018 == "31000"')
                        
                        # 4 SELECT UA INTERSECTING UC
                        # Perform spatial overlay (intersection)
                        uagreen_urbc = gpd.overlay(uagreen, uc_city, how='intersection')

                        # 5 IDENTIFY GREEN AREAS EXCLUDED (NOT COVERED BY NCM)
                        # Perform spatial overlay (intersection)
                        nqgreen = gpd.overlay(uagreen_urbc, ncm_dis, how='intersection') #noisy/quiet green
                        not_covered = uagreen_urbc.geometry.difference(uagreen_urbc.geometry.intersection(nqgreen.geometry.unary_union))
                        # Filter out empty polygons(not empty polygons)
                        green_not_covered_by_ncm = not_covered[~not_covered.is_empty]

                        # save to shapefile
                        file_path = os.path.join(outdata_f, f'{city_agl_cd}_green_not_covered_by_ncm.shp')
                        green_not_covered_by_ncm.to_file(file_path, driver='ESRI Shapefile')
                        print ("green_not_covered_by_ncm")

                        # 6 IDENTIFY QUIET/NOISY AREAS
                        ## for statistics need to calculate area again
                        # Calculate the area for each geometry and create a new column 'area'
                        nqgreen['area_m2'] = nqgreen['geometry'].area
                        nqgreen['area_ha'] = round(nqgreen['area_m2']* 0.0001,2)
                        nqgreen['area_km2'] = round(nqgreen['area_ha']* 0.01,2)
                        nqgreen_area = nqgreen.groupby(['code_2018', 'noisy'])['area_m2'].sum().reset_index()
                        nqgreen_area['area_ha'] = round(nqgreen_area['area_m2']* 0.0001,2)
                        nqgreen_area['area_km2'] = round(nqgreen_area['area_ha']* 0.01,2)

                        # 7 EXPORT GREEN QUIET AREAS (GQA)
                        nqgreen = nqgreen[['country', 'fua_name', 'fua_code', 'HDENS_2011', 'code_2018', 'class_2018', 'noisy',  'area_m2', 'area_ha', 'area_km2', 'geometry']]
                        GQA = nqgreen.query('noisy == 0')
                        GNA = nqgreen.query('noisy == 1')

                        # Export to shapefile
                        file_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA.shp')
                        GQA.to_file(file_path, driver='ESRI Shapefile')
                        print ("GQA")

                        # 8 CREATE CENTROIDS FOR GQA POLYGONS
                        # Create a new GeoDataFrame with centroids as points
                        GQA_pts = gpd.GeoDataFrame(geometry=GQA['geometry'].centroid)
                        GQA_pts['oid'] = GQA.index
                        GQA_pts['fua_name'] = GQA.fua_name
                        GQA_pts['fua_code'] = GQA.fua_code
                        GQA_pts['HDENS_2011'] = GQA.HDENS_2011

                        # Export to shapefile
                        file_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                        GQA_pts.to_file(file_path, driver='ESRI Shapefile')

                        print ("GQA_pts")
                
                        # Calculate the duration
                        end_time = datetime.now()
                        processing_time = end_time - start_time

                        print ("str(processing_time)")
                        
                        ## write output values into log file
                        uc_km2 = round(uc_city.area.sum()/1000000,2)
                        agl_city_km2 = round(agl_city.area.sum()/1000000,2)
                        ncm_agl_city_km2 = round(ncm_agl_city.area.sum()/1000000,2)
                        ua_km2 = round(ua.area.sum()/1000000,2)
                        uagreen_km2 = round(uagreen.area.sum()/1000000,2)
                        uagreen_urbc_km2 = round(uagreen_urbc.area.sum()/1000000,2)
                        nqgreen_m2 = round(nqgreen.area.sum(),2)
                        green_not_covered_by_ncm_m2 = round(green_not_covered_by_ncm.area.sum(),2)
                        GQA_m2 = round(GQA.area.sum(),2)
                        GNA_m2 = round(GNA.area.sum(),2)
                        processing_duration = str(processing_time)

                        log_entry = create_log_entry(aglo_name, agl_id, uc_km2, agl_city_km2, 
                                                ncm_agl_city_km2,ua_km2, uagreen_km2, uagreen_urbc_km2, nqgreen_m2, 
                                                green_not_covered_by_ncm_m2, GQA_m2, GNA_m2, processing_time)
                        write_log(log_path, log_entry)

                        # Clean up intermediate variables to free memory
                        del agl_city, ncm, ncm_agl, ncm_agl_city, ncm_dis, ua, uagreen, uagreen_urbc, nqgreen, green_not_covered_by_ncm, GQA, GNA, GQA_pts
                    except:
                        print("Error " + city_agl_cd)
                        agl_error_ls.append(city_agl_cd +" Topological error")
        counter= counter+1

print(agl_error_ls)

1
2024-07-16 11:26:56.571514
IE-GEOSTAT21_132-AG_IE_00_2-Cork
1 country for this urban centre
ncm
ncm_dis
Error IE-GEOSTAT21_132-AG_IE_00_2-Cork
2
2024-07-16 11:37:16.479902
IE-GEOSTAT21_095-AG_IE_00_3-Limerick
1 country for this urban centre
ncm
ncm_dis
ncm_agl
union
fillna
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((3070538.504 3442444.792, 30705...
1    0.0  MULTIPOLYGON (((3084396.082 3443015.945, 30843...
LIMERICK
layers_ls
loaded ua in urban city
green_not_covered_by_ncm
GQA
GQA_pts
str(processing_time)
3
2024-07-16 11:47:17.915768
LT-GEOSTAT21_051-LT_a_ag0002-Kaunas
1 country for this urban centre
ncm
ncm_dis
ncm_agl
union
fillna
   noisy                                           geometry
0    0.0  MULTIPOLYGON (((5218939.449 3622262.047, 52189...
1    1.0  MULTIPOLYGON (((5218719.688 3622243.389, 52187...
2    0.0  MULTIPOLYGON (((5218942.838 3622267.526, 52189...
KAUNAS
layers_ls
loaded ua in urban city
green_not_covered_by_ncm
GQA
GQA

  ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')


union
fillna
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((4337262.627 4469653.178, 43372...
1    0.0  MULTIPOLYGON (((4353354.561 4472171.182, 43533...
2    0.0  MULTIPOLYGON (((4352620.124 4465458.314, 43523...
TRONDHEIM
layers_ls
loaded ua in urban city
green_not_covered_by_ncm
GQA
GQA_pts
str(processing_time)
14
2024-07-16 12:33:04.906963
NO-GEOSTAT21_023-AG_NO_00_4-Stavanger/Sandnes
1 country for this urban centre
ncm
ncm_dis
ncm_agl
union
fillna
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((4097960.505 3980102.986, 40979...
1    0.0  MULTIPOLYGON (((4097979.692 3980114.160, 40979...
2    0.0  MULTIPOLYGON (((4144712.031 4010984.711, 41431...
STAVANGER
layers_ls
loaded ua in urban city
Error NO-GEOSTAT21_023-AG_NO_00_4-Stavanger/Sandnes
15
2024-07-16 12:35:54.205739
PL-GEOSTAT21_318-AG_PL_02_64-Wroclaw
1 country for this urban centre
ncm
Error PL-GEOSTAT21_318-AG_PL_02_64-Wroclaw
16
2024-07-16 12:35:59

  ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')


union
fillna
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((4950353.596 3062052.967, 49503...
1    0.0  MULTIPOLYGON (((4954111.076 3066837.438, 49541...
GORNOSLASKI ZWIAZEK METROPOLITALNY
Error PL-GEOSTAT21_376-AG_PL_24_62-BYTOM
PL-GEOSTAT21_376-AG_PL_24_63-Chorzow
1 country for this urban centre
ncm
ncm_dis
ncm_agl
union
fillna
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((4961107.818 3058603.407, 49611...
1    0.0  MULTIPOLYGON (((4960942.256 3058847.123, 49609...
GORNOSLASKI ZWIAZEK METROPOLITALNY
Error PL-GEOSTAT21_376-AG_PL_24_63-Chorzow
PL-GEOSTAT21_376-AG_PL_24_65-Dabrowa Gornicza
1 country for this urban centre
ncm
ncm_dis
ncm_agl
union
fillna
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((4975414.398 3073636.087, 49754...
1    0.0  MULTIPOLYGON (((4989054.187 3066376.255, 49890...
2    0.0  MULTIPOLYGON (((4993731.941 3070696.049, 49937...
GORNOSLASKI ZWIAZEK 

  ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')


union
fillna
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((4949102.286 3051724.100, 49491...
1    0.0  MULTIPOLYGON (((4956592.148 3054209.342, 49566...
GORNOSLASKI ZWIAZEK METROPOLITALNY
Error PL-GEOSTAT21_376-AG_PL_24_72-Ruda Slaska
PL-GEOSTAT21_376-AG_PL_24_75-Sosnowiec
1 country for this urban centre
ncm
ncm_dis
ncm_agl
union
fillna
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((4981510.107 3058463.049, 49815...
1    0.0  MULTIPOLYGON (((4980105.185 3062157.192, 49800...
2    0.0  MULTIPOLYGON (((4980025.837 3062203.031, 49800...
GORNOSLASKI ZWIAZEK METROPOLITALNY
Error PL-GEOSTAT21_376-AG_PL_24_75-Sosnowiec
PL-GEOSTAT21_376-AG_PL_24_78-ZABRZE
1 country for this urban centre
ncm
ncm_dis
ncm_agl


  ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')


union
fillna
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((4941553.380 3057721.577, 49415...
1    1.0  POLYGON ((4942883.482 3056871.487, 4942883.482...
2    0.0  MULTIPOLYGON (((4948931.592 3063767.802, 49489...
GORNOSLASKI ZWIAZEK METROPOLITALNY
Error PL-GEOSTAT21_376-AG_PL_24_78-ZABRZE
24
2024-07-16 13:03:29.570514
PL-GEOSTAT21_233-AG_PL_08_62-Zielona Gora
1 country for this urban centre
ncm
Error PL-GEOSTAT21_233-AG_PL_08_62-Zielona Gora
25
2024-07-16 13:03:31.282525
PL-GEOSTAT21_171-AG_PL_14_65-Warsaw
1 country for this urban centre
ncm
ncm_dis
ncm_agl


  ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')


union
fillna
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((5061280.297 3293409.294, 50612...
1    0.0  MULTIPOLYGON (((5061124.525 3285714.116, 50611...
WARSZAWA
layers_ls
loaded ua in urban city


  return lib.intersection(a, b, **kwargs)


green_not_covered_by_ncm
GQA
GQA_pts
str(processing_time)
26
2024-07-16 13:08:50.296827
PL-GEOSTAT21_068-AG_PL_22_61-GDAŃSK
1 country for this urban centre
ncm
ncm_dis


  ncm_agl = gpd.overlay(ncm_dis, agl_city, how='intersection')


ncm_agl
union
fillna
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((4893356.079 3505839.340, 48933...
1    0.0  MULTIPOLYGON (((4877049.364 3513377.342, 48770...
GDANSK
layers_ls
loaded ua in urban city


  return lib.intersection(a, b, **kwargs)


green_not_covered_by_ncm
GQA
GQA_pts
str(processing_time)
27
2024-07-16 13:10:36.235201
PL-GEOSTAT21_385-AG_PL_12_61-Cracow
1 country for this urban centre
ncm
ncm_dis
ncm_agl
union
fillna
   noisy                                           geometry
0    1.0  MULTIPOLYGON (((5028977.349 3033360.079, 50289...
1    0.0  MULTIPOLYGON (((5047946.483 3047006.871, 50479...
KRAKOW
layers_ls
loaded ua in urban city
green_not_covered_by_ncm
GQA
GQA_pts
str(processing_time)
['IE-GEOSTAT21_132-AG_IE_00_2-Cork Topological error', 'MT-GEOSTAT21_867-AG_MT_00_1-Malta Noise Agglomeration Topological error', 'NL-GEOSTAT21_190-AG_NL_00_04-Agglomeration Amsterdam-Haarlem Topological error', 'NL-GEOSTAT21_240-AG_NL_00_18-Agglomeration Rotterdam-Dordrecht Topological error', 'NL-GEOSTAT21_905-AG_NL_00_20-Agglomeration Utrecht Topological error', 'NL-GEOSTAT21_177-AG_NL_00_21-Agglomeration Zwolle Topological error', 'NO-GEOSTAT21_023-AG_NO_00_4-Stavanger/Sandnes Topological error', 'PL-GEOSTAT21_318-AG_PL_02

In [8]:
agl_error_ls

['IE-GEOSTAT21_132-AG_IE_00_2-Cork Topological error',
 'MT-GEOSTAT21_867-AG_MT_00_1-Malta Noise Agglomeration Topological error',
 'NL-GEOSTAT21_190-AG_NL_00_04-Agglomeration Amsterdam-Haarlem Topological error',
 'NL-GEOSTAT21_240-AG_NL_00_18-Agglomeration Rotterdam-Dordrecht Topological error',
 'NL-GEOSTAT21_905-AG_NL_00_20-Agglomeration Utrecht Topological error',
 'NL-GEOSTAT21_177-AG_NL_00_21-Agglomeration Zwolle Topological error',
 'NO-GEOSTAT21_023-AG_NO_00_4-Stavanger/Sandnes Topological error',
 'PL-GEOSTAT21_318-AG_PL_02_64-Wroclaw Topological error',
 'PL-GEOSTAT21_376-AG_PL_24_62-BYTOM Topological error',
 'PL-GEOSTAT21_376-AG_PL_24_63-Chorzow Topological error',
 'PL-GEOSTAT21_376-AG_PL_24_65-Dabrowa Gornicza Topological error',
 'PL-GEOSTAT21_376-AG_PL_24_66-Gliwice Topological error',
 'PL-GEOSTAT21_376-AG_PL_24_69-Katowice Topological error',
 'PL-GEOSTAT21_376-AG_PL_24_72-Ruda Slaska Topological error',
 'PL-GEOSTAT21_376-AG_PL_24_75-Sosnowiec Topological error',
 '