In [2]:
from src.utils import *


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [3]:
# Load required libraries
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import fiona
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from shapely.ops import unary_union
from shapely.errors import TopologicalError
from unidecode import unidecode
import glob
import csv
from datetime import datetime
import dask.dataframe as dd
import dask_geopandas as dg
from dask.distributed import Client
import gc
import re

In [4]:
# Path to data folders
indata_f = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas'
outdata_f = os.path.join(indata_f, 'OutputData', 'batch1_0020')
if not os.path.exists(outdata_f):
    # Create the folder if it doesn't exist
    os.makedirs(outdata_f)

# 0 PREPARE A LOG FILE FOR QC
log_file = 'log_GQA_Step1.csv'
log_path = os.path.join(outdata_f, log_file)

# Initialize Dask client
client = Client()

# Define engines
engines = {
    'fiona': {'engine': 'fiona'},
    'pyogrio': {'engine': 'pyogrio'},
    'pyogrio+arrow': {'engine': 'pyogrio', 'use_arrow': True}
          
}


Perhaps you already have a cluster running?
Hosting the HTTP server on port 55043 instead


In [16]:
# 1 READ URBAN CENTRES
# Read shapefile
uc_file_path = os.path.join(indata_f, 'UrbanCentres', 'HDC2021_RG_InputUpdateB2B3B4.shp')
# Read the GeoPackage file
uc = gpd.read_file(uc_file_path)
uc['CNTR_CODE'].fillna('AA', inplace=True)

# Select cities for processing in this batch
###uc_sel = uc.query('Batch==1.0 & CNTR_CODE != "SE"')
uc_sel = uc.query('Batch>0.0')
uc_sel = uc_sel.sort_values(by='CNTR_CODE')

# Read table to list the cities to process using urban centre code
cities_ls = uc_sel.HDENS_CLST.tolist()
len(cities_ls)


230

In [17]:
## uc_2remove from conflict folder
uc_2remove_path = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\OutputData\oct_run\GQA_conflictives'
shps = glob.glob(os.path.join(uc_2remove_path, '*.shp'))
for i in shps:
    code = str(os.path.basename(i)[:-7][:-6])
    print(code)
    cities_ls.remove(str(code))

len(cities_ls)

GEOSTAT21_402


229

In [18]:
# Final GQAs
QGA_Final_path = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\OutputData\GQA'

# 1 UA DATA FOLDER
ua_data_f = r'A:\Copernicus\UrbanAtlas\UrbanAtlas\UA2018'

# 2 READ NOISE DATA
# Load agglomerations delineations
agls_file_path = os.path.join(indata_f, 'NoiseData', 'DF1_5_Agglomerations_20240429.gpkg')

# Read the GeoPackage file
agls = gpd.read_file(agls_file_path, layer = 'dbo.DF15_AgglomerationSource_Valid_LatestDelivery', 
                     **engines['pyogrio+arrow'],columns=['agglomerationId_identifier', 'agglomerationName_nameEng', 'geometry'])

# 3 TRANSLATOR TABLE
# Crosswalk table containing the different codes from input sources
codes_path = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\Processing\Codes.csv'
codes = pd.read_csv(codes_path)

In [19]:
# Read table with HDENS Urban centres information and Agglomerations link
HDENS_AGGL_tbl = pd.read_csv(r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas\Processing\UrbanCentres_Agglomerations_csv.csv')
# Join uc code field to this table
HDENS_AGGL_tbl = HDENS_AGGL_tbl.merge(uc[['POPL_2021', 'HDENS_CLST']], on='POPL_2021')

#### Run Fr update

In [20]:
len(cities_ls)

229

In [21]:
counter= 1
agl_error_ls = []

# Loop through test cities
for uc_city_code in cities_ls:
    print(counter)
    start_time = datetime.now()
    print(str(start_time))

    ua_path = codes.query(f'HDENS_CLST=="{uc_city_code}"').UA2018.values[0].strip()
    if ua_path == 'not available':
        agl_error_ls.append(uc_city_code +" UA not available")
    
    else:
        inGQA = os.path.join(QGA_Final_path, '{}_finalGQA.shp'.format(uc_city_code))
        if not os.path.exists(inGQA):
            urban_center = uc.query(f'HDENS_CLST=="{uc_city_code}"')
            HDENS_AGGL_city = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}'")
            agl_id_city_ls = HDENS_AGGL_city.agglomerationId_identifier.values.astype(str).tolist()
            for agl_id in agl_id_city_ls:
                HDENS_AGGL_city_aglid = HDENS_AGGL_tbl.query(f"HDENS_CLST=='{uc_city_code}' & agglomerationId_identifier=='{agl_id}'")
                ctry_code = HDENS_AGGL_city_aglid.CNTR_CODE.values.astype(str)[0]
                aglo_name = HDENS_AGGL_city_aglid.agglomerationName_nameEng.values.astype(str)[0]
                aglo_name = aglo_name.split('/')[0]
                aglo_name = aglo_name.split(' ')[0]

                if len(ctry_code.split('-'))>1:
                    print(F'>1 countries {ctry_code}')
                    ctry_code = ctry_code.split('-')[0]
                
                city_agl_cd = f"{ctry_code}-{uc_city_code}-{agl_id}-{aglo_name}"
                print(city_agl_cd)
                if ctry_code=='FR':
                    print('Loading agglomeration boundary for selected city')      
                    # Load agglomeration boundary for selected city
                    agl_city = agls.query(f'agglomerationId_identifier == "{agl_id}"')
                    if agl_city.empty:
                        agglomerationId_identifier = 'NotAvailable'
                        print ("agglomerationId_identifier Not Available")
                        agl_error_ls.append(city_agl_cd + " agglomerationId_identifier Not Available")
                    else:
                        output_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                        if not os.path.exists(output_path):
                            try:
                                # Check noise contour maps GeoPackage file
                                ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_FR_update.gpkg')
                                layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'           
                                #ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_{ctry_code}.gpkg')
                                #layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'
                                #layerName = f'dbodf48_agg_noisecontours_roadsinagglomeration_lden_valid_latestdelivery_poly_{ctry_code}'
                                ncm = gpd.read_file(ncm_file_path, layer=layerName, columns=['category', 'geometry'], 
                                                    engine='pyogrio', use_arrow=True, bbox= tuple(agl_city.total_bounds))
                                print ("ncm")

                                # Define the list of noisy classes
                                noisy_classes = ['Lden5559', 'Lden6064', 'Lden6569', 'Lden7074', 'LdenGreaterThan75']

                                # Create a condition based on the category column
                                condition = ncm['category'].isin(noisy_classes)  # Replace 'category_column' with the actual column name

                                # Specify the condition and create a new category column based on the condition
                                ncm['noisy'] = 0
                                ncm.loc[condition, 'noisy'] = 1
                                ncm = ncm[['noisy', 'geometry']]
                                ncm_dis_dg = dg.from_geopandas(ncm, npartitions=10)
                                ncm_dis = ncm_dis_dg.dissolve(by='noisy').compute().reset_index()
                                print ("ncm_dis")

                                # Perform spatial overlay (intersection) 
                                ncm_agl = gpd.overlay(ncm_dis, agl_city, how='intersection')
                                print ("ncm_agl")

                                # Aggregate the area with lower band values (quieter bands)
                                ncm_agl_city = gpd.overlay(ncm_agl, agl_city, how='union')
                                print ("union")

                                ncm_agl_city['noisy'] = ncm_agl_city.noisy.fillna(0)
                                print ("fillna")

                                # Select a subset of columns of interest
                                ncm_dis = ncm_agl_city[['noisy', 'geometry']]
                                print(ncm_dis)
                                

                                # 3 READ URBAN ATLAS DATA       
                                file_path = os.path.join(ua_data_f, f'{ua_path}\Data\{ua_path}.gpkg')
                                # Read the GeoPackage file
                                ua = gpd.read_file(file_path, layer= ua_path[:-5], 
                                            columns= ['country', 'fua_name', 'fua_code','code_2018', 'class_2018', 'geometry'], 
                                            engine='pyogrio', 
                                            use_arrow=True, bbox= tuple(urban_center.total_bounds))
                                print ("loaded ua in urban city")                        

                                # Select 'green' classes
                                uagreen = ua.query('code_2018 == "14100" or code_2018 == "31000"')
                                
                                # 4 SELECT UA INTERSECTING UC
                                # Perform spatial overlay (intersection)
                                uagreen_urbc = gpd.overlay(uagreen, urban_center, how='intersection')

                                # 5 IDENTIFY GREEN AREAS EXCLUDED (NOT COVERED BY NCM)
                                # Perform spatial overlay (intersection)
                                nqgreen = gpd.overlay(uagreen_urbc, ncm_dis, how='intersection') #noisy/quiet green
                                not_covered = uagreen_urbc.geometry.difference(uagreen_urbc.geometry.intersection(nqgreen.geometry.unary_union))
                                # Filter out empty polygons(not empty polygons)
                                green_not_covered_by_ncm = not_covered[~not_covered.is_empty]

                                # save to shapefile
                                file_path = os.path.join(outdata_f, f'{city_agl_cd}_green_not_covered_by_ncm.shp')
                                #green_not_covered_by_ncm.to_file(file_path, driver='ESRI Shapefile')
                                print ("green_not_covered_by_ncm")

                                # 6 IDENTIFY QUIET/NOISY AREAS
                                ## for statistics need to calculate area again
                                # Calculate the area for each geometry and create a new column 'area'
                                nqgreen['area_m2'] = nqgreen['geometry'].area
                                nqgreen['area_ha'] = round(nqgreen['area_m2']* 0.0001,2)
                                nqgreen['area_km2'] = round(nqgreen['area_ha']* 0.01,2)
                                nqgreen_area = nqgreen.groupby(['code_2018', 'noisy'])['area_m2'].sum().reset_index()
                                nqgreen_area['area_ha'] = round(nqgreen_area['area_m2']* 0.0001,2)
                                nqgreen_area['area_km2'] = round(nqgreen_area['area_ha']* 0.01,2)

                                # 7 EXPORT GREEN QUIET AREAS (GQA)
                                nqgreen = nqgreen[['country', 'fua_name', 'fua_code', 'HDENS_2011', 'code_2018', 'class_2018', 'noisy',  'area_m2', 'area_ha', 'area_km2', 'geometry']]
                                GQA = nqgreen.query('noisy == 0')
                                GNA = nqgreen.query('noisy == 1')

                                # Export to shapefile
                                print ('Export to shapefile')
                                GQA_uc_aglo_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA.shp')
                                print (GQA_uc_aglo_path)
                                GQA.to_file(GQA_uc_aglo_path, driver='ESRI Shapefile')
                                print ("GQA")

                                # 8 CREATE CENTROIDS FOR GQA POLYGONS
                                # Create a new GeoDataFrame with centroids as points
                                GQA_pts = gpd.GeoDataFrame(geometry=GQA['geometry'].centroid)
                                GQA_pts['oid'] = GQA.index
                                GQA_pts['fua_name'] = GQA.fua_name
                                GQA_pts['fua_code'] = GQA.fua_code
                                GQA_pts['HDENS_2011'] = GQA.HDENS_2011

                                # Export to shapefile
                                file_path = os.path.join(outdata_f, f'{city_agl_cd}_GQA_centroids.shp')
                                GQA_pts.to_file(file_path, driver='ESRI Shapefile')

                                print ("GQA_pts")
                        
                                # Calculate the duration
                                end_time = datetime.now()
                                processing_time = end_time - start_time

                                print ("str(processing_time)")
                                
                                ## write output values into log file
                                uc_km2 = round(uc_city.area.sum()/1000000,2)
                                agl_city_km2 = round(agl_city.area.sum()/1000000,2)
                                ncm_agl_city_km2 = round(ncm_agl_city.area.sum()/1000000,2)
                                ua_km2 = round(ua.area.sum()/1000000,2)
                                uagreen_km2 = round(uagreen.area.sum()/1000000,2)
                                uagreen_urbc_km2 = round(uagreen_urbc.area.sum()/1000000,2)
                                nqgreen_m2 = round(nqgreen.area.sum(),2)
                                green_not_covered_by_ncm_m2 = round(green_not_covered_by_ncm.area.sum(),2)
                                GQA_m2 = round(GQA.area.sum(),2)
                                GNA_m2 = round(GNA.area.sum(),2)
                                processing_duration = str(processing_time)

                                log_entry = create_log_entry(aglo_name, agl_id, uc_km2, agl_city_km2, 
                                                        ncm_agl_city_km2,ua_km2, uagreen_km2, uagreen_urbc_km2, nqgreen_m2, 
                                                        green_not_covered_by_ncm_m2, GQA_m2, GNA_m2, processing_time)
                                write_log(log_path, log_entry)

                                # Clean up intermediate variables to free memory
                                del agl_city, ncm, ncm_agl, ncm_agl_city, ncm_dis, ua, uagreen, uagreen_urbc, nqgreen, green_not_covered_by_ncm, GQA, GNA, GQA_pts
                            except:
                                print("Error " + city_agl_cd)
                                agl_error_ls.append(city_agl_cd +" Topological error")
    counter= counter+1

print(agl_error_ls)

1
2024-10-23 11:28:45.146250
2
2024-10-23 11:28:45.158257
3
2024-10-23 11:28:45.163254
4
2024-10-23 11:28:45.167252
5
2024-10-23 11:28:45.170258
6
2024-10-23 11:28:45.173255
7
2024-10-23 11:28:45.176255
8
2024-10-23 11:28:45.179257
9
2024-10-23 11:28:45.182250
10
2024-10-23 11:28:45.185252
11
2024-10-23 11:28:45.196251
12
2024-10-23 11:28:45.200258
13
2024-10-23 11:28:45.202257
14
2024-10-23 11:28:45.205252
15
2024-10-23 11:28:45.206253
16
2024-10-23 11:28:45.210259
17
2024-10-23 11:28:45.211262
18
2024-10-23 11:28:45.216263
19
2024-10-23 11:28:45.223266
CH-GEOSTAT21_523-AG_CH_00_13-Biel
20
2024-10-23 11:28:45.237256
21
2024-10-23 11:28:45.238255
22
2024-10-23 11:28:45.241256
23
2024-10-23 11:28:45.251263
24
2024-10-23 11:28:45.264254
25
2024-10-23 11:28:45.280261
26
2024-10-23 11:28:45.283262
27
2024-10-23 11:28:45.296255
28
2024-10-23 11:28:45.302254
29
2024-10-23 11:28:45.311255
CZ-GEOSTAT21_403-AG_CZ_00_003-Ostrava
30
2024-10-23 11:28:45.322260
CZ-GEOSTAT21_404-AG_CZ_00_003-Ostrava