In [1]:
from src.utils import *

In [None]:
# Path to data folders
indata_f = r'P:\Environment and Health\Noise\ServiceContract\2024_ServiceContract\QuietAreas'
outdata_f = os.path.join(indata_f, 'OutputData', 'test0207')
if not os.path.exists(outdata_f):
    # Create the folder if it doesn't exist
    os.makedirs(outdata_f)

# 0 PREPARE A LOG FILE FOR QC
log_file = 'log_GQA_Step1.csv'
log_path = os.path.join(outdata_f, log_file)

# Initialize Dask client
client = Client()

# 1 READ URBAN CENTRES
# Read shapefile
uc_file_path = os.path.join(indata_f, 'UrbanCentres', 'HDC2021_RG.shp')
# Read the GeoPackage file
uc = gpd.read_file(uc_file_path)


# 2 READ NOISE DATA
# Load agglomerations delineations
agls_file_path = os.path.join(indata_f, 'NoiseData', 'DF1_5_Agglomerations_20240429.gpkg')

# Read the GeoPackage file
agls = gpd.read_file(agls_file_path, layer = 'dbo.DF15_AgglomerationSource_Valid_LatestDelivery')

cities_ls = ['Kaunas', 'København', 'Girona', 'Bordeaux']
# Loop through test cities
for cityLocalName in cities_ls[:1]:
    print(str(cityLocalName))
    start_time = datetime.now()
    print(str(start_time))
    uc_city = uc.query(f'HDENS_NAME == "{cityLocalName}"')
    ctry_code = uc_city.CNTR_CODE.values.astype(str)[0]
    cityLocalName_unicode = unidecode(cityLocalName)
    output_path = os.path.join(outdata_f, f'{ctry_code}_{cityLocalName_unicode}_GQA_centroids.shp')
    if not os.path.exists(output_path):
        print(f'loading aglomeration city {cityLocalName_unicode}')
        agl_city = agls.query(f'agglomerationName_localName == "{cityLocalName}"')
        # Keep only subset of columns
        agl_city = agl_city[['agglomerationId_identifier', 'agglomerationName_nameEng', 'geometry' ]]
        if agl_city.empty:
            agglomerationId_identifier = 'NotAvailable'
            print ("agglomerationId_identifier")
        else:
            agglomerationId_identifier = agl_city.agglomerationId_identifier.values.astype(str)[0]
            
            print ("agglomerationId_identifier")

            # Check noise contour maps GeoPackage file
            ncm_file_path = os.path.join(indata_f, 'NoiseData', f'Noise_20202025_export_{ctry_code}.gpkg')
            #layerName = f'dbo.DF48_agg_NoiseContours_roadsInAgglomeration_Lden_Valid_LatestDelivery_Poly_{ctry_code}'
            layerName = f'dbodf48_agg_noisecontours_roadsinagglomeration_lden_valid_latestdelivery_poly_{ctry_code}'
            ncm = gpd.read_file(ncm_file_path, layer=layerName)
            print ("ncm")

            # subset columns
            ncm_gdf = ncm[['category', 'geometry']]

            # Convert GeoDataFrames to Dask GeoDataFrames
            ncm = dg.from_geopandas(ncm_gdf, npartitions=10)
            agl_city = dg.from_geopandas(agl_city, npartitions=10)
    
            # Perform spatial overlay (intersection) 
            # Perform overlay operation
            dask_overlay = dg.overlay(ncm, agl_city, how='intersection')
            ncm_agl = dask_overlay.compute()
            print ("ncm_agl")

            # Aggregate the area with lower band values (quieter bands)
            ncm_agl_city = dg.overlay(ncm_agl, agl_city, how='union')
            ncm_agl_city = ncm_agl_city.compute()
            ncm_agl_city.category.fillna(0)

            # Select a subset of columns of interest
            ncm_dis = ncm_agl_city[['category', 'geometry']]
            
            # Define the list of noisy classes
            noisy_classes = ['Lden5559', 'Lden6064', 'Lden6569', 'Lden7074', 'LdenGreaterThan75']

            # Create a condition based on the category column
            condition = ncm_dis['category'].isin(noisy_classes)  # Replace 'category_column' with the actual column name

            # Specify the condition and create a new category column based on the condition
            ncm_dis['noisy'] = 0
            ncm_dis.loc[condition, 'noisy'] = 1
            ncm_dis = ncm_dis[['noisy', 'geometry']]
            ncm_dis_dg = dg.from_geopandas(ncm_dis, npartitions=10)
            ncm_dis = ncm_dis_dg.dissolve(by='noisy').compute().reset_index()
            print ("ncm_dis")

            # 3 READ UA DATA        
            # Load GeoPackage info
            data_f = r'A:\Copernicus\UrbanAtlas\UrbanAtlas\UA2018'
            ctry_code = uc_city.CNTR_CODE.values.astype(str)[0] 
            city_unicodeName_upper = unidecode(cityLocalName).upper()
            folder_path = glob.glob(os.path.join(data_f, f'{ctry_code}*{city_unicodeName_upper}*'))
            ua_file_path =  glob.glob(os.path.join(folder_path[0], 'Data', f'{ctry_code}*{city_unicodeName_upper}*.gpkg'))
            layers_ls = fiona.listlayers(ua_file_path[0])
            print ("layers_ls")

            # Read the GeoPackage file
            ua = gpd.read_file(ua_file_path[0], layer= layers_ls[0])
            print ("ua")

            # Select 'green' classes
            uagreen = ua.query('code_2018 == "14100" or code_2018 == "31000"')
            uagreen_dg = dg.from_geopandas(uagreen, npartitions=10)
            
            # 4 SELECT UA INTERSECTING UC
            # Perform spatial overlay (intersection)
            uc_city_dg = dg.from_geopandas(uc_city, npartitions=10)
            uagreen_urbc = dg.overlay(uagreen_dg, uc_city_dg, how='intersection').compute()

            # 5 IDENTIFY GREEN AREAS EXCLUDED (NOT COVERED BY NCM)
            # Perform spatial overlay (intersection)
            nqgreen = dg.overlay(uagreen_urbc, ncm_dis, how='intersection').compute() #noisy/quiet green
            not_covered = uagreen_urbc.geometry.difference(uagreen_urbc.geometry.intersection(nqgreen.geometry.unary_union))
            # Filter out empty polygons(not empty polygons)
            green_not_covered_by_ncm = not_covered[~not_covered.is_empty]

            # save to shapefile
            file_path = os.path.join(outdata_f, f'{ctry_code}_{cityLocalName_unicode}_green_not_covered_by_ncm.shp')
            green_not_covered_by_ncm.to_file(file_path, driver='ESRI Shapefile')
            print ("green_not_covered_by_ncm")

            # 6 IDENTIFY QUIET/NOISY AREAS
            ## for statistics need to calculate area again
            # Calculate the area for each geometry and create a new column 'area'
            nqgreen['area_m2'] = nqgreen['geometry'].area
            nqgreen['area_ha'] = round(nqgreen['area_m2']* 0.0001,2)
            nqgreen['area_km2'] = round(nqgreen['area_ha']* 0.01,2)
            nqgreen_area = nqgreen.groupby(['code_2018', 'noisy'])['area_m2'].sum().reset_index()
            nqgreen_area['area_ha'] = round(nqgreen_area['area_m2']* 0.0001,2)
            nqgreen_area['area_km2'] = round(nqgreen_area['area_ha']* 0.01,2)

            # 7 EXPORT GREEN QUIET AREAS (GQA)
            nqgreen = nqgreen[['country', 'fua_name', 'fua_code', 'HDENS_2011', 'code_2018', 'class_2018', 'noisy',  'area_m2', 'area_ha', 'area_km2', 'geometry']]
            GQA = nqgreen.query('noisy == 0')
            GNA = nqgreen.query('noisy == 1')

            # Export to shapefile
            file_path = os.path.join(outdata_f, f'{ctry_code}_{cityLocalName_unicode}_GQA.shp')
            GQA.to_file(file_path, driver='ESRI Shapefile')
            print ("GQA")

            # 8 CREATE CENTROIDS FOR GQA POLYGONS
            # Create a new GeoDataFrame with centroids as points
            GQA_pts = gpd.GeoDataFrame(geometry=GQA['geometry'].centroid)
            GQA_pts['oid'] = GQA.index
            GQA_pts['fua_name'] = GQA.fua_name
            GQA_pts['fua_code'] = GQA.fua_code
            GQA_pts['HDENS_2011'] = GQA.HDENS_2011

            # Export to shapefile
            file_path = os.path.join(outdata_f, f'{ctry_code}_{cityLocalName_unicode}_GQA_centroids.shp')
            GQA_pts.to_file(file_path, driver='ESRI Shapefile')

            print ("GQA_pts")
    
            # Calculate the duration
            end_time = datetime.now()
            processing_time = end_time - start_time

            print ("str(processing_time)")
            
            ## write output values into log file
            uc_km2 = round(uc_city.area.sum()/1000000,2)
            agl_city_km2 = round(agl_city.area.sum()/1000000,2)
            ncm_agl_city_km2 = round(ncm_agl_city.area.sum()/1000000,2)
            ua_km2 = round(ua.area.sum()/1000000,2)
            uagreen_km2 = round(uagreen.area.sum()/1000000,2)
            uagreen_urbc_km2 = round(uagreen_urbc.area.sum()/1000000,2)
            nqgreen_m2 = round(nqgreen.area.sum(),2)
            green_not_covered_by_ncm_m2 = round(green_not_covered_by_ncm.area.sum(),2)
            GQA_m2 = round(GQA.area.sum(),2)
            GNA_m2 = round(GNA.area.sum(),2)
            processing_duration = str(processing_time)

            log_entry = create_log_entry(cityLocalName, agglomerationId_identifier, uc_km2, agl_city_km2, 
                                    ncm_agl_city_km2,ua_km2, uagreen_km2, uagreen_urbc_km2, nqgreen_m2, 
                                    green_not_covered_by_ncm_m2, GQA_m2, GNA_m2, processing_time)
            write_log(log_path, log_entry)
            # Clean up intermediate variables to free memory
            del agl_city, ncm, ncm_agl, ncm_agl_city, ncm_dis, ua, uagreen, uagreen_urbc, nqgreen, green_not_covered_by_ncm, GQA, GNA, GQA_pts

            # Shut down the Dask client
            client.shutdown()


            