In [None]:
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

In [None]:
! pip install rasterio -q

In [None]:
!apt install gdal-bin -q

In [None]:
import os
import glob
import requests
import zipfile
from osgeo import gdal, osr, ogr
import rasterio
import pandas as pd
import geopandas as gpd
import numpy as np
import subprocess
from rasterio.warp import reproject, Resampling
from rasterio.windows import Window
from concurrent.futures import ThreadPoolExecutor


In [None]:


# Directory to extract files
extract_dir = "/content/drive/MyDrive/GHS_POP_DATA"

# Create directory if it doesn't exist
os.makedirs(extract_dir, exist_ok=True)

# Function to download, unzip, and delete zip file
def download_and_extract(url, extract_to):
    local_filename = url.split("/")[-1]
    # Download the file
    response = requests.get(url)
    with open(local_filename, 'wb') as file:
        file.write(response.content)
    # Unzip the file
    with zipfile.ZipFile(local_filename, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    # Delete the zip file
    os.remove(local_filename)

# Process each URL
for year in range(1975,2030,5):
    url = f"https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_R2023A/GHS_POP_E{year}_GLOBE_R2023A_4326_30ss/V1-0/GHS_POP_E{year}_GLOBE_R2023A_4326_30ss_V1_0.zip"
    download_and_extract(url, extract_dir)

# List the extracted files
extracted_files = os.listdir(extract_dir)
print(extracted_files)


In [None]:
#build vrt
folder_pop = "/content/drive/MyDrive/GHS_POP_DATA"
# List all files and directories in the specified folder
tif_files = glob.glob(os.path.join(folder_pop, '*.tif'))
os.chdir(folder_pop)
vrt_path ='GHS_POP.vrt'  # path to vrt to build
ds = gdal.BuildVRT(vrt_path, tif_files, options=gdal.BuildVRTOptions(separate=True,srcNodata=-200, VRTNodata=np.nan))
ds.FlushCache()

In [None]:
######for stats start here

In [None]:
df_wpp_prediction = pd.read_csv("/content/drive/MyDrive/CCRI/WPP2022_prediction.csv")
df_wpp_estimate = pd.read_csv("/content/drive/MyDrive/CCRI/WPP2022_estimate.csv")
df_wpp = pd.concat([df_wpp_estimate,df_wpp_prediction], ignore_index=True)
country_bnd = gpd.read_file('/content/drive/MyDrive/CCRI/global_bnd_adm0.geojson')

# Merge polygons with the same ISO3 code
country_bnd = country_bnd.dissolve(by='iso3')
country_bnd = country_bnd.reset_index()

In [None]:
# Path to the VRT file
pop_vrt_path = "/content/drive/MyDrive/GHS_POP_DATA/GHS_POP.vrt"
file_1960s = "/content/drive/MyDrive/hwi_stats/dgca/average_hwi_1960s.tif"
file_2020s = "/content/drive/MyDrive/hwi_stats/dgca/average_hwi_2020s.tif"
#1km resolution
# Create the output directory if it doesn't exist
output_dir = "/content/drive/MyDrive/POP_stat"
os.makedirs(output_dir, exist_ok=True)

In [None]:
import os
import rasterio
import numpy as np
import pandas as pd
from shapely.geometry import mapping
from rasterio.mask import mask
from concurrent.futures import ProcessPoolExecutor
from shapely.geometry import MultiPolygon
import tempfile
import fiona


def calculate_exposure(iso3, pop_vrt_path, file_1960s, file_2020s, df_wpp, year, output_dir):
    """Calculates exposure metrics for a given ISO3 code and writes results to a CSV file."""
    print(f"Processing {iso3}")
    child_percent = df_wpp.loc[(df_wpp['ISO3 Alpha-code'] == iso3) & (df_wpp['Year'] == year), '0-17']
    if child_percent.empty:
      print(f"No population data for {iso3} in {year}")
      return None



    filtered_gdf = country_bnd[country_bnd['iso3'] == iso3]
    # Create a temporary filtered GeoJSON file
    filtered_geojson_path = f'/content/drive/MyDrive/POP_stat/filtered_{iso3}.geojson'
    filtered_gdf.to_file(filtered_geojson_path, driver='GeoJSON')


    subset_pop_path = os.path.join(output_dir, f"{iso3}_pop_subset.tif")
    subset_T1_path = os.path.join(output_dir, f"{iso3}_T1_subset.tif")
    subset_T2_path = os.path.join(output_dir, f"{iso3}_T2_subset.tif")

    def clip_raster(input_path, output_path, filtered_geojson_path, iso3_value):

      # Clip the raster using gdalwarp with the filtered GeoJSON file
      warp_options = gdal.WarpOptions(
          cutlineDSName=filtered_geojson_path,
          cropToCutline=True,
          dstAlpha=True,
          format='GTiff',
          creationOptions=["COMPRESS=LZW"]
      )

      gdal.Warp(destNameOrDestDS=output_path, srcDSOrSrcDSTab=input_path, options=warp_options)

    # Clip each input raster to the subset paths
    try:
        clip_raster(pop_vrt_path, subset_pop_path, filtered_geojson_path, iso3)
        clip_raster(file_1960s, subset_T1_path, filtered_geojson_path, iso3)
        clip_raster(file_2020s, subset_T2_path, filtered_geojson_path, iso3)
    except Exception as e:
        print(f"Error during raster clipping: {e}")
        return None

    data_T1, data_T2 = {}, {}
    pop_band = (year - 1975) // 5 + 1
    try:
        with rasterio.open(subset_pop_path) as pop_src:
            total_pop = np.nansum(pop_src.read(pop_band))
            child_percent = df_wpp.loc[(df_wpp['ISO3 Alpha-code'] == iso3) & (df_wpp['Year'] == year), '0-17']
            if child_percent.empty:
                print(f"No population data for {iso3} in {year}")
                for path in [subset_pop_path, subset_T1_path, subset_T2_path]:
                    os.remove(path)
                return None
            child_percent = float(child_percent.values[0])
            child_pop = pop_src.read(pop_band) * (child_percent / 100)
    except Exception as e:
        print(f"Error reading {subset_pop_path}: {e}")
        for path in [subset_pop_path, subset_T1_path, subset_T2_path]:
                    os.remove(path)
        return None

    def read_data(file_path, data_dict):
        try:
            with rasterio.open(file_path) as src:
                data_dict['hw_count'] = src.read(1)
                with np.errstate(divide='ignore', invalid='ignore'):
                    hw_count = src.read(1)
                    hw_days = src.read(2)
                    hw_days_per_count = np.where(hw_count == 0, 0, hw_days / hw_count)
                    data_dict['hw_days'] = hw_days_per_count
                data_dict['hw_temp_diff'] = src.read(3)
                data_dict['high_temp_degree_days'] = src.read(4)
        except Exception as e:
            print(f"Error reading {file_path}: {e}")

    read_data(subset_T1_path, data_T1)
    read_data(subset_T2_path, data_T2)

    results = {'iso3': iso3}
    results["total_pop"] = total_pop
    results["child_pop"] = np.nansum(child_pop)
    results["child_percent"] = child_percent

    for key in data_T1.keys():
        percentage_increase = np.zeros_like(data_T1[key], dtype=float)
        with np.errstate(divide='ignore', invalid='ignore'):
            data_T1_key = data_T1[key]
            data_T2_key = data_T2[key]
             # Case 1: Both T1 and T2 are zero
            both_zero_mask = (data_T1_key == 0) & (data_T2_key == 0)

            percentage_increase[both_zero_mask] = 0

            # Case 2: T1 is zero and T2 is not zero
            T1_zero_T2_nonzero_mask = (data_T1_key == 0) & (data_T2_key != 0)
            percentage_increase[T1_zero_T2_nonzero_mask] = np.inf

            # Case 3: T1 is not zero
            T1_nonzero_mask = data_T1_key != 0
            percentage_increase[T1_nonzero_mask] = ((data_T2_key[T1_nonzero_mask] - data_T1_key[T1_nonzero_mask]) / data_T1_key[T1_nonzero_mask]) * 100

            for threshold in [50, 100, 200]:
                mask_thresh = percentage_increase > threshold
                results[f"exposure_{key}_{threshold}"] = np.nansum(child_pop[mask_thresh])

    # Clean up temporary subset files (optional)
    for path in [subset_pop_path, subset_T1_path, subset_T2_path, filtered_geojson_path]:
         os.remove(path)

    return results


In [None]:
# Parallel processing setup
results_list = []
iso3_codes = country_bnd['iso3'].unique()
year = 2025
max_workers = 3
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = {executor.submit(calculate_exposure, iso3, pop_vrt_path, file_1960s, file_2020s, df_wpp, year, output_dir): iso3 for iso3 in iso3_codes}

    for future in futures:
        result = future.result()
        if result is not None:
            results_list.append(result)

# Combine results and write to a single CSV
all_results_df = pd.DataFrame(results_list)
all_results_df.to_csv(os.path.join(output_dir, "all_exposure_results_high_res_1975.csv"), index=False)