In [1]:
import pandas as pd
import numpy as np
from itertools import product
import pickle

In [2]:
cols = [
    'xcoord', 'ycoord', 'DALYs', 'min_dist', 'num', 'best', 'duration',
    'min_dist_type1', 'num_type1', 'best_type1', 'duration_type1', 'mean_ps',
    'unemployment', 'cisi', 'pop', 'gdp', 'gpc', 'hdi', 'cropland', 'forest',
    'urban', 'migration', 'dem_mean', 'dem_std', 'mou_cover', 'dist2coast',
    'ttime_mean', 'water_gc', 'ttime_sd', 'landarea', 'bdist2', 'bdist3',
    'capdist', 'excluded', 'road_dens', 'gdmhz', 'mining', 'infant_mortality',
    'hfp', 'ndvi', 'min_dist_geopko', 'num_geopko', 'SPI', 'hist_pre', 'STI',
    'hist_tmp'
]
len(cols)

46

# hazard

In [None]:
file = './models/models_final.pkl'
with open(file, 'rb') as f:
    data = pickle.load(f)
    models = data['models']

In [3]:
GCMs = ['CanESM5', 'GFDL-ESM4', 'MPI-ESM1-2-LR', 'BCC-CSM2-MR', 'IPSL-CM6A-LR']
SSPs = ['ssp126', 'ssp245', 'ssp370', 'ssp585']

In [5]:
years = np.arange(2024, 2061)
choices = list(product(SSPs, GCMs, years))
num = len(choices)

740

In [None]:
for i in range(num):
    ssp, gcm, year = choices[i]
    file = './all_data/%s_%s_%s.csv' % (ssp, gcm, year)
    all_covars = pd.read_csv(file)
    index = all_covars[cols].isna().any(axis=1)
    all_covars = all_covars[~index].reset_index(drop=True)
    X = all_covars[cols].values
    hazard_cols = []
    for k in range(20):
        col = 'hazard_%02d' % k
        all_covars[col] = models[k].predict_proba(X)[:, 1]
        hazard_cols.append(col)
    all_covars['hazard'] = all_covars[hazard_cols].mean(axis=1)
    hazard_cols += ['gid', 'hazard']
    file = './risk/hazard/%s_%s_%s.csv' % (gcm, ssp, year)
    all_covars[hazard_cols].to_csv(file, index=0)

# exposure

In [6]:
SSPs = ['SSP1', 'SSP2', 'SSP3', 'SSP5']

In [None]:
pop_max = 0
pop_min = 0
for ssp in SSPs:
    ## pop
    file1 = './data/WorldPop/pop_2000_2020.csv'
    file2 = './data/SSP_POP/%s_2020_2060.csv' % (ssp)
    df1 = pd.read_csv(file1)
    df2 = pd.read_csv(file2)
    df_pop = pd.merge(df1, df2, on='gid', how='left', suffixes=['', '_'])
    cols = [str(i) for i in range(2000, 2061)]
    if df_pop[cols].max().max() > pop_max:
        pop_max = df_pop[cols].max().max()
pop_max, pop_min

In [None]:
gdp_max = 0
gdp_min = 0
for ssp in SSPs:
    ## GDP
    file = './data/SSP_GDP/%s_2000_2060.csv' % (ssp)
    df_gdp = pd.read_csv(file)
    if df_gdp[cols].max().max() > gdp_max:
        gdp_max = df_gdp[cols].max().max()
gdp_max, gdp_min

In [None]:
for ssp in SSPs:
    ## pop
    file1 = '/data/01 Datasets/19 WorldPop/pop_2000_2020.csv'
    file2 = '/data/01 Datasets/21 SSP_POP/%s_2020_2060.csv' % (ssp)
    df1 = pd.read_csv(file1)
    df2 = pd.read_csv(file2)
    df_pop = pd.merge(df1, df2, on='gid', how='left', suffixes=['', '_'])
    cols = [str(i) for i in range(2000, 2061)]
    ## GDP
    file = '/data/01 Datasets/22 SSP_GDP/%s_2000_2060.csv' % (ssp)
    df_gdp = pd.read_csv(file)
    for col in tqdm(cols):
        col_ = ['gid', col]
        temp = pd.merge(df_gdp[col_],
                        df_pop[col_],
                        on='gid',
                        how='left',
                        suffixes=['_gdp', '_pop'])
        temp['gpc'] = temp['%s_gdp' % col] / temp['%s_pop' % col]
        index = temp['%s_pop' % col] == 0
        temp.loc[index, 'gpc'] = 0
        temp['gdp'] = (temp['%s_gdp' % col] - gdp_min) / (gdp_max - gdp_min)
        temp['pop'] = (temp['%s_pop' % col] - pop_min) / (pop_max - pop_min)
        temp['exposure'] = temp['gdp'] * 0.5 + temp['pop'] * 0.5
        file = './risk/exposure/%s_%s.csv' % (ssp, col)
        temp.to_csv(file, index=0)

# vulnerability

In [7]:
SSPs = ['ssp126', 'ssp245', 'ssp370', 'ssp585']
ssp_dict = {
    'ssp126': 'SSP1',
    'ssp245': 'SSP2',
    'ssp370': 'SSP3',
    'ssp585': 'SSP5'
}

In [None]:
hdi_list = []
gpc_list = []
urban_list = []
for ssp in ['ssp126', 'ssp245', 'ssp370', 'ssp585']:
    cols = [str(i) for i in range(2000, 2061)]
    ## HDI(-)
    cols = [str(i) for i in range(2000, 2061)]
    file = './data/%sHDI.csv' % ssp_dict[ssp]
    df_hdi = pd.read_csv(file)
    hdi_max = df_hdi[cols].max().max()
    hdi_min = df_hdi[cols].min().min()
    hdi_max, hdi_min
    hdi_list.append(hdi_max)
    hdi_list.append(hdi_min)
    ## per capita GDP(-)
    file = '/data/%s_GDP_PPP_per_capita.csv' % ssp_dict[ssp]
    df_gpc = pd.read_csv(file)
    cols = [str(i) for i in range(2000, 2061)]
    gpc_max = df_gpc[cols].max().max()
    gpc_min = df_gpc[cols].min().min()
    gpc_max, gpc_min
    gpc_list.append(gpc_max)
    gpc_list.append(gpc_min)
    ## urban(-)
    cols = [str(i) for i in range(2000, 2061)]
    file1 = './data/historical_urban.csv'
    file2 = './data/future_urban_%s.csv' % ssp
    df1 = pd.read_csv(file1)
    df2 = pd.read_csv(file2)
    df_urban = pd.merge(df1, df2, on='gid', how='left', suffixes=['', '_'])
    urban_max = df_urban[cols].max().max()
    urban_min = df_urban[cols].min().min()
    urban_max, urban_min
    urban_list.append(urban_max)
    urban_list.append(urban_min)

In [None]:
hdi_max = np.array(hdi_list).max()
hdi_min = np.array(hdi_list).min()
gpc_max = np.array(gpc_list).max()
gpc_min = np.array(gpc_list).min()
urban_max = np.array(urban_list).max()
urban_min = np.array(urban_list).min()
hdi_max, hdi_min
gpc_max, gpc_min
urban_max, urban_min

In [None]:
for ssp in ['ssp126', 'ssp245', 'ssp370', 'ssp585']:
    cols = [str(i) for i in range(2000, 2061)]
    ## HDI(-)
    cols = [str(i) for i in range(2000, 2061)]
    file = './data/%sHDI.csv' % ssp_dict[ssp]
    df_hdi = pd.read_csv(file)
    df_hdi[cols] = (df_hdi[cols] - hdi_min) / (hdi_max - hdi_min)
    df_hdi[cols] = 1 - df_hdi[cols]
    ## per capita GDP(-)
    file = './data/%s_GDP_PPP_per_capita.csv' % ssp_dict[ssp]
    df_gpc = pd.read_csv(file)
    cols = [str(i) for i in range(2000, 2061)]
    df_gpc[cols] = (df_gpc[cols] - gpc_min) / (gpc_max - gpc_min)
    df_gpc[cols] = 1 - df_gpc[cols]
    ## urban(-)
    cols = [str(i) for i in range(2000, 2061)]
    file1 = './data/historical_urban.csv'
    file2 = './data/future_urban_%s.csv' % ssp
    df1 = pd.read_csv(file1)
    df2 = pd.read_csv(file2)
    df_urban = pd.merge(df1, df2, on='gid', how='left', suffixes=['', '_'])
    df_urban[cols] = (df_urban[cols] - urban_min) / (urban_max - urban_min)
    df_urban[cols] = 1 - df_urban[cols]
    ## vulnerability
    for year in trange(2000, 2061):
        cols = ['gid']
        all_covars = df_urban[cols]
        col = str(year)
        ##
        temp = df_urban.rename(columns={col: 'urban'})
        cols = ['gid', 'urban']
        all_covars = pd.merge(all_covars,
                              temp[cols],
                              on='gid',
                              suffixes=['', '_'],
                              how='left')
        ##
        temp = df_hdi.rename(columns={col: 'hdi'})
        cols = ['gid', 'hdi']
        all_covars = pd.merge(all_covars,
                              temp[cols],
                              on='gid',
                              suffixes=['', '_'],
                              how='left')
        ##
        temp = df_gpc.rename(columns={col: 'gpc'})
        cols = ['gid', 'gpc']
        all_covars = pd.merge(all_covars,
                              temp[cols],
                              on='gid',
                              suffixes=['', '_'],
                              how='left')
        file = './risk/vulnerability/%s_%s.csv' % (ssp, year)
        all_covars.to_csv(file, index=0)

# risk

In [10]:
years = np.arange(2024, 2061)
num = len(years)

In [11]:
choices = list(product(SSPs, GCMs, years))
len(choices)

740

In [12]:
hazard_cols = []
for k in range(20):
    col = 'hazard_%02d' % k
    hazard_cols.append(col)
hazard_cols

['hazard_00',
 'hazard_01',
 'hazard_02',
 'hazard_03',
 'hazard_04',
 'hazard_05',
 'hazard_06',
 'hazard_07',
 'hazard_08',
 'hazard_09',
 'hazard_10',
 'hazard_11',
 'hazard_12',
 'hazard_13',
 'hazard_14',
 'hazard_15',
 'hazard_16',
 'hazard_17',
 'hazard_18',
 'hazard_19']

In [None]:
for i in range(num):
    ssp, gcm, year = choices[i]
    ## hazard

    file = './risk/hazard/%s_%s_%s.csv' % (gcm, ssp, year)
    df_hazard = pd.read_csv(file)
    ## exposure
    file = './risk/exposure/%s_%s.csv' % (ssp_dict[ssp], year)
    df_exposure = pd.read_csv(file)
    ## vulnerability
    file = './risk/vulnerability/%s_2000_2060.csv' % (ssp)
    df_vulnerability = pd.read_csv(file)
    index = df_vulnerability['year'] == year
    df_vulnerability = df_vulnerability[index].reset_index(drop=True)
    cols = ['gid', 'vulnerability']
    result = study_area.merge(df_vulnerability[cols],
                              on='gid',
                              suffixes=['', '_'],
                              how='left')
    cols = ['gid', 'exposure']
    result = result.merge(df_exposure[cols],
                          on='gid',
                          suffixes=['', '_'],
                          how='left')
    result = result.merge(df_hazard, on='gid', suffixes=['', '_'], how='left')
    a = 0.5
    b = 0.25
    c = 0.25
    final_cols = []
    for k in range(20):
        col = '%s_%02d' % (gcm, k)
        result[col] = (result['hazard_%02d' % k]**a) * (
            result['exposure']**b) * (result['vulnerability']**c)
        final_cols.append(col)
    result['risk_std'] = result[final_cols].std(axis=1)
    result['risk_%s' % gcm] = result[final_cols].mean(axis=1)
    result['risk_all_%s' %
           gcm] = (result['hazard']**a) * (result['exposure']**
                                           b) * (result['vulnerability']**c)
    result['risk_he_%s' % gcm] = (result['hazard']**a) * (result['exposure']**b)
    result['risk_hv_%s' %
           gcm] = (result['hazard']**a) * (result['vulnerability']**c)
    result['risk_ev_%s' %
           gcm] = (result['exposure']**b) * (result['vulnerability']**c)
    final_cols = [
        'gid', 'region', 'country', 'exposure', 'vulnerability', 'hazard'
    ] + hazard_cols + final_cols + [
        'risk_all_%s' % gcm,
        'risk_%s' % gcm, 'risk_std',
        'risk_he_%s' % gcm,
        'risk_hv_%s' % gcm,
        'risk_ev_%s' % gcm
    ]
    file = './risk/%s_%s_%s.csv' % (ssp, gcm, year)
    result[final_cols].to_csv(file, index=0)