In [1]:
import os

import warnings 
warnings.simplefilter('ignore')

import geopandas as gpd
import pandas as pd
import numpy as np

In [38]:
geocensus_path = '../Data/Mumbai_Wards.geojson'
zs_lst_path = '../Data/ZS_LST_Test.geojson'
zs_ndvi_path = '../Data/ZS_NDVI_Test.geojson'
vul_para_path = '../Data/VulnerabilityParameters.xlsx' # has list of selected parameters as entries in column Parameters
census_var_def_path = 'https://cities-socio-economic-vulnerability.s3.eu-west-3.amazonaws.com/data/dev/india_2/census/2011/census_variables_definition.csv'

In [68]:
census_var_def = pd.read_csv(census_var_def_path).fillna(method='ffill')

In [69]:
geocensus = gpd.read_file(geocensus_path)

In [70]:
# read and slice lst
lst_zs_gdf = gpd.read_file(zs_lst_path)
lst_zs = lst_zs_gdf[['index', 'lst_mean']]

In [71]:
# read and slice ndvi
ndvi_zs_gdf = gpd.read_file(zs_ndvi_path)
ndvi_zs = ndvi_zs_gdf[['index', 'ndvi_mean']]

In [72]:
# read and get list of Selected Parameters 
vul_para = pd.read_excel(vul_para_path)
vul_para_list = list(vul_para['Parameters'])

In [74]:
census_vul = geocensus[['index'] + vul_para_list]

In [75]:
df = census_vul.merge(lst_zs, on = 'index', how = 'left').merge(ndvi_zs, on = 'index', how = 'left')
df = df.drop('index', axis = 1)

In [76]:
df_corr = df.corr()

In [77]:
corr_lst = df_corr[['lst_mean']]
corr_lst = corr_lst[corr_lst.index != 'lst_mean']

In [78]:
corr_lst = corr_lst.reset_index()

In [79]:
corr_lst = corr_lst.rename({'index':'variable_name_processed'}, axis = 1)

In [80]:
corr_lst = census_var_def.merge(corr_lst, on = 'variable_name_processed', how = 'right')

In [84]:
corr_lst['variable_category'] = np.select([corr_lst['variable_name_processed'] == 'ndvi_mean'], ['Vegetation'], corr_lst['variable_category'])
corr_lst['variable_name_processed_viz'] = np.select([corr_lst['variable_name_processed'] == 'ndvi_mean'], ['NDVI'], corr_lst['variable_name_processed_viz'])

In [85]:
corr_lst

Unnamed: 0,variable_category,variable_name_processed,variable_name_processed_viz,lst_mean
0,Pop-Basics,p_tot,Pop,0.186180
1,Pop-Basics,p_tot_m,Male pop,0.195948
2,Pop-Basics,p_tot_f,Female pop,0.174107
3,Pop-Basics,prop_m_f,Male/female ratio,0.338557
4,Pop-Basics,p_0to6,Pop aged 0 - 6 yrs,0.192644
...,...,...,...,...
79,Households-assets,hh_prop_mtrcl,Pct of households with scooter/ motorcycle / m...,-0.056435
80,Households-assets,hh_prop_ncar,Pct of households without car/ jeep / van,0.469747
81,Households-assets,hh_prop_allgoods,"Pct of households with TV, computer/laptop, te...",-0.432875
82,Households-assets,hh_prop_ngoods,Pct of households with none of the assets spec...,-0.119398


In [83]:
corr_lst.to_excel('../Data/Mumbai_Correlation.xlsx', index = False)

In [87]:
corr_lst[corr_lst['lst_mean'] == corr_lst['lst_mean'].max()]

Unnamed: 0,variable_category,variable_name_processed,variable_name_processed_viz,lst_mean
21,Pop-Employment,p_prop_nwkrs_f,Non-wkrs female pop,0.611172


In [66]:
corr_lst[corr_lst['lst_mean'] == corr_lst['lst_mean'].min()]

Unnamed: 0,variable_category,variable_name_processed,variable_name_processed_viz,lst_mean
68,Households-size,hh_prop_3rms,Pct of households with 3 rooms,-0.5517


In [67]:
corr_lst[corr_lst.variable_name_processed == 'ndvi_mean']

Unnamed: 0,variable_category,variable_name_processed,variable_name_processed_viz,lst_mean
83,NDVI,ndvi_mean,NDVI,-0.488203
