In [34]:
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio import mask

import shapely
from shapely.geometry import Polygon, Point
from shapely.ops import cascaded_union
import shapely.speedups

shapely.speedups.enable()

In [12]:
df_district_boundaries = gpd.read_file('gadm40_IND_shp/gadm40_IND_2.shp')

In [13]:
df_district_boundaries['NAME_2'].nunique()

659

In [15]:
india_worldpop_raster_2020 = rasterio.open('ind_ppp_2020_1km_Aggregated_UNadj.tif')
print('No. of bands:',(india_worldpop_raster_2020.count))
# Reading the first band, filtering negative raster values and visualise data with matplotlib
india_worldpop_raster_2020_tot = india_worldpop_raster_2020.read(1)
india_worldpop_raster_2020_tot[india_worldpop_raster_2020_tot<0] = None

india_worldpop_raster_2020_nonzero = india_worldpop_raster_2020_tot[india_worldpop_raster_2020_tot>0]
population_worldpop = india_worldpop_raster_2020_nonzero[india_worldpop_raster_2020_nonzero > 0].sum()
print('Total population - India (2020): ',round(population_worldpop/1000000000,2),'billion')

No. of bands: 1
Total population - India (2020):  1.38 billion


In [19]:
def get_population_count(vector_polygon,raster_layer):
    gtraster, bound = rasterio.mask.mask(raster_layer, [vector_polygon], crop=True)
    pop_estimate = gtraster[0][gtraster[0]>0].sum()
    return (pop_estimate.round(2))

In [20]:
%%time
df_district_boundaries['population_count_wp'] = df_district_boundaries['geometry'].apply(get_population_count,raster_layer=india_worldpop_raster_2020)


CPU times: user 1.91 s, sys: 63.4 ms, total: 1.97 s
Wall time: 1.98 s


In [45]:
district_population = df_district_boundaries.groupby(['NAME_2','NAME_1'])['population_count_wp'].sum().round().reset_index().sort_values(by='population_count_wp')


In [31]:
relative_wealth_data = pd.read_csv('ind_pak_relative_wealth_index.csv')

In [36]:
def convert_Point(facebook_relative_wealth):
    return Point(facebook_relative_wealth['longitude'],facebook_relative_wealth['latitude'])

relative_wealth_data['geometry'] = relative_wealth_data[['latitude','longitude']].apply(convert_Point,axis=1)
relative_wealth_data = gpd.GeoDataFrame(relative_wealth_data)

In [37]:
relative_wealth_data.head(2)

Unnamed: 0,latitude,longitude,rwi,error,geometry
0,33.897776,70.037842,-0.074,0.624,POINT (70.03784 33.89778)
1,31.118794,66.807861,-0.569,0.368,POINT (66.80786 31.11879)


In [39]:
def get_rwi_mean(vector_polygon,vector_layer):
    pip_mask = vector_layer.within(vector_polygon)
    pip_data = vector_layer.loc[pip_mask]
    mean_val = round(pip_data['rwi'].mean(),2)
    return(mean_val)

In [41]:
def get_rwi_median(vector_polygon,vector_layer):
    pip_mask = vector_layer.within(vector_polygon)
    pip_data = vector_layer.loc[pip_mask]
    mean_val = round(pip_data['rwi'].median(),2)
    return(mean_val)

In [40]:
df_district_boundaries['rwi_mean'] = df_district_boundaries['geometry'].apply(get_rwi_mean,
                                                                              vector_layer=relative_wealth_data)


In [47]:
district_average_rwi = df_district_boundaries.groupby(['NAME_2','NAME_1'])['rwi_mean'].mean().reset_index().sort_values(by='rwi_mean')


In [46]:
district_population.head(2)

Unnamed: 0,NAME_2,NAME_1,population_count_wp
166,Dibang Valley,Arunachal Pradesh,6660.0
25,Anjaw,Arunachal Pradesh,23516.0


In [49]:
df_combined = pd.merge(district_average_rwi,district_population,on=['NAME_2','NAME_1'])

In [50]:
df_combined['weighted'] = df_combined['population_count_wp']*df_combined['rwi_mean']

In [52]:
df_combined.sort_values(by='weighted').to_excel('rwi_average.xlsx')

In [53]:
df_district_boundaries['rwi_median'] = df_district_boundaries['geometry'].apply(get_rwi_median,
                                                                              vector_layer=relative_wealth_data)



In [55]:
district_median_rwi = df_district_boundaries.groupby(['NAME_2','NAME_1'])['rwi_median'].mean().reset_index().sort_values(by='rwi_median')



In [56]:
df_combined = pd.merge(district_median_rwi,district_population,on=['NAME_2','NAME_1'])

In [58]:
df_combined['weighted'] = df_combined['population_count_wp']*df_combined['rwi_median']

In [59]:
df_combined.sort_values(by='weighted').to_excel('rwi_median.xlsx')