In [None]:
import os
import geopandas as gpd
import pandas as pd
import numpy as np
import rasterio as rio
from rasterio.plot import show
import matplotlib.pyplot as plt
import pyproj
from matplotlib import pyplot
import rioxarray as rxr
from rasterstats import zonal_stats

In [None]:
os.chdir(r'D:\iMMAP\proj\ASDC\data\pop_admin_layer')

## Preping and checking data

In [None]:
# Load data
admin = gpd.read_file(r'input\admin.shp').to_crs('ESRI:53034')
sett = gpd.read_file(r'input\pplp-sample-updated.shp').to_crs('ESRI:53034')
build = gpd.read_file(r'input\buildings-sample.shp').to_crs('ESRI:53034')
lc = gpd.read_file(r'input\land-cover-dis-sample.shp')
pop = rio.open(r'process\pop-sample-ESRI53034.tif', masked=True)

In [None]:
# Reproject and save land cover
lc.set_crs('EPSG:32642').to_crs('ESRI:53034').to_file(r'input\land-cover-dis-sample-53034.shp')

In [None]:
# Load reprojected land cover
lc = gpd.read_file(r'input\land-cover-dis-sample-53034.shp')

In [None]:
# View raster
pyplot.imshow(pop.read(1), cmap='pink')

In [None]:
# View vector
ax = admin.plot(edgecolor='red', linewidth=2)
ax = sett.plot(ax=ax, color = 'yellow') #, facecolor='None', edgecolor='blue', linewidth=2)

show((pop, 1), ax=ax)

## Population

In [None]:
# Read the raster values
array = pop.read(1)

# Get the affine
affine = pop.transform

In [None]:
# View histogram and statistics
np.histogram(array, bins=100)

df_describe = pd.DataFrame(array)
df_describe.describe()

In [None]:
# Set negative values to nan
array[(array < 0)] = np.nan

# Check result
df_describe = pd.DataFrame(array)
df_describe.describe()

In [None]:
# Calculate zonal statistics
pop_dictionaries = zonal_stats(admin, array, affine=affine, stats=['sum'])

# Confert dictionaries to list of values
list_of_values = []
for i in range(len(pop_dictionaries)):
    list_of_values.append(list(pop_dictionaries[i].values())[0])

#print(pop_dictionaries)
#print(list_of_values)

In [None]:
# Adding pop_sum values to admin polygons
admin['pop_sum'] = list_of_values

## Area

In [None]:
# Calculating area and converting from square m to square km
admin['area_km2'] = admin['geometry'].area.div(1000000)

## Settlements

In [None]:
# Join settlements to admin areas
joined_df = gpd.sjoin(
    sett,
    admin,
    how='inner',
    op='intersects',
)

# Count number of settlement within admin polygons
sett_count = joined_df.groupby(
    ['ADM2_PCODE'],
    as_index=False,
)['vuid'].count() # vuid is arbitrary, we just count the rows

# Change column name to sett_count
sett_count.rename(columns = {'vuid': 'sett_count'}, inplace = True)

# QA Check - if any admin areas have no settlements
sett_count.isnull().sum()

# View results
sett_count

In [None]:
# Merge sett count back on to admin dataset
admin = admin.merge(
    sett_count, 
    on='ADM2_PCODE', 
    how='left')

admin

## Buildings

In [None]:
# Convert building polygons to centroid points

# Convert building geodataframe to geoseries
build_centroids = build.geometry

# Create a geoseries with just the centroids
build_centroids = build_centroids.centroid

# Convert geoseries to geodataframe
build_centroids= gpd.GeoDataFrame(build_centroids)

# Rename column
build_centroids.rename(columns = {0: 'geo'}, inplace = True)

# Setting geometry columns specifically
build_centroids.geometry = build_centroids['geo']

In [None]:
# Join building to admin areas
joined_df = gpd.sjoin(
    build_centroids,
    admin,
    how='inner',
    op='intersects'
)

# Count number of buildings within admin polygons
build_count = joined_df.groupby(
    ['ADM2_PCODE'],
    as_index=False,
)['geo'].count() # vuid is arbitrary, we just count the rows


# Change column name to build_count
build_count.rename(columns = {'geo': 'build_count'}, inplace = True)

# QA Check - if any admin areas have no buildings
build_count.isnull().sum()

# View results
build_count

In [None]:
# Merge build count back on to admin dataset
admin = admin.merge(
    build_count, 
    on='ADM2_PCODE', 
    how='left')

## Population Density

In [None]:
# Calculate population density
admin['pop_density'] = admin['pop_sum'] / admin['area_km2']

# Calculate people per building
admin['people_building'] = admin['pop_sum'] / admin['build_count']

admin

## Land Cover (to be added later)

In [None]:
# Get unique LC values
lc.agg_simpli.unique()

In [None]:
# Calculate area
# lc['area_m2'] = lc['geometry'].area

## Export results

In [None]:
admin.to_file(r'output/pop_adm.shp')