# Extracting and Comparing Nighttime Lights and Settlements
Extract settlements from built area datasets and attribute with a series of data

0. Clip and extract input data based on country admin file
1. Convert raster of built area into settlements
    1. Convert directly to shapes using rasterio
    2. do some simple buffering to simplify
    
2. Extract zonal results for each new feature

In [1]:
import os, sys, logging, glob, importlib
import rasterio
import rasterio.features

import pandas as pd
import geopandas as gpd
import numpy as np

from shapely.geometry import shape

#Get reference to the GOSTRocks library
gostRocks = os.path.dirname(os.getcwd())
sys.path.insert(0, gostRocks)

import GOST_Urban.rasterMisc as rMisc
import GOST_Urban.misc as misc

#This is just for debugging, comment it out otherwise
#importlib.reload(rMisc)
#importlib.reload(misc)

In [2]:
#0. Clip input data
inputAOI = r"Q:\AFRICA\MWI\ADMIN\MWI_adm0.shp" #r"Q:\AFRICA\KEN\ADMIN\KEN_adm0.shp" #
outputFolder = r"Q:\AFRICA\MWI\ENERGY\TestingNewNTL" #r"Q:\AFRICA\KEN\Electrification\TestingRates"
buffer = 1 #How much to buffer settlements extracted from settlement raster. Must be set to at least 0 to fix broken geometry
finalOutput = os.path.join(outputFolder, "electrified_settlements_%s.csv" % buffer)

electrifiedFile = r"Q:\AFRICA\MWI\ENERGY\NTL\Malawi_wbnaterate_sets_lit_2017.tif"

settlementsFile = os.path.join(outputFolder, "settlements.shp")
gufFile = os.path.join(outputFolder, "GUF.tif")
ghslFile = os.path.join(outputFolder, "GHSL.tif")
viirsFile = os.path.join(outputFolder, "VIIRS_2015.tif")
popFile = os.path.join(outputFolder, "popFile.tif")

inputParams = misc.getUrbanParams()
baseGUF = inputParams['gufVRT']
baseGHSL = inputParams['ghslVRT']
baseVIIRS = inputParams['viirs2015']
basePop = inputParams['worldPopAfrica']

inD = gpd.read_file(inputAOI)
if not os.path.exists(gufFile):
    rMisc.clipRaster(rasterio.open(baseGUF), inD, gufFile)
if not os.path.exists(ghslFile):
    rMisc.clipRaster(rasterio.open(baseGHSL), inD, ghslFile)
if not os.path.exists(viirsFile):
    rMisc.clipRaster(rasterio.open(baseVIIRS), inD, viirsFile)
if not os.path.exists(popFile):
    rMisc.clipRaster(rasterio.open(basePop), inD, popFile)

                     

In [3]:
#1. Convert the built area dataset to shapes
builtSettlements = []
with rasterio.open(gufFile) as built_dataset:
    src = built_dataset.read()
    for geom, val in rasterio.features.shapes(src, transform=built_dataset.transform):
        if val == 255.0:
            builtSettlements.append(shape(geom))

In [77]:
#1. Convert the list of geometries to a data frame
curDF = gpd.GeoDataFrame(pd.DataFrame(), geometry = builtSettlements)
#Project to metres-based CRS
curDF.crs = built_dataset.crs
curDF = curDF.to_crs({'init': 'epsg:3857'})
curDF['area'] = curDF.area
#Limiting size of settlements to speed process
#curDF = curDF[curDF.area > 100000]
#curDF = curDF.reset_index()
curDF.geometry = curDF.buffer(buffer) #buffer settlements by raster resolution 
bufferedGeom = curDF.unary_union
allGeom = []
for x in bufferedGeom:
    allGeom.append(x)
curDF = gpd.GeoDataFrame(pd.DataFrame(), crs=curDF.crs, geometry=allGeom)
curDF['area'] = curDF.area
curDF.to_file(settlementsFile)

In [78]:
#2. Run zonal statistics against the other baseline data
popData = rMisc.zonalStats(settlementsFile, popFile, reProj=True, minVal=0)
viirsData = rMisc.zonalStats(settlementsFile, viirsFile, reProj=True, minVal=0)
#Summarize the new electrification data from Brian Min
summarizedElec = rMisc.zonalStats(settlementsFile, electrifiedFile, reProj=True, rastType='C', unqVals=[0,1])

  if inVector.crs != curRaster.crs:


In [79]:
popRes = pd.DataFrame(popData)
popRes['ID'] = popRes.index
popRes.columns = ["popSUM", "popMIN", "popMAX", "popMEAN", "gID"]
viirsRes = pd.DataFrame(viirsData)
viirsRes.columns = ["viirsSUM", "viirsMIN", "viirsMAX", "viirsMEAN"]
elecRes = pd.DataFrame(summarizedElec)
elecRes.columns = ["nonElec","elec"]
allRes = popRes.join(viirsRes).join(elecRes).join(curDF)

In [81]:
viirsElecThresh = 0.23156 #MWI = 4.78686 #This value is based on Brian Min's calculations to match electrification rates
elecThresh = 0.6 #What percent of the settlement being electrified according to Brian makes it electrified
allRes['perElec'] = (allRes['elec']/(allRes['nonElec'] + allRes['elec']))
allRes['bElec'] = (allRes['perElec'] > elecThresh) * 10
#apply a nighttime lights threshold to the VIIRSmean category to determine electrification status
allRes['viirsElec'] = (allRes['viirsMEAN'] > viirsElecThresh) * 1
allRes['combElec'] = allRes['bElec'] + allRes['viirsElec']
allRes.to_csv(finalOutput)

In [82]:
#print(allRes.head())
xx = pd.pivot_table(allRes, index=["bElec","viirsElec"], values=['gID','area','popSUM'], 
               aggfunc={'gID':'count',
                        'area':sum,
                        'popSUM':lambda x: sum(x[-np.isinf(x)][x>0])})
print(xx)
totalPopulation = allRes['popSUM'].sum()
agreedElectrified_population = allRes[allRes['combElec'] == 11]['popSUM'].sum()
viirsElectrified_population = allRes[allRes['combElec'] == 1]['popSUM'].sum()
brianElectrified_population = allRes[allRes['combElec'] == 10]['popSUM'].sum()


                         area   gID        popSUM
bElec viirsElec                                  
0     0          8.063125e+07  4269  2.003126e+06
      1          7.274136e+06   473  5.171810e+05
10    0          2.143206e+08  8524  8.847659e+06
      1          1.123411e+09  6780  2.195087e+07


In [83]:
print("%s of the population has agreed electrification" % round(agreedElectrified_population/totalPopulation*100))
print("%s of the population has VIIRS only electrification" % round(viirsElectrified_population/totalPopulation*100))
print("%s of the population has Brian only electrification" % round(brianElectrified_population/totalPopulation*100))

66.0 of the population has agreed electrification
2.0 of the population has VIIRS only electrification
27.0 of the population has Brian only electrification
