# Evaluate building completeness

In support of an ongoing project in Nigeria, we are investigating and evaluating the results of a building digitization effort. The following analyses will be performed to assess the completeness of the digitization effort.

1. Attribute evaluation  
   a. ensure variables in verification protocol are present  
   b. ensure relationship between parcel owner and building occupants  
2. Coverage evaluation  
   a. Calculate across the area in a consistently sized grid (250m)  
   b. Compare building density to Google Buildings  

In [34]:
import sys, os, importlib
import folium, fiona

import pandas as pd
import geopandas as gpd

sys.path.append(r"C:\WBG\Work\Code\gostrocks\src")
import GOSTRocks.rasterMisc as rMisc
import GOSTRocks.misc as misc

from shapely.geometry import Point, box
from shapely.wkt import loads

from math import ceil
import numpy as np
from shapely.geometry import Polygon

import bldg_helper import helper

%load_ext autoreload
%autoreload 2

In [2]:
in_folder = "C:/WBG/Work/NGA_buildings/Delta"
out_folder = os.path.join(in_folder, "Results")
if not os.path.exists(out_folder):
    os.makedirs(out_folder)
in_buildings_file = os.path.join(in_folder, "Collected", "Building.shp")
in_parcels_file = os.path.join(in_folder, "Collected", "Parcel.shp")
in_da = os.path.join(os.path.dirname(in_folder), "NGA_buildings_merge.shp")
in_aoi = os.path.join(os.path.dirname(in_folder), 'NGA_buildings_aois.shp')

# Define paramters
crs = 3857
m_crs = f'epsg:{crs}' # projection used to calculate metre measurements
res = 250 # resolution of 

# Define outputs
summary_grid = f'/home/wb411133/projects/NGA_buildings/Delta/NGA_summary_grid_{res}.shp'
da_buildings = os.path.join(out_folder, "da_buildings.shp")

In [3]:
inAOI = gpd.read_file(in_aoi)
inAOI = inAOI.loc[inAOI['Name'].apply(lambda x: "Delta" in x)]
inAOI

Unnamed: 0,id,Name,geometry
1,2.0,Delta1,"POLYGON ((5.71204 5.99490, 5.76972 6.00310, 5...."
2,3.0,Delta2,"POLYGON ((5.61041 5.63420, 5.79444 5.69433, 5...."
3,4.0,Delta3,"POLYGON ((6.10482 6.32260, 6.31082 6.30076, 6...."
4,5.0,Delta4,"POLYGON ((6.67613 6.35809, 6.75029 6.33625, 6...."


In [4]:
inB = gpd.read_file(in_buildings_file)
inB = inB.to_crs(m_crs)
inP = gpd.read_file(in_parcels_file)
inP = inP.to_crs(m_crs)

b_idx = inB.sindex
p_idx = inP.sindex

# Calculate general statistics

In [5]:
print(f'Total number of buildings: {inB.shape[0]}')
print(f'Total area of buildings: {inB["BLD_SIZE_M"].sum()}')
      
print(f'Total number of parcels: {inP.shape[0]}')
print(f'Total area of parcels: {inP["PARCEL_SIZ"].sum()}')

Total number of buildings: 91386
Total area of buildings: 20981739.8926188
Total number of parcels: 61392
Total area of parcels: 67084133.19206174


# Extract Digitize Africa Buildings

In [20]:
if not os.path.exists(da_buildings):
    inDA = helper.extract_da_buildings(inAOI, in_da, m_crs)
    inDA.to_file(da_buildings)
else:
    inDA = gpd.read_file(da_buildings)

# Assess attribute completeness
1. Size of the parcel (based on polygon)
2. **Number of structures in parcel, number of occupancy units**  
   a. This is not found in the parcel datasets, but can be calculated.
3. The use of each structure and unit
4. The name, ID, and contact details for the owner of the parcel, and the owner or occupier of the structure, or unit


In [None]:
# Calculate null values in buildings dataset
for col in inB.columns:
    cur_col = inB.loc[:,col]
    print(f'{col}: {cur_col.isna().sum()}')

In [None]:
# Get count of null valus in STR_NAME grouped by BLD_USE
inB.loc[inB['STR_NAME'].isna()]['BLD_USE'].value_counts()

# Summarize buildings within a grid

Create a 250m grid across the study arera and summarize bulidngs and parcels within

In [None]:
# Open DA buildings
da_idx = inDA.sindex

In [None]:
# Generate the grid
if not os.path.exists(summary_grid):
    grid = helper.generate_grid(inAOI, res, m_crs)
    grid.to_file(summary_grid)
else:
    grid = gpd.read_file(summary_grid)

In [None]:
grid = helper.summarize_in_grid(grid, inDA, inB, inP)

# Compare Buildings and Parcels

In [None]:
inB['area'] = inB['geometry'].apply(lambda x: x.area)
inG['area'] = inG['geometry'].apply(lambda x: x.area)

In [None]:
inP = helper.compare_buildings_parcels(inP, inB, inDA)
inP.to_file(in_parcels_file)

# Print final statistics

In [None]:
inB = helper.def attribute_buildings_parcels(inB, inP)

In [None]:
contained_buildings = (inB['p_c'] > 0).sum()
intersecting_buildings = (inB['p_i'] > 0).sum()

In [None]:
# Summarize building stats
print(f'Total Buildings: {inB.shape[0]}')
print(f"Main Buildings: {inB.loc[inB['area'] > 50].shape[0]}")
print(f'Contained Buildings: {contained_buildings}')    

In [None]:
# Parcels with more than one 'Main' building
inP.loc[inP['BLDG_M'] > 1].shape

In [None]:
print(f'Total buildings: {inB.shape[0]}')
print(f'Floating buildings: {inB.shape[0] - intersecting_buildings}')
print(f'Buildings with >1 parcel: {(inB["p_i"] > 1).sum()}')
print(f'Buildings crossing bounds: {intersecting_buildings - contained_buildings}')