# Evaluate building completeness

In support of an ongoing project in Nigeria, we are investigating and evaluating the results of a building digitization effort. The following analyses will be performed to assess the completeness of the digitization effort.

1. Attribute evaluation  
   a. ensure variables in verification protocol are present  
   b. ensure relationship between parcel owner and building occupants  
2. Coverage evaluation  
   a. Calculate across the area in a consistently sized grid (250m)  
   b. Compare building density to Google Buildings  

In [12]:
import sys, os, importlib
import folium, fiona

import pandas as pd
import geopandas as gpd

sys.path.append(r"C:\WBG\Work\Code\gostrocks\src")
import GOSTRocks.rasterMisc as rMisc
import GOSTRocks.misc as misc

from shapely.geometry import Point, box
from shapely.wkt import loads

from math import ceil
import numpy as np
from shapely.geometry import Polygon

import bldg_helper as helper

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
in_folder = r"C:\WBG\Work\NGA_buildings\Kogi"
out_folder = os.path.join(in_folder, "Results")
if not os.path.exists(out_folder):
    os.makedirs(out_folder)
in_buildings_file = os.path.join(in_folder, "To Transfer", "building footprint_and_roadnetwork", "building_footprint.shp")
in_buildings_attr = os.path.join(in_folder, "BLDG.csv")
in_unit_attr = os.path.join(in_folder, "APRT.csv")
in_aoi = os.path.join(os.path.dirname(in_folder), 'NGA_buildings_aois.shp')
in_da = os.path.join(os.path.dirname(in_folder), "NGA_buildings_merge.shp")

# Define paramters
crs = 3857
m_crs = f'epsg:{crs}' # projection used to calculate metre measurements
res = 250 # resolution of 

# Define outputs
summary_grid = os.path.join(out_folder, f"KOGI_summary_grid_{res}.shp")
da_buildings = os.path.join(out_folder, "da_buildings.shp")


In [3]:
inB = gpd.read_file(in_buildings_file)

def broken_geometry(x):
    try:
        if not x:
            return(False)

        if not x.is_valid:
            return(False)
    except:
        return(False)
    return(True)

# Clean proken geometries
broken = inB['geometry'].apply(broken_geometry)

In [4]:
print(inB.shape)
inB = inB.loc[broken]
print(inB.shape)

# Project to metres
wgs_bounds = inB.to_crs('epsg:4326').total_bounds
inB = inB.to_crs(m_crs)

(45424, 8)
(45254, 8)


In [5]:
45424 - 45254

170

In [6]:
inB_attr = pd.read_csv(in_buildings_attr)
inB_unit_attr = pd.read_csv(in_unit_attr)

In [10]:
inAOI = gpd.read_file(in_aoi)
inAOI = inAOI.loc[inAOI['Name'].apply(lambda x: "Kogi" in x)]
inAOI

Unnamed: 0,id,Name,geometry
5,6.0,Kogi,"POLYGON ((6.70645 7.88021, 6.72504 7.87468, 6...."


# Calculate general statistics

In [7]:
inB.head()

Unnamed: 0,gid,id,bldg_no,status,bsn,bldg_size,new_bsn,geometry
0,66,,81.0,,66,35.685,2466,"POLYGON ((750544.870 865634.261, 750553.408 86..."
1,119,,134.0,,119,42.213,24119,"POLYGON ((744531.665 871789.611, 744528.157 87..."
3,557,,572.0,,557,114.241,24557,"POLYGON ((743322.141 871521.198, 743330.409 87..."
4,596,,611.0,,596,48.304,24596,"POLYGON ((743458.970 871449.642, 743465.316 87..."
5,664,,679.0,,664,248.655,24664,"POLYGON ((743197.603 871232.358, 743218.967 87..."


In [8]:
print(f'Total number of buildings: {inB.shape[0]}')
print(f'Total area of buildings: {inB["bldg_size"].sum()}')

Total number of buildings: 45254
Total area of buildings: 7180622.51


# Extract and process Digitize Africa buildings

In [13]:
if not os.path.exists(da_buildings):
    inDA = helper.extract_da_buildings(inAOI, in_da, m_crs)
    inDA.to_file(da_buildings)
else:
    inDA = gpd.read_file(da_buildings)

NameError: name 'm_crs' is not defined

# Assess attribute completeness
1. Size of the parcel (based on polygon)
2. **Number of structures in parcel, number of occupancy units**  
   a. This is not found in the parcel datasets, but can be calculated.
3. The use of each structure and unit
4. The name, ID, and contact details for the owner of the parcel, and the owner or occupier of the structure, or unit


In [None]:
inB_attr.head()

In [None]:
# Calculate null values in buildings dataset
for col in inB_attr.columns:
    cur_col = inB_attr.loc[:,col]
    print(f'{col}: {cur_col.isna().sum()}')

# Summarize buildings within a grid

Create a 250m grid across the study arera and summarize bulidngs and parcels within

In [None]:
# Generate the grid
if not os.path.exists(summary_grid):
    grid = helper.generate_grid(inAOI, res, m_crs)
    grid.to_file(summary_grid)
else:
    grid = gpd.read_file(summary_grid)

In [None]:
grid = helper.summarize_in_grid(grid, inDA, inB, inP)

# RETIRED

In [None]:
# Summarize buildings in parcels
inP['BLDG_I'] = 0
inP['BLDG_C'] = 0
inP['BLDG_I_G'] = 0
inP['BLDG_C_G'] = 0
for idx, row in inP.iterrows():
    # Summarize collected buildings
    potential_buildings = inB.loc[list(b_idx.intersection(row['geometry'].bounds))]
    m_bld = potential_buildings.loc[~potential_buildings.intersects(row['geometry'])]
    i_bld = potential_buildings.loc[potential_buildings.intersects(row['geometry'])]
    c_bld = potential_buildings.loc[potential_buildings['geometry'].apply(lambda x: row['geometry'].contains(x))]    
    inP.loc[idx, 'BLDG_I'] = i_bld.shape[0]
    inP.loc[idx, 'BLDG_C'] = c_bld.shape[0]
    
    # Summarize buildings in Google
    potential_buildings = inG.loc[list(g_idx.intersection(row['geometry'].bounds))]
    m_bld_g = potential_buildings.loc[~potential_buildings.intersects(row['geometry'])]
    i_bld_g = potential_buildings.loc[potential_buildings.intersects(row['geometry'])]
    c_bld_g = potential_buildings.loc[potential_buildings['geometry'].apply(lambda x: row['geometry'].contains(x))]    
    inP.loc[idx, 'BLDG_I_G'] = i_bld_g.shape[0]
    inP.loc[idx, 'BLDG_C_G'] = c_bld_g.shape[0]
    
    ''' # uncomment this section to stop the loop add a specific index in order to run plotting below
    if idx > 3:
        break
    '''

In [None]:
m = folium.Map(location=[potential_buildings.unary_union.centroid.y, potential_buildings.unary_union.centroid.x], zoom_start=18)
try:
    folium.GeoJson(row['geometry'], 
             style_function=lambda x: {'fillOpacity':0.5, 'fillColor': 'red', 'color':'#520f0a'}).add_to(m)
except:
    pass
try:
    folium.GeoJson(data=m_bld.to_json(), 
              style_function=lambda x: {'fillOpacity':1,'fillColor': '#ebab15', 'stroke': False}).add_to(m)
except:
    pass
try:
    folium.GeoJson(i_bld.to_json(), 
               style_function=lambda x: {'fillOpacity':1, 'fillColor': '#6878ed', 'stroke': False}).add_to(m)
except:
    pass
try:
    folium.GeoJson(c_bld.to_json(), 
               style_function=lambda x: {'fillOpacity':1, 'fillColor': '#68ed76', 'stroke': False}).add_to(m)
except:
    pass

try:
    folium.GeoJson(data=potential_buildings.to_json(), 
              style_function=lambda x: {'fillOpacity':0,'fillColor': '#ebab15'}).add_to(m)
except:
    pass

m
