# Evaluate building completeness

In support of an ongoing project in Nigeria, we are investigating and evaluating the results of a building digitization effort. The following analyses will be performed to assess the completeness of the digitization effort.

1. Attribute evaluation  
   a. ensure variables in verification protocol are present  
   b. ensure relationship between parcel owner and building occupants  
2. Coverage evaluation  
   a. Calculate across the area in a consistently sized grid (250m)  
   b. Compare building density to Google Buildings  

In [1]:
import sys, os, importlib
import folium

import pandas as pd
import geopandas as gpd
import GOSTRocks.rasterMisc as rMisc
import GOSTRocks.misc as misc

from shapely.geometry import Point, box
from shapely.wkt import loads

from math import ceil
import numpy as np
from shapely.geometry import Polygon

import bldg_helper as helper

%load_ext autoreload
%autoreload 2

In [2]:
in_folder =  "/home/wb411133/projects/NGA_buildings/Ondo"
out_folder = os.path.join(in_folder, "Results")
if not os.path.exists(out_folder):
    os.makedirs(out_folder)

#Define inputs
in_da = os.path.join(os.path.dirname(in_folder), "NGA_buildings_merge.shp")
in_aoi = os.path.join(os.path.dirname(in_folder), 'NGA_buildings_aois.shp')
    
# Define paramters
crs = 3857
m_crs = f'epsg:{crs}' # projection used to calculate metre measurements
res = 250 # resolution of 

# Define outputs
out_buildings   = os.path.join(out_folder, f"Ondo_buildings.geojson")
summary_grid    = os.path.join(out_folder, f"Ondo_summary_grid_{res}.shp")
out_google_csv  = os.path.join(out_folder, "google_buildings.csv")
out_google_bldg = os.path.join(out_folder, "google_buildings.geojson")
da_buildings    = os.path.join(out_folder, "da_buildings.shp")

In [3]:
inAOI = gpd.read_file(in_aoi)
inAOI = inAOI.loc[inAOI['Name'].apply(lambda x: "Ondo" in x)]
inAOI

Unnamed: 0,id,Name,geometry
0,1.0,Ondo,"POLYGON ((4.67311 7.22671, 4.70199 7.32221, 4...."


In [4]:
excel_folder = os.path.join(in_folder, 'WB Data Request', 'WB Data Request', 'WB Data Request')
excel_files = [x for x in os.listdir(excel_folder) if (x.endswith("xlsx") & (not x.startswith('~')))]
total = 0
for x in excel_files:
    xx = pd.read_excel(os.path.join(excel_folder, x))
    print(f'{x}: {xx.shape}')
    total += xx.shape[0]
print(total)

AKOKO NORTH EAST.xlsx: (6217, 23)


  xx = pd.read_excel(os.path.join(excel_folder, x))


Akoko South East.xlsx: (1964, 32)
Akoko South West.xlsx: (1515, 23)
AKURE NORTH.xlsx: (16335, 24)
Ile oluji Okeigbo.xlsx: (2961, 24)
OWO.xlsx: (7712, 24)
36704


In [5]:
# convert excel data to shapefile    
if not os.path.exists(out_buildings):
    excel_folder = os.path.join(in_folder, 'WB Data Request', 'WB Data Request', 'WB Data Request')
    excel_files = [x for x in os.listdir(excel_folder) if (x.endswith("xlsx") & (not x.startswith('~')))]
    res = [pd.read_excel(os.path.join(excel_folder, x)) for x in excel_files]
    all_vals = pd.concat(res)
    all_vals = all_vals.loc[:,[not "Unnamed" in x for x in all_vals.columns]]
    all_vals['Longitude'] = pd.to_numeric(all_vals['Longitude'], errors='coerce')
    all_vals['Latitude'] = pd.to_numeric(all_vals['Latitude'], errors='coerce')

    all_vals_geom = all_vals.apply(lambda x: Point(x.Longitude, x.Latitude), axis=1)
    inB = gpd.GeoDataFrame(all_vals, geometry=all_vals_geom, crs='epsg:4326')

    inB.to_file(out_buildings, driver='GeoJSON')
else:
    inB = gpd.read_file(out_buildings)


# Calculate general statistics

In [None]:
inB.head()

In [None]:
print(f'Total number of buildings: {inB.shape[0]}')
inB["Land Size"] = pd.to_numeric(inB["Land Size"], errors='coerce')
print(f'Total area of buildings: {inB["Land Size"].sum()}')

# Extract buildings from Digitize Africa


In [6]:
importlib.reload(helper)
if not os.path.exists(da_buildings):
    inDA = helper.extract_da_buildings(inAOI, in_da, m_crs)
    inDA.to_file(da_buildings)
else:
    inDA = gpd.read_file(da_buildings)
    
inDA.reset_index(inplace=True)

# Assess attribute completeness
1. Size of the parcel (based on polygon)
2. **Number of structures in parcel, number of occupancy units**  
   a. This is not found in the parcel datasets, but can be calculated.
3. The use of each structure and unit
4. The name, ID, and contact details for the owner of the parcel, and the owner or occupier of the structure, or unit


In [None]:
# Calculate null values in buildings dataset
print(f'Total Records: {inB.shape[0]}')
for col in inB.columns:
    cur_col = inB.loc[:,col]
    print(f'{col}: {cur_col.isna().sum()}')

# Summarize buildings within a grid

Create a 250m grid across the study arera and summarize bulidngs and parcels within

In [7]:
# Open DA buildings
da_idx = inDA.sindex

In [8]:
# Generate the grid
if not os.path.exists(summary_grid):
    grid = helper.generate_grid(inAOI, res, m_crs)
    grid.to_file(summary_grid)
else:
    grid = gpd.read_file(summary_grid)

In [19]:
if inB.crs.to_epsg() != grid.crs.to_epsg():
    inB = inB.to_crs(grid.crs)
    
grid = helper.summarize_in_grid(grid, inDA, inB, inP=None)

In [20]:
grid.head()

Unnamed: 0,rowIdx,colIdx,per_b,per_p,per_g,geometry,per_da
0,525193.23487,799815.007437,1.0,0.0,15.0,"POLYGON ((525193.235 799815.007, 525193.235 80...",6.0
1,525193.23487,800315.007437,4.0,0.0,16.0,"POLYGON ((525193.235 800315.007, 525193.235 80...",13.0
2,525193.23487,800565.007437,3.0,0.0,6.0,"POLYGON ((525193.235 800565.007, 525193.235 80...",3.0
3,525193.23487,801065.007437,6.0,0.0,27.0,"POLYGON ((525193.235 801065.007, 525193.235 80...",23.0
4,525193.23487,801315.007437,1.0,0.0,9.0,"POLYGON ((525193.235 801315.007, 525193.235 80...",8.0


In [21]:
grid.to_file(summary_grid)

# RETIRED

In [None]:
# Summarize buildings in parcels
inP['BLDG_I'] = 0
inP['BLDG_C'] = 0
inP['BLDG_I_G'] = 0
inP['BLDG_C_G'] = 0
for idx, row in inP.iterrows():
    # Summarize collected buildings
    potential_buildings = inB.loc[list(b_idx.intersection(row['geometry'].bounds))]
    m_bld = potential_buildings.loc[~potential_buildings.intersects(row['geometry'])]
    i_bld = potential_buildings.loc[potential_buildings.intersects(row['geometry'])]
    c_bld = potential_buildings.loc[potential_buildings['geometry'].apply(lambda x: row['geometry'].contains(x))]    
    inP.loc[idx, 'BLDG_I'] = i_bld.shape[0]
    inP.loc[idx, 'BLDG_C'] = c_bld.shape[0]
    
    # Summarize buildings in Google
    potential_buildings = inG.loc[list(g_idx.intersection(row['geometry'].bounds))]
    m_bld_g = potential_buildings.loc[~potential_buildings.intersects(row['geometry'])]
    i_bld_g = potential_buildings.loc[potential_buildings.intersects(row['geometry'])]
    c_bld_g = potential_buildings.loc[potential_buildings['geometry'].apply(lambda x: row['geometry'].contains(x))]    
    inP.loc[idx, 'BLDG_I_G'] = i_bld_g.shape[0]
    inP.loc[idx, 'BLDG_C_G'] = c_bld_g.shape[0]
    
    ''' # uncomment this section to stop the loop add a specific index in order to run plotting below
    if idx > 3:
        break
    '''

In [None]:
m = folium.Map(location=[potential_buildings.unary_union.centroid.y, potential_buildings.unary_union.centroid.x], zoom_start=18)
try:
    folium.GeoJson(row['geometry'], 
             style_function=lambda x: {'fillOpacity':0.5, 'fillColor': 'red', 'color':'#520f0a'}).add_to(m)
except:
    pass
try:
    folium.GeoJson(data=m_bld.to_json(), 
              style_function=lambda x: {'fillOpacity':1,'fillColor': '#ebab15', 'stroke': False}).add_to(m)
except:
    pass
try:
    folium.GeoJson(i_bld.to_json(), 
               style_function=lambda x: {'fillOpacity':1, 'fillColor': '#6878ed', 'stroke': False}).add_to(m)
except:
    pass
try:
    folium.GeoJson(c_bld.to_json(), 
               style_function=lambda x: {'fillOpacity':1, 'fillColor': '#68ed76', 'stroke': False}).add_to(m)
except:
    pass

try:
    folium.GeoJson(data=potential_buildings.to_json(), 
              style_function=lambda x: {'fillOpacity':0,'fillColor': '#ebab15'}).add_to(m)
except:
    pass

m
