# Evaluate building completeness

In support of an ongoing project in Nigeria, we are investigating and evaluating the results of a building digitization effort. The following analyses will be performed to assess the completeness of the digitization effort.

1. Attribute evaluation  
   a. ensure variables in verification protocol are present  
   b. ensure relationship between parcel owner and building occupants  
2. Coverage evaluation  
   a. Calculate across the area in a consistently sized grid (250m)  
   b. Compare building density to Google Buildings  

In [1]:
import sys, os, importlib
import folium

import pandas as pd
import geopandas as gpd
import GOSTRocks.rasterMisc as rMisc
import GOSTRocks.misc as misc

from shapely.geometry import Point, box
from shapely.wkt import loads

from math import ceil
import numpy as np
from shapely.geometry import Polygon

In [2]:
in_folder =  "/home/wb411133/projects/NGA_buildings/Kogi/"
out_folder = "/home/wb411133/projects/NGA_buildings/Kogi/Results"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)
in_buildings_file = os.path.join(in_folder, "To Transfer", "building footprint_and_roadnetwork", "building_footprint.shp")
in_buildings_attr = os.path.join(in_folder, "BLDG.csv")
in_unit_attr = os.path.join(in_folder, "APRT.csv")

# Define paramters
crs = 3857
m_crs = f'epsg:{crs}' # projection used to calculate metre measurements
res = 250 # resolution of 

# Define outputs
summary_grid = os.path.join(out_folder, f"KOGI_summary_grid_{res}.shp")
out_google = os.path.join(out_folder, "google_buildings.csv")


In [3]:
inB = gpd.read_file(in_buildings_file)

def broken_geometry(x):
    try:
        if not x:
            return(False)

        if not x.is_valid:
            return(False)
    except:
        return(False)
    return(True)

# Clean proken geometries
broken = inB['geometry'].apply(broken_geometry)

In [4]:
print(inB.shape)
inB = inB.loc[broken]
print(inB.shape)

# Project to metres
wgs_bounds = inB.to_crs('epsg:4326').total_bounds
inB = inB.to_crs(m_crs)

(45424, 8)
(45254, 8)


In [8]:
45424 - 45254

170

In [5]:
inB_attr = pd.read_csv(in_buildings_attr)
inB_unit_attr = pd.read_csv(in_unit_attr)

# Calculate general statistics

In [None]:
inB.head()

In [None]:
print(f'Total number of buildings: {inB.shape[0]}')
print(f'Total area of buildings: {inB["bldg_size"].sum()}')

# Find, download, and process Google Buildings

In [None]:
if not os.path.exists(out_google):
    # read in the google buildings extents file to determine file to download
    # google_extents = gpd.read_file('/home/public/Data/GLOBAL/Buildings/google_tiles.geojson')
    # sel_extents = google_extents.loc[google_extents.intersects(inB_wgs84.unary_union)]
    
    # may need to download buildings
    google_buildings = '/home/public/Data/GLOBAL/Buildings/105_buildings.csv'
    inG = pd.read_csv(google_buildings)
    b = wgs_bounds
    selG = inG.loc[(inG['longitude'] > b[0]) & (inG['longitude'] < b[2]) & (inG['latitude'] > b[1]) & (inG['latitude'] < b[3])]
    selG.to_csv(out_google)
    inG = selG
else:
    inG = pd.read_csv(out_google)

In [None]:
inG_geom = inG['geometry'].apply(lambda x: loads(x))
inG = gpd.GeoDataFrame(inG, geometry=inG_geom, crs="epsg:4326")
inG = inG.to_crs(m_crs)

In [None]:
inG.to_file(os.path.join(out_folder, "google_buildings_sel.shp"))

In [None]:
inB.to_file(os.path.join(out_folder, "collected_buildings.shp"))

In [None]:
inG.head()

# Assess attribute completeness
1. Size of the parcel (based on polygon)
2. **Number of structures in parcel, number of occupancy units**  
   a. This is not found in the parcel datasets, but can be calculated.
3. The use of each structure and unit
4. The name, ID, and contact details for the owner of the parcel, and the owner or occupier of the structure, or unit


In [6]:
inB_attr.head()

Unnamed: 0,sn,entrydate,drainage,road_surface,rd_condition,rd_type,rdcarriage,streetname,the_location,the_area,...,contact_person,contact_email,contact_phone,road_sn,field_officer,agency_id,date_uploaded,coordinates,x_axis,y_axis
0,19,Wed May 19 14:34:11 GMT 2021,Earth,0,Bad,Close,Single Carriage,Na,Zango Daji,Zango,...,,,,2866,odoma attah sunday,11,Wed May 19 12:31:06 GMT 2021,"7.80910004591279971, 6.6328150590388999",7.8091,6.632815
1,20,Wed Jun 23 13:09:47 GMT 2021,,0,Fair,Street,Single Carriage,Unity Community street,500 Housing Unity,Back of Texaco,...,,,,24187,SAMUEL FAITH,11,Thu Jun 24 11:52:03 GMT 2021,"7.74905149741459987, 6.73626764082469975",7.749051,6.736268
2,21,Thu Jun 10 10:20:36 GMT 2021,Good,0,Fair,Street,Single Carriage,Sub Meme street,Otokiti opposite army barracks,Otokiti housing estate,...,,,,24680,aaze aishat,11,Thu Jun 10 10:27:12 GMT 2021,"7.80685855298460041, 6.66841536887259956",7.806859,6.668415
3,22,Thu Jun 24 11:49:00 GMT 2021,,0,Bad,Street,Single Carriage,Destine Star ministry,Ganaja,Ganaja,...,,,,242173,SHAIBU ABDULLAHI,11,Thu Jun 24 11:57:46 GMT 2021,"7.74655708436729995, 6.7370398818885997",7.746557,6.73704
4,23,Thu Jun 17 09:47:22 GMT 2021,Damaged,0,Fair,Street,Single Carriage,Street o3,Otokiti estate,Otokiti,...,,,,241866,Danladi Garba,11,Thu Jun 17 10:03:30 GMT 2021,"7.80505193702270006, 6.6873732072102996",7.805052,6.687373


In [7]:
# Calculate null values in buildings dataset
for col in inB_attr.columns:
    cur_col = inB_attr.loc[:,col]
    print(f'{col}: {cur_col.isna().sum()}')

sn: 0
entrydate: 0
drainage: 0
road_surface: 0
rd_condition: 0
rd_type: 0
rdcarriage: 0
streetname: 69
the_location: 0
the_area: 0
the_lga: 0
thestate: 0
thestateid: 0
Unnamed: 13: 42963
bsn: 3
type_of_owner: 0
part_occupied: 0
site_condi,C,254: 0
bldg_type: 0
electrification: 0
no_of_floors: 0
water_supply: 0
refuse_disposal: 0
no_of_bq: 0
contact_person: 42963
contact_email: 42963
contact_phone: 42963
road_sn: 0
field_officer: 1
agency_id: 0
date_uploaded: 0
coordinates: 0
x_axis: 0
y_axis: 0


In [9]:
for col in inB_unit_attr.columns:
    cur_col = inB_unit_attr.loc[:,col]
    print(f'{col}: {cur_col.isna().sum()}')

entityid: 0
prycategory: 0
secategory: 2
contactperson: 23052
contactemail: 23052
contactphone: 23052
meter_available: 0
meter_type: 0
meter_condition: 0
meternumber: 23052
signage: 0
photopath: 23052
entity_name: 1188
bsn: 3953
fieldofficer_id: 23052


# Summarize buildings within a grid

Create a 250m grid across the study arera and summarize bulidngs and parcels within

In [None]:
grid.head()

In [None]:
if grid.crs != inB.crs:
    grid = grid.to_crs(inB.crs)
    
if inG.crs != inB.crs:
    inG = inG.to_crs(inB.crs)


In [None]:
dir(inG.sindex)

In [None]:
inB.shape

In [None]:
g_idx = inG.sindex
b_idx = inB.sindex

In [None]:
grid['per_b'] = 0.
grid['per_p'] = 0.
grid['per_g'] = 0.
for idx, row in grid.iterrows():
    print(idx)
    # identify intersecting google buildings 
    potential_google = inG.loc[list(g_idx.intersection(row['geometry'].bounds))]    
    i_g = potential_google.loc[potential_google.intersects(row['geometry'])]
    c_g = potential_google.loc[potential_google['geometry'].apply(lambda x: row['geometry'].contains(x))]
        
    # identify intersecting buildings 
    potential_buildings = inB.loc[inB.intersects(row['geometry'])]    
    i_bld = potential_buildings.loc[potential_buildings.intersects(row['geometry'])]
    c_bld = potential_buildings.loc[potential_buildings['geometry'].apply(lambda x: row['geometry'].contains(x))]
    
    # calulate percent building
    try:
        per_google = row['geometry'].intersection(i_g.unary_union).area/row['geometry'].area
    except:
        per_google = 0
    try:
        per_building = row['geometry'].intersection(i_bld.unary_union).area/row['geometry'].area
    except:
        per_building = 0
        
    grid.loc[idx, 'per_b'] = per_building    
    grid.loc[idx, 'per_g'] = per_google

    # uncomment this section to stop the loop add a specific index in order to run plotting below
    # if idx > 3 and potential_buildings.shape[0] > 0:
    #    break
    

In [None]:
grid.to_file(summary_grid)

In [None]:
# Map the most recently looped grid cell
m = folium.Map(location=[potential_buildings.unary_union.centroid.y, potential_buildings.unary_union.centroid.x], zoom_start=18)
try:
    folium.GeoJson(row['geometry'], 
             style_function=lambda x: {'fillOpacity':0.5, 'fillColor': 'red', 'color':'#520f0a'}).add_to(m)
except:
    pass
try:
    folium.GeoJson(i_par.to_json(), 
               style_function=lambda x: {'fillOpacity':1, 'fillColor': '#6878ed', 'stroke': False}).add_to(m)
except:
    pass
try:
    folium.GeoJson(data=i_bld.to_json(), 
              style_function=lambda x: {'fillOpacity':1,'fillColor': '#ebab15', 'stroke': False}).add_to(m)
except:
    pass
m

In [None]:
inB.total_bounds

In [None]:
b_idx.bounds

# RETIRED

In [None]:
# Summarize buildings in parcels
inP['BLDG_I'] = 0
inP['BLDG_C'] = 0
inP['BLDG_I_G'] = 0
inP['BLDG_C_G'] = 0
for idx, row in inP.iterrows():
    # Summarize collected buildings
    potential_buildings = inB.loc[list(b_idx.intersection(row['geometry'].bounds))]
    m_bld = potential_buildings.loc[~potential_buildings.intersects(row['geometry'])]
    i_bld = potential_buildings.loc[potential_buildings.intersects(row['geometry'])]
    c_bld = potential_buildings.loc[potential_buildings['geometry'].apply(lambda x: row['geometry'].contains(x))]    
    inP.loc[idx, 'BLDG_I'] = i_bld.shape[0]
    inP.loc[idx, 'BLDG_C'] = c_bld.shape[0]
    
    # Summarize buildings in Google
    potential_buildings = inG.loc[list(g_idx.intersection(row['geometry'].bounds))]
    m_bld_g = potential_buildings.loc[~potential_buildings.intersects(row['geometry'])]
    i_bld_g = potential_buildings.loc[potential_buildings.intersects(row['geometry'])]
    c_bld_g = potential_buildings.loc[potential_buildings['geometry'].apply(lambda x: row['geometry'].contains(x))]    
    inP.loc[idx, 'BLDG_I_G'] = i_bld_g.shape[0]
    inP.loc[idx, 'BLDG_C_G'] = c_bld_g.shape[0]
    
    ''' # uncomment this section to stop the loop add a specific index in order to run plotting below
    if idx > 3:
        break
    '''

In [None]:
m = folium.Map(location=[potential_buildings.unary_union.centroid.y, potential_buildings.unary_union.centroid.x], zoom_start=18)
try:
    folium.GeoJson(row['geometry'], 
             style_function=lambda x: {'fillOpacity':0.5, 'fillColor': 'red', 'color':'#520f0a'}).add_to(m)
except:
    pass
try:
    folium.GeoJson(data=m_bld.to_json(), 
              style_function=lambda x: {'fillOpacity':1,'fillColor': '#ebab15', 'stroke': False}).add_to(m)
except:
    pass
try:
    folium.GeoJson(i_bld.to_json(), 
               style_function=lambda x: {'fillOpacity':1, 'fillColor': '#6878ed', 'stroke': False}).add_to(m)
except:
    pass
try:
    folium.GeoJson(c_bld.to_json(), 
               style_function=lambda x: {'fillOpacity':1, 'fillColor': '#68ed76', 'stroke': False}).add_to(m)
except:
    pass

try:
    folium.GeoJson(data=potential_buildings.to_json(), 
              style_function=lambda x: {'fillOpacity':0,'fillColor': '#ebab15'}).add_to(m)
except:
    pass

m
