# Evaluate building completeness

In support of an ongoing project in Nigeria, we are investigating and evaluating the results of a building digitization effort. The following analyses will be performed to assess the completeness of the digitization effort.

1. Attribute evaluation  
   a. ensure variables in verification protocol are present  
   b. ensure relationship between parcel owner and building occupants  
2. Coverage evaluation  
   a. Calculate across the area in a consistently sized grid (250m)  
   b. Compare building density to Google Buildings  

In [None]:
import sys, os, importlib
import folium

import pandas as pd
import geopandas as gpd
import GOSTRocks.rasterMisc as rMisc
import GOSTRocks.misc as misc

from shapely.geometry import Point, box
from shapely.wkt import loads

from math import ceil
import numpy as np
from shapely.geometry import Polygon

In [None]:
in_folder = "/home/public/Data/COUNTRY/NGA"
out_folder = "/home/wb411133/projects/NGA_buildings"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)
in_buildings_file = os.path.join(in_folder, "Building.shp")
in_parcels_file = os.path.join(in_folder, "Parcel.shp")

# Define outputs
summary_grid = os.path.join(out_folder, f"NGA_summary_grid_{res}.shp")

# Define paramters
crs = 3857
m_crs = f'epsg:{crs}' # projection used to calculate metre measurements
res = 250 # resolution of 


In [None]:
inB = gpd.read_file(in_buildings_file)
inP = gpd.read_file(in_parcels_file)

b_idx = inB.sindex
p_idx = inP.sindex

# Calculate general statistics

In [None]:
print(f'Total number of buildings: {inB.shape[0]}')
print(f'Total area of buildings: {inB["BLD_SIZE_M"].sum()}')
      
print(f'Total number of parcels: {inP.shape[0]}')
print(f'Total area of parcels: {inP["PARCEL_SIZ"].sum()}')

# Find, downlaod, and process Google Buildings

In [None]:
# read in the google buildings extents file to determine file to download
google_extents = gpd.read_file('/home/public/Data/GLOBAL/Buildings/google_tiles.geojson')
sel_extents = google_extents.loc[google_extents.intersects(inB.unary_union)]
sel_extents

In [None]:
# may need to download buildings
google_buildings = '/home/public/Data/GLOBAL/Buildings/105_buildings.csv'
inG = pd.read_csv(google_buildings)
inG.head()

In [None]:
b = inB.total_bounds
selG = inG.loc[(inG['longitude'] > b[0]) & (inG['longitude'] < b[2]) & (inG['latitude'] > b[1]) & (inG['latitude'] < b[3])]

In [None]:
selG.to_csv(os.path.join(out_folder, "google_buildings.csv"))

In [None]:
g_idx = inG.sindex

# Assess attribute completeness
1. Size of the parcel (based on polygon)
2. **Number of structures in parcel, number of occupancy units**  
   a. This is not found in the parcel datasets, but can be calculated.
3. The use of each structure and unit
4. The name, ID, and contact details for the owner of the parcel, and the owner or occupier of the structure, or unit


In [None]:
inB.head()

In [None]:
inP.head()

In [None]:
# Calculate null values in buildings dataset
for col in inB.columns:
    cur_col = inB.loc[:,col]
    print(f'{col}: {cur_col.isna().sum()}')

In [None]:
# Get count of null valus in STR_NAME grouped by BLD_USE
inB.loc[inB['STR_NAME'].isna()]['BLD_USE'].value_counts()

# Summarize buildings within a grid

Create a 250m grid across the study arera and summarize bulidngs and parcels within

In [None]:
tempP = inP.to_crs(m_crs)
allP = tempP.unary_union

In [None]:
# Generate the grid
if not os.path.exists(summary_grid):
    cols = list(np.arange(xmin, xmax + gridWidth, gridWidth))
    rows = list(np.arange(ymin, ymax + gridHeight, gridHeight))
    all_res = []
    for x in cols:
        for y in rows:
            poly = Polygon([(x,y), (x+gridWidth, y), (x+gridWidth, y+gridHeight), (x, y+gridHeight)])
            if poly.intersects(allP):
                all_res.append([x,y,poly])        
    grid = gpd.GeoDataFrame(pd.DataFrame(all_res, columns=['rowIdx', 'colIdx', 'geometry']), geometry='geometry', crs=f'epsg:{crsNum}')
    grid.to_file(summary_grid)
else:
    grid = gpd.read_file(summary_grid)

In [None]:
if grid.crs != inB.crs:
    grid = grid.to_crs(inB.crs)

In [None]:
grid['per_b'] = 0.
grid['per_p'] = 0.
grid['per_g'] = 0.
for idx, row in grid.iterrows():
    # identify intersecting buildings 
    potential_google = inG.loc[list(g_idx.intersection(row['geometry'].bounds))]    
    i_g = potential_google.loc[potential_google.intersects(row['geometry'])]
    c_g = potential_google.loc[potential_google['geometry'].apply(lambda x: row['geometry'].contains(x))]
        
    # identify intersecting buildings 
    potential_buildings = inB.loc[list(b_idx.intersection(row['geometry'].bounds))]    
    i_bld = potential_buildings.loc[potential_buildings.intersects(row['geometry'])]
    c_bld = potential_buildings.loc[potential_buildings['geometry'].apply(lambda x: row['geometry'].contains(x))]
    
    # identify intersecting parcels
    potential_parcels = inP.loc[list(p_idx.intersection(row['geometry'].bounds))]    
    i_par = potential_parcels.loc[potential_parcels.intersects(row['geometry'])]
    c_par = potential_parcels.loc[potential_parcels['geometry'].apply(lambda x: row['geometry'].contains(x))]
    
    # calulate percent parcel and percent built
    try:
        per_google = row['geometry'].intersection(i_g.unary_union).area/row['geometry'].area
    except:
        per_building = 0
    try:
        per_building = row['geometry'].intersection(i_bld.unary_union).area/row['geometry'].area
    except:
        per_building = 0
    try:
        per_parcel   = row['geometry'].intersection(i_par.unary_union).area/row['geometry'].area
    except:
        per_parcel = 0
        
    grid.loc[idx, 'per_b'] = per_building
    grid.loc[idx, 'per_p'] = per_parcel
    grid.loc[idx, 'per_g'] = per_google

    ''' # uncomment this section to stop the loop add a specific index in order to run plotting below
    if idx > 3:
        break
    '''

In [None]:
grid.head()

In [None]:
grid.to_file(summary_grid)

In [None]:
# Map the most recently looped grid cell
m = folium.Map(location=[potential_buildings.unary_union.centroid.y, potential_buildings.unary_union.centroid.x], zoom_start=18)
try:
    folium.GeoJson(row['geometry'], 
             style_function=lambda x: {'fillOpacity':0.5, 'fillColor': 'red', 'color':'#520f0a'}).add_to(m)
except:
    pass
try:
    folium.GeoJson(i_par.to_json(), 
               style_function=lambda x: {'fillOpacity':1, 'fillColor': '#6878ed', 'stroke': False}).add_to(m)
except:
    pass
try:
    folium.GeoJson(data=i_bld.to_json(), 
              style_function=lambda x: {'fillOpacity':1,'fillColor': '#ebab15', 'stroke': False}).add_to(m)
except:
    pass
m

# RETIRED

In [None]:
# Summarize buildings in parcels
inP['BLDG_I'] = 0
inP['BLDG_C'] = 0
inP['BLDG_I_G'] = 0
inP['BLDG_C_G'] = 0
for idx, row in inP.iterrows():
    # Summarize collected buildings
    potential_buildings = inB.loc[list(b_idx.intersection(row['geometry'].bounds))]
    m_bld = potential_buildings.loc[~potential_buildings.intersects(row['geometry'])]
    i_bld = potential_buildings.loc[potential_buildings.intersects(row['geometry'])]
    c_bld = potential_buildings.loc[potential_buildings['geometry'].apply(lambda x: row['geometry'].contains(x))]    
    inP.loc[idx, 'BLDG_I'] = i_bld.shape[0]
    inP.loc[idx, 'BLDG_C'] = c_bld.shape[0]
    
    # Summarize buildings in Google
    potential_buildings = inG.loc[list(g_idx.intersection(row['geometry'].bounds))]
    m_bld_g = potential_buildings.loc[~potential_buildings.intersects(row['geometry'])]
    i_bld_g = potential_buildings.loc[potential_buildings.intersects(row['geometry'])]
    c_bld_g = potential_buildings.loc[potential_buildings['geometry'].apply(lambda x: row['geometry'].contains(x))]    
    inP.loc[idx, 'BLDG_I_G'] = i_bld_g.shape[0]
    inP.loc[idx, 'BLDG_C_G'] = c_bld_g.shape[0]
    
    ''' # uncomment this section to stop the loop add a specific index in order to run plotting below
    if idx > 3:
        break
    '''

In [None]:
m = folium.Map(location=[potential_buildings.unary_union.centroid.y, potential_buildings.unary_union.centroid.x], zoom_start=18)
try:
    folium.GeoJson(row['geometry'], 
             style_function=lambda x: {'fillOpacity':0.5, 'fillColor': 'red', 'color':'#520f0a'}).add_to(m)
except:
    pass
try:
    folium.GeoJson(data=m_bld.to_json(), 
              style_function=lambda x: {'fillOpacity':1,'fillColor': '#ebab15', 'stroke': False}).add_to(m)
except:
    pass
try:
    folium.GeoJson(i_bld.to_json(), 
               style_function=lambda x: {'fillOpacity':1, 'fillColor': '#6878ed', 'stroke': False}).add_to(m)
except:
    pass
try:
    folium.GeoJson(c_bld.to_json(), 
               style_function=lambda x: {'fillOpacity':1, 'fillColor': '#68ed76', 'stroke': False}).add_to(m)
except:
    pass

try:
    folium.GeoJson(data=potential_buildings.to_json(), 
              style_function=lambda x: {'fillOpacity':0,'fillColor': '#ebab15'}).add_to(m)
except:
    pass

m
