# Mapping unelectrified households

This notebook focuses on identifying households in electrified settlements that are not yet electrified. The proposed methodology is as follows:

1. Use the settlements from the [Global Electrification Platform](https://electrifynow.energydata.info/explore/kh-2) to identify settlements that are partially electrified.  
  a. Each of these settlements has a number of new connections to make to reach universal electrification  
2. Map the building footprints to the GEP settlements; those buildings that do not intersect a settlement are mapped to the nearest settlement
3. Calculate the distance from the buildings to the MV network
4. For each GEP settlement, identify the individual buildings to be electrified to satisfy the number of new connections required  
5. [TODO] Connect the buildings to the license areas and tabulate distances to grids

In [7]:
import os

import pandas as pd
import geopandas as gpd

#import GOSTrocks.rasterMisc as rMisc
#import GOSTrocks.dataMisc as dMisc
#import GOSTrocks.mapMisc as mapMisc
from GOSTrocks.misc import tPrint

In [114]:
iso3 = 'KHM'
m_crs = 32648
out_folder = "C:/WBG/Work/KHM_Energy/data"

wsf_file = os.path.join(out_folder, "WSF", "wsf.tif")
ghsl_file = os.path.join(out_folder, "GHSL", "ghsl.tif")
overture_buildings = os.path.join(out_folder, "overture", "overture_download_2024_03_29.csv")
overture_raster = os.path.join(out_folder, "overture", "overture_download_2024_03_29.tif")
overture_raster_points = os.path.join(out_folder, "overture", "overture_download_2024_03_29_points.tif")
ghs_smod = os.path.join(out_folder, "URBAN", "GHS_SMOD.tif")
ghs_ucbd = os.path.join(out_folder, "URBAN", "GHS_UCBD.gpkg")
ntl_folder = os.path.join(out_folder, "NTL", "VIIRS_KHM")
google_buildings = os.path.join(out_folder, "Google_Buildings", "GOB_cambodia.shp")
gep_folder = os.path.join(out_folder, "GEP")
gep_settlements = os.path.join(gep_folder, "final_clusters.shp")
gep_attributes  = os.path.join(gep_folder, "kh-2-0_0_0_0_1_0.csv")
google_buildings_attributed_GEP = os.path.join(gep_folder, "GOB_gep.csv")
gridfinder_grid = os.path.join(out_folder, "GridFinder", "grid.gpkg")

for file in [wsf_file, ghsl_file, ghs_smod]:
    if not os.path.exists(os.path.dirname(file)):
        os.makedirs(os.path.dirname(file))

# get country extent from geopandas
world_filepath = gpd.datasets.get_path('naturalearth_lowres')
world = gpd.read_file(world_filepath)
country = world[world.iso_a3 == iso3]
country['geometry'] = country.buffer(0.1)

  world_filepath = gpd.datasets.get_path('naturalearth_lowres')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [4]:
# Read in GEP settlements
gep_d = gpd.read_file(gep_settlements)
gep_sindex = gep_d.sindex
gep_a = pd.read_csv(gep_attributes)

# read in Google Buildings
build_d = gpd.read_file(google_buildings)
build_sindex = build_d.sindex

In [31]:
if not os.path.exists(google_buildings_attributed_GEP):
    # attach GEP settlement ids to Google Buildings
    build_d['gep_id'] = None
    cnt = 0
    for idx, row in gep_d.iterrows():    
        candidate_buildings = build_d.loc[list(build_sindex.intersection(row['geometry'].bounds))]
        sel_buildings = candidate_buildings[candidate_buildings.intersects(row['geometry'])]
        build_d.loc[sel_buildings.index,'gep_id'] = row['id']
        cnt += 1
        if cnt % 1000 == 0:
            progress = round(cnt/gep_d.shape[0], 2)
            tPrint(progress)

    # For buildings that are not in a GEP settlement, assign them to the nearest settlement
    build_d['gep_dist'] = 0.0
    missing_buildings = build_d.loc[build_d['gep_id'].isnull()]
    cnt = 0
    for idx, row in missing_buildings.iterrows():
        buffer_dist = 0.0
        n_settlements = 0
        while n_settlements == 0:
            possible_matches_index = list(gep_sindex.intersection(row['geometry'].buffer(buffer_dist).bounds))
            possible_matches = gep_d.loc[possible_matches_index]
            n_settlements = possible_matches.shape[0]
            buffer_dist += 0.1
        dist_calc = possible_matches.distance(row['geometry'])
        nearest_id = gep_d.loc[dist_calc.idxmin(),"id"]
        nearest_dist = dist_calc.min()
        build_d.loc[idx, 'gep_id'] = nearest_id
        build_d.loc[idx, 'gep_dist'] = nearest_dist
        cnt += 1
        if cnt % 1000 == 0:
            progress = round(cnt/missing_buildings.shape[0], 2)
            tPrint(progress)
    pd.DataFrame(build_d).loc[:,['gep_id','gep_dist','full_plus_']].to_csv(google_buildings_attributed_GEP)
else:
    attributed_builds = pd.read_csv(google_buildings_attributed_GEP)
    build_d['gep_id'] = attributed_builds['gep_id']
    build_d['gep_dist'] = attributed_builds['gep_dist']
    

14:14:34	0.01
14:14:40	0.02
14:14:42	0.03
14:14:43	0.04
14:15:10	0.05
14:15:14	0.06
14:15:17	0.07
14:15:21	0.08
14:15:22	0.1
14:15:29	0.11
14:15:39	0.12
14:15:40	0.13
14:15:44	0.14
14:15:49	0.15
14:15:51	0.16
14:15:53	0.17
14:15:54	0.18
14:16:03	0.19
14:16:04	0.2
14:16:05	0.21
14:16:26	0.22
14:16:27	0.23
14:16:28	0.24
14:17:11	0.25
14:17:18	0.26
14:17:27	0.28
14:17:28	0.29
14:17:29	0.3
14:17:32	0.31
14:17:35	0.32
14:17:40	0.33
14:17:43	0.34
14:17:45	0.35
14:17:46	0.36
14:17:47	0.37
14:17:49	0.38
14:17:50	0.39
14:17:51	0.4
14:17:52	0.41
14:17:53	0.42
14:17:55	0.43
14:17:59	0.44
14:18:00	0.46
14:18:02	0.47
14:18:06	0.48
14:18:50	0.49
14:18:52	0.5
14:18:54	0.51
14:18:56	0.52
14:18:58	0.53
14:19:09	0.54
14:19:28	0.55
14:19:31	0.56
14:19:32	0.57
14:19:34	0.58
14:19:37	0.59
14:19:38	0.6
14:19:39	0.61
14:19:40	0.62
14:19:41	0.64
14:19:42	0.65
14:19:43	0.66
14:19:44	0.67
14:19:45	0.68
14:19:47	0.69
14:19:48	0.7
14:19:51	0.71
14:19:52	0.72
14:19:53	0.73
14:19:55	0.74
14:19:56	0.75
14:19:57	0.76

In [67]:
# Calculate distance from Google buildings to GRID
grid_d = gpd.read_file(gridfinder_grid)
sel_grid = grid_d.cx[country.total_bounds[0]:country.total_bounds[2], country.total_bounds[1]:country.total_bounds[3]] 
sel_grid = sel_grid.to_crs(m_crs)


Exception ignored in: <function tqdm.__del__ at 0x000002D86B9C6700>
Traceback (most recent call last):
  File "c:\wbg\Anaconda3\envs\urban_test\Lib\site-packages\tqdm\std.py", line 1148, in __del__
    self.close()
  File "c:\wbg\Anaconda3\envs\urban_test\Lib\site-packages\tqdm\notebook.py", line 279, in close
    self.disp(bar_style='danger', check_delay=False)
    ^^^^^^^^^
AttributeError: 'tqdm_notebook' object has no attribute 'disp'
Exception ignored in: <function tqdm.__del__ at 0x000002D86B9C6700>
Traceback (most recent call last):
  File "c:\wbg\Anaconda3\envs\urban_test\Lib\site-packages\tqdm\std.py", line 1148, in __del__
    self.close()
  File "c:\wbg\Anaconda3\envs\urban_test\Lib\site-packages\tqdm\notebook.py", line 279, in close
    self.disp(bar_style='danger', check_delay=False)
    ^^^^^^^^^
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


Unnamed: 0,source,geometry
0,gridfinder,"LINESTRING (128.88542 71.73125, 128.87708 71.7..."
1,gridfinder,"LINESTRING (128.88542 71.73125, 128.89375 71.7..."
2,gridfinder,"LINESTRING (128.88542 71.70208, 128.88125 71.7..."
3,openstreetmap,"LINESTRING (128.88958 71.70208, 128.88542 71.7..."
4,openstreetmap,"LINESTRING (128.88542 71.69792, 128.88542 71.7..."


In [74]:
gep_columns = ['id',"Pop","Pop2030","NumPeoplePerHH","IsUrban","NightLights","CurrentMVLineDist","PlannedMVLineDist",
               "FinalElecCode2020","FinalElecCode2030","NewConnections2025",'NewConnections2030']
sel_gep_a = gep_a.loc[:,gep_columns].copy()
sel_gep_a['nConnections'] =  sel_gep_a.apply(lambda x: x['NewConnections2030']/x['NumPeoplePerHH'], axis=1)

In [89]:
# Identify GEP settlements that qre fully elecrified
sel_gep_a['electrified'] = 0
sel_gep_a.loc[sel_gep_a['nConnections'] <= 1, 'electrified'] = 1
partially_electrified = sel_gep_a.loc[sel_gep_a['electrified'] == 0]

# extract all buildings from settlements that are partially electrified
# Loop through partially electrified settlements, identify buildings furthest from the grid in those settlements
partially_electrified_buildings = build_d.loc[build_d['gep_id'].isin(partially_electrified['id'])]
partially_electrified_buildings = partially_electrified_buildings.to_crs(m_crs)
partially_electrified_buildings['grid_dist'] = partially_electrified_buildings.distance(sel_grid.unary_union)

KeyError: 'gep_id'

In [121]:
buildings_to_electrify = []
for idx, x in partially_electrified_buildings.groupby('gep_id'):
    gep_settlement = sel_gep_a.loc[sel_gep_a['id'] == x['gep_id'].values[0]]
    n_buildings = int(gep_settlement['nConnections'].values[0])
    x.sort_values('grid_dist', ascending=False, inplace=True)
    buildings_to_electrify = buildings_to_electrify + list(x['full_plus_'].values[:n_buildings])

In [122]:
buildings_to_electrify = build_d.loc[build_d['full_plus_'].isin(buildings_to_electrify)].copy()
buildings_to_electrify = buildings_to_electrify.to_crs(m_crs)

sel_grid = sel_grid.to_crs(m_crs)
buildings_to_electrify['grid_dist'] = buildings_to_electrify.distance(sel_grid.unary_union)
buildings_to_electrify.head()

Unnamed: 0,latitude,longitude,area_in_me,confidence,full_plus_,geometry,gep_id,gep_dist,grid_dist
0,10.358102,104.32013,49.1494,0.7171,7P26985C+63R2,"POLYGON ((425567.516 1145081.526, 425566.894 1...",26039,0.0,13626.801841
5,10.432327,104.32765,78.3979,0.8026,7P26C8JH+W3J5,"POLYGON ((426407.396 1153285.444, 426404.700 1...",45171,0.0,5591.271323
6,10.432619,104.327663,33.047,0.7397,7P26C8MH+23VJ,"POLYGON ((426411.761 1153319.214, 426409.143 1...",45171,0.0,5561.117718
7,10.432621,104.327603,79.2408,0.8625,7P26C8MH+22XV,"POLYGON ((426405.938 1153316.508, 426400.761 1...",45171,0.0,5556.074614
8,10.432755,104.327326,7.5354,0.6908,7P26C8MG+4W43,"POLYGON ((426374.500 1153337.000, 426374.500 1...",45171,0.0,5539.140116


In [128]:
buildings_to_electrify.loc[:,['full_plus_','gep_id','gep_dist', 'grid_dist']].to_csv(os.path.join(out_folder, "buildings_to_electrify.csv"))