In [1]:
from google.colab import drive
drive.mount('/content/drive')

program_location = '/content/drive/MyDrive/Colab Notebooks/PTI'

Mounted at /content/drive


---
# **1. SET-UP**

##### These packages are used across the range of Sahel PTI indicator preparations. Not all packages may be used in present script.

In [2]:
!pip install geopandas import_ipynb pyshp pycrs pyogrio rasterio

Collecting import_ipynb
  Downloading import_ipynb-0.1.4-py3-none-any.whl (4.1 kB)
Collecting pycrs
  Downloading PyCRS-1.0.2.tar.gz (36 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyogrio
  Downloading pyogrio-0.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.0/22.0 MB[0m [31m43.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rasterio
  Downloading rasterio-1.3.9-cp310-cp310-manylinux2014_x86_64.whl (20.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.6/20.6 MB[0m [31m66.1 MB/s[0m eta [36m0:00:00[0m
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Collecting snuggs>=1.4.1 (from rasterio)
  Downloading snuggs-1.4.7-py3-none-any.whl (5.4 kB)
Collecting jedi>=0.16 (from IPython->import_ipynb)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [3]:
# Colab default packages
import io, os, sys, glob, re, time, string, types, json
from os.path import exists

import pandas as pd
import importlib
import numpy as np

'''
from shapely import geometry
from shapely.geometry import Point, LineString, Polygon, shape, MultiPoint, mapping
from shapely.ops import cascaded_union
from shapely.validation import make_valid
import shapely.wkt
'''

# Temporarily installed packages
import geopandas as gpd
import pyogrio
import pycrs
import import_ipynb

# Import external files
os.chdir(program_location)
!pwd
import config

sys.path.append(program_location)

import tools
importlib.reload(tools)

/content/drive/MyDrive/Colab Notebooks/PTI
importing Jupyter notebook from config.ipynb


<module 'tools' from '/content/drive/MyDrive/Colab Notebooks/PTI/tools.py'>

In [4]:
data_loc = os.path.join(os.getcwd(), 'data', config.ISO)
print(data_loc)

fil_AMD = os.path.join(data_loc, 'ADM', 'Source')

/content/drive/MyDrive/Colab Notebooks/PTI/data/CPV


---
# **2. ADM1,2,3 Integration**

In [5]:
# Read admin SHPs

src_ADM1 = os.path.join(fil_AMD, config.src_adm1)
src_ADM2 = os.path.join(fil_AMD, config.src_adm2)
src_ADM3 = os.path.join(fil_AMD, config.src_adm3)

gdf_ADM1 = tools.vec_import(config.RW_engine, src_ADM1)
gdf_ADM2 = tools.vec_import(config.RW_engine, src_ADM2)
gdf_ADM3 = tools.vec_import(config.RW_engine, src_ADM3)

Vector import complete.
GDF size:9
GEOGCRS["WGS 84",DATUM["World Geodetic System 1984",ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]],ID["EPSG",6326]],PRIMEM["Greenwich",0,ANGLEUNIT["Degree",0.0174532925199433]],CS[ellipsoidal,3],AXIS["longitude",east,ORDER[1],ANGLEUNIT["Degree",0.0174532925199433]],AXIS["latitude",north,ORDER[2],ANGLEUNIT["Degree",0.0174532925199433]],AXIS["ellipsoidal height (h)",up,ORDER[3],LENGTHUNIT["metre",1,ID["EPSG",9001]]]]
Vector import complete.
GDF size:22
GEOGCRS["WGS 84",DATUM["World Geodetic System 1984",ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]],ID["EPSG",6326]],PRIMEM["Greenwich",0,ANGLEUNIT["Degree",0.0174532925199433]],CS[ellipsoidal,3],AXIS["longitude",east,ORDER[1],ANGLEUNIT["Degree",0.0174532925199433]],AXIS["latitude",north,ORDER[2],ANGLEUNIT["Degree",0.0174532925199433]],AXIS["ellipsoidal height (h)",up,ORDER[3],LENGTHUNIT["metre",1,ID["EPSG",9001]]]]
Vector import complete.
GDF size:32
EPSG:4326


In [6]:
# Generate psudo ADM codes:

# ADM0 & ADM1
gdf_ADM1['ADM0_CODE'] = config.ISO
gdf_ADM1['ADM1_CODE'] = gdf_ADM1.index
gdf_ADM1['ADM1_CODE'] = gdf_ADM1['ADM1_CODE'].astype("str")
gdf_ADM1['ADM1_CODE'] = gdf_ADM1['ADM1_CODE'].str.zfill(3)
gdf_ADM1['ADM1_CODE'] = gdf_ADM1['ADM0_CODE'] + gdf_ADM1['ADM1_CODE']

# ADM2
gdf_ADM2['ADM2_CODE'] = gdf_ADM2.index
gdf_ADM2['ADM2_CODE'] = gdf_ADM2['ADM2_CODE'].astype("str")
gdf_ADM2['ADM2_CODE'] = gdf_ADM2['ADM2_CODE'].str.zfill(3)

# ADM3
gdf_ADM3['ADM3_CODE'] = gdf_ADM3.index
gdf_ADM3['ADM3_CODE'] = gdf_ADM3['ADM3_CODE'].astype("str")
gdf_ADM3['ADM3_CODE'] = gdf_ADM3['ADM3_CODE'].str.zfill(4)


# Check
print(gdf_ADM1['ADM0_CODE'].nunique())
print(gdf_ADM1['ADM1_CODE'].nunique())
print(gdf_ADM2['ADM2_CODE'].nunique())
print(gdf_ADM3['ADM3_CODE'].nunique())

1
9
22
32


Generate centroids

In [7]:
gdf_ADM3_pt = gdf_ADM3.copy()

gdf_ADM3_pt['centroid'] = gdf_ADM3_pt.geometry.centroid
gdf_ADM3_pt = gdf_ADM3_pt.set_geometry('centroid').drop(['geometry'], axis=1)

gdf_ADM3_pt.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   COD_ILHA    32 non-null     int32   
 1   NOME_ILHA   32 non-null     object  
 2   COD_CONCEL  32 non-null     int32   
 3   NOME_CONCE  32 non-null     object  
 4   COD_FREGUE  32 non-null     int32   
 5   NOME_FREGU  32 non-null     object  
 6   Forsplit    32 non-null     object  
 7   ADM3_CODE   32 non-null     object  
 8   centroid    32 non-null     geometry
dtypes: geometry(1), int32(3), object(5)
memory usage: 2.0+ KB



  gdf_ADM3_pt['centroid'] = gdf_ADM3_pt.geometry.centroid


Spatial join with the point version of GRID3 for a one-to-one result.

In [8]:
# Rename the ADM3 shape name (e.g., name of manucipality)
gdf_ADM3_pt.rename(columns={config.adm3_name: 'ADM3_NAME'}, inplace=True)

# Simplyfy the GDFs as temp
gdf_pt_temp = gdf_ADM3_pt[['ADM3_NAME', 'ADM3_CODE', 'centroid']].copy()
gdf_adm_temp = gdf_ADM1[['ADM0_CODE', 'ADM1_CODE', 'geometry']]


# First merge (ADM1)
gdf_pt_temp = gpd.sjoin(gdf_pt_temp, gdf_adm_temp, how='left', predicate='intersects')
gdf_pt_temp.drop('index_right', axis= 1, inplace = True)


# Simplyfy the GDF to be merged
gdf_adm_temp = gdf_ADM2[['ADM2_CODE', 'geometry']]


# Second merge (ADM2)
gdf_pt_temp = gpd.sjoin(gdf_pt_temp, gdf_adm_temp, how='left', predicate='intersects')
gdf_pt_temp.drop('index_right', axis= 1, inplace = True)


gdf_pt_temp.sample(10)

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: GEOGCRS["WGS 84",DATUM["World Geodetic System 1984 ...

  gdf_pt_temp = gpd.sjoin(gdf_pt_temp, gdf_adm_temp, how='left', predicate='intersects')
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: GEOGCRS["WGS 84",DATUM["World Geodetic System 1984 ...

  gdf_pt_temp = gpd.sjoin(gdf_pt_temp, gdf_adm_temp, how='left', predicate='intersects')


Unnamed: 0,ADM3_NAME,ADM3_CODE,centroid,ADM0_CODE,ADM1_CODE,ADM2_CODE
25,S. Miguel Arcângelo,25,POINT (-23.63909 15.19248),CPV,CPV006,14
20,S. João Baptista,20,POINT (-24.69060 14.85482),CPV,CPV008,21
8,Santa Isabel,8,POINT (-22.85516 16.08277),CPV,CPV004,7
10,S. Francisco de Assis,10,POINT (-24.35837 16.59460),CPV,CPV002,5
2,Santo Crucifixo,2,POINT (-25.11577 17.13026),CPV,CPV000,0
27,S. Salvador do Mundo,27,POINT (-23.62897 15.08888),CPV,CPV006,15
22,S. Nicolau Tolentino,22,POINT (-23.56773 15.01530),CPV,CPV006,13
19,Nossa Senhora do Monte,19,POINT (-24.72304 14.84693),CPV,CPV008,21
16,Nossa Senhora de Conceição,16,POINT (-24.42958 14.88767),CPV,CPV007,19
29,Santiago Maior,29,POINT (-23.55277 15.11059),CPV,CPV006,11


In [9]:
# Merge df_pt_complete and gdf_ADM3 to generate gdf_ADM3_complete
df_pt_complete = gdf_pt_temp[['ADM3_NAME', 'ADM0_CODE', 'ADM1_CODE', 'ADM2_CODE', 'ADM3_CODE']]
gdf_ADM3_complete = gdf_ADM3.merge(df_pt_complete, how='left', on='ADM3_CODE')


# Generate true codes
gdf_ADM3_complete['ADM2_CODE'] = gdf_ADM3_complete['ADM1_CODE'] + gdf_ADM3_complete['ADM2_CODE']
gdf_ADM3_complete['ADM3_CODE'] = gdf_ADM3_complete['ADM2_CODE'] + gdf_ADM3_complete['ADM3_CODE']

# **3. EMPTY ROW CHECK**

In [10]:
# Empty row check
gdf_ADM3_complete.query('ADM1_CODE.isnull()', engine='python')

Unnamed: 0,COD_ILHA,NOME_ILHA,COD_CONCEL,NOME_CONCE,COD_FREGUE,NOME_FREGU,Forsplit,geometry,ADM3_CODE,ADM3_NAME,ADM0_CODE,ADM1_CODE,ADM2_CODE


---
Special tretment for any island and enclave land shapes (these were not spatial joined. So, there must be rows with empty records. You need to handle them manually)

In [None]:
tar_row = 172

replace_ADM1 = config.ISO + str('007')
replace_ADM2 = replace_ADM1 + str('034')
replace_ADM3 = replace_ADM2 + str('0172')


gdf_ADM3_complete.loc[tar_row, 'ADM0_CODE'] = config.ISO
gdf_ADM3_complete.loc[tar_row, 'ADM1_CODE'] = replace_ADM1
gdf_ADM3_complete.loc[tar_row, 'ADM2_CODE'] = replace_ADM2
gdf_ADM3_complete.loc[tar_row, 'ADM3_CODE'] = replace_ADM3

print(replace_ADM1)
print(replace_ADM2)
print(replace_ADM3)

MRT007
MRT007034
MRT0070340172


---

In [11]:
gdf_ADM3toSHP = gdf_ADM3_complete[['ADM3_NAME', 'ADM0_CODE', 'ADM1_CODE', 'ADM2_CODE', 'ADM3_CODE', 'geometry']]
gdf_ADM3toSHP.sample(20)

# Final check
print('ADM0 = {}, dif from the original ADM0 = {}'.format(gdf_ADM3toSHP['ADM0_CODE'].nunique(), gdf_ADM3toSHP['ADM0_CODE'].nunique() - gdf_ADM1['ADM0_CODE'].nunique()))
print('ADM1 = {}, dif from the original ADM1 = {}'.format(gdf_ADM3toSHP['ADM1_CODE'].nunique(), gdf_ADM3toSHP['ADM1_CODE'].nunique() - gdf_ADM1['ADM1_CODE'].nunique()))
print('ADM2 = {}, dif from the original ADM2 = {}'.format(gdf_ADM3toSHP['ADM2_CODE'].nunique(), gdf_ADM3toSHP['ADM2_CODE'].nunique() - gdf_ADM2['ADM2_CODE'].nunique()))
print('ADM3 = {}, dif from the original ADM3 = {}'.format(gdf_ADM3toSHP['ADM3_CODE'].nunique(), gdf_ADM3toSHP['ADM3_CODE'].nunique() - gdf_ADM3['ADM3_CODE'].nunique()))

ADM0 = 1, dif from the original ADM0 = 0
ADM1 = 9, dif from the original ADM1 = 0
ADM2 = 22, dif from the original ADM2 = 0
ADM3 = 32, dif from the original ADM3 = 0


In [12]:
# Export
src_out = os.path.join(data_loc, 'ADM', 'Source', 'complete_ADM3.shp')
tools.vec_export(config.RW_engine, gdf_ADM3toSHP, src_out)

Vector export complete.
