In [25]:
import geopandas as gpd
import pandas as pd
import os
import json

from sqlalchemy import create_engine
import psycopg2 # required for exporting to postgis

In [2]:
os.chdir(r'D:\iMMAP\proj\ASDC\data\settlement')
print(os.getcwd())

D:\iMMAP\proj\ASDC\data\settlement


In [26]:
# Load database configuration from file
with open(r'D:\iMMAP\code\db_config\hsdc_local_db_config.json', 'r') as f:
    config = json.load(f)

# Create database URL with credentials
db_url = f"postgresql://{config['username']}:{config['password']}@{config['host']}:{config['port']}/{config['database']}"

# Connect to the database
con = create_engine(db_url)

In [3]:
# Load the point shapefile
points = gpd.read_file(r'D:\iMMAP\data\Afghanistan\HSDC-Official\afg_pplp.shp')

# Load the polygon shapefiles
polygons = gpd.read_file(r'D:\iMMAP\data\Afghanistan\HSDC-Official\afg_admbnda_adm2.shp')

In [4]:
# View Point original attributes
point_cols_original = points.columns.tolist()
point_cols_original

['OBJECTID',
 'SOURCE',
 'VIL_UID',
 'CNTR_CODE',
 'AFG_UID',
 'LANGUAGE_',
 'LANG_CODE',
 'ELEVATION',
 'LAT_Y',
 'LON_X',
 'Note',
 'Edited_by',
 'Name_EN',
 'Type_Settl',
 'DIST_CODE',
 'DIST_NA_EN',
 'PROV_NA_EN',
 'PROV_CODE_',
 'UNIT_TYPE',
 'DIST_NA_DA',
 'PROV_NA_DA',
 'REG_UNAMA_',
 'DIST_NA_PS',
 'REG_UNAM_1',
 'VUID_Area_',
 'VUIDNear',
 'VUID_Build',
 'VUID_Popul',
 'VUID_Pop_p',
 'VUID',
 'Name_Local',
 'Name_Loc_1',
 'Name_Alter',
 'class_AS',
 'geometry']

In [5]:
# List point attributes to drop
point_columns_to_drop = ['index_right',
    'VUID',
    'VUID_Pop_p',
    'VUID_Popul',
    'VUID_Build',
    'VUIDNear',
    'VUID_Area_',
    'REG_UNAM_1',
    'DIST_NA_PS',
    'REG_UNAMA_',
    'PROV_NA_DA',
    'DIST_NA_DA',
    'UNIT_TYPE',
    'PROV_CODE_',
    'PROV_NA_EN',
    'DIST_NA_EN',
    'DIST_CODE',
    'DIST_CODE',
    'Note',
    'AFG_UID']



In [6]:
# Filtere out attributes in Points
point_cols_keep = []

# Iterate over each element in list A
for element in point_cols_original:
    # Check if the element is not in list B
    if element not in point_columns_to_drop:
        # If the element is not in list B, add it to the new list
        point_cols_keep.append(element)
        
point_cols_keep

['OBJECTID',
 'SOURCE',
 'VIL_UID',
 'CNTR_CODE',
 'LANGUAGE_',
 'LANG_CODE',
 'ELEVATION',
 'LAT_Y',
 'LON_X',
 'Edited_by',
 'Name_EN',
 'Type_Settl',
 'Name_Local',
 'Name_Loc_1',
 'Name_Alter',
 'class_AS',
 'geometry']

In [7]:
points_filtered = points[point_cols_keep]

In [8]:
# View Polygon original attributes
label_list = polygons.columns.tolist()
label_list

['id',
 'shape_leng',
 'shape_area',
 'adm2_en',
 'adm2_da',
 'adm2_pcode',
 'adm2_ref',
 'adm2alt1en',
 'adm2alt2en',
 'adm2alt1da',
 'adm2alt2da',
 'adm1_en',
 'adm1_da',
 'adm1_pcode',
 'adm0_en',
 'adm0_da',
 'adm0_pcode',
 'date',
 'validon',
 'validto',
 'reg_en',
 'reg_da',
 'reg_pcode',
 'geometry']

In [9]:
# Filter polygon attributes

strings_to_remove = [
 'id',
 'shape_leng',
 'shape_area',
 'adm2_ref',
 'adm2alt1en',
 'adm2alt2en',
 'adm2alt1da',
 'adm2alt2da',
 'date',
 'validon',
 'validto']

new_label_list = [x for x in label_list if x not in strings_to_remove]
print(new_label_list)

['adm2_en', 'adm2_da', 'adm2_pcode', 'adm1_en', 'adm1_da', 'adm1_pcode', 'adm0_en', 'adm0_da', 'adm0_pcode', 'reg_en', 'reg_da', 'reg_pcode', 'geometry']


In [10]:
polygons_filtered = polygons[new_label_list]

#cities_with_country = cities.sjoin(countries, how="inner", predicate='intersects')
# Perform spatial join for between points and polygons
settlement_with_admin = points_filtered.sjoin(polygons_filtered, how='inner')

In [11]:
# New point attributes after joing
settlement_with_admin.columns.tolist()

['OBJECTID',
 'SOURCE',
 'VIL_UID',
 'CNTR_CODE',
 'LANGUAGE_',
 'LANG_CODE',
 'ELEVATION',
 'LAT_Y',
 'LON_X',
 'Edited_by',
 'Name_EN',
 'Type_Settl',
 'Name_Local',
 'Name_Loc_1',
 'Name_Alter',
 'class_AS',
 'geometry',
 'index_right',
 'adm2_en',
 'adm2_da',
 'adm2_pcode',
 'adm1_en',
 'adm1_da',
 'adm1_pcode',
 'adm0_en',
 'adm0_da',
 'adm0_pcode',
 'reg_en',
 'reg_da',
 'reg_pcode']

In [23]:
pd.set_option('display.max_columns', 500)
settlement_with_admin.iloc[:50]

Unnamed: 0,OBJECTID,SOURCE,VIL_UID,CNTR_CODE,LANGUAGE_,LANG_CODE,ELEVATION,LAT_Y,LON_X,Edited_by,Name_EN,Type_Settl,Name_Local,Name_Loc_1,Name_Alter,class_AS,geometry,index_right,adm2_en,adm2_da,adm2_pcode,adm1_en,adm1_da,adm1_pcode,adm0_en,adm0_da,adm0_pcode,reg_en,reg_da,reg_pcode
0,1.0,Yale POP_MASTER,SBK-047,0.0,Pashto,2.0,547.452393,32.1407,61.4445,,Rom Sufla Do Qala,Sub,روم,Low,Rom,Sub,POINT (61.44450 32.14070),192,Shibkoh,شیب کوه,AF3305,Farah,فراه,AF33,Afghanistan,افغانستان,AF,Western,لویدیځه حوزه,WR
1,2.0,Yale POP_MASTER,SBK-042,0.0,Dari,1.0,559.284424,31.9691,61.6937,,Lakari Kalay,,لکړی بند کلی,Moderate,Lakari Band Kelay,Other,POINT (61.69370 31.96910),192,Shibkoh,شیب کوه,AF3305,Farah,فراه,AF33,Afghanistan,افغانستان,AF,Western,لویدیځه حوزه,WR
2,3.0,Yale POP_MASTER,SBK-011,0.0,Dari,1.0,514.669189,32.0486,61.2555,,Chashma Darazak,,چشمۀ پا درازک,Low,Chashmah-ye Pa Darazak,Other,POINT (61.25550 32.04860),192,Shibkoh,شیب کوه,AF3305,Farah,فراه,AF33,Afghanistan,افغانستان,AF,Western,لویدیځه حوزه,WR
3,4.0,Yale POP_MASTER,SBK-037,0.0,Dari,1.0,519.916382,32.0553,61.2836,,Karaiz Paishak,,پژک,Low,Pazhak,Other,POINT (61.28360 32.05530),192,Shibkoh,شیب کوه,AF3305,Farah,فراه,AF33,Afghanistan,افغانستان,AF,Western,لویدیځه حوزه,WR
4,5.0,USAID AIDVillageView,SBK-005,0.0,Dari,1.0,517.7948,32.0825,61.2111,,Khash,,خاش,Very High,Khash,Other,POINT (61.21110 32.08250),192,Shibkoh,شیب کوه,AF3305,Farah,فراه,AF33,Afghanistan,افغانستان,AF,Western,لویدیځه حوزه,WR
5,6.0,Yale POP_MASTER,SBK-040,0.0,Dari,1.0,527.208801,32.0831,61.2945,,Kashta Goh,,کوه کشا,Low,Koh-e Kasha,Other,POINT (61.29450 32.08310),192,Shibkoh,شیب کوه,AF3305,Farah,فراه,AF33,Afghanistan,افغانستان,AF,Western,لویدیځه حوزه,WR
6,7.0,USAID AIDVillageView,SBK-004,0.0,Dari,1.0,539.976379,32.0923,61.4156,,Kareze Kerta,,کارېزک,Low,Karezak,Other,POINT (61.41560 32.09230),192,Shibkoh,شیب کوه,AF3305,Farah,فراه,AF33,Afghanistan,افغانستان,AF,Western,لویدیځه حوزه,WR
7,8.0,Yale POP_MASTER,SBK-029,0.0,Pashto,2.0,531.174011,32.1082,61.3467,,Karaiz Chaplate,,چپلاتی,Very High,Chaplati,Other,POINT (61.34670 32.10820),192,Shibkoh,شیب کوه,AF3305,Farah,فراه,AF33,Afghanistan,افغانستان,AF,Western,لویدیځه حوزه,WR
8,9.0,Yale POP_MASTER,SBK-022,0.0,Dari,1.0,540.074829,32.1093,61.4127,,Hussain Abad,,آزاد,Low,Azad,Other,POINT (61.41270 32.10930),192,Shibkoh,شیب کوه,AF3305,Farah,فراه,AF33,Afghanistan,افغانستان,AF,Western,لویدیځه حوزه,WR
9,10.0,USAID AIDVillageView,SBK-002,0.0,Dari,1.0,537.434021,32.1154,61.3832,,Ghulamabad,,غلام آباد,Very High,Ghulamabad,Other,POINT (61.38320 32.11540),192,Shibkoh,شیب کوه,AF3305,Farah,فراه,AF33,Afghanistan,افغانستان,AF,Western,لویدیځه حوزه,WR


In [30]:
settlement_with_admin['class_AS'].unique()

array(['Sub', 'Other', 'District Capital', 'Province Capital', 'Capital'],
      dtype=object)

In [19]:
# Save the result to a new shapefile
settlement_with_admin.to_file('points_with_attributes2.shp')

  settlement_with_admin.to_file('points_with_attributes2.shp')


In [27]:
settlement_with_admin.to_postgis('afg_pplp', con, if_exists='replace')

In [20]:
new_sett = gpd.read_file("D:\iMMAP\proj\ASDC\data\settlement\points_with_attributes2.shp")
original_sett = points_filtered

In [21]:
len(new_sett.columns.tolist())

30

In [22]:
len(original_sett.columns.tolist())

17