In [66]:
import sys, os, time
import pycountry

import pandas as pd
import geopandas as gpd

from tqdm.notebook import tqdm
from shapely.geometry import Point

sys.path.append(r"C:\WBG\Work\Code\GOSTrocks\src")
import GOSTrocks.dataMisc as dMisc
from GOSTrocks.misc import tPrint

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
FUA_file = r"C:\WBG\Work\data\URBAN\GHS_FUA_UCDB2015_GLOBE_R2019A_54009_1K_V1_0.gpkg"
admin_bounds = r"C:\WBG\Work\AdminBounds\Admin0_Polys.shp"

out_folder = r"C:\WBG\Work\MENA_Urban\RESULTS\ACLED"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)
inFUA = gpd.read_file(FUA_file)
inAdmin = gpd.read_file(admin_bounds)

inFUA = inFUA.to_crs(4326)
inAdmin = inAdmin.to_crs(4326)

alced_creds = dMisc.get_acled_creds()
acled_email = alced_creds['email']
acled_key = alced_creds['api_key']

In [59]:
# Join the FUA to the admin boundaries to find multi-national cities
inFUA_joined = gpd.sjoin(inFUA, inAdmin, how='inner')

inFUA_joined['MultiISO'] = inFUA_joined.duplicated(subset='eFUA_ID', keep=False)
# Create two datasets - simple FUAs are contained within one country, complicated FUAs are multi-national
simple_FUA = inFUA_joined.loc[inFUA_joined['MultiISO'] == False]
complicated_FUA = inFUA_joined.loc[inFUA_joined['MultiISO'] == True]
complicated_FUA.drop_duplicates(subset='eFUA_ID', keep='first', inplace=True)

In [64]:
simple_FUA.head()

Unnamed: 0,eFUA_ID,UC_num,UC_IDs,eFUA_name,Commuting,Cntry_ISO,Cntry_name,FUA_area,UC_area,FUA_p_2015,...,WB_ADM0_NA,Shape_Leng,Shape_Area,ISO3,UN_m49,Region,incomeG,lendingC,FID_100,MultiISO
0,1.0,1.0,5959,Herat,1.0,AFG,Afghanistan,139.0,60.0,888811.175807,...,Afghanistan,7132529.0,641837.888397,AFG,4,South Asia,Low income,IDA,0,False
1,192.0,1.0,5964,Guzarah,1.0,AFG,Afghanistan,32.0,16.0,169489.573231,...,Afghanistan,7132529.0,641837.888397,AFG,4,South Asia,Low income,IDA,0,False
2,354.0,1.0,5968,Shindand,0.0,AFG,Afghanistan,12.0,12.0,147553.40387,...,Afghanistan,7132529.0,641837.888397,AFG,4,South Asia,Low income,IDA,0,False
3,505.0,1.0,5970,Qala i Naw,0.0,AFG,Afghanistan,3.0,3.0,79809.722656,...,Afghanistan,7132529.0,641837.888397,AFG,4,South Asia,Low income,IDA,0,False
4,648.0,1.0,5973,Farah,1.0,AFG,Afghanistan,32.0,16.0,131508.79706,...,Afghanistan,7132529.0,641837.888397,AFG,4,South Asia,Low income,IDA,0,False


In [101]:
# For simple FUAs, we will query the ACLED database with the ISO codes
for iso3, curData in simple_FUA.groupby('Cntry_ISO'):
    out_file = os.path.join(out_folder, '%s_ACLED.csv' % iso3)
    if not os.path.exists(out_file):
        try:
            country_coding = pycountry.countries.get(alpha_3=iso3).numeric
        except:
            country_coding = 0 #For Kosovo and other potentially unrecognized states
        acled_data = dMisc.acled_search(acled_key, acled_email, iso3 = country_coding,
                                        fields = ['longitude','latitude','event_type','geo_precision'])
        if acled_data.shape[0] > 0:
            acled_geom = [Point(x, y) for x, y in zip(acled_data['longitude'], acled_data['latitude'])]
            acled_geo = gpd.GeoDataFrame(acled_data, geometry = acled_geom, crs = 4326)
            country_res = []
            for idx, row in tqdm(curData.iterrows(), total=curData.shape[0], desc='Processing ACLED data for %s' % iso3):
                row_df = gpd.GeoDataFrame(row.to_frame().T, geometry='geometry', crs=4326)
                row_df.set_index('eFUA_ID', inplace=True)
                sel_acled = gpd.sjoin(acled_geo, row_df, how='inner')
                if sel_acled.shape[0] > 0:                        
                    # Summarize conflict type and geoprecision
                    acled_summary = sel_acled.groupby(['event_type', 'geo_precision']).size().reset_index(name='counts')
                    acled_summary['Label'] = acled_summary['event_type'] + '_' + acled_summary['geo_precision'].astype(str)
                    acled_summary = acled_summary.loc[:,['Label', 'counts']].T
                    acled_summary.columns = acled_summary.loc['Label']
                    acled_summary = acled_summary.drop('Label')
                    acled_summary['eFUA_ID'] = row['eFUA_ID']
                    country_res.append(acled_summary)
            if len(country_res) > 0:
                country_total = pd.concat(country_res)
                country_total.to_csv(out_file)




Processing ACLED data for XKO:   0%|          | 0/7 [00:00<?, ?it/s]



Processing ACLED data for XNC:   0%|          | 0/1 [00:00<?, ?it/s]

ValueError: No objects to concatenate

In [99]:
pycountry.countries.get(name=curData['Cntry_name'].iloc[0])

In [100]:
curData['Cntry_name'].iloc[0]

'Kosovo'

Processing ACLED data for AFG:   0%|          | 0/28 [00:00<?, ?it/s]

In [92]:
pd.concat(country_res)            

Label,Battles_1,Battles_2,Battles_3,Explosions/Remote violence_1,Explosions/Remote violence_2,Explosions/Remote violence_3,Protests_1,Protests_2,Protests_3,Riots_1,Riots_3,Strategic developments_1,Strategic developments_2,Strategic developments_3,Violence against civilians_1,Violence against civilians_2,Violence against civilians_3,eFUA_ID,Riots_2
counts,158.0,54.0,101.0,49.0,15.0,17.0,47.0,1.0,28.0,5.0,1.0,21.0,2.0,29.0,96.0,20.0,26.0,1.0,
counts,1.0,72.0,1.0,,5.0,,,,,,,,5.0,,,10.0,,192.0,
counts,12.0,336.0,,3.0,120.0,,,1.0,,,,,9.0,,1.0,21.0,,354.0,
counts,60.0,39.0,94.0,27.0,11.0,51.0,5.0,1.0,6.0,1.0,1.0,5.0,1.0,13.0,12.0,4.0,2.0,505.0,
counts,258.0,97.0,113.0,95.0,33.0,51.0,6.0,,6.0,1.0,,12.0,1.0,12.0,35.0,11.0,13.0,648.0,
counts,24.0,12.0,46.0,5.0,1.0,23.0,9.0,,9.0,1.0,,4.0,2.0,9.0,6.0,7.0,13.0,784.0,
counts,43.0,192.0,167.0,18.0,61.0,44.0,23.0,1.0,9.0,2.0,,12.0,4.0,13.0,26.0,12.0,13.0,916.0,
counts,56.0,28.0,70.0,30.0,8.0,9.0,4.0,1.0,7.0,1.0,,2.0,3.0,8.0,18.0,4.0,11.0,1040.0,
counts,67.0,24.0,47.0,38.0,7.0,12.0,5.0,,2.0,,,9.0,,11.0,12.0,3.0,7.0,1154.0,
counts,404.0,90.0,129.0,221.0,39.0,57.0,25.0,1.0,21.0,2.0,,18.0,7.0,24.0,54.0,8.0,7.0,1263.0,


In [42]:
all_res = {}
for idx, row in tqdm(inFUA.iterrows(), total = inFUA.shape[0]):
    acled_data = dMisc.acled_search(acled_key, acled_email, row.geometry.bounds,
                                    fields = ['longitude','latitude','event_type','geo_precision'])
    if acled_data.shape[0] > 0:
        acled_geom = [Point(x, y) for x, y in zip(acled_data['longitude'], acled_data['latitude'])]
        acled_geo = gpd.GeoDataFrame(acled_data, geometry = acled_geom, crs = 4326)
        acled_geo = acled_geo.loc[acled_geo.within(row.geometry)]
        if acled_geo.shape[0] > 0:                
            # Tabulate results
            acled_summary = acled_geo.groupby(['event_type', 'geo_precision']).size().reset_index(name='counts')
            acled_summary['Label'] = acled_summary['event_type'] + '_' + acled_summary['geo_precision'].astype(str)
            acled_summary = acled_summary.loc[:,['Label', 'counts']].T
            acled_summary.columns = acled_summary.loc['Label']
            acled_summary = acled_summary.drop('Label')
            acled_summary['eFUA_ID'] = row['eFUA_ID']
            all_res[idx] = acled_summary    
    


  0%|          | 0/9031 [00:00<?, ?it/s]



KeyboardInterrupt: 

In [54]:
all_dfs = [item for key, item in all_res.items()]
pd.concat(all_dfs)

Label,Battles_1,Battles_2,Battles_3,Explosions/Remote violence_1,Explosions/Remote violence_2,Explosions/Remote violence_3,Protests_1,Protests_2,Protests_3,Riots_1,Riots_3,Strategic developments_1,Strategic developments_2,Strategic developments_3,Violence against civilians_1,Violence against civilians_2,Violence against civilians_3,eFUA_ID,Riots_2
counts,158,54,101,49,15,17,47,1,28,5,1,21,2,29,96,20,26,1.0,
counts,1,72,1,,5,,,,,,,,5,,,10,,192.0,
counts,12,336,,3,120,,,1,,,,,9,,1,21,,354.0,
counts,60,39,94,27,11,51,5,1,6,1,1,5,1,13,12,4,2,505.0,
counts,258,97,113,95,33,51,6,,6,1,,12,1,12,35,11,13,648.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
counts,,,,,,,,,,3,,,,,1,,,1910.0,
counts,1,,,,,,,,,1,,,,,4,1,,1989.0,
counts,3,,,1,,,1,,,,,1,,,,,1,2064.0,
counts,8,,,,,,,,,,,2,,,1,,,2213.0,


In [49]:
pd.DataFrame(all_res

Label,Battles_1,Battles_2,Battles_3,Explosions/Remote violence_2,Strategic developments_2,Violence against civilians_2,eFUA_ID
counts,1,72,1,5,5,10,192.0
