In [None]:
import sys, os, time
import pycountry

import pandas as pd
import geopandas as gpd

from tqdm.notebook import tqdm
from shapely.geometry import Point

sys.path.append(r"C:\WBG\Work\Code\GOSTrocks\src")
import GOSTrocks.dataMisc as dMisc
from GOSTrocks.misc import tPrint

%load_ext autoreload
%autoreload 2

In [None]:
FUA_file = r"C:\WBG\Work\data\URBAN\GHS_FUA_UCDB2015_GLOBE_R2019A_54009_1K_V1_0.gpkg"
admin_bounds = r"C:\WBG\Work\AdminBounds\Admin0_Polys.shp"

out_folder = r"C:\WBG\Work\MENA_Urban\RESULTS\ACLED"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)
inFUA = gpd.read_file(FUA_file)
inAdmin = gpd.read_file(admin_bounds)

inFUA = inFUA.to_crs(4326)
inAdmin = inAdmin.to_crs(4326)

alced_creds = dMisc.get_acled_creds()
acled_email = alced_creds['email']
acled_key = alced_creds['api_key']

In [None]:
# Join the FUA to the admin boundaries to find multi-national cities
inFUA_joined = gpd.sjoin(inFUA, inAdmin, how='inner')

inFUA_joined['MultiISO'] = inFUA_joined.duplicated(subset='eFUA_ID', keep=False)
# Create two datasets - simple FUAs are contained within one country, complicated FUAs are multi-national
simple_FUA = inFUA_joined.loc[inFUA_joined['MultiISO'] == False]
complicated_FUA = inFUA_joined.loc[inFUA_joined['MultiISO'] == True]
complicated_FUA.drop_duplicates(subset='eFUA_ID', keep='first', inplace=True)

In [None]:
simple_FUA.head()

In [None]:
# For simple FUAs, we will query the ACLED database with the ISO codes
for iso3, curData in simple_FUA.groupby('Cntry_ISO'):
    out_file = os.path.join(out_folder, '%s_ACLED.csv' % iso3)
    if not os.path.exists(out_file):
        try:
            country_coding = pycountry.countries.get(alpha_3=iso3).numeric
        except:
            country_coding = 0 #For Kosovo and other potentially unrecognized states
        acled_data = dMisc.acled_search(acled_key, acled_email, iso3 = country_coding,
                                        fields = ['longitude','latitude','event_type','geo_precision'])
        if acled_data.shape[0] > 0:
            acled_geom = [Point(x, y) for x, y in zip(acled_data['longitude'], acled_data['latitude'])]
            acled_geo = gpd.GeoDataFrame(acled_data, geometry = acled_geom, crs = 4326)
            country_res = []
            for idx, row in tqdm(curData.iterrows(), total=curData.shape[0], desc='Processing ACLED data for %s' % iso3):
                row_df = gpd.GeoDataFrame(row.to_frame().T, geometry='geometry', crs=4326)
                row_df.set_index('eFUA_ID', inplace=True)
                sel_acled = gpd.sjoin(acled_geo, row_df, how='inner')
                if sel_acled.shape[0] > 0:                        
                    # Summarize conflict type and geoprecision
                    acled_summary = sel_acled.groupby(['event_type', 'geo_precision']).size().reset_index(name='counts')
                    acled_summary['Label'] = acled_summary['event_type'] + '_' + acled_summary['geo_precision'].astype(str)
                    acled_summary = acled_summary.loc[:,['Label', 'counts']].T
                    acled_summary.columns = acled_summary.loc['Label']
                    acled_summary = acled_summary.drop('Label')
                    acled_summary['eFUA_ID'] = row['eFUA_ID']
                    country_res.append(acled_summary)
            if len(country_res) > 0:
                country_total = pd.concat(country_res)
                country_total.to_csv(out_file)


In [None]:
pycountry.countries.get(name=curData['Cntry_name'].iloc[0])

In [None]:
curData['Cntry_name'].iloc[0]

In [None]:
pd.concat(country_res)            

In [None]:
all_res = {}
for idx, row in tqdm(inFUA.iterrows(), total = inFUA.shape[0]):
    acled_data = dMisc.acled_search(acled_key, acled_email, row.geometry.bounds,
                                    fields = ['longitude','latitude','event_type','geo_precision'])
    if acled_data.shape[0] > 0:
        acled_geom = [Point(x, y) for x, y in zip(acled_data['longitude'], acled_data['latitude'])]
        acled_geo = gpd.GeoDataFrame(acled_data, geometry = acled_geom, crs = 4326)
        acled_geo = acled_geo.loc[acled_geo.within(row.geometry)]
        if acled_geo.shape[0] > 0:                
            # Tabulate results
            acled_summary = acled_geo.groupby(['event_type', 'geo_precision']).size().reset_index(name='counts')
            acled_summary['Label'] = acled_summary['event_type'] + '_' + acled_summary['geo_precision'].astype(str)
            acled_summary = acled_summary.loc[:,['Label', 'counts']].T
            acled_summary.columns = acled_summary.loc['Label']
            acled_summary = acled_summary.drop('Label')
            acled_summary['eFUA_ID'] = row['eFUA_ID']
            all_res[idx] = acled_summary    
    


In [None]:
all_dfs = [item for key, item in all_res.items()]
pd.concat(all_dfs)

In [None]:
pd.DataFrame(all_res