# Step 2 - Layer Generation

In this script, we match the output we generated in Step 1 onto a world admin boundary shapefile.

In [1]:
import geopandas as gpd
import pandas as pd
import os, sys
import time
from shapely.wkt import loads
import numpy as np

Define settings, per usual. 

In [17]:
data_path = r'C:\Users\charl\Documents\GOST\SafeGraph'
shapefile = r'Artemis_countries.shp'
shp = 'Artemis_Countries'
workspace = r'C:\Users\charl\Documents\GOST\SafeGraph'
data_file = r'homeRange_analysis_output2.csv'
outs = r'C:\Users\charl\Documents\GOST\SafeGraph'

Import the administrative boundary shapefile as a GeoDataFrame

In [19]:
shape = gpd.read_file(os.path.join(data_path, shp, shapefile))

Import the datalayer we want to match on to the resultant file, cut out all the b.s. and only retain records with detected migrations

In [19]:
res = pd.read_csv(os.path.join(outs, data_file))
res = res.loc[res.status == 'migration detected']

We create start_loc and end_loc as geometry objects for the start and end points in the migration. Then we generate corresponding GeoDataFrames for each.

In [None]:
res = res[['start_loc','status','end_loc']]
res.start_loc = res.start_loc.apply(loads)
res.end_loc = res.end_loc.apply(loads)
res_start = gpd.GeoDataFrame(res,crs = {'init' :'epsg:4326'}, geometry = 'start_loc' )
res_end = gpd.GeoDataFrame(res,crs = {'init' :'epsg:4326'}, geometry = 'end_loc' )

Cut the admin boundary shapefile to just the columns we care about:

In [20]:
shape_short = shape[['adm0_name','adm1_name','adm2_name','geometry']]

Now, we spatially intersect the migration entries with the shapefile to 'load' in the info onto the polygons they intersect. See in line comments for details

In [21]:
# Create spatial indexes of the migration start and end pointsw
spatial_index_out = res_start.sindex
spatial_index_in = res_end.sindex

# create a summary bag
summary = []

# here, we iterate through each polygon in the shapefile - and try to identify migrations that start or end in that territory
for i in range(0, len(shape_short)):
    
    # this is the shapely object associated with this row in the shapefile
    polygon = shape_short.geometry.loc[i]
    
    # using a spatial index to speed the process, identify migrations STARTING in this territory
    possible_matches_out_index = list(spatial_index_out.intersection(polygon.bounds))
    possible_matches_out = res_start.iloc[possible_matches_out_index]
    precise_matches_out = possible_matches_out[possible_matches_out.intersects(polygon)]
    
    # using a spatial index to speed the process, identify migrations ENDING in this territory
    possible_matches_in_index = list(spatial_index_in.intersection(polygon.bounds))
    possible_matches_in = res_end.iloc[possible_matches_in_index]
    precise_matches_in = possible_matches_in[possible_matches_in.intersects(polygon)]
    
    # if no matches, set the migration values to 0
    if (len(precise_matches_out) == 0) and (len(precise_matches_in) == 0):
         info = {'i':i,
         'geometry': shape_short.geometry.loc[i],
         'adm0_name': shape_short.adm0_name.loc[i],
         'adm1_name': shape_short.adm1_name.loc[i],
         'adm2_name': shape_short.adm2_name.loc[i],
         'TotalMigration':0,
         'OutMigration':0,
         'InMigration':0}
            
    # otherwise, set counter variables to the length of the matches lists
    else:
        # 'counter' for migrations out of this area
        migr_out = len(precise_matches_out)
        
        # counter for migrations moving into this area
        migr_in = len(precise_matches_in)
        info = {'i':i,
         'geometry': shape_short.geometry.loc[i],
         'adm0_name': shape_short.adm0_name.loc[i],
         'adm1_name': shape_short.adm1_name.loc[i],
         'adm2_name': shape_short.adm2_name.loc[i],
         'TotalMigration':migr_out + migr_in,
         'OutMigration':migr_out,
         'InMigration':migr_in}
        
    # either way, append the results to the summary list
    summary.append(info)

# rebuild the original admin boundary shapefile
df = pd.DataFrame(summary)

Convert this guy back into a GeoDataFrame and send to file

In [22]:
res_gdf = gpd.GeoDataFrame(df, geometry = 'geometry', crs = {'init':'epsg:4326'})
res_gdf.to_file(os.path.join(workspace, 'output.shp'))