In [1]:
S3_INPUT_PATH = "s3://wri-projects/Aqueduct30/test/testGpd/"
EC2_INPUT_PATH = "/volumes/data/temp/"
EC2_OUTPUT_PATH = "/volumes/data/temp/output/"
S3_OUTPUT_PATH = "s3://wri-projects/Aqueduct30/test/output/"

In [2]:
!mkdir -p {EC2_INPUT_PATH}
!mkdir -p {EC2_OUTPUT_PATH}

In [3]:
!aws s3 cp {S3_INPUT_PATH} {EC2_INPUT_PATH} --recursive --quiet

In [4]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
%matplotlib notebook
import os
import folium
from shapely.wkt import loads
from shapely.geometry import Point

In [5]:
gdfFAO = gpd.read_file('/volumes/data/temp/FAO/faoBuffered.shp')
gdfHybas = gpd.read_file('/volumes/data/temp/Hybas/hybas_lev06_v1c_merged_fiona_Cropped_V01.shp')

In [6]:
gdfHybas = gdfHybas.set_index('PFAF_ID')

In [7]:
gdfHybas.head()

Unnamed: 0_level_0,COAST,DIST_MAIN,DIST_SINK,ENDO,HYBAS_ID,MAIN_BAS,NEXT_DOWN,NEXT_SINK,ORDER,SORT,SUB_AREA,UP_AREA,geometry
PFAF_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
232260,0,227.1,227.1,0,2060499090,2060021030,2060502710,2060021030,2,526,13664.2,13664.2,"POLYGON ((1.133333333333358 47.35833333333336,..."
232270,0,227.3,227.3,0,2060498990,2060021030,2060502710,2060021030,1,527,10041.2,42572.4,"POLYGON ((2.87916666666669 46.73750000000003, ..."
232405,0,275.6,275.6,0,2060455290,2060022150,2060446440,2060022150,1,542,1088.5,44526.2,"POLYGON ((2.079166666666683 48.98750000000003,..."
232404,0,275.7,275.7,0,2060455180,2060022150,2060446440,2060022150,2,543,16783.0,16783.0,"POLYGON ((3.770833333333363 49.19166666666669,..."
232407,0,353.6,353.6,0,2060459800,2060022150,2060455290,2060022150,1,544,7203.8,12783.1,"POLYGON ((2.90416666666669 49.13750000000003, ..."


In [8]:
gsHybasBuffer = gdfHybas['geometry'].buffer(-0.005,resolution=16)

in order to use merge, I needed to convert the geoSeries to a geoDataFrame

In [9]:
gdfHybasBuffer =gpd.GeoDataFrame(geometry=gsHybasBuffer)

The old geometry will be replaced by the new geometry (buffered)

In [10]:
gdfHybas = gdfHybas.drop('geometry',1)

In [11]:
gdfHybasBuffer.head()

Unnamed: 0_level_0,geometry
PFAF_ID,Unnamed: 1_level_1
232260,POLYGON ((0.4802056493726044 47.33676526482385...
232270,POLYGON ((0.3946555531847661 47.49782138220293...
232405,"POLYGON ((1.985697993274555 48.88870001490769,..."
232404,"POLYGON ((1.642712429100236 49.6413141345426, ..."
232407,"POLYGON ((2.414370437159644 48.81750000000003,..."


In [12]:
gdfHybas.head()

Unnamed: 0_level_0,COAST,DIST_MAIN,DIST_SINK,ENDO,HYBAS_ID,MAIN_BAS,NEXT_DOWN,NEXT_SINK,ORDER,SORT,SUB_AREA,UP_AREA
PFAF_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
232260,0,227.1,227.1,0,2060499090,2060021030,2060502710,2060021030,2,526,13664.2,13664.2
232270,0,227.3,227.3,0,2060498990,2060021030,2060502710,2060021030,1,527,10041.2,42572.4
232405,0,275.6,275.6,0,2060455290,2060022150,2060446440,2060022150,1,542,1088.5,44526.2
232404,0,275.7,275.7,0,2060455180,2060022150,2060446440,2060022150,2,543,16783.0,16783.0
232407,0,353.6,353.6,0,2060459800,2060022150,2060455290,2060022150,1,544,7203.8,12783.1


Contrary to pandas, geopandas does not automatically merge based on index. Therefore I copy the indices to new columns. Hopefully merging on index by default will be supported in the future. 

In [13]:
gdfHybas['PFAF_ID2'] = gdfHybas.index
gdfHybasBuffer['PFAF_ID2'] = gdfHybasBuffer.index

In [14]:
gdfHybasNew = gdfHybasBuffer.merge(gdfHybas,how="outer",on="PFAF_ID2")

In [15]:
gdfHybasNew.head()

Unnamed: 0,geometry,PFAF_ID2,COAST,DIST_MAIN,DIST_SINK,ENDO,HYBAS_ID,MAIN_BAS,NEXT_DOWN,NEXT_SINK,ORDER,SORT,SUB_AREA,UP_AREA
0,POLYGON ((0.4802056493726044 47.33676526482385...,232260,0,227.1,227.1,0,2060499090,2060021030,2060502710,2060021030,2,526,13664.2,13664.2
1,POLYGON ((0.3946555531847661 47.49782138220293...,232270,0,227.3,227.3,0,2060498990,2060021030,2060502710,2060021030,1,527,10041.2,42572.4
2,"POLYGON ((1.985697993274555 48.88870001490769,...",232405,0,275.6,275.6,0,2060455290,2060022150,2060446440,2060022150,1,542,1088.5,44526.2
3,"POLYGON ((1.642712429100236 49.6413141345426, ...",232404,0,275.7,275.7,0,2060455180,2060022150,2060446440,2060022150,2,543,16783.0,16783.0
4,"POLYGON ((2.414370437159644 48.81750000000003,...",232407,0,353.6,353.6,0,2060459800,2060022150,2060455290,2060022150,1,544,7203.8,12783.1


Note that geopandas did not preserve the Index. Hopefully that will get fixed in the future as well. 

In [16]:
gdfHybasBuffer.to_file(os.path.join(EC2_OUTPUT_PATH,'output.shp'))

In [17]:
!aws s3 cp {EC2_OUTPUT_PATH} {S3_OUTPUT_PATH} --recursive --quiet