In [41]:
import sys, os, importlib

import pandas as pd
import geopandas as gpd
import numpy as np

from shapely.geometry import Point, LineString


sys.path.append('../')
sys.path.append('../../gostrocks/src/')

from infrasap import process_flows
from GOSTRocks.misc import tPrint
from infrasap import geocode

# Geocode CSV file

In [None]:
# Geocode new ports 
new_ports_file = "/home/wb411133/data/Projects/INFRA/PORTS/new_major_ports.csv"
inP = pd.read_csv(new_ports_file)
inP.head()

In [None]:
locations = inP.apply(lambda x: geocode.getLocation("%s, %s" % (x['Port'], x['Country'])), axis=1)

In [None]:
inP['Lat'] = [x['location']['lat'] for x in locations]
inP['Lon'] = [x['location']['lng'] for x in locations]
inP.to_csv(new_ports_file.replace(".csv", "_geocoded.csv"))

# MAP National Port Flows

In [52]:
port_data = "/home/public/Data/GLOBAL/INFRA/PORTS/Port_flow_data_Q42020.csv"
out_folder = "/home/wb411133/temp"
inD = pd.read_csv(port_data)

In [53]:
global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
inB = gpd.read_file(global_bounds)
inB['geometry'] = inB['geometry'].apply(lambda x: x.centroid)
inB.head()

Unnamed: 0,OBJECTID,ISO_A2,WB_ADM0_CO,WB_ADM0_NA,Shape_Leng,Shape_Area,ISO3,UN_m49,Region,incomeG,lendingC,FID_100,geometry
0,1,AF,1,Afghanistan,7132529.0,641837.9,AFG,4,South Asia,Low income,IDA,0,POINT (7354476.159919892 4016689.864814219)
1,2,AL,3,Albania,1743971.0,28681.77,ALB,8,Europe & Central Asia,Upper middle income,IBRD,100,POINT (2233635.449581742 5034755.452888602)
2,3,DZ,4,Algeria,8933841.0,2309321.0,DZA,12,Middle East & North Africa,Upper middle income,IBRD,200,POINT (297712.9092418809 3302083.668946038)
3,4,AS,5,American Samoa (U.S.),174457.3,211.0162,ASM,16,East Asia & Pacific,Upper middle income,,300,POINT (-18967447.86540093 -1597359.862465858)
4,5,SD,6,Sudan,8852111.0,1844887.0,SDN,736,Sub-Saharan Africa,Lower middle income,IDA,400,POINT (3337126.248401281 1817745.065660827)


In [59]:
inD.dtypes

Date                                 object
Port1                                object
Port2                                object
No of services                        int64
Average annual frequency              int64
Average number of ships               int64
Maximum number of ships               int64
Minimum number of ships               int64
No of operators                       int64
No of Alliances                       int64
Average ship size (TEU)              object
Min ship size (TEU)                  object
Max ship size (TEU)                  object
StDevP ship size (TEU)               object
Variance ship size (TEU)             object
Average ship age (months)             int64
Min ship age (months)                 int64
Max ship age (months)                 int64
StDevP ship age (months)              int64
Variance ship age (months)           object
Annual deployed capacity (TEU)       object
Quarterly deployed capacity (TEU)    object
ISO2_FROM                       

In [61]:
agg = {'No of services':'sum','Average ship size (TEU)':'mean','Quarterly deployed capacity (TEU)':'sum'}

def tryFloat(x):
    try:
        return(float(x))
    except:
        try:
            return(float(x.replace(",","")))
        except:
            pass
        

for key in agg.keys():
    inD[key] = inD[key].apply(tryFloat)

In [62]:
# Get a list of country codes to extract
inD['ISO2_FROM'] = inD['Port1'].apply(lambda x: x[:2])
inD['ISO2_TO'] = inD['Port2'].apply(lambda x: x[:2])
nRes = inD.groupby(['ISO2_FROM', 'ISO2_TO']).aggregate(agg).reset_index()
internal_trade = nRes.loc[nRes['ISO2_FROM'] == nRes['ISO2_TO']]

In [63]:
internal_trade = pd.merge(internal_trade, inB.loc[:,['ISO_A2','geometry']], left_on="ISO2_FROM", right_on="ISO_A2")
internal_trade = gpd.GeoDataFrame(internal_trade, geometry='geometry', crs=inB.crs)
internal_trade.to_file(os.path.join(out_folder, "PORT_NATIONAL_INTERNAL_FLOWS.geojson"), driver="GeoJSON")

In [64]:
flows = nRes.loc[nRes['ISO2_FROM'] != nRes['ISO2_TO']]
flows = pd.merge(flows, inB.loc[:,['ISO_A2','geometry']], left_on="ISO2_FROM", right_on="ISO_A2")
flows = pd.merge(flows, inB.loc[:,['ISO_A2','geometry']], left_on="ISO2_TO", right_on="ISO_A2")
flows['geometry'] = flows.apply(lambda x: LineString([x['geometry_x'], x['geometry_y']]), axis=1)
flows.drop(['ISO_A2_x','geometry_x','ISO_A2_y','geometry_y'], axis=1, inplace=True)
flows = gpd.GeoDataFrame(flows, geometry="geometry", crs=inB.crs)
flows.to_file(os.path.join(out_folder, "PORT_NATIONAL_EXTERNAL_FLOWS.geojson"), driver="GeoJSON")

# Map PORT FLOW ports

In [3]:
output_folder = '/home/wb411133/data/Global/INFRA/PORTS'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
raw_folder = os.path.join(output_folder, "RAW_LOCODES")
if not os.path.exists(raw_folder):
    os.makedirs(raw_folder)

In [6]:
# Get a list of country codes to extract
inD['ISO2'] = inD['Port1'].apply(lambda x: x[:2])
countries = inD['ISO2'].unique()
countries

array(['AE', 'AG', 'AI', 'AL', 'AN', 'AO', 'AR', 'AS', 'AU', 'AW', 'BB',
       'BD', 'BE', 'BG', 'BH', 'BJ', 'BM', 'BN', 'BQ', 'BR', 'BS', 'BZ',
       'CA', 'CD', 'CG', 'CI', 'CK', 'CL', 'CM', 'CN', 'CO', 'CR', 'CU',
       'CV', 'CW', 'CX', 'CY', 'DE', 'DJ', 'DK', 'DM', 'DO', 'DZ', 'EC',
       'EE', 'EG', 'ER', 'ES', 'FI', 'FJ', 'FM', 'FO', 'FR', 'GA', 'GB',
       'GD', 'GE', 'GF', 'GH', 'GI', 'GL', 'GM', 'GN', 'GP', 'GQ', 'GR',
       'GT', 'GU', 'GW', 'GY', 'HK', 'HN', 'HR', 'HT', 'ID', 'IE', 'IL',
       'IN', 'IQ', 'IR', 'IS', 'IT', 'JM', 'JO', 'JP', 'KE', 'KH', 'KI',
       'KM', 'KN', 'KR', 'KW', 'KY', 'LB', 'LC', 'LK', 'LR', 'LT', 'LV',
       'LY', 'MA', 'MD', 'ME', 'MG', 'MH', 'MM', 'MP', 'MQ', 'MR', 'MS',
       'MT', 'MU', 'MV', 'MX', 'MY', 'MZ', 'NA', 'NC', 'NF', 'NG', 'NI',
       'NL', 'NO', 'NR', 'NZ', 'OM', 'PA', 'PE', 'PF', 'PG', 'PH', 'PK',
       'PL', 'PR', 'PT', 'PW', 'PY', 'QA', 'RE', 'RO', 'RU', 'SA', 'SB',
       'SC', 'SD', 'SE', 'SG', 'SI', 'SL', 'SN', 'S

In [7]:
def convert_coords(x):
    coords_split = x.split(" ")
    lat = coords_split[0]
    lon = coords_split[1]
    def get_number(y):        
        num = float(y[:-3]) + ((float(y[-3:-1])/60))
        if (y[-1] == 'W') or (y[-1] == "S"):
            num = num * -1
        return(num)
    return(Point([get_number(lon), get_number(lat)]))

In [8]:
ports = inD['Port1'].unique()

In [9]:
def process_ports(country, country_data):
    if not os.path.exists(country_data):
        # Read data form UN website, save for future processing
        tempD = pd.read_html('https://service.unece.org/trade/locode/%s.htm' % country.lower())
        curD = tempD[2]
        curD.columns = curD.iloc[0]
        curD = curD.drop(0)
        curD.to_csv(country_data)
    else:
        curD = pd.read_csv(country_data, index_col=0)
    curD['LOCODE'] = curD['LOCODE'].apply(lambda x: x.replace(" ",""))
    
    #Drop records with no coordinates
    no_coords_idx = curD['Coordinates'].apply(lambda x: x is np.nan)
    badD = curD.loc[no_coords_idx]
    goodD = curD.loc[~no_coords_idx]
    #if the badD are in the list of port flows, save those
    important_ports = badD.loc[badD['LOCODE'].isin(ports)]

    curD_geom = goodD['Coordinates'].apply(convert_coords)
    curgpd = gpd.GeoDataFrame(goodD, geometry=curD_geom, crs={'init':'epsg:4326'})
    return({'ALL':curD, 'GOOD':curgpd,'BAD':important_ports})

country = "CA"
res = process_ports(country, os.path.join(raw_folder, "%s.csv" % country))
print(res['ALL'].shape)
print(res['GOOD'].shape)
print(res['BAD'].shape)

(3194, 11)
(2422, 12)
(7, 11)


In [None]:
try:
    del(final)
    del(missing_ports)
except:
    pass
for country in countries:
    print(country)
    try:
        res = process_ports(country, os.path.join(raw_folder, "%s.csv" % country))
    except:
        print("ERROR")
    try:
        missing_ports = missing_ports.append(res['BAD'])
    except:
        missing_ports = res['BAD']
    try:
        final = final.append(res['GOOD'])
    except:
        final = res['GOOD']


In [None]:
#Add country name to missing ports
import pycountry
importlib.reload(geocode)
#pycountry.countries.get(alpha_2='CA')

missing_ports['Country'] = missing_ports['LOCODE'].apply(lambda x: pycountry.countries.get(alpha_2=x[:2]).name)
locations = missing_ports.apply(lambda x: geocode.getLocation("%s, %s" % (x['NameWoDiacritics'], x['Country'])), axis=1)

In [None]:
def get_point(x):
    if float(x['location']['lng']) < -180:
        return(np.nan)
    try:
        return(Point(float(x['location']['lng']), float(x['location']['lat'])))
    except:
        return(np.nan)

geoms = locations.apply(get_point)
missing_ports['geometry'] = geoms

still_missing_ports = missing_ports.loc[missing_ports['geometry'].apply(lambda x: type(x) != Point)]
missing_ports = missing_ports.loc[missing_ports['geometry'].apply(lambda x: type(x) == Point)]

In [None]:
still_missing_ports.to_csv(os.path.join(output_folder, "still_missing_ports.csv"))

In [None]:
missing_ports.head()

In [None]:
final.head()

In [None]:
final['Country'] = final['LOCODE'].apply(lambda x: pycountry.countries.get(alpha_2=x[:2]).name)

In [None]:
print(final.shape)

In [None]:
final = final.append(missing_ports).reset_index()

In [None]:
# Save all data to file
final.to_file(os.path.join(output_folder, "all_ports.shp"))

In [None]:
final.shape

In [None]:
ports = inD['Port1'].unique()
inP = final.loc[final['LOCODE'].isin(ports)]
inP.shape

In [None]:
len(ports)

In [None]:
# Join traffic attributes to ports dataset
inP.to_file(os.path.join(output_folder, "selected_ports.shp"))

In [None]:
agg = {"Quarterly deployed capacity (TEU)":"sum"}

inD_agg = inD.groupby(['Port1'])

outflows = inD_agg.agg(agg).reset_index()
outflows.columns = ['Port1', 'outflows']

inflows = inD_agg.agg(agg).reset_index()
inflows.columns = ['Port2', 'inflows']

combo = pd.merge(inP, outflows, left_on='LOCODE', right_on='Port1')
combo = pd.merge(combo, inflows, left_on='LOCODE', right_on='Port2')

combo = combo.drop(['SubDiv','Coordinates','Date','index','Ch','IATA','Remarks','Port1','Port2'], axis=1)

combo.to_file(os.path.join(output_folder, "attributed_ports.shp"))

In [None]:
output_folder

In [None]:
inD.head()

In [None]:
len(ports)

In [None]:
combo.shape