In [9]:
import sys, os, importlib

import pandas as pd
import geopandas as gpd
import numpy as np

from shapely.geometry import Point


sys.path.append('../')
sys.path.append('../../gostrocks/src/')

from infrasap import process_flows
froGOSTRocks.misc import tPrint
from infrasap import geocode

In [11]:
# Geocode new ports 
new_ports_file = "/home/wb411133/data/Projects/INFRA/PORTS/new_major_ports.csv"
inP = pd.read_csv(new_ports_file)
inP.head()

Unnamed: 0,Port,Country,Region
0,Ambon,Indonesia,East Asia & Pacific
1,Apia,Samoa,East Asia & Pacific
2,Bangkok,Thailand,East Asia & Pacific
3,Bar,Montenegro,Europe & Central Asia
4,Bitung,Indonesia,East Asia & Pacific


In [12]:
locations = inP.apply(lambda x: geocode.getLocation("%s, %s" % (x['Port'], x['Country'])), axis=1)

Gulhifalhu, Maldives was not found
Hazira (Adani), India was not found
Krishnapatnam (Adani), India was not found
Mundra (DP World), India was not found
Pipavav (APMT/Maersk), India was not found
Port Via, Vanuatu was not found
Tibar Bay, Timor Leste was not found


In [20]:
inP['Lat'] = [x['location']['lat'] for x in locations]
inP['Lon'] = [x['location']['lng'] for x in locations]
inP.to_csv(new_ports_file.replace(".csv", "_geocoded.csv"))

In [None]:
in

In [None]:
output_folder = 'P:/data/Global/INFRA/PORTS'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
raw_folder = os.path.join(output_folder, "RAW_LOCODES")
if not os.path.exists(raw_folder):
    os.makedirs(raw_folder)

In [None]:
port_data = "J:/Data/GLOBAL/INFRA/PORTS/Port_flow_data_Q22020.csv"
inD = pd.read_csv(port_data)

In [None]:
# Get a list of country codes to extract
inD['ISO2'] = inD['Port1'].apply(lambda x: x[:2])
countries = inD['ISO2'].unique()
countries

In [None]:
def convert_coords(x):
    coords_split = x.split(" ")
    lat = coords_split[0]
    lon = coords_split[1]
    def get_number(y):        
        num = float(y[:-3]) + ((float(y[-3:-1])/60))
        if (y[-1] == 'W') or (y[-1] == "S"):
            num = num * -1
        return(num)
    return(Point([get_number(lon), get_number(lat)]))

In [None]:
ports = inD['Port1'].unique()

In [None]:
def process_ports(country, country_data):
    if not os.path.exists(country_data):
        # Read data form UN website, save for future processing
        tempD = pd.read_html('https://service.unece.org/trade/locode/%s.htm' % country.lower())
        curD = tempD[2]
        curD.columns = curD.iloc[0]
        curD = curD.drop(0)
        curD.to_csv(country_data)
    else:
        curD = pd.read_csv(country_data, index_col=0)
    curD['LOCODE'] = curD['LOCODE'].apply(lambda x: x.replace(" ",""))
    
    #Drop records with no coordinates
    no_coords_idx = curD['Coordinates'].apply(lambda x: x is np.nan)
    badD = curD.loc[no_coords_idx]
    goodD = curD.loc[~no_coords_idx]
    #if the badD are in the list of port flows, save those
    important_ports = badD.loc[badD['LOCODE'].isin(ports)]

    curD_geom = goodD['Coordinates'].apply(convert_coords)
    curgpd = gpd.GeoDataFrame(goodD, geometry=curD_geom, crs={'init':'epsg:4326'})
    return({'ALL':curD, 'GOOD':curgpd,'BAD':important_ports})

country = "CA"
res = process_ports(country, os.path.join(raw_folder, "%s.csv" % country))
print(res['ALL'].shape)
print(res['GOOD'].shape)
print(res['BAD'].shape)

In [None]:
try:
    del(final)
    del(missing_ports)
except:
    pass
for country in countries:
    print(country)
    try:
        res = process_ports(country, os.path.join(raw_folder, "%s.csv" % country))
    except:
        print("ERROR")
    try:
        missing_ports = missing_ports.append(res['BAD'])
    except:
        missing_ports = res['BAD']
    try:
        final = final.append(res['GOOD'])
    except:
        final = res['GOOD']


In [None]:
#Add country name to missing ports
import pycountry
importlib.reload(geocode)
#pycountry.countries.get(alpha_2='CA')

missing_ports['Country'] = missing_ports['LOCODE'].apply(lambda x: pycountry.countries.get(alpha_2=x[:2]).name)
locations = missing_ports.apply(lambda x: geocode.getLocation("%s, %s" % (x['NameWoDiacritics'], x['Country'])), axis=1)

In [None]:
def get_point(x):
    if float(x['location']['lng']) < -180:
        return(np.nan)
    try:
        return(Point(float(x['location']['lng']), float(x['location']['lat'])))
    except:
        return(np.nan)

geoms = locations.apply(get_point)
missing_ports['geometry'] = geoms

still_missing_ports = missing_ports.loc[missing_ports['geometry'].apply(lambda x: type(x) != Point)]
missing_ports = missing_ports.loc[missing_ports['geometry'].apply(lambda x: type(x) == Point)]

In [None]:
still_missing_ports.to_csv(os.path.join(output_folder, "still_missing_ports.csv"))

In [None]:
missing_ports.head()

In [None]:
final.head()

In [None]:
final['Country'] = final['LOCODE'].apply(lambda x: pycountry.countries.get(alpha_2=x[:2]).name)

In [None]:
print(final.shape)

In [None]:
final = final.append(missing_ports).reset_index()

In [None]:
# Save all data to file
final.to_file(os.path.join(output_folder, "all_ports.shp"))

In [None]:
final.shape

In [None]:
ports = inD['Port1'].unique()
inP = final.loc[final['LOCODE'].isin(ports)]
inP.shape

In [None]:
len(ports)

In [None]:
# Join traffic attributes to ports dataset
inP.to_file(os.path.join(output_folder, "selected_ports.shp"))

In [None]:
agg = {"Quarterly deployed capacity (TEU)":"sum"}

inD_agg = inD.groupby(['Port1'])

outflows = inD_agg.agg(agg).reset_index()
outflows.columns = ['Port1', 'outflows']

inflows = inD_agg.agg(agg).reset_index()
inflows.columns = ['Port2', 'inflows']

combo = pd.merge(inP, outflows, left_on='LOCODE', right_on='Port1')
combo = pd.merge(combo, inflows, left_on='LOCODE', right_on='Port2')

combo = combo.drop(['SubDiv','Coordinates','Date','index','Ch','IATA','Remarks','Port1','Port2'], axis=1)

combo.to_file(os.path.join(output_folder, "attributed_ports.shp"))

In [None]:
output_folder

In [None]:
inD.head()

In [None]:
len(ports)

In [None]:
combo.shape