# Processing COMTRADE flow data

This script attempts to process the COMTRADEflow data for energy, coal, oil and gas. The goal is to extract the import, export, and flow data as geospatial information: the tools for which are found in the infra_tools library

In [None]:
import sys, os, importlib

import pandas as pd
import geopandas as gpd

sys.path.append('../')

from infra_tools import process_flows

In [None]:
input_file_energy = "/home/wb411133/data/Projects/INFRA/FLOWS/UN_comtrade_energy_flows.csv"
input_file_coal = "/home/wb411133/data/Projects/INFRA/FLOWS/UN_comtrade_coal_flows.csv"
input_file_gas = "/home/wb411133/data/Projects/INFRA/FLOWS/UN_comtrade_gas_flows.csv"
input_file_oil = "/home/wb411133/data/Projects/INFRA/FLOWS/UN_comtrade_oil_flows.csv"
out_folder = ""
global_boundaries = "/home/wb411133/data/Projects/INFRA/FLOWS/national_centroids.shp"

inB = gpd.read_file(global_boundaries)
if inB.crs != {'init':'epsg:4326'}:
    inB = inB.to_crs({'init':'epsg:4326'})



In [None]:
importlib.reload(process_flows)
energy_flows = process_flows.comtrade_flow(input_file_energy, "Energy")
energy_flows.initialize([3], inB)
energy_flows.save("/home/wb411133/data/Projects/INFRA/FLOWS/ENERGY/CSV", "CSV")
energy_flows.save("/home/wb411133/data/Projects/INFRA/FLOWS/ENERGY/SHP", "SHP")

In [None]:
data_flows.raw_data.columns

In [None]:
data_flows = process_flows.comtrade_flow(file_def[0], file_def[1])
data_flows.initialize(file_def[2], inB)
    

In [None]:
data_res = {}

In [None]:
importlib.reload(process_flows)
base_out_folder = "/home/wb411133/data/Projects/INFRA/FLOWS"

for file_def in [
                 [input_file_energy, "Energy", [3]],
                 [input_file_coal, "Coal", [8]],
                 #[input_file_gas, "Gas", [8,1]],
                 #[input_file_oil, "Oil", [8]]
                ]:
    print("Processing %s" % file_def[1])
    data_flows = process_flows.comtrade_flow(file_def[0], file_def[1])
    data_flows.initialize(file_def[2], inB)
    select_folder = os.path.join(base_out_folder, file_def[1])
    for file_type in ["SHP"]:
        out_folder = os.path.join(select_folder, file_type)
        data_flows.save(out_folder, file_type)    
    data_res[file_def[1]] = data_flows

In [None]:
data_res.keys()

# Bullshit below


In [None]:
inD = data_res['Energy']
from_pt = inD.complete_data.loc[1,]
from_pt['Reporter_Pt']

In [None]:

def generate_great_circle(from_pt, to_pt, interim_steps=15):
    '''
    '''
    geod = Geodesic.WGS84
    g = geod.Inverse(from_pt.x, from_pt.y, to_pt.x, to_pt.y)
    l = geod.Line(g['lat1'], g['lon1'], g['azi1'])
    num = interim_steps  # 15 intermediate steps
    list_of_points = [from_pt]
    for i in range(num+1):
        pos = l.Position(i * g['s12'] / num)
        list_of_points.append(Point(pos['lat2'], pos['lon2']))
    list_of_points.append(to_pt)
    return(LineString(list_of_points))

generate_great_circle(from_pt['Reporter_Pt'], from_pt['Partner_Pt'])

In [None]:
importlib.reload(process_flows)
file_def = [input_file_energy, "Energy", [3]]
data_flows = process_flows.comtrade_flow(file_def[0], file_def[1])
data_flows.initialize(file_def[2], inB, line_type='great')
data_flows.save("/home/wb411133/data/Projects/INFRA/FLOWS/ENERGY/SHP", "SHP")

In [None]:
data_flows.country_flows.head()

In [None]:
importlib.reload(process_flows)
data_flows.country_flows['geometry'] = data_flows.country_flows.apply(lambda x:process_flows.generate_great_circle(x['Reporter_Pt'], x['Partner_Pt']), axis=1)
data_flows.country_flows.head()

In [None]:
for key, value in data_res.items():
    print(key)
    print(value.raw_data['Commodity'].value_counts())

In [None]:
curD.raw_data['Commodity'].value_counts()

In [None]:
inB['geometry'] = inB['geometry'].apply(lambda x: x.centroid)
inB.to_file("/home/wb411133/data/Projects/INFRA/FLOWS/national_centroids.shp")

In [None]:
#Assign origin and destination centroids
def get_centroid(iso,boundaries):
    try:
        selected_country = boundaries.loc[boundaries['ISO3'] == iso]
        if selected_country.shape[0] == 1:
            return(selected_country.iloc[0]['geometry'].centroid)
        elif selected_country.shape[0] > 1:
            selected_country = selected_country.sort_values('Shape_Area', ascending=False)
            return(selected_country.iloc[0]['geometry'].centroid)
        else:
            return(None)
    except:
        return(None)
#get_centroid('LSO', inB)
inD['Reporter_Pt'] = inD['Reporter ISO'].apply(lambda x: get_centroid(x, inB))
inD['Partner_Pt'] = inD['Partner ISO'].apply(lambda x: get_centroid(x, inB))

In [None]:
selected_D = inD.loc[:,good_columns]
country_flows = selected_D.loc[selected_D['Partner ISO'] != "WLD"]
country_summary = selected_D.loc[selected_D['Partner ISO'] == "WLD"]

In [None]:
country_summary.sort_values(['Reporter ISO'])
country_summary.reset_index(inplace=True)
country_summary.to_csv(os.path.join(out_folder, "Country_Summaries.csv"))

In [None]:
from shapely.geometry import LineString

def generate_line_string(row):
    try:
        if row['Trade Flow'] == "Export":
            return(LineString([row['Reporter_Pt'], row['Partner_Pt']]))
        else:
            return(LineString([row['Partner_Pt'], row['Reporter_Pt']]))
    except:
        print(row)
        return(None)
country_flows['geometry'] = country_flows.apply(lambda x: generate_line_string(x), axis=1)
country_flows.to_csv(os.path.join(out_folder, "Country_Flows.csv"))