# Charts

In [1]:
import pandas as pd
from os.path import join, expanduser
import sys
from datetime import datetime

In [2]:
emissions_dir = r'C:\Users\WB514197\WBG\EEAPV Pacific Observatory Files - Geospatial and Big Data\AIS\emissions'
input_dir = join(emissions_dir, '20240614')
output_dir = join(emissions_dir, 'for_charts')
df = pd.read_csv(join(input_dir, 'Emissions Pacific 201901_202405.csv'))
df.loc[:, 'ymd'] = pd.to_datetime(df[['year','month']].assign(day=1)) 
df.loc[:, 'ym'] = df['ymd'].dt.to_period('M')
df.columns = df.columns.str.lower()

In [3]:
kir_islands = ['Phoenix Group', 'Gilbert Islands', 'Line Group']
df.loc[df['country'].isin(kir_islands), 'country'] = 'Kiribati'

In [4]:
# df.to_csv(join(output_dir, 'emissions.csv'))

## Vessel Count

In [None]:
group_cols = ['country', 'ymd', 'ym'] 
data_cols = ['count_vessel', 'count_vessel_day', '_missing_hours', '_total_hours']
df_re = df.groupby(group_cols)[data_cols].sum()
df_re.reset_index(inplace=True)
df_piv = df_re[group_cols+['count_vessel']].pivot(columns=['country'], values='count_vessel', index=['ymd', 'ym']).reset_index()
order = list(df_re.groupby('country').mean().sort_values('count_vessel', ascending=False).index)
df_piv[['ym', 'ymd']+order].to_csv(join(output_dir, 'vessel_count_by_country.csv'), index=False)

## CO2 Emissions by Type

In [25]:
group_cols = ['country', 'ymd', 'ym', '_vessel_group_ais']
data_cols = ['_ch4_e', '_co_e', '_n2o_e', '_nmvoc_e', '_pm10_e', '_pm25_e', '_nox_e', '_bc_e', '_co2_f', '_sox_f', '_bc_f']
df_re = df.groupby(group_cols)[data_cols].sum()
df_re.reset_index(inplace=True)
df_piv = df_re[group_cols+['_co2_f']].pivot(columns='_vessel_group_ais', values='_co2_f', index=['country', 'ymd', 'ym']).reset_index()

In [67]:
df_all = df_piv.groupby(['ymd', 'ym']).sum().reset_index().assign(country='All')
df_concat = pd.concat([df_all, df_piv], sort=False).reset_index(drop=True)
df_concat

_vessel_group_ais,ymd,ym,country,Cargo,Fishing,Others,Passenger,Tanker
0,2019-01-01,2019-01,All,8.334048e+11,4.255528e+10,7.419351e+10,6.941941e+10,2.137858e+11
1,2019-02-01,2019-02,All,6.575297e+11,3.629167e+10,7.046958e+10,9.684505e+10,1.239919e+11
2,2019-03-01,2019-03,All,7.247566e+11,3.929789e+10,7.268150e+10,7.405272e+10,1.541263e+11
3,2019-04-01,2019-04,All,5.449808e+11,3.535564e+10,5.907037e+10,3.034599e+10,1.301189e+11
4,2019-05-01,2019-05,All,5.679863e+11,3.190150e+10,5.964276e+10,1.161958e+10,1.425866e+11
...,...,...,...,...,...,...,...,...
1035,2024-01-01,2024-01,Vanuatu,4.793609e+09,1.024560e+09,4.567897e+09,6.759454e+09,1.142747e+09
1036,2024-02-01,2024-02,Vanuatu,2.724946e+09,7.289587e+08,4.311742e+09,5.011830e+09,5.768809e+08
1037,2024-03-01,2024-03,Vanuatu,4.399589e+09,6.639224e+08,3.436149e+09,3.156203e+09,1.073155e+09
1038,2024-04-01,2024-04,Vanuatu,3.889955e+09,4.441859e+08,1.950988e+09,2.541087e+09,8.247853e+08


In [24]:
# df_concat.to_csv(join(output_dir, 'co2_emissions_by_vessel_group.csv'))

In [68]:
df_concat.loc[:, "Cargo":].head(2)

_vessel_group_ais,Cargo,Fishing,Others,Passenger,Tanker
0,833404800000.0,42555280000.0,74193510000.0,69419410000.0,213785800000.0
1,657529700000.0,36291670000.0,70469580000.0,96845050000.0,123991900000.0


In [69]:
df_sum = df_concat.loc[:, "Cargo":].sum(axis=1)

In [70]:
df_pct = df_concat.loc[:, "Cargo":].div(df_sum, axis=0)*100

In [74]:
df_concat.join(df_pct, rsuffix=' pct').to_csv(join(output_dir, 'co2_emissions_by_vessel_group.csv'))

## CO2 Emissions by Type

In [24]:
group_cols = ['country', 'year', '_vessel_group_ais']
data_cols = ['_co2_f']
df_re = df.groupby(group_cols)[data_cols].sum()
df_re.reset_index(inplace=True)
# df_piv = df_re[group_cols+['_co2_f']].pivot(columns='_vessel_group_ais', values='_co2_f', index=['country', 'ymd', 'ym']).reset_index()

In [28]:
df_re.columns = ['Country', 'Year', 'Vessel Type', 'CO2 Emissions']

In [38]:
df_sum = df_re.groupby(['Country', 'Year'])[['CO2 Emissions']].sum().reset_index()

In [41]:
df_re = df_re.merge(df_sum, on=['Country', 'Year'], suffixes=('', ' sum'))

In [42]:
df_re.loc[:, 'Share'] = df_re['CO2 Emissions']/df_re['CO2 Emissions sum']*100

In [44]:
df_re.drop(columns=['CO2 Emissions sum'], inplace=True)

In [45]:
df_re.to_csv(join(output_dir, 'co2_emissions_by_vessel_group_and_year.csv'), index=False)

## Poluttant Time Series

In [73]:
group_cols = ['country', 'ymd', 'ym'] # 'country', 'ymd', 'ym', '_op_phase' 
data_cols = ['_ch4_e', '_co_e', '_n2o_e', '_nmvoc_e', '_pm10_e', '_pm25_e', '_nox_e', '_bc_e', '_co2_f', '_sox_f', '_bc_f']
df_re = df.groupby(group_cols)[data_cols].sum()
df_re.reset_index(inplace=True)
df_re = df_re[['country', 'ymd', 'ym', '_ch4_e', '_co_e', '_n2o_e', '_nmvoc_e', '_pm10_e', '_pm25_e', '_nox_e', '_bc_f', '_co2_f', '_sox_f']].copy()

df_re.rename(
    columns={
        '_ch4_e': 'Methane', 
        '_co_e': 'Carbon Monoxide', 
        '_n2o_e': 'Nitrous Oxide', 
        '_nmvoc_e': 'Non-Methane Volatile Organic Compounds', 
        '_pm10_e': 'Particulate Matter 10', 
        '_pm25_e': 'Particulate Matter 2.5', 
        '_nox_e': 'Nitrogen Oxide', 
        '_bc_f': 'Black Carbon', 
        '_co2_f': 'Carbon Dioxide', 
        '_sox_f': 'Sulfur Oxide'
    }, inplace=True)


In [75]:
df_re.columns

Index(['country', 'ymd', 'ym', 'Methane', 'Carbon Monoxide', 'Nitrous Oxide',
       'Non-Methane Volatile Organic Compounds', 'Particulate Matter 10',
       'Particulate Matter 2.5', 'Nitrogen Oxide', 'Black Carbon',
       'Carbon Dioxide', 'Sulfur Oxide'],
      dtype='object')

In [82]:
df_all = df_re.groupby(['ymd', 'ym']).sum().reset_index().assign(country='All')
df_concat = pd.concat([df_all, df_re], sort=False).reset_index(drop=True)
df_concat.head()

Unnamed: 0,ymd,ym,country,Methane,Carbon Monoxide,Nitrous Oxide,Non-Methane Volatile Organic Compounds,Particulate Matter 10,Particulate Matter 2.5,Nitrogen Oxide,Black Carbon,Carbon Dioxide,Sulfur Oxide
0,2019-01-01,2019-01,All,450256600.0,1331744000.0,125420600.0,2530910000.0,2910637000.0,2677786000.0,58696930000.0,133056400.0,1233359000000.0,16588820000.0
1,2019-02-01,2019-02,All,187263700.0,1045620000.0,106242300.0,2137815000.0,2497645000.0,2297833000.0,49841740000.0,119474400.0,985127900000.0,13773740000.0
2,2019-03-01,2019-03,All,310920700.0,1135755000.0,110371400.0,2226790000.0,2558231000.0,2353572000.0,51448480000.0,116233200.0,1064915000000.0,14392860000.0
3,2019-04-01,2019-04,All,338161600.0,868804000.0,81042020.0,1635361000.0,1825614000.0,1679565000.0,37566770000.0,85387290.0,799871800000.0,10241750000.0
4,2019-05-01,2019-05,All,373313800.0,900036500.0,82585740.0,1672159000.0,1852027000.0,1703865000.0,38359350000.0,87304730.0,813736700000.0,10392840000.0


In [83]:
df_concat.to_csv(join(output_dir, 'pollutant-by-country-monthly.csv'), index=False)

## Operational Phase

In [11]:
# df.columns

In [71]:
group_cols = ['country', 'year'] # 'country', 'ymd', 'ym', '_op_phase' 
data_cols = ['_ch4_e', '_co_e', '_n2o_e', '_nmvoc_e', '_pm10_e', '_pm25_e', '_nox_e', '_bc_e', '_co2_f', '_sox_f', '_bc_f']
df_re = df.groupby(group_cols)[data_cols].sum()
df_re.reset_index(inplace=True)
# df_piv = df_re[group_cols+['_co2_f']].pivot(columns='_op_phase', values='_co2_f', index=['country', 'ymd', 'ym']).reset_index()

In [72]:
df_re = df_re[['country', 'year', '_ch4_e', '_co_e', '_n2o_e', '_nmvoc_e', '_pm10_e',
       '_pm25_e', '_nox_e', '_bc_f', '_co2_f', '_sox_f']].copy()

df_re.rename(
    columns={
        '_ch4_e': 'Methane', 
        '_co_e': 'Carbon Monoxide', 
        '_n2o_e': 'Nitrous Oxide', 
        '_nmvoc_e': 'Non-Methane Volatile Organic Compounds', 
        '_pm10_e': 'Particulate Matter 10', 
        '_pm25_e': 'Particulate Matter 2.5', 
        '_nox_e': 'Nitrogen Oxide', 
        '_bc_f': 'Black Carbon', 
        '_co2_f': 'Carbon Dioxide', 
        '_sox_f': 'Sulfur Oxide',
        'country': 'Country',
        'year': 'Year'
    }, inplace=True)


In [68]:
# df_re.columns = ['Operational Phase', 'CH4 (Methane)' 'CO (Carbon Monoxide)', 'N2O (Nitrous Oxide)', 'NMVOC (Non-Methane Volatile Organic Compounds)', 'PM10 (Particulate Matter 10)', 'PM2.5 (Particulate Matter 2.5)', 'NOx (Nitrogen Oxide)', 'BC (Black Carbon)', 'CO2 (Methane)', 'SOx (Sulfur Oxide)', 'BC (Black Carbon 2)']
# df_re.columns = ['Country', 'Year', 'Methane', 'Carbon Monoxide', 'Nitrous Oxide', 'Non-Methane Volatile Organic Compounds', 'Particulate Matter 10', 'Particulate Matter 2.5', 'Nitrogen Oxide', 'Black Carbon', 'Carbon Dioxide', 'Sulfur Oxide'] # 'Black Carbon 2']

In [69]:
# reshape wide to long
df_re = df_re.melt(id_vars=['Country', 'Year'], var_name='Pollutant', value_name='Emissions')

In [70]:
df_re.to_csv(join(output_dir, 'pollutant-by-country-year.csv'), index=False)

In [None]:
df_piv[['ym', 'ymd']+order].to_csv(join(output_dir, 'vessel_count_by_country.csv'), index=False)

In [46]:
group_cols = ['country', 'ymd', 'ym'] # '_vessel_group_ais' '_w_fishing'
data_cols = ['count_vessel', 'count_vessel_day', '_missing_hours', '_total_hours']
# data_cols = ['_ch4_e', '_co_e', '_n2o_e', '_nmvoc_e', '_pm10_e', '_pm25_e', '_nox_e', '_bc_e', '_co2_f', '_sox_f', '_bc_f']

In [47]:
df_re = df.groupby(group_cols)[data_cols].sum()
df_re.reset_index(inplace=True)
# df_re.to_csv(join(output_dir, 'emissions_clean_by_vessel_group.csv'))

In [48]:
df_re.head(2)

Unnamed: 0,country,ymd,ym,count_vessel,count_vessel_day,_missing_hours,_total_hours
0,Cook Islands,2019-01-01,2019-01,1046,2229,17801.0,30445.0
1,Cook Islands,2019-02-01,2019-02,997,1825,18252.0,26144.0


In [51]:
# df_piv = df_re[group_cols+['_co2_f']].pivot(columns='_vessel_group_ais', values='_co2_f', index=['country', 'ymd', 'ym']).reset_index()
# df_piv = df_re[group_cols+['count_vessel']].pivot(columns=['_vessel_group_ais'], values='count_vessel', index=['country', 'ymd', 'ym']).reset_index()
# df_piv = df_re[group_cols+['count_vessel']].pivot(columns=['country'], values='count_vessel', index=['ymd', 'ym']).reset_index()

In [68]:
order = list(df_re.groupby('country').mean().sort_values('count_vessel', ascending=False).index)

In [73]:
# df_piv.to_csv(join(output_dir, 'co2_emissions_by_vessel_group.csv'))
df_piv[['ym', 'ymd']+order].to_csv(join(output_dir, 'vessel_count_by_country.csv'), index=False)

In [56]:
# group_cols = ['country', 'ymd', 'ym']
# df_re = df.groupby(group_cols)[['_co2_f']].sum()
# df_re.reset_index(inplace=True)
# df_piv = df_re.pivot(columns='country', values='_co2_f', index='ym')