### Load packages

In [1]:
from user_setup import *
from gst_tools import gst_utils as utils
from gst_tools import constants

### Load data

In [2]:
# Data selection

if data == 'emissions':
    if extrapol:
        raw_data_fname = primap_extrap_fname
    else:
        raw_data_fname = primap_no_extrap_fname
else:
    raw_data_fname = bp_world_energy_panel_fname

In [3]:
# Data loading

raw_data = utils.load_data(constants.input_folder, raw_data_fname)

Reading input-data/Guetschow-et-al-2021-PRIMAP-hist_v2.3.1_20-Sep_2021.csv


### Reorganise and filter data

In [4]:
# Reorganise dataset for processing

if data == 'emissions':
    renamed_data = utils.rename_primap(raw_data)
elif data == 'energy':
    renamed_data = utils.rename_bp(raw_data)
else:
    print('Error. Please provide a valida dataset type, either "emissions" or "energy".')

In [5]:
#renamed_data[['Country', 'year', 'electbyfuel_ren_power']].loc[(renamed_data['Country'] == 'Canada')]
#ren_power_twh
#ren_power_twh_net
#

#What is included in renewables? In ren_power_twh, hydro is not included
# renewables include "others", which is geothermal, biomass, and other sources

#net is different (slightly smaller)

In [6]:
#print(list(renamed_data.columns))

#Interesting columns
#biodiesel_cons_pj
#biodiesel_prod_pj
#biofuels_cons_ej
#biofuels_cons_pj
#biofuels_prod_pj
#biogeo_ej
#biogeo_twh
#biogeo_twh_net
#coalcons_ej
#coalprod_ej

#elect_twh
#electbyfuel_coal
#electbyfuel_gas
#electbyfuel_hydro
#electbyfuel_nuclear
#electbyfuel_oil
#electbyfuel_other
#electbyfuel_ren_power
#electbyfuel_total

#ethanol_cons_pj
#ethanol_prod_pj
#gascons_ej
#gasprod_ej
#hydro_ej
#hydro_twh
#hydro_twh_net
#nuclear_ej
#nuclear_twh
#nuclear_twh_net
#oilcons_ej

#primary_ej
#ren_power_ej
#ren_power_twh
#ren_power_twh_net
#renewables_ej
#solar_ej
#solar_twh
#solar_twh_net
#wind_ej
#wind_twh
#wind_twh_net

In [7]:
# Filter dataset

if data == 'emissions':
    proc_data = utils.filter_primap(renamed_data, primap_gas, primap_sector, primap_scenario, countries, years_of_interest[0], constants.gas_names_variable)
else:
    proc_data = utils.filter_bp(renamed_data, energy_variable, countries, years_of_interest[0])

Not all countries requested were available in the raw data. You are missing the following:
   Solomon Islands
   São Tomé & Príncipe
   Antigua & Barbuda
   Central African Republic
   Liberia
   Vanuatu
   Comoros
   Gambia
   Tonga
   Samoa
   Fiji
   Nauru
   Burkina Faso
   Mauritius
   Armenia
   Haiti
   Togo
   Cambodia
   Djibouti
   Maldives
   Laos
   St. Kitts & Nevis
   Kiribati
   Botswana
   Uganda
   Tuvalu
   Guyana
   Marshall Islands
   Guinea
   Mali
   St. Lucia
   Niue
   Malta
   Palestine
   Zimbabwe
   Dominica
   Burundi
   Micronesia
   Lesotho
   Namibia
   Nepal
   St. Vincent & Grenadines
   Cabo Verde
   Grenada
   Guinea-Bissau
   Seychelles
   Bhutan
   Bahamas
   Cook Islands
---------
First year of data available is now 1990
Last year of data available is 2019
These are the 10 first rows of the processed data:
  category country  gas scenario              source         unit  \
0    1.B.2     AFG  CO2   HISTCR  PRIMAP-hist_v2.3.1  Gg CO2 / yr   
1    1

In [8]:
# Convert the "absolute" dataset to reasonable units

convert_from_Gg_to_Mt = True

if data == 'emissions' and convert_from_Gg_to_Mt:
    proc_data = utils.convert_from_Gg_to_Mt(proc_data)

*******************
Converting unit from "Gg CO2 / yr" to "MtCO2" using a conversion factor of 0.001
*******************


In [9]:
# Define processed data filename

if data == 'emissions':
    proc_fname = utils.define_primap_proc_fname(proc_data, constants.gas_names_fname, constants.sector_names_fname)
else:
    proc_fname = utils.define_bp_proc_fname(proc_data)

In [10]:
# Write filtered dataset to file

utils.write_to_file(proc_data, constants.proc_data_folder, proc_fname)

Processed data written to file! - proc-data/primap-histcr_v2.3.1_CO2_oil_and_gas.csv


### Normalise dataset if necessary

In [11]:
# Normalisation per capita and per USD

if data_type != 'absolute':
    if data == 'energy' and energy_variable != 3:
        print('ERROR: The energy variable selected is a share, so it can be no further normalised.')
    else:
        if data_type == 'per capita':
            pop_dset = utils.rearrange_wb_data(constants.input_folder, wb_population_fname)
            final_data = utils.normalise(proc_data, pop_dset, data_type)
        else:
            gdp_dset = utils.rearrange_wb_data(constants.input_folder, wb_gdp_fname)
            final_data = utils.normalise(proc_data, gdp_dset, data_type)
        

        convert_normalised = True

        if convert_normalised:
            final_data = utils.convert_norm(final_data, data, data_type)
        
        utils.write_to_file(final_data, constants.proc_data_folder, proc_fname.replace('.csv', '_'+data_type+'.csv').replace(' ', '_'))
            
else:
    final_data = proc_data

### Plot emissions in a selected year

In [12]:
dset_to_plot, variable, unit = utils.prepare_for_plotting(final_data)

In [13]:

# how should the xaxis be labelled?
#xaxlabel = variable_name_to_display

# make a plot for each year
for selected_year in years_of_interest:
    utils.make_histogram(dset_to_plot, selected_year, unit, variable_title=variable, remove_outliers=True)

---------
Making plot for: 
---------
-----------
Identifying and removing outliers
lower outliers are:
Series([], Name: 1990, dtype: float64)
upper outliers are: 
country
AGO     7.21
AUS     6.09
BRA     6.20
CAN    11.70
CHN     4.66
DZA    12.30
EUU    16.40
FRA     4.36
GAB     5.20
GBR     5.78
IDN    11.80
IRN    23.00
IRQ     9.35
ITA     4.05
LBY     8.38
MEX    11.70
NGA    43.80
OMN     5.34
RUS    25.70
SAU     8.08
SYR     3.69
USA    41.80
Name: 1990, dtype: float64
---
bins set to range(0, 5)


