In [1]:
# Import dependencies
import pandas as pd
from pyxlsb import open_workbook

import requests
import json

from config import eia_api_key

# Extract Data Into DataFrames

### Data 1: Irena.org Dataset on Electricity Installed Capacity (MW)

In [2]:
# Import data file
irena_eic_file = "data/IRENA_Stats_Tool.xlsb"

irena_eic_df = []
with open_workbook (irena_eic_file) as wb:
     with wb.get_sheet("Data") as sheet:
        for row in sheet.rows():
            irena_eic_df.append([item.v for item in row])

irena_eic_df = pd.DataFrame(irena_eic_df[8:], columns=irena_eic_df[5])
irena_eic_df.head()

Unnamed: 0,Region,Country,ISO Code,RE or Non-RE,Group Technology,Technology,Poducer Type,Year,Electricity Installed Capacity (MW),Unnamed: 10,...,Unnamed: 12,Unnamed: 13,NaN,NaN.1,NaN.2,NaN.3,NaN.4,NaN.5,NaN.6,NaN.7
0,Africa,Algeria,DZA,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,2018.0,21308.0,,...,,,,,,,,,,
1,Africa,Algeria,DZA,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,2019.0,23508.0,,...,,,,,,,,,,
2,Africa,Algeria,DZA,Total Renewable,Hydropower (excl. Pumped Storage),Renewable hydropower,On-grid,2018.0,228.0,,...,,,,,,,,,,
3,Africa,Algeria,DZA,Total Renewable,Hydropower (excl. Pumped Storage),Renewable hydropower,On-grid,2019.0,228.0,,...,,,,,,,,,,
4,Africa,Algeria,DZA,Total Renewable,Solar energy,Concentrated solar power,On-grid,2018.0,25.0,,...,,,,,,,,,,


### Data 2 (1): bp.com Dataset on Energy Consumption (Exajoules)

In [3]:
# Import data file
bp_pe_file = "data/bp-stats-review-2020-all-data.xlsx"

bp_pe = pd.read_excel(bp_pe_file, "Primary Energy Consumption", engine="openpyxl")

# Rename the columns with value from the second row
bp_pe_rename = bp_pe.rename(columns=bp_pe.iloc[1])
bp_pe_rename.head()

Unnamed: 0,Exajoules,1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2015.0,2016.0,2017.0,2018.0,2019.0,2019,2008-18,2019.1,NaN,NaN.1
0,,,,,,,,,,,...,,,,,,Growth rate per annum,,Share,,
1,Exajoules,1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2015.0,2016.0,2017.0,2018.0,2019.0,2019,2008-18,2019,,
2,,,,,,,,,,,...,,,,,,,,,,
3,Canada,4.924317,5.239095,5.480214,5.836056,6.185953,6.624581,6.831992,7.348642,7.857612,...,13.99365,13.936678,14.109482,14.349534,14.214058,-0.00944118,0.00640355,0.0243434,,
4,Mexico,1.050253,1.111572,1.13247,1.232997,1.353689,1.444834,1.519984,1.680466,1.812695,...,7.6883,7.785419,7.895341,7.833322,7.720742,-0.0143718,0.00905657,0.0132227,,


### Data 2 (2): bp.com Dataset on Renewables Consumption (Exajoules)

In [4]:
# Import data file
bp_rc_file = "data/bp-stats-review-2020-all-data.xlsx"

bp_rc = pd.read_excel(bp_rc_file, "Renewables Consumption - EJ", engine="openpyxl")

# Rename the columns with value from the second row
bp_rc_rename = bp_rc.rename(columns=bp_rc.iloc[1])
bp_rc_rename.head()

Unnamed: 0,Exajoules (input-equivalent),1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2015.0,2016.0,2017.0,2018.0,2019.0,2019,2008-18,2019.1,NaN,NaN.1
0,,,,,,,,,,,...,,,,,,Growth rate per annum,,Share,,
1,Exajoules (input-equivalent),1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2015.0,2016.0,2017.0,2018.0,2019.0,2019,2008-18,2019,,
2,,,,,,,,,,,...,,,,,,,,,,
3,Canada,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.437742,0.488314,0.492008,0.503491,0.523399,0.0395406,0.134528,0.0180629,,
4,Mexico,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00161,...,0.157067,0.170452,0.184147,0.216463,0.345292,0.595155,0.102784,0.0119163,,


 ### Data 3: wesr.unep.org Dataset on GHG Emission

In [5]:
# Import data file
unep_file = "data/wesr_unep_GHGemission.csv"

unep_df = pd.read_csv(unep_file, skiprows=3)
unep_df.head()

Unnamed: 0,INDICATOR NAME (unit),COUNTRY NAME,YEAR,VALUE
0,Greenhouse gas (GHG) emissions from agricultur...,China,1961,254109.75
1,Greenhouse gas (GHG) emissions from agricultur...,China,1962,256115.15
2,Greenhouse gas (GHG) emissions from agricultur...,China,1963,267398.78
3,Greenhouse gas (GHG) emissions from agricultur...,China,1964,288531.02
4,Greenhouse gas (GHG) emissions from agricultur...,China,1965,301524.32


 ### Data 4: eia.gov API Dataset on CO2 Emission

#### China CO2 Emission

In [6]:
# Building query URL
china_co2_query_url = f"http://api.eia.gov/series/?api_key={eia_api_key}&series_id=INTL.4008-8-CHN-MMTCD.A"

In [7]:
# Get the info from JSON response
co2_response = requests.get(china_co2_query_url).json()

# Get the data to display for creating call format
print(json.dumps(co2_response, indent=4, sort_keys=False))

{
    "request": {
        "command": "series",
        "series_id": "INTL.4008-8-CHN-MMTCD.A"
    },
    "series": [
        {
            "series_id": "INTL.4008-8-CHN-MMTCD.A",
            "name": "CO2 emissions, China, Annual",
            "units": "million metric tonnes carbon dioxide",
            "f": "A",
            "copyright": "None",
            "source": "EIA, U.S. Energy Information Administration",
            "geography": "CHN",
            "start": "1980",
            "end": "2018",
            "updated": "2020-12-16T10:43:39-0500",
            "data": [
                [
                    "2018",
                    10633.716109407
                ],
                [
                    "2017",
                    10431.664013179
                ],
                [
                    "2016",
                    10433.034096167
                ],
                [
                    "2015",
                    10457.195827419
                ],
                [


#### Germany CO2 Emission

In [8]:
# Building query URL
germany_co2_query_url = f"http://api.eia.gov/series/?api_key={eia_api_key}&series_id=INTL.4008-8-DEU-MMTCD.A"

In [9]:
# Get the info from JSON response
co2_response_g = requests.get(germany_co2_query_url).json()

# Get the data to display for creating call format
print(json.dumps(co2_response_g, indent=4, sort_keys=False))

{
    "request": {
        "command": "series",
        "series_id": "INTL.4008-8-DEU-MMTCD.A"
    },
    "series": [
        {
            "series_id": "INTL.4008-8-DEU-MMTCD.A",
            "name": "CO2 emissions, Germany, Annual",
            "units": "million metric tonnes carbon dioxide",
            "f": "A",
            "copyright": "None",
            "source": "EIA, U.S. Energy Information Administration",
            "geography": "DEU",
            "start": "1980",
            "end": "2018",
            "updated": "2020-12-02T16:24:23-0500",
            "data": [
                [
                    "2018",
                    798.0084039
                ],
                [
                    "2017",
                    824.833881662
                ],
                [
                    "2016",
                    828.275513747
                ],
                [
                    "2015",
                    826.764602382
                ],
                [
        

#### USA CO2 Emission

In [10]:
# Building query URL
usa_co2_query_url = f"http://api.eia.gov/series/?api_key={eia_api_key}&series_id=INTL.4008-8-USA-MMTCD.A"

In [11]:
# Get the info from JSON response
co2_response_u = requests.get(usa_co2_query_url).json()

# Get the data to display for creating call format
print(json.dumps(co2_response_u, indent=4, sort_keys=False))

{
    "request": {
        "command": "series",
        "series_id": "INTL.4008-8-USA-MMTCD.A"
    },
    "series": [
        {
            "series_id": "INTL.4008-8-USA-MMTCD.A",
            "name": "CO2 emissions, United States, Annual",
            "units": "million metric tonnes carbon dioxide",
            "f": "A",
            "copyright": "None",
            "source": "EIA, U.S. Energy Information Administration",
            "geography": "USA",
            "start": "1949",
            "end": "2018",
            "updated": "2020-12-16T10:43:39-0500",
            "data": [
                [
                    "2018",
                    5284.406529
                ],
                [
                    "2017",
                    5130.576907
                ],
                [
                    "2016",
                    5170.51057
                ],
                [
                    "2015",
                    5262.842024
                ],
                [
         

# Transform Data

### Data 1: Irena.org Dataset on Electricity Installed Capacity (MW)

In [207]:
# Get coloumns 
irena_eic_df.columns

Index([                             'Region',
                                   'Country',
                                  'ISO Code',
                              'RE or Non-RE',
                          'Group Technology',
                                'Technology',
                              'Poducer Type',
                                      'Year',
       'Electricity Installed Capacity (MW)',
                                          '',
                                          '',
                                          '',
                                          '',
                                        None,
                                        None,
                                        None,
                                        None,
                                        None,
                                        None,
                                        None,
                                        None],
      dtype='object')

In [208]:
# Extract wanted columns
eic_cols = ['Country','Year','RE or Non-RE','Group Technology','Technology','Poducer Type','Electricity Installed Capacity (MW)']
eic_countries = irena_eic_df[eic_cols].copy()
eic_countries.head()

Unnamed: 0,Country,Year,RE or Non-RE,Group Technology,Technology,Poducer Type,Electricity Installed Capacity (MW)
0,Algeria,2018.0,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,21308.0
1,Algeria,2019.0,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,23508.0
2,Algeria,2018.0,Total Renewable,Hydropower (excl. Pumped Storage),Renewable hydropower,On-grid,228.0
3,Algeria,2019.0,Total Renewable,Hydropower (excl. Pumped Storage),Renewable hydropower,On-grid,228.0
4,Algeria,2018.0,Total Renewable,Solar energy,Concentrated solar power,On-grid,25.0


In [209]:
# Drop rows with empty values
eic_countries.dropna()

Unnamed: 0,Country,Year,RE or Non-RE,Group Technology,Technology,Poducer Type,Electricity Installed Capacity (MW)
0,Algeria,2018.0,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,21308.00
1,Algeria,2019.0,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,23508.00
2,Algeria,2018.0,Total Renewable,Hydropower (excl. Pumped Storage),Renewable hydropower,On-grid,228.00
3,Algeria,2019.0,Total Renewable,Hydropower (excl. Pumped Storage),Renewable hydropower,On-grid,228.00
4,Algeria,2018.0,Total Renewable,Solar energy,Concentrated solar power,On-grid,25.00
...,...,...,...,...,...,...,...
2968,Venezuela,2019.0,Total Renewable,Solar energy,Off-grid Solar photovoltaic,Off-grid,3.00
2969,Venezuela,2018.0,Total Renewable,Solar energy,On-grid Solar photovoltaic,On-grid,1.00
2970,Venezuela,2019.0,Total Renewable,Solar energy,On-grid Solar photovoltaic,On-grid,2.32
2971,Venezuela,2018.0,Total Renewable,Wind energy,Onshore wind energy,Off-grid,71.28


#### Extracting data from my selected three countries: China, Germany, and United States

In [210]:
# Extracting China's data
# Reset index to "Country"
eic_china = eic_countries[eic_countries["Country"]=="China"]
eic_china.reset_index(drop=True, inplace=True)
eic_china.head()

Unnamed: 0,Country,Year,RE or Non-RE,Group Technology,Technology,Poducer Type,Electricity Installed Capacity (MW)
0,China,2018.0,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,1126270.0
1,China,2019.0,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,1168004.147
2,China,2018.0,Total Non-Renewable,Nuclear,Nuclear,On-grid,44660.0
3,China,2019.0,Total Non-Renewable,Nuclear,Nuclear,On-grid,48740.0
4,China,2018.0,Total Non-Renewable,Other non-renewable energy,Other non-renewable energy,On-grid,4575.0


In [211]:
# Extracting Germany's data
# Reset index to "Country"
eic_germany = eic_countries[eic_countries["Country"]=="Germany"]
eic_china.reset_index(drop=True, inplace=True)
eic_germany.head()

Unnamed: 0,Country,Year,RE or Non-RE,Group Technology,Technology,Poducer Type,Electricity Installed Capacity (MW)
1755,Germany,2018.0,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,91415.0
1756,Germany,2019.0,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,91991.0
1757,Germany,2018.0,Total Non-Renewable,Nuclear,Nuclear,On-grid,10799.0
1758,Germany,2019.0,Total Non-Renewable,Nuclear,Nuclear,On-grid,10799.0
1759,Germany,2018.0,Total Non-Renewable,Other non-renewable energy,Other non-renewable energy,On-grid,2332.0


In [212]:
# Extracting USA's data
# Reset index to "Country"
eic_usa = eic_countries[eic_countries["Country"]=="United States"]
eic_china.reset_index(drop=True, inplace=True)
eic_usa.head()

Unnamed: 0,Country,Year,RE or Non-RE,Group Technology,Technology,Poducer Type,Electricity Installed Capacity (MW)
2459,United States,2018.0,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,750619.0
2460,United States,2019.0,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,743307.0
2461,United States,2018.0,Total Non-Renewable,Nuclear,Nuclear,On-grid,99629.0
2462,United States,2019.0,Total Non-Renewable,Nuclear,Nuclear,On-grid,98266.3
2463,United States,2018.0,Total Non-Renewable,Other non-renewable energy,Other non-renewable energy,On-grid,3292.5


#### Combining these dataframe together as one

In [213]:
# Append the dataframes together as one
eic_two = pd.concat([eic_china, eic_germany], axis=0)
eic_all = pd.concat([eic_two, eic_usa], axis=0)
eic_all.reset_index(drop=True, inplace=True)
eic_all.head()

Unnamed: 0,Country,Year,RE or Non-RE,Group Technology,Technology,Poducer Type,Electricity Installed Capacity (MW)
0,China,2018.0,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,1126270.0
1,China,2019.0,Total Non-Renewable,Fossil fuels,Fossil fuels,On-grid,1168004.147
2,China,2018.0,Total Non-Renewable,Nuclear,Nuclear,On-grid,44660.0
3,China,2019.0,Total Non-Renewable,Nuclear,Nuclear,On-grid,48740.0
4,China,2018.0,Total Non-Renewable,Other non-renewable energy,Other non-renewable energy,On-grid,4575.0


#### Normalization of the Data

In [214]:
# Create a dataframe for country id
list(eic_all["Country"].unique())

['China', 'Germany', 'United States']

In [215]:
countries_df = pd.DataFrame({"country_id": [1,2,3],"countries": ['China', 'Germany', 'United States']})
countries_df.set_index("country_id", inplace=True)
countries_df

Unnamed: 0_level_0,countries
country_id,Unnamed: 1_level_1
1,China
2,Germany
3,United States


In [216]:
list(eic_all["Year"].unique())

[2018.0, 2019.0]

In [217]:
# Create a dataframe for re or not id
list(eic_all["RE or Non-RE"].unique())

['Total Non-Renewable', 'Total Renewable']

In [218]:
re_non_df = pd.DataFrame({"re_or_not_id": [1,2],"re_or_not": ['Total Non-Renewable', 'Total Renewable']})
re_non_df.set_index("re_or_not_id", inplace=True)
re_non_df

Unnamed: 0_level_0,re_or_not
re_or_not_id,Unnamed: 1_level_1
1,Total Non-Renewable
2,Total Renewable


In [219]:
# Create a dataframe for group technology id
list(eic_all["Group Technology"].unique())

['Fossil fuels',
 'Nuclear',
 'Other non-renewable energy',
 'Pumped storage',
 'Bioenergy',
 'Geothermal energy',
 'Hydropower (excl. Pumped Storage)',
 'Marine energy',
 'Solar energy',
 'Wind energy']

In [220]:
len(eic_all["Group Technology"].unique())

10

In [221]:
group_tech_range = [*range(1,11,1)]

In [222]:
group_tech_df = pd.DataFrame({"group_tech_id": group_tech_range,
                              "group_tech": [
                            'Fossil fuels',
                             'Nuclear',
                             'Other non-renewable energy',
                             'Pumped storage',
                             'Bioenergy',
                             'Geothermal energy',
                             'Hydropower (excl. Pumped Storage)',
                             'Marine energy',
                             'Solar energy',
                             'Wind energy'
                              ]})
group_tech_df.set_index("group_tech_id", inplace=True)
group_tech_df

Unnamed: 0_level_0,group_tech
group_tech_id,Unnamed: 1_level_1
1,Fossil fuels
2,Nuclear
3,Other non-renewable energy
4,Pumped storage
5,Bioenergy
6,Geothermal energy
7,Hydropower (excl. Pumped Storage)
8,Marine energy
9,Solar energy
10,Wind energy


In [223]:
# Create a dataframe for technology id
list(eic_all["Technology"].unique())

['Fossil fuels',
 'Nuclear',
 'Other non-renewable energy',
 'Pumped storage',
 'Biogas',
 'Renewable municipal waste',
 'Solid biofuels',
 'Geothermal energy',
 'Renewable hydropower',
 'Marine energy',
 'Concentrated solar power',
 'Off-grid Solar photovoltaic',
 'On-grid Solar photovoltaic',
 'Offshore wind energy',
 'Onshore wind energy',
 'Liquid biofuels',
 'Mixed Hydro Plants']

In [224]:
len(eic_all["Technology"].unique())

17

In [225]:
tech_range=[*range(1, 18, 1)]
tech_range

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]

In [226]:
tech_df = pd.DataFrame({"tech_id": tech_range,
                        "tech": ['Fossil fuels',
                         'Nuclear',
                         'Other non-renewable energy',
                         'Pumped storage',
                         'Biogas',
                         'Renewable municipal waste',
                         'Solid biofuels',
                         'Geothermal energy',
                         'Renewable hydropower',
                         'Marine energy',
                         'Concentrated solar power',
                         'Off-grid Solar photovoltaic',
                         'On-grid Solar photovoltaic',
                         'Offshore wind energy',
                         'Onshore wind energy',
                         'Liquid biofuels',
                         'Mixed Hydro Plants']})
tech_df.set_index("tech_id", inplace=True)
tech_df

Unnamed: 0_level_0,tech
tech_id,Unnamed: 1_level_1
1,Fossil fuels
2,Nuclear
3,Other non-renewable energy
4,Pumped storage
5,Biogas
6,Renewable municipal waste
7,Solid biofuels
8,Geothermal energy
9,Renewable hydropower
10,Marine energy


In [227]:
list(eic_all["Poducer Type"].unique())

['On-grid', 'Off-grid']

In [228]:
# Create a dataframe for producer id
producer_df = pd.DataFrame({"producer_id": [1,2],"producer": ['On-grid', 'Off-grid']})
producer_df.set_index("producer_id", inplace=True)
producer_df

Unnamed: 0_level_0,producer
producer_id,Unnamed: 1_level_1
1,On-grid
2,Off-grid


#### Replace the values with the ids in the dataframe

In [230]:
# Update the values to their according ids
# Rename the columns
eic_all_new = eic_all.copy()
eic_all_new["Country"] = eic_all_new["Country"].replace(['China', 'Germany', 'United States'],[1,2,3])
eic_all_new["Year"] = eic_all_new["Year"].replace([2018.0, 2019.0],[54,55])
eic_all_new["RE or Non-RE"] = eic_all_new["RE or Non-RE"].replace(['Total Non-Renewable', 'Total Renewable'],[1,2])
eic_all_new["Group Technology"] = eic_all_new["Group Technology"].replace(['Fossil fuels',
                                                                             'Nuclear',
                                                                             'Other non-renewable energy',
                                                                             'Pumped storage',
                                                                             'Bioenergy',
                                                                             'Geothermal energy',
                                                                             'Hydropower (excl. Pumped Storage)',
                                                                             'Marine energy',
                                                                             'Solar energy',
                                                                             'Wind energy'], group_tech_range)

eic_all_new["Technology"] = eic_all_new["Technology"].replace(['Fossil fuels',
                                                                 'Nuclear',
                                                                 'Other non-renewable energy',
                                                                 'Pumped storage',
                                                                 'Biogas',
                                                                 'Renewable municipal waste',
                                                                 'Solid biofuels',
                                                                 'Geothermal energy',
                                                                 'Renewable hydropower',
                                                                 'Marine energy',
                                                                 'Concentrated solar power',
                                                                 'Off-grid Solar photovoltaic',
                                                                 'On-grid Solar photovoltaic',
                                                                 'Offshore wind energy',
                                                                 'Onshore wind energy',
                                                                 'Liquid biofuels',
                                                                 'Mixed Hydro Plants'], tech_range)
eic_all_new["Poducer Type"] = eic_all_new["Poducer Type"].replace(['On-grid', 'Off-grid'],[1,2])
eic_all_new = eic_all_new.rename(columns={"Country":"country_id",
                                          "Year":"year_id",
                                          "RE or Non-RE":"re_or_not_id",
                                          "Group Technology": "group_tech_id",
                                          "Technology":"tech_id",
                                          "Poducer Type":"producer_type_id"
    
})
eic_all_new.set_index("country_id", inplace=True)
eic_all_new.head()

Unnamed: 0_level_0,year_id,re_or_not_id,group_tech_id,tech_id,producer_type_id,Electricity Installed Capacity (MW)
country_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,54.0,1,1,1,1,1126270.0
1,55.0,1,1,1,1,1168004.147
1,54.0,1,2,2,1,44660.0
1,55.0,1,2,2,1,48740.0
1,54.0,1,3,3,1,4575.0


### Data 2 (1): bp.com Dataset on Energy Consumption (Exajoules)

In [231]:
# Extract wanted columns
bp_ec_countries = bp_pe_rename.copy()
bp_ec_countries.head()

Unnamed: 0,Exajoules,1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2015.0,2016.0,2017.0,2018.0,2019.0,2019,2008-18,2019.1,NaN,NaN.1
0,,,,,,,,,,,...,,,,,,Growth rate per annum,,Share,,
1,Exajoules,1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2015.0,2016.0,2017.0,2018.0,2019.0,2019,2008-18,2019,,
2,,,,,,,,,,,...,,,,,,,,,,
3,Canada,4.924317,5.239095,5.480214,5.836056,6.185953,6.624581,6.831992,7.348642,7.857612,...,13.99365,13.936678,14.109482,14.349534,14.214058,-0.00944118,0.00640355,0.0243434,,
4,Mexico,1.050253,1.111572,1.13247,1.232997,1.353689,1.444834,1.519984,1.680466,1.812695,...,7.6883,7.785419,7.895341,7.833322,7.720742,-0.0143718,0.00905657,0.0132227,,


In [232]:
# Renaming the same name columns (2019) by creating specific name columns and duplicating the according data row
bp_ec_countries ["2019_share"] = bp_ec_countries.iloc [:,58]
bp_ec_countries ["2019_growth_rate_per_annum"] = bp_ec_countries.iloc [:,56]
bp_ec_countries ["2019_year"] = bp_ec_countries.iloc[:,55]
bp_ec_countries

Unnamed: 0,Exajoules,1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2018.0,2019.0,2019,2008-18,2019.1,NaN,NaN.1,2019_share,2019_growth_rate_per_annum,2019_year
0,,,,,,,,,,,...,,,Growth rate per annum,,Share,,,Share,Growth rate per annum,
1,Exajoules,1965.000000,1966.000000,1967.000000,1968.000000,1969.000000,1970.000000,1971.000000,1972.000000,1973.000000,...,2018.000000,2019.000000,2019,2008-18,2019,,,2019,2019,2019.000000
2,,,,,,,,,,,...,,,,,,,,,,
3,Canada,4.924317,5.239095,5.480214,5.836056,6.185953,6.624581,6.831992,7.348642,7.857612,...,14.349534,14.214058,-0.00944118,0.00640355,0.0243434,,,0.0243434,-0.00944118,14.214058
4,Mexico,1.050253,1.111572,1.132470,1.232997,1.353689,1.444834,1.519984,1.680466,1.812695,...,7.833322,7.720742,-0.0143718,0.00905657,0.0132227,,,0.0132227,-0.0143718,7.720742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124,,,,,,,,,,,...,,,,,,,,,,
125,,,,,,,,,,,...,,,,,,,,,,
126,,,,,,,,,,,...,,,,,,,,,,
127,,,,,,,,,,,...,,,,,,,,,,


In [233]:
# # Get a list of the column names and select the wanted columns 
ls = list(bp_ec_countries.columns)
columns=['Exajoules',
 1965.0,
 1966.0,
 1967.0,
 1968.0,
 1969.0,
 1970.0,
 1971.0,
 1972.0,
 1973.0,
 1974.0,
 1975.0,
 1976.0,
 1977.0,
 1978.0,
 1979.0,
 1980.0,
 1981.0,
 1982.0,
 1983.0,
 1984.0,
 1985.0,
 1986.0,
 1987.0,
 1988.0,
 1989.0,
 1990.0,
 1991.0,
 1992.0,
 1993.0,
 1994.0,
 1995.0,
 1996.0,
 1997.0,
 1998.0,
 1999.0,
 2000.0,
 2001.0,
 2002.0,
 2003.0,
 2004.0,
 2005.0,
 2006.0,
 2007.0,
 2008.0,
 2009.0,
 2010.0,
 2011.0,
 2012.0,
 2013.0,
 2014.0,
 2015.0,
 2016.0,
 2017.0,
 2018.0,
 '2019_year', 
 '2019_growth_rate_per_annum',
 '2008-18',
 '2019_share']
bp_ec_countries = bp_ec_countries[columns]
bp_ec_countries

Unnamed: 0,Exajoules,1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2013.0,2014.0,2015.0,2016.0,2017.0,2018.0,2019_year,2019_growth_rate_per_annum,2008-18,2019_share
0,,,,,,,,,,,...,,,,,,,,Growth rate per annum,,Share
1,Exajoules,1965.000000,1966.000000,1967.000000,1968.000000,1969.000000,1970.000000,1971.000000,1972.000000,1973.000000,...,2013.000000,2014.000000,2015.00000,2016.000000,2017.000000,2018.000000,2019.000000,2019,2008-18,2019
2,,,,,,,,,,,...,,,,,,,,,,
3,Canada,4.924317,5.239095,5.480214,5.836056,6.185953,6.624581,6.831992,7.348642,7.857612,...,13.881791,14.031964,13.99365,13.936678,14.109482,14.349534,14.214058,-0.00944118,0.00640355,0.0243434
4,Mexico,1.050253,1.111572,1.132470,1.232997,1.353689,1.444834,1.519984,1.680466,1.812695,...,7.738309,7.697182,7.68830,7.785419,7.895341,7.833322,7.720742,-0.0143718,0.00905657,0.0132227
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124,,,,,,,,,,,...,,,,,,,,,,
125,,,,,,,,,,,...,,,,,,,,,,
126,,,,,,,,,,,...,,,,,,,,,,
127,,,,,,,,,,,...,,,,,,,,,,


In [234]:
# Rename the fist and 2008-2018 column
bp_ec_countries = bp_ec_countries.rename(columns={bp_ec_countries.columns[0]: "Country",
                                                  bp_ec_countries.columns[55]: 2019.0,
                                                bp_ec_countries.columns[57]: "2008-2018_growth_rate_per_annum"})
bp_ec_countries

Unnamed: 0,Country,1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2013.0,2014.0,2015.0,2016.0,2017.0,2018.0,2019.0,2019_growth_rate_per_annum,2008-2018_growth_rate_per_annum,2019_share
0,,,,,,,,,,,...,,,,,,,,Growth rate per annum,,Share
1,Exajoules,1965.000000,1966.000000,1967.000000,1968.000000,1969.000000,1970.000000,1971.000000,1972.000000,1973.000000,...,2013.000000,2014.000000,2015.00000,2016.000000,2017.000000,2018.000000,2019.000000,2019,2008-18,2019
2,,,,,,,,,,,...,,,,,,,,,,
3,Canada,4.924317,5.239095,5.480214,5.836056,6.185953,6.624581,6.831992,7.348642,7.857612,...,13.881791,14.031964,13.99365,13.936678,14.109482,14.349534,14.214058,-0.00944118,0.00640355,0.0243434
4,Mexico,1.050253,1.111572,1.132470,1.232997,1.353689,1.444834,1.519984,1.680466,1.812695,...,7.738309,7.697182,7.68830,7.785419,7.895341,7.833322,7.720742,-0.0143718,0.00905657,0.0132227
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124,,,,,,,,,,,...,,,,,,,,,,
125,,,,,,,,,,,...,,,,,,,,,,
126,,,,,,,,,,,...,,,,,,,,,,
127,,,,,,,,,,,...,,,,,,,,,,


#### Extracting data from my selected three countries: China, Germany, and United States

In [235]:
# Extracting China's data
# Reset index to "Country"
# Rearrange display method
bp_ec_china = bp_ec_countries[bp_ec_countries["Country"]=="China"]
bp_ec_china.reset_index(inplace=True)
bp_ec_china = bp_ec_china.T

# Reset index
bp_ec_china.reset_index(inplace=True)

# Drop the first two rows and rename the columns
bp_ec_china = bp_ec_china.drop([0,1])
bp_ec_china = bp_ec_china.rename(columns={"index":"Year",
                                        0:"China_EJ (input-equiv)"})

# Reset index
bp_ec_china.reset_index(inplace=True)

# Delete the index and set index to country
del bp_ec_china["index"]
bp_ec_china.head()

Unnamed: 0,Year,China_EJ (input-equiv)
0,1965,5.51604
1,1966,5.995
2,1967,5.39134
3,1968,5.44535
4,1969,6.62766


In [236]:
# Extracting Germany's data
# Reset index to "Country"
# Rearrange display method
bp_ec_germany = bp_ec_countries[bp_ec_countries["Country"]=="Germany"]
bp_ec_germany.reset_index(inplace=True)
bp_ec_germany = bp_ec_germany.T

# Reset index
bp_ec_germany.reset_index(inplace=True)

# Drop the first two rows and rename the columns
bp_ec_germany = bp_ec_germany.drop([0,1])
bp_ec_germany = bp_ec_germany.rename(columns={"index":"Year",
                    0:"Germany_EJ (input-equiv)"})

# Reset index
bp_ec_germany.reset_index(inplace=True)

# Delete the index and set index to country
del bp_ec_germany["index"]
bp_ec_germany.head()

Unnamed: 0,Year,Germany_EJ (input-equiv)
0,1965,10.679
1,1966,10.7058
2,1967,10.6789
3,1968,11.4022
4,1969,12.3013


In [237]:
# Extracting USA's data
# Reset index to "Country"
# Rearrange display method
bp_ec_usa = bp_ec_countries[bp_ec_countries["Country"]=="US"]
bp_ec_usa.reset_index(inplace=True)
bp_ec_usa = bp_ec_usa.T

# Reset index
bp_ec_usa.reset_index(inplace=True)

# Drop the first two rows and rename the columns
bp_ec_usa = bp_ec_usa.drop([0,1])
bp_ec_usa = bp_ec_usa.rename(columns={"index":"Year",
                    0:"USA_EJ (input-equiv)"})

# Reset index
bp_ec_usa.reset_index(inplace=True)

# Delete the index and set index to country
del bp_ec_usa["index"]
bp_ec_usa.head()

Unnamed: 0,Year,USA_EJ (input-equiv)
0,1965,52.4338
1,1966,55.383
2,1967,57.3118
3,1968,60.7991
4,1969,64.0496


#### Combining these dataframe together as one

In [238]:
# Append all dataframes togather as one
bp_ec_all = bp_ec_china.copy()
bp_ec_all["Germany_EJ (input-equiv)"] = bp_ec_germany["Germany_EJ (input-equiv)"]
bp_ec_all["USA_EJ (input-equiv)"] = bp_ec_usa ["USA_EJ (input-equiv)"]

#Reset index to Year
bp_ec_all.reset_index(drop=True, inplace=True)
bp_ec_all.head()

Unnamed: 0,Year,China_EJ (input-equiv),Germany_EJ (input-equiv),USA_EJ (input-equiv)
0,1965,5.51604,10.679,52.4338
1,1966,5.995,10.7058,55.383
2,1967,5.39134,10.6789,57.3118
3,1968,5.44535,11.4022,60.7991
4,1969,6.62766,12.3013,64.0496


#### Normalization of the data

In [239]:
# Create a dataframe for year id
list(bp_ec_all["Year"].unique())

[1965.0,
 1966.0,
 1967.0,
 1968.0,
 1969.0,
 1970.0,
 1971.0,
 1972.0,
 1973.0,
 1974.0,
 1975.0,
 1976.0,
 1977.0,
 1978.0,
 1979.0,
 1980.0,
 1981.0,
 1982.0,
 1983.0,
 1984.0,
 1985.0,
 1986.0,
 1987.0,
 1988.0,
 1989.0,
 1990.0,
 1991.0,
 1992.0,
 1993.0,
 1994.0,
 1995.0,
 1996.0,
 1997.0,
 1998.0,
 1999.0,
 2000.0,
 2001.0,
 2002.0,
 2003.0,
 2004.0,
 2005.0,
 2006.0,
 2007.0,
 2008.0,
 2009.0,
 2010.0,
 2011.0,
 2012.0,
 2013.0,
 2014.0,
 2015.0,
 2016.0,
 2017.0,
 2018.0,
 2019.0,
 '2019_growth_rate_per_annum',
 '2008-2018_growth_rate_per_annum',
 '2019_share']

In [240]:
len(bp_ec_all["Year"].unique())

58

In [264]:
# Create a dataframe on year id
year_df = pd.DataFrame({"year_id": [*range(1,61,1)],
                        "year": [1965.0,
 1966.0,
 1967.0,
 1968.0,
 1969.0,
 1970.0,
 1971.0,
 1972.0,
 1973.0,
 1974.0,
 1975.0,
 1976.0,
 1977.0,
 1978.0,
 1979.0,
 1980.0,
 1981.0,
 1982.0,
 1983.0,
 1984.0,
 1985.0,
 1986.0,
 1987.0,
 1988.0,
 1989.0,
 1990.0,
 1991.0,
 1992.0,
 1993.0,
 1994.0,
 1995.0,
 1996.0,
 1997.0,
 1998.0,
 1999.0,
 2000.0,
 2001.0,
 2002.0,
 2003.0,
 2004.0,
 2005.0,
 2006.0,
 2007.0,
 2008.0,
 2009.0,
 2010.0,
 2011.0,
 2012.0,
 2013.0,
 2014.0,
 2015.0,
 2016.0,
 2017.0,
 2018.0,
 2019.0,
 '2019_growth_rate_per_annum',
 '2008-2018_growth_rate_per_annum',
 '2019_share', 2030, 2050]})
year_df.set_index("year_id", inplace=True)
year_df.head()

Unnamed: 0_level_0,year
year_id,Unnamed: 1_level_1
1,1965
2,1966
3,1967
4,1968
5,1969


In [242]:
# Update the year values in the dataframe
# Rename the column
bp_ec_new = bp_ec_all.copy()
bp_ec_new["Year"] = bp_ec_new["Year"].replace([1965.0,
                                                         1966.0,
                                                         1967.0,
                                                         1968.0,
                                                         1969.0,
                                                         1970.0,
                                                         1971.0,
                                                         1972.0,
                                                         1973.0,
                                                         1974.0,
                                                         1975.0,
                                                         1976.0,
                                                         1977.0,
                                                         1978.0,
                                                         1979.0,
                                                         1980.0,
                                                         1981.0,
                                                         1982.0,
                                                         1983.0,
                                                         1984.0,
                                                         1985.0,
                                                         1986.0,
                                                         1987.0,
                                                         1988.0,
                                                         1989.0,
                                                         1990.0,
                                                         1991.0,
                                                         1992.0,
                                                         1993.0,
                                                         1994.0,
                                                         1995.0,
                                                         1996.0,
                                                         1997.0,
                                                         1998.0,
                                                         1999.0,
                                                         2000.0,
                                                         2001.0,
                                                         2002.0,
                                                         2003.0,
                                                         2004.0,
                                                         2005.0,
                                                         2006.0,
                                                         2007.0,
                                                         2008.0,
                                                         2009.0,
                                                         2010.0,
                                                         2011.0,
                                                         2012.0,
                                                         2013.0,
                                                         2014.0,
                                                         2015.0,
                                                         2016.0,
                                                         2017.0,
                                                         2018.0,
                                                         2019.0,
                                                         '2019_growth_rate_per_annum',
                                                         '2008-2018_growth_rate_per_annum',
                                                         '2019_share'], [*range(1,59,1)])
bp_ec_new = bp_ec_new.rename(columns={"Year":"year_id"})
bp_ec_new.set_index("year_id", inplace=True)
bp_ec_new.head()

Unnamed: 0_level_0,China_EJ (input-equiv),Germany_EJ (input-equiv),USA_EJ (input-equiv)
year_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,5.51604,10.679,52.4338
2,5.995,10.7058,55.383
3,5.39134,10.6789,57.3118
4,5.44535,11.4022,60.7991
5,6.62766,12.3013,64.0496


### Data 2 (2): bp.com Dataset on Renewables Consumption (Exajoules)

In [243]:
# Extract wanted columns
bp_rc_countries = bp_rc_rename.copy()
bp_rc_countries.head()

Unnamed: 0,Exajoules (input-equivalent),1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2015.0,2016.0,2017.0,2018.0,2019.0,2019,2008-18,2019.1,NaN,NaN.1
0,,,,,,,,,,,...,,,,,,Growth rate per annum,,Share,,
1,Exajoules (input-equivalent),1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2015.0,2016.0,2017.0,2018.0,2019.0,2019,2008-18,2019,,
2,,,,,,,,,,,...,,,,,,,,,,
3,Canada,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.437742,0.488314,0.492008,0.503491,0.523399,0.0395406,0.134528,0.0180629,,
4,Mexico,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00161,...,0.157067,0.170452,0.184147,0.216463,0.345292,0.595155,0.102784,0.0119163,,


In [244]:
# Renaming the same name columns (2019) by creating specific name columns and duplicating the according data row
bp_rc_countries ["2019_share"] = bp_rc_countries.iloc [:,58]
bp_rc_countries ["2019_growth_rate_per_annum"] = bp_rc_countries.iloc [:,56]
bp_rc_countries ["2019_year"] = bp_rc_countries.iloc[:,55]
bp_rc_countries

Unnamed: 0,Exajoules (input-equivalent),1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2018.0,2019.0,2019,2008-18,2019.1,NaN,NaN.1,2019_share,2019_growth_rate_per_annum,2019_year
0,,,,,,,,,,,...,,,Growth rate per annum,,Share,,,Share,Growth rate per annum,
1,Exajoules (input-equivalent),1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.00000,...,2018.000000,2019.000000,2019,2008-18,2019,,,2019,2019,2019.000000
2,,,,,,,,,,,...,,,,,,,,,,
3,Canada,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,...,0.503491,0.523399,0.0395406,0.134528,0.0180629,,,0.0180629,0.0395406,0.523399
4,Mexico,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00161,...,0.216463,0.345292,0.595155,0.102784,0.0119163,,,0.0119163,0.595155,0.345292
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,♦ Less than 0.05%.,,,,,,,,,,...,,,,,,,,,,
118,n/a not available.,,,,,,,,,,...,,,,,,,,,,
119,"USSR includesCIS, Georgia, Ukraine and the Bal...",,,,,,,,,,...,,,,,,,,,,
120,"# Excludes Estonia, Latvia and Lithuania pri...",,,,,,,,,,...,,,,,,,,,,


In [245]:
# # Get a list of the column names and select the wanted columns 
rc_ls = list(bp_rc_countries.columns)
rc_columns=[
'Exajoules (input-equivalent)',
 1965.0,
 1966.0,
 1967.0,
 1968.0,
 1969.0,
 1970.0,
 1971.0,
 1972.0,
 1973.0,
 1974.0,
 1975.0,
 1976.0,
 1977.0,
 1978.0,
 1979.0,
 1980.0,
 1981.0,
 1982.0,
 1983.0,
 1984.0,
 1985.0,
 1986.0,
 1987.0,
 1988.0,
 1989.0,
 1990.0,
 1991.0,
 1992.0,
 1993.0,
 1994.0,
 1995.0,
 1996.0,
 1997.0,
 1998.0,
 1999.0,
 2000.0,
 2001.0,
 2002.0,
 2003.0,
 2004.0,
 2005.0,
 2006.0,
 2007.0,
 2008.0,
 2009.0,
 2010.0,
 2011.0,
 2012.0,
 2013.0,
 2014.0,
 2015.0,
 2016.0,
 2017.0,
 2018.0,
'2019_year',
'2019_growth_rate_per_annum',
 '2008-18',
 '2019_share'
]
bp_rc_countries = bp_rc_countries[rc_columns]
bp_rc_countries

Unnamed: 0,Exajoules (input-equivalent),1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2013.0,2014.0,2015.0,2016.0,2017.0,2018.0,2019_year,2019_growth_rate_per_annum,2008-18,2019_share
0,,,,,,,,,,,...,,,,,,,,Growth rate per annum,,Share
1,Exajoules (input-equivalent),1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.00000,...,2013.000000,2014.000000,2015.000000,2016.000000,2017.000000,2018.000000,2019.000000,2019,2008-18,2019
2,,,,,,,,,,,...,,,,,,,,,,
3,Canada,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,...,0.290927,0.303300,0.437742,0.488314,0.492008,0.503491,0.523399,0.0395406,0.134528,0.0180629
4,Mexico,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00161,...,0.111551,0.133273,0.157067,0.170452,0.184147,0.216463,0.345292,0.595155,0.102784,0.0119163
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,♦ Less than 0.05%.,,,,,,,,,,...,,,,,,,,,,
118,n/a not available.,,,,,,,,,,...,,,,,,,,,,
119,"USSR includesCIS, Georgia, Ukraine and the Bal...",,,,,,,,,,...,,,,,,,,,,
120,"# Excludes Estonia, Latvia and Lithuania pri...",,,,,,,,,,...,,,,,,,,,,


In [246]:
# Rename the fist and 2008-2018 column
bp_rc_countries = bp_rc_countries.rename(columns={bp_rc_countries.columns[0]: "Country",
                                                bp_rc_countries.columns[57]: "2008-2018_growth_rate_per_annum"})
bp_rc_countries

Unnamed: 0,Country,1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.0,...,2013.0,2014.0,2015.0,2016.0,2017.0,2018.0,2019_year,2019_growth_rate_per_annum,2008-2018_growth_rate_per_annum,2019_share
0,,,,,,,,,,,...,,,,,,,,Growth rate per annum,,Share
1,Exajoules (input-equivalent),1965.0,1966.0,1967.0,1968.0,1969.0,1970.0,1971.0,1972.0,1973.00000,...,2013.000000,2014.000000,2015.000000,2016.000000,2017.000000,2018.000000,2019.000000,2019,2008-18,2019
2,,,,,,,,,,,...,,,,,,,,,,
3,Canada,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,...,0.290927,0.303300,0.437742,0.488314,0.492008,0.503491,0.523399,0.0395406,0.134528,0.0180629
4,Mexico,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00161,...,0.111551,0.133273,0.157067,0.170452,0.184147,0.216463,0.345292,0.595155,0.102784,0.0119163
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,♦ Less than 0.05%.,,,,,,,,,,...,,,,,,,,,,
118,n/a not available.,,,,,,,,,,...,,,,,,,,,,
119,"USSR includesCIS, Georgia, Ukraine and the Bal...",,,,,,,,,,...,,,,,,,,,,
120,"# Excludes Estonia, Latvia and Lithuania pri...",,,,,,,,,,...,,,,,,,,,,


#### Extracting data from my selected three countries: China, Germany, and United States

In [247]:
# Extracting China's data
# Reset index
# Rearrange display method
bp_rc_china = bp_rc_countries[bp_rc_countries["Country"]=="China"]
bp_rc_china.reset_index(inplace=True)
bp_rc_china = bp_rc_china.T
bp_rc_china.head()

Unnamed: 0,0
index,91
Country,China
1965.0,0
1966.0,0
1967.0,0


In [248]:
# Reset index
bp_rc_china.reset_index(inplace=True)
bp_rc_china.head()

Unnamed: 0,index,0
0,index,91
1,Country,China
2,1965,0
3,1966,0
4,1967,0


In [249]:
# Drop the first two rows and rename the columns
# Reset index
bp_rc_china = bp_rc_china.drop([0,1])
bp_rc_china = bp_rc_china.rename(columns={"index":"Year",
                                        0:"China_EJ (input-equiv)"})

bp_rc_china.reset_index(drop=True,inplace=True)
bp_rc_china.head()

Unnamed: 0,Year,China_EJ (input-equiv)
0,1965,0
1,1966,0
2,1967,0
3,1968,0
4,1969,0


In [250]:
# Extracting Germany's data
# Reset index 
# Rearrange display method
bp_rc_germany = bp_rc_countries[bp_rc_countries["Country"]=="Germany"]
bp_rc_germany.reset_index(inplace=True)
bp_rc_germany = bp_rc_germany.T

# Reset index
bp_rc_germany.reset_index(inplace=True)

# Drop the first two rows and rename the columns
bp_rc_germany = bp_rc_germany.drop([0,1])
bp_rc_germany = bp_rc_germany.rename(columns={"index":"Year",
                    0:"Germany_EJ (input-equiv)"})

# Reset index
bp_rc_germany.reset_index(drop=True, inplace=True)
bp_rc_germany.head()

Unnamed: 0,Year,Germany_EJ (input-equiv)
0,1965,0
1,1966,0
2,1967,0
3,1968,0
4,1969,0


In [251]:
# Extracting USA's data
# Rearrange display method
bp_rc_usa = bp_rc_countries[bp_rc_countries["Country"]=="US"]
bp_rc_usa.reset_index(inplace=True)
bp_rc_usa = bp_rc_usa.T

# Reset index
bp_rc_usa.reset_index(inplace=True)

# Drop the first two rows and rename the columns
bp_rc_usa = bp_rc_usa.drop([0,1])
bp_rc_usa = bp_rc_usa.rename(columns={"index":"Year",
                    0:"USA_EJ (input-equiv)"})

# Reset index
bp_rc_usa.reset_index(drop=True, inplace=True)
bp_rc_usa.head()

Unnamed: 0,Year,USA_EJ (input-equiv)
0,1965,0.133322
1,1966,0.14062
2,1967,0.140736
3,1968,0.15546
4,1969,0.162271


#### Combining these dataframe together as one

In [252]:
# Append all dataframes togather as one
bp_rc_all = bp_rc_china.copy()
bp_rc_all["Germany_EJ (input-equiv)"] = bp_rc_germany["Germany_EJ (input-equiv)"]
bp_rc_all["USA_EJ (input-equiv)"] = bp_rc_usa ["USA_EJ (input-equiv)"]

#Reset index to Year
bp_rc_all.reset_index(drop=True, inplace=True)
bp_rc_all.head()

Unnamed: 0,Year,China_EJ (input-equiv),Germany_EJ (input-equiv),USA_EJ (input-equiv)
0,1965,0,0,0.133322
1,1966,0,0,0.14062
2,1967,0,0,0.140736
3,1968,0,0,0.15546
4,1969,0,0,0.162271


#### Normalization of the data

In [253]:
# Update the year values in the dataframe
# Rename the column
bp_rc_new = bp_rc_all.copy()
bp_rc_new["Year"] = bp_rc_new["Year"].replace([1965.0,
                                                         1966.0,
                                                         1967.0,
                                                         1968.0,
                                                         1969.0,
                                                         1970.0,
                                                         1971.0,
                                                         1972.0,
                                                         1973.0,
                                                         1974.0,
                                                         1975.0,
                                                         1976.0,
                                                         1977.0,
                                                         1978.0,
                                                         1979.0,
                                                         1980.0,
                                                         1981.0,
                                                         1982.0,
                                                         1983.0,
                                                         1984.0,
                                                         1985.0,
                                                         1986.0,
                                                         1987.0,
                                                         1988.0,
                                                         1989.0,
                                                         1990.0,
                                                         1991.0,
                                                         1992.0,
                                                         1993.0,
                                                         1994.0,
                                                         1995.0,
                                                         1996.0,
                                                         1997.0,
                                                         1998.0,
                                                         1999.0,
                                                         2000.0,
                                                         2001.0,
                                                         2002.0,
                                                         2003.0,
                                                         2004.0,
                                                         2005.0,
                                                         2006.0,
                                                         2007.0,
                                                         2008.0,
                                                         2009.0,
                                                         2010.0,
                                                         2011.0,
                                                         2012.0,
                                                         2013.0,
                                                         2014.0,
                                                         2015.0,
                                                         2016.0,
                                                         2017.0,
                                                         2018.0,
                                                         2019.0,
                                                         '2019_growth_rate_per_annum',
                                                         '2008-2018_growth_rate_per_annum',
                                                         '2019_share'], [*range(1,59,1)])
bp_rc_new = bp_rc_new.rename(columns={"Year":"year_id"})
bp_rc_new.set_index("year_id", inplace=True)
bp_rc_new.head()

Unnamed: 0_level_0,China_EJ (input-equiv),Germany_EJ (input-equiv),USA_EJ (input-equiv)
year_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0,0,0.133322
2,0,0,0.14062
3,0,0,0.140736
4,0,0,0.15546
5,0,0,0.162271


### Data 3: wesr.unep.org Dataset on GHG Emission

In [254]:
# Get the column names and rearrange them 
unep_df_countries = unep_df.copy()
col_list = list(unep_df_countries.columns)
new_col = ['COUNTRY NAME','INDICATOR NAME (unit)', 'YEAR', 'VALUE']
unep_df_countries = unep_df_countries[new_col]
unep_df_countries

Unnamed: 0,COUNTRY NAME,INDICATOR NAME (unit),YEAR,VALUE
0,China,Greenhouse gas (GHG) emissions from agricultur...,1961,254109.75
1,China,Greenhouse gas (GHG) emissions from agricultur...,1962,256115.15
2,China,Greenhouse gas (GHG) emissions from agricultur...,1963,267398.78
3,China,Greenhouse gas (GHG) emissions from agricultur...,1964,288531.02
4,China,Greenhouse gas (GHG) emissions from agricultur...,1965,301524.32
...,...,...,...,...
383,China,"Greenhouse gas (GHG) emissions, total (metric ...",2011,12234.29
384,China,"Greenhouse gas (GHG) emissions, total (metric ...",2012,12488.24
385,China,"Greenhouse gas (GHG) emissions, total (metric ...",2013,12982.92
386,China,"Greenhouse gas (GHG) emissions, total (metric ...",2014,13113.21


#### Extracting data from the selected year of 2030 and 2050

In [267]:
# Choose the goal year 2030
# Reset index to "COUNTRY NAME"
unep_df_2030 = unep_df_countries[unep_df_countries["YEAR"]== "2030"]
unep_df_2030.reset_index(drop=True, inplace=True)
unep_df_2030

Unnamed: 0,COUNTRY NAME,INDICATOR NAME (unit),YEAR,VALUE
0,China,Greenhouse gas (GHG) emissions from agricultur...,2030,811210.5
1,United States of America,Greenhouse gas (GHG) emissions from agricultur...,2030,361695.8
2,Germany,Greenhouse gas (GHG) emissions from agricultur...,2030,57838.37


In [268]:
# Choose the goal year 2050
# Drop duplicate country
# Reset index to "COUNTRY NAME"
unep_df_2050 = unep_df_countries[unep_df_countries["YEAR"]== "2050"].copy()
unep_df_2050.drop_duplicates("COUNTRY NAME", inplace=True)
unep_df_2050.reset_index(drop=True, inplace=True)
unep_df_2050

Unnamed: 0,COUNTRY NAME,INDICATOR NAME (unit),YEAR,VALUE
0,China,Greenhouse gas (GHG) emissions from agricultur...,2050,875477.4
1,United States of America,Greenhouse gas (GHG) emissions from agricultur...,2050,366640.75
2,Germany,Greenhouse gas (GHG) emissions from agricultur...,2050,55460.54


#### Combining these dataframe together as one

In [270]:
# Append the dataframes together as one
unep_all = pd.concat([unep_df_2030, unep_df_2050], axis=0)
unep_all.reset_index(drop=True, inplace=True)
unep_all

Unnamed: 0,COUNTRY NAME,INDICATOR NAME (unit),YEAR,VALUE
0,China,Greenhouse gas (GHG) emissions from agricultur...,2030,811210.5
1,United States of America,Greenhouse gas (GHG) emissions from agricultur...,2030,361695.8
2,Germany,Greenhouse gas (GHG) emissions from agricultur...,2030,57838.37
3,China,Greenhouse gas (GHG) emissions from agricultur...,2050,875477.4
4,United States of America,Greenhouse gas (GHG) emissions from agricultur...,2050,366640.75
5,Germany,Greenhouse gas (GHG) emissions from agricultur...,2050,55460.54


#### Normalization of the data: Greenhouse gas (GHG) emissions from agriculture (Thousand tons of CO2 equivalent)

In [275]:
# Update the values to their according ids
# Rename the columns
unep_new = unep_all.copy()
unep_new["COUNTRY NAME"] = unep_new["COUNTRY NAME"].replace(['China', 'United States of America', 'Germany'],[1,3,2])
unep_new["YEAR"] = unep_new["YEAR"].replace([2030, 2050],[59, 60])
del unep_new["INDICATOR NAME (unit)"]
unep_new = unep_new.rename(columns={"COUNTRY NAME":"country_id",
                                   "YEAR":"year_id",
                                   "VALUE":"value"})
unep_new.set_index("country_id", inplace=True)
unep_new

Unnamed: 0_level_0,year_id,value
country_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2030,811210.5
3,2030,361695.8
2,2030,57838.37
1,2050,875477.4
3,2050,366640.75
2,2050,55460.54


 ### Data 4: eia.gov API Dataset on CO2 Emission, Renewable Electricity Net Generation, & Population

#### China

In [257]:
# Locate the wanted data from the json format output
co2_china = co2_response["series"][0]["data"]
len(co2_china)

39

In [258]:
# Create a list of number for the range of the data
# Create keys to hold the year and emission values
# Loop through the list of data to append the needed info list
range_numbers = [*range(0, 39, 1)]

co2_china_year = []
co2_china_emission = []
country_name = []

for number in range_numbers:
    list_set = co2_response["series"][0]["data"][number]
    year = list_set[0]
    emission = list_set[1]
    name = "China"
    co2_china_year.append(year)
    co2_china_emission.append(emission)
    country_name.append(name)

In [259]:
# Create a data frame to hold the keys
co2_china_emission_dic = {
    "Country":country_name,
    "Year":co2_china_year,
    "Emission":co2_china_emission
}

In [260]:
# Create a data frame
# Change it to ascending order
# Reset index to "Country"
china_co2_info_df = pd.DataFrame(co2_china_emission_dic)
china_co2_info_df = china_co2_info_df.sort_values(by="Year", ascending=True)
china_co2_info_df.set_index("Country", inplace=True)
china_co2_info_df.head()

Unnamed: 0_level_0,Year,Emission
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
China,1980,1595.96603
China,1981,1586.50616
China,1982,1670.253074
China,1983,1784.999566
China,1984,1947.119365


#### Germany

In [261]:
# Locate the wanted data from the json format output
co2_germany = co2_response_g["series"][0]["data"]

# Create a list of number for the range of the data
# Create keys to hold the year and emission values
# Loop through the list of data to append the needed info list
range_numbers = [*range(0, len(co2_germany), 1)]

co2_germany_year = []
co2_germany_emission = []
country_name_g = []

for number in range_numbers:
    list_set = co2_response_g["series"][0]["data"][number]
    year = list_set[0]
    emission = list_set[1]
    name = "Germany"
    co2_germany_year.append(year)
    co2_germany_emission.append(emission)
    country_name_g.append(name)

# Create a data frame to hold the keys
co2_germany_emission_dic = {
    "Country":country_name_g,
    "Year":co2_germany_year,
    "Emission":co2_germany_emission
}

# Create a data frame
# Change it to ascending order
# Reset index to "Country"
germany_co2_info_df = pd.DataFrame(co2_germany_emission_dic)
germany_co2_info_df = germany_co2_info_df.sort_values(by="Year", ascending=True)
germany_co2_info_df.set_index("Country", inplace=True)
germany_co2_info_df.head()

Unnamed: 0_level_0,Year,Emission
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Germany,1980,--
Germany,1981,--
Germany,1982,--
Germany,1983,--
Germany,1984,--


In [262]:
germany_co2_info_df

Unnamed: 0_level_0,Year,Emission
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Germany,1980,--
Germany,1981,--
Germany,1982,--
Germany,1983,--
Germany,1984,--
Germany,1985,--
Germany,1986,--
Germany,1987,--
Germany,1988,--
Germany,1989,--


#### USA

In [263]:
# Locate the wanted data from the json format output
co2_usa = co2_response_u["series"][0]["data"]

# Create a list of number for the range of the data
# Create keys to hold the year and emission values
# Loop through the list of data to append the needed info list
range_numbers = [*range(0, len(co2_usa), 1)]

co2_usa_year = []
co2_usa_emission = []
country_name_u = []

for number in range_numbers:
    list_set = co2_response_u["series"][0]["data"][number]
    year = list_set[0]
    emission = list_set[1]
    name = "USA"
    co2_usa_year.append(year)
    co2_usa_emission.append(emission)
    country_name_u.append(name)

# Create a data frame to hold the keys
co2_usa_emission_dic = {
    "Country":country_name_u,
    "Year":co2_usa_year,
    "Emission":co2_usa_emission
}

# Create a data frame
# Change it to ascending order
# Reset index to "Country"
usa_co2_info_df = pd.DataFrame(co2_usa_emission_dic)
usa_co2_info_df = usa_co2_info_df.sort_values(by="Year", ascending=True)
usa_co2_info_df.set_index("Country", inplace=True)
usa_co2_info_df.head()

Unnamed: 0_level_0,Year,Emission
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
USA,1949,2206.690829
USA,1950,2382.046176
USA,1951,2526.687327
USA,1952,2473.373964
USA,1953,2536.892888
