<a href="https://colab.research.google.com/github/william-toscani/Data_Visualization_Project/blob/main/Data_Viz_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Libraries

In [None]:
!pip install pandasql

In [2]:
import requests
import pandas as pd
import numpy as np
from pandasql import sqldf
sql = lambda q: sqldf(q, globals())

import plotly.express as px
import plotly.graph_objects as go

# Datasets

## Population

In [6]:
pop_raw = pd.read_csv("https://ourworldindata.org/grapher/population-with-un-projections.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#pop_metadata = requests.get("https://ourworldindata.org/grapher/population-with-un-projections.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

#display(pop_raw.head())
#display(pop_metadata)
#display(pop_df.info())
#display(pop_df.describe())
#display(pop_df.columns)
#display(pop_df.shape)
#display(pop_df.dtypes)
#display(pop_df.isnull().sum())

pop_clean = sql("""
SELECT Code, population__sex_all__age_all__variant_medium as population FROM pop_raw
WHERE Code IS NOT NULL AND Code IS NOT "OWID_WRL"
AND year = 2024
ORDER BY population__sex_all__age_all__variant_medium DESC
LIMIT 25
""")

display(pop_clean)

Unnamed: 0,Code,population
0,IND,1450936000.0
1,CHN,1419321000.0
2,USA,345426600.0
3,IDN,283487900.0
4,PAK,251269200.0
5,NGA,232679500.0
6,BRA,211998600.0
7,BGD,173562400.0
8,RUS,144820400.0
9,ETH,132059800.0


## World (connect countries with continent)

In [20]:
world_raw = pd.read_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv")
world_raw.rename(columns={'alpha-3': 'Code'}, inplace=True)
#display(world_raw.head())
#display(world_raw.columns)


world_clean = sql("""
SELECT Code, region FROM world_raw
""")

display(world_clean)

Unnamed: 0,Code,region
0,AFG,Asia
1,ALA,Europe
2,ALB,Europe
3,DZA,Africa
4,ASM,Oceania
...,...,...
244,WLF,Oceania
245,ESH,Africa
246,YEM,Asia
247,ZMB,Africa


## GDP

In [23]:
gdp_raw = pd.read_csv("https://ourworldindata.org/grapher/gdp-worldbank.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#gdp_metadata = requests.get("https://ourworldindata.org/grapher/gdp-worldbank.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

#display(gdp_raw.head())
#display(gdp_metadata)
#display(gdp_df.info())

gdp_clean = sql("""
SELECT Code, ny_gdp_mktp_pp_kd as gdp FROM gdp_raw
WHERE Code IS NOT NULL AND Code IS NOT "OWID_WRL"
AND year = 2024
""")

display(gdp_clean)

Unnamed: 0,Code,gdp
0,ALB,51360027803
1,DZA,722912369991
2,AND,5402033635
3,AGO,278239182845
4,ATG,2772069805
...,...,...
179,UZB,379988885832
180,VUT,1038801497
181,VNM,1455759302312
182,ZMB,79207172680


## Energy consumption by source

In [32]:
energy_cons_raw = pd.read_csv("https://ourworldindata.org/grapher/global-energy-substitution.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#metadata = requests.get("https://ourworldindata.org/grapher/global-energy-substitution.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()
columns_to_sum = [col for col in energy_cons_raw.columns if col not in ['Year', 'Code', 'Entity']]
energy_cons_raw['total_consumption'] = energy_cons_raw[columns_to_sum].sum(axis=1)
display(energy_cons_raw.head())


energy_cons_clean = sql("""
SELECT Code, total_consumption FROM energy_cons_raw
""")

display(energy_cons_clean)

Unnamed: 0,Entity,Code,Year,other_renewables__twh_substituted_energy,biofuels__twh_substituted_energy,solar__twh_substituted_energy,wind__twh_substituted_energy,hydropower__twh_substituted_energy,nuclear__twh_substituted_energy,gas__twh_substituted_energy,oil__twh_substituted_energy,coal__twh_substituted_energy,traditional_biomass__twh_substituted_energy,total_consumption
0,World,OWID_WRL,1800,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,97.0,5556,5653.0
1,World,OWID_WRL,1810,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,128.0,5833,5961.0
2,World,OWID_WRL,1820,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,153.0,6111,6264.0
3,World,OWID_WRL,1830,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,264.0,6389,6653.0
4,World,OWID_WRL,1840,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,356.0,6944,7300.0


Unnamed: 0,Code,total_consumption
0,OWID_WRL,5653.0000
1,OWID_WRL,5961.0000
2,OWID_WRL,6264.0000
3,OWID_WRL,6653.0000
4,OWID_WRL,7300.0000
...,...,...
72,OWID_WRL,167912.8431
73,OWID_WRL,175926.1128
74,OWID_WRL,178900.5715
75,OWID_WRL,181911.5645


## Co2 emissions

In [None]:
emission_df = pd.read_csv("https://ourworldindata.org/grapher/annual-co2-emissions-per-country.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
emission_metadata = requests.get("https://ourworldindata.org/grapher/annual-co2-emissions-per-country.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

display(emission_df.head())

Unnamed: 0,Entity,Code,Year,emissions_total
0,Afghanistan,AFG,1949,14656.0
1,Afghanistan,AFG,1950,84272.0
2,Afghanistan,AFG,1951,91600.0
3,Afghanistan,AFG,1952,91600.0
4,Afghanistan,AFG,1953,106256.0


# Data Analysis

## Research question: top 20 biggest countries in the world: how do they emit and consume energy?

In [None]:
top_20_countries_df = sql("""
SELECT * FROM pop_df
WHERE Code IS NOT NULL AND Code IS NOT "OWID_WRL"
AND population__sex_all__age_all__variant_medium IS NOT NULL
AND year = 2024
ORDER BY population__sex_all__age_all__variant_medium DESC
LIMIT 25

""")

display(top_20_countries_df)

Unnamed: 0,Entity,Code,Year,population__sex_all__age_all__variant_estimates,population__sex_all__age_all__variant_medium
0,India,IND,2024,,1450936000.0
1,China,CHN,2024,,1419321000.0
2,United States,USA,2024,,345426600.0
3,Indonesia,IDN,2024,,283487900.0
4,Pakistan,PAK,2024,,251269200.0
5,Nigeria,NGA,2024,,232679500.0
6,Brazil,BRA,2024,,211998600.0
7,Bangladesh,BGD,2024,,173562400.0
8,Russia,RUS,2024,,144820400.0
9,Ethiopia,ETH,2024,,132059800.0


In [None]:
new_gdp_df = sql("""
SELECT gdp_df.*, top_20_countries_df.population__sex_all__age_all__variant_medium AS TopCountryPopulation
FROM gdp_df
JOIN top_20_countries_df ON gdp_df.Code = top_20_countries_df.Code
WHERE gdp_df.Code IS NOT NULL AND gdp_df.Code IS NOT "OWID_WRL"
AND gdp_df.year = 2024

""")

display(new_gdp_df)

Unnamed: 0,Entity,Code,Year,ny_gdp_mktp_pp_kd,TopCountryPopulation
0,Bangladesh,BGD,2024,1472986459828,173562400.0
1,Brazil,BRA,2024,4165328804627,211998600.0
2,China,CHN,2024,33597882047418,1419321000.0
3,Democratic Republic of Congo,COD,2024,164367209943,109276300.0
4,Egypt,EGY,2024,1957627322504,116538300.0
5,Ethiopia,ETH,2024,380895311983,132059800.0
6,France,FRA,2024,3731760755094,66548530.0
7,Germany,DEU,2024,5246975952526,84552230.0
8,India,IND,2024,14243939416927,1450936000.0
9,Indonesia,IDN,2024,4102194303076,283487900.0


In [None]:
emission_gdp_df = sql("""
SELECT
    new_gdp_df.Entity,
    new_gdp_df.Code,
    new_gdp_df.Year,
    new_gdp_df.TopCountryPopulation AS population,
    CAST(emission_df.emissions_total AS REAL) / new_gdp_df.ny_gdp_mktp_pp_kd AS emission_per_gdp
FROM new_gdp_df
JOIN emission_df ON new_gdp_df.Code = emission_df.Code AND new_gdp_df.Year = emission_df.Year
WHERE emission_df.Code IS NOT NULL AND emission_df.Code IS NOT "OWID_WRL"
AND emission_df.year = 2024

ORDER by population DESC
""")

display(emission_gdp_df)

Unnamed: 0,Entity,Code,Year,population,emission_per_gdp
0,India,IND,2024,1450936000.0,0.000224
1,China,CHN,2024,1419321000.0,0.000366
2,United States,USA,2024,345426600.0,0.000191
3,Indonesia,IDN,2024,283487900.0,0.000198
4,Pakistan,PAK,2024,251269200.0,0.000129
5,Nigeria,NGA,2024,232679500.0,0.000103
6,Brazil,BRA,2024,211998600.0,0.000116
7,Bangladesh,BGD,2024,173562400.0,7.4e-05
8,Russia,RUS,2024,144820400.0,0.000292
9,Ethiopia,ETH,2024,132059800.0,4.7e-05


In [None]:
# Calculate min and max for 'emission_per_gdp'
min_emission = emission_gdp_df['emission_per_gdp'].min()
max_emission = emission_gdp_df['emission_per_gdp'].max()

# Apply min-max normalization
emission_gdp_df['emission_per_gdp_normalized'] = (emission_gdp_df['emission_per_gdp'] - min_emission) / (max_emission - min_emission)

display(emission_gdp_df)

Unnamed: 0,Entity,Code,Year,population,emission_per_gdp,emission_per_gdp_normalized
0,India,IND,2024,1450936000.0,0.000224,0.378354
1,China,CHN,2024,1419321000.0,0.000366,0.662845
2,United States,USA,2024,345426600.0,0.000191,0.311645
3,Indonesia,IDN,2024,283487900.0,0.000198,0.325698
4,Pakistan,PAK,2024,251269200.0,0.000129,0.187797
5,Nigeria,NGA,2024,232679500.0,0.000103,0.134867
6,Brazil,BRA,2024,211998600.0,0.000116,0.160841
7,Bangladesh,BGD,2024,173562400.0,7.4e-05,0.075587
8,Russia,RUS,2024,144820400.0,0.000292,0.515441
9,Ethiopia,ETH,2024,132059800.0,4.7e-05,0.021919


Research question: what are the major countries (in terms of gdp and population) for the electricity consumption via fossil?

In [None]:
fossil = np.array(["oil_generation__twh_chart_electricity_prod_source_stacked","gas_generation__twh_chart_electricity_prod_source_stacked", "coal_generation__twh_chart_electricity_prod_source_stacked"])
renewables = np.array(["other_renewables_excluding_bioenergy_generation__twh_chart_electricity_prod_source_stacked", "bioenergy_generation__twh_chart_electricity_prod_source_stacked",
                       "hydro_generation__twh_chart_electricity_prod_source_stacked", "solar_generation__twh_chart_electricity_prod_source_stacked", "wind_generation__twh_chart_electricity_prod_source_stacked"])

fossil_elec_prod = sql(f"""
SELECT Entity, year, {",".join(fossil)} FROM elec_prod_df
WHERE CODE IS NOT NULL AND year > 1985
ORDER BY year
""")

display(fossil_elec_prod)


Unnamed: 0,Entity,Year,oil_generation__twh_chart_electricity_prod_source_stacked,gas_generation__twh_chart_electricity_prod_source_stacked,coal_generation__twh_chart_electricity_prod_source_stacked
0,Argentina,1986,7.894000,13.249956,1.013000
1,Australia,1986,3.771000,13.784000,92.587000
2,Brazil,1986,10.187736,0.000000,6.184231
3,Canada,1986,6.166000,6.886000,71.627000
4,China,1986,53.678000,1.693000,299.610350
...,...,...,...,...,...
5757,United Kingdom,2024,9.230000,84.160000,2.300000
5758,United States,2024,30.320000,1864.870000,652.760000
5759,Uruguay,2024,0.930000,0.000000,0.000000
5760,Vietnam,2024,0.110000,21.630000,149.350000
