<a href="https://colab.research.google.com/github/william-toscani/Data_Visualization_Project/blob/main/Data_Viz_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Libraries

In [None]:
!pip install pandasql

In [2]:
import requests
import pandas as pd
import numpy as np
from pandasql import sqldf
sql = lambda q: sqldf(q, globals())

import plotly.express as px
import plotly.graph_objects as go

# Datasets

## World (connect countries with continent)

In [71]:
world_raw = pd.read_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv")
world_raw.rename(columns={'alpha-3': 'code'}, inplace=True)
#display(world_raw.head())
#display(world_raw.columns)


world_clean = sql("""
SELECT name, code, region FROM world_raw
""")

display(world_clean)

Unnamed: 0,name,code,region
0,Afghanistan,AFG,Asia
1,Åland Islands,ALA,Europe
2,Albania,ALB,Europe
3,Algeria,DZA,Africa
4,American Samoa,ASM,Oceania
...,...,...,...
244,Wallis and Futuna,WLF,Oceania
245,Western Sahara,ESH,Africa
246,Yemen,YEM,Asia
247,Zambia,ZMB,Africa


## Population

In [75]:
pop_raw = pd.read_csv("https://ourworldindata.org/grapher/population-with-un-projections.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#pop_metadata = requests.get("https://ourworldindata.org/grapher/population-with-un-projections.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

#display(pop_raw.head())
#display(pop_metadata)
#display(pop_df.info())
#display(pop_df.describe())
#display(pop_df.columns)
#display(pop_df.shape)
#display(pop_df.dtypes)
#display(pop_df.isnull().sum())

pop_clean = sql("""
SELECT world_clean.code, pop_raw.population__sex_all__age_all__variant_medium as population FROM pop_raw
INNER JOIN world_clean ON pop_raw.Code = world_clean.code
WHERE year = 2024
""")

display(pop_clean)

Unnamed: 0,code,population
0,AFG,42647502.0
1,ALB,2791756.0
2,DZA,46814302.0
3,ASM,46792.0
4,AND,81955.0
...,...,...
231,WLF,11303.0
232,ESH,590515.0
233,YEM,40583173.0
234,ZMB,21314962.0


## GDP

In [22]:
gdp_raw = pd.read_csv("https://ourworldindata.org/grapher/gdp-worldbank.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#gdp_metadata = requests.get("https://ourworldindata.org/grapher/gdp-worldbank.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

#display(gdp_raw.head())
#display(gdp_metadata)
#display(gdp_df.info())

gdp_clean = sql("""
SELECT gdp_raw.Code, gdp_raw.ny_gdp_mktp_pp_kd as gdp FROM gdp_raw
INNER JOIN pop_clean ON gdp_raw.Code = pop_clean.Code
WHERE gdp_raw.Code IS NOT NULL AND gdp_raw.Code IS NOT "OWID_WRL"
AND gdp_raw.year = 2024
""")

display(gdp_clean)

Unnamed: 0,Code,gdp
0,BGD,1472986459828
1,BRA,4165328804627
2,CHN,33597882047418
3,COD,164367209943
4,EGY,1957627322504
5,ETH,380895311983
6,FRA,3731760755094
7,DEU,5246975952526
8,IND,14243939416927
9,IDN,4102194303076


## Energy consumption by source

In [25]:
energy_cons_raw = pd.read_csv("https://ourworldindata.org/grapher/energy-consumption-by-source-and-country.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#metadata = requests.get("https://ourworldindata.org/grapher/energy-consumption-by-source-and-country.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

columns_to_sum = [col for col in energy_cons_raw.columns if col not in ['Year', 'Code', 'Entity']]
energy_cons_raw['total_consumption'] = energy_cons_raw[columns_to_sum].sum(axis=1)
#display(energy_cons_raw)

energy_cons_clean = sql("""
SELECT energy_cons_raw.Code, total_consumption FROM energy_cons_raw
INNER JOIN pop_clean ON energy_cons_raw.Code = pop_clean.Code
WHERE energy_cons_raw.Code IS NOT NULL AND energy_cons_raw.Code IS NOT "OWID_WRL"
AND energy_cons_raw.year = 2024
""")

display(energy_cons_clean)


Unnamed: 0,Code,total_consumption
0,BGD,528.719584
1,BRA,3825.93604
2,CHN,48476.99049
3,COD,0.0
4,EGY,1121.893978
5,FRA,2481.191396
6,DEU,3157.213447
7,IND,11276.181985
8,IDN,2990.648073
9,IRN,3586.318615


## Co2 emissions

In [33]:
emission_raw = pd.read_csv("https://ourworldindata.org/grapher/annual-co2-emissions-per-country.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#emission_metadata = requests.get("https://ourworldindata.org/grapher/annual-co2-emissions-per-country.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()
#display(emission_raw.head())

emission_clean = sql("""
SELECT emission_raw.Code, emission_raw.emissions_total FROM emission_raw
INNER JOIN pop_clean ON emission_raw.Code = pop_clean.Code
WHERE emission_raw.Code IS NOT NULL AND emission_raw.Code IS NOT "OWID_WRL"
AND emission_raw.year = 2024
""")

display(emission_clean)

Unnamed: 0,Code,emissions_total
0,BGD,108317900.0
1,BRA,483011600.0
2,CHN,12289040000.0
3,COD,5904451.0
4,EGY,258367900.0
5,ETH,17837290.0
6,FRA,264155600.0
7,DEU,572319200.0
8,IND,3193478000.0
9,IDN,812220200.0


# No way

In [54]:
share_raw = pd.read_csv("https://ourworldindata.org/grapher/share-of-primary-energy-consumption-by-source.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#metadata = requests.get("https://ourworldindata.org/grapher/share-of-primary-energy-consumption-by-source.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

fossil_columns = ['gas', 'oil', 'coal']
rinnovabili_columns = ['wind', 'hydro', 'other_renewables', 'nuclear', 'solar', 'biofuels']

new_columns = {}
for col in share_raw.columns:
    if '__pct_direct_primary_energy' in col:
        new_name = col.replace('__pct_direct_primary_energy', '')
        new_columns[col] = new_name
share_raw = share_raw.rename(columns=new_columns)

share_clean= sql(f"""
SELECT share_raw.Code, {",".join(fossil_columns)} FROM share_raw
INNER JOIN pop_clean on share_raw.Code = pop_clean.Code
WHERE share_raw.Code IS NOT NULL AND share_raw.Code IS NOT "OWID_WRL"
AND year = 2024
""")

display(share_clean)

Unnamed: 0,Code,gas,oil,coal
0,BGD,52.016747,30.54752,16.984356
1,BRA,11.097937,50.51955,5.230067
2,CHN,10.150709,20.946898,59.819595
3,EGY,55.16813,40.61497,1.764306
4,FRA,18.694588,45.359646,2.887011
5,DEU,28.93888,43.334034,16.140873
6,IND,6.623613,28.543978,60.125793
7,IDN,16.55144,30.776245,45.914703
8,IRN,69.21415,29.464396,0.520399
9,ITA,39.90213,48.263275,1.891833


# Data Analysis

In [35]:
merged_data = sql("""
SELECT
    ec.Code,
    (CAST(ec.emissions_total AS REAL) / gc.gdp) AS emissions_per_gdp,
    (CAST(ecc.total_consumption AS REAL) / gc.gdp) AS consumption_per_gdp,
    wc.region
FROM
    emission_clean AS ec
INNER JOIN
    energy_cons_clean AS ecc ON ec.Code = ecc.Code
INNER JOIN
    gdp_clean AS gc ON ec.Code = gc.Code
INNER JOIN
    world_clean AS wc ON ec.Code = wc.Code
""")

display(merged_data)

Unnamed: 0,Code,emissions_per_gdp,consumption_per_gdp,region
0,BGD,7.4e-05,3.58944e-10,Asia
1,BRA,0.000116,9.185196e-10,Americas
2,CHN,0.000366,1.442859e-09,Asia
3,COD,3.6e-05,0.0,Africa
4,EGY,0.000132,5.730886e-10,Africa
5,FRA,7.1e-05,6.648849e-10,Europe
6,DEU,0.000109,6.017206e-10,Europe
7,IND,0.000224,7.916477e-10,Asia
8,IDN,0.000198,7.290362e-10,Asia
9,IRN,0.000534,2.414057e-09,Asia


In [38]:
fig = px.scatter(merged_data, x='emissions_per_gdp', y='consumption_per_gdp', color='region', hover_data=['Code'])
fig.update_traces(marker=dict(size=12)) # Increase point size
fig.update_layout(width=800, height=800) # Make the plot square
fig.show()

In [56]:
fig = px.choropleth(emission_clean, locations='Code', color='emissions_total',
                    hover_name='Code', # show country name on hover
                    color_continuous_scale=px.colors.sequential.Greens, # color scale
                    title='CO2 Emissions per Country (2024)')
fig.show()