<a href="https://colab.research.google.com/github/william-toscani/Data_Visualization_Project/blob/main/Data_Viz_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Libraries

In [None]:
!pip install pandasql

In [2]:
import requests
import pandas as pd
import numpy as np
from pandasql import sqldf
sql = lambda q: sqldf(q, globals())

import plotly.express as px
import plotly.graph_objects as go

# Datasets

## World (connect countries with continent)

In [71]:
world_raw = pd.read_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv")
world_raw.rename(columns={'alpha-3': 'code'}, inplace=True)
#display(world_raw.head())
#display(world_raw.columns)


world_clean = sql("""
SELECT name, code, region FROM world_raw
""")

display(world_clean)

Unnamed: 0,name,code,region
0,Afghanistan,AFG,Asia
1,Åland Islands,ALA,Europe
2,Albania,ALB,Europe
3,Algeria,DZA,Africa
4,American Samoa,ASM,Oceania
...,...,...,...
244,Wallis and Futuna,WLF,Oceania
245,Western Sahara,ESH,Africa
246,Yemen,YEM,Asia
247,Zambia,ZMB,Africa


## Population

In [75]:
pop_raw = pd.read_csv("https://ourworldindata.org/grapher/population-with-un-projections.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#pop_metadata = requests.get("https://ourworldindata.org/grapher/population-with-un-projections.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

#display(pop_raw.head())
#display(pop_metadata)
#display(pop_df.info())
#display(pop_df.describe())
#display(pop_df.columns)
#display(pop_df.shape)
#display(pop_df.dtypes)
#display(pop_df.isnull().sum())

pop_clean = sql("""
SELECT world_clean.code, pop_raw.population__sex_all__age_all__variant_medium as population FROM pop_raw
INNER JOIN world_clean ON pop_raw.Code = world_clean.code
WHERE year = 2024
""")

display(pop_clean)

Unnamed: 0,code,population
0,AFG,42647502.0
1,ALB,2791756.0
2,DZA,46814302.0
3,ASM,46792.0
4,AND,81955.0
...,...,...
231,WLF,11303.0
232,ESH,590515.0
233,YEM,40583173.0
234,ZMB,21314962.0


## GDP

In [84]:
gdp_raw = pd.read_csv("https://ourworldindata.org/grapher/gdp-worldbank.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#gdp_metadata = requests.get("https://ourworldindata.org/grapher/gdp-worldbank.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

#display(gdp_raw.head())
#display(gdp_metadata)
#display(gdp_df.info())

gdp_clean = sql("""
SELECT gdp_raw.code, ny_gdp_mktp_pp_kd as gdp FROM gdp_raw
INNER JOIN pop_clean ON gdp_raw.Code = pop_clean.code
WHERE year = 2024
""")

display(gdp_clean)

Unnamed: 0,Code,gdp
0,ALB,51360027803
1,DZA,722912369991
2,AND,5402033635
3,AGO,278239182845
4,ATG,2772069805
...,...,...
178,UZB,379988885832
179,VUT,1038801497
180,VNM,1455759302312
181,ZMB,79207172680


## Energy consumption by source

In [86]:
energy_cons_raw = pd.read_csv("https://ourworldindata.org/grapher/energy-consumption-by-source-and-country.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#metadata = requests.get("https://ourworldindata.org/grapher/energy-consumption-by-source-and-country.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

columns_to_sum = [col for col in energy_cons_raw.columns if col not in ['Year', 'Code', 'Entity']]
energy_cons_raw['total_consumption'] = energy_cons_raw[columns_to_sum].sum(axis=1)
#display(energy_cons_raw)

energy_cons_clean = sql("""
SELECT energy_cons_raw.code, total_consumption FROM energy_cons_raw
INNER JOIN gdp_clean ON energy_cons_raw.Code = gdp_clean.code
WHERE year = 2024
""")

display(energy_cons_clean)


Unnamed: 0,Code,total_consumption
0,DZA,760.020251
1,AGO,0.000000
2,ARG,975.028883
3,AUS,1657.616945
4,AUT,402.558966
...,...,...
93,USA,26349.447510
94,UZB,693.631223
95,VNM,1457.180538
96,ZMB,0.000000


## Co2 emissions

In [157]:
emission_raw = pd.read_csv("https://ourworldindata.org/grapher/annual-co2-emissions-per-country.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#emission_metadata = requests.get("https://ourworldindata.org/grapher/annual-co2-emissions-per-country.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()
#display(emission_raw.head())

emission_clean = sql("""
SELECT wc.region, emission_raw.code, emissions_total FROM emission_raw
INNER JOIN energy_cons_clean ON emission_raw.Code = energy_cons_clean.Code
INNER JOIN world_clean AS wc ON energy_cons_clean.Code = wc.Code
WHERE year = 2024
ORDER BY emissions_total
LIMIT 10
""")

display(emission_clean)

Unnamed: 0,region,Code,emissions_total
0,Africa,TCD,2831401.0
1,Europe,ISL,3803011.0
2,Americas,GUY,4510330.0
3,Africa,MDG,4528381.0
4,Africa,GAB,5398794.0
5,Africa,COD,5904451.0
6,Europe,LVA,6461889.0
7,Europe,MKD,6619636.0
8,Africa,GNQ,7010352.0
9,Europe,LUX,7039783.0


## Share

In [131]:
share_raw = pd.read_csv("https://ourworldindata.org/grapher/share-of-primary-energy-consumption-by-source.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#metadata = requests.get("https://ourworldindata.org/grapher/share-of-primary-energy-consumption-by-source.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

fossil_columns = ['gas', 'oil', 'coal']
renew_cols = ['wind', 'hydro', 'other_renewables', 'nuclear', 'solar', 'biofuels']

new_columns = {}
for col in share_raw.columns:
    if '__pct_direct_primary_energy' in col:
        new_name = col.replace('__pct_direct_primary_energy', '')
        new_columns[col] = new_name
share_raw = share_raw.rename(columns=new_columns)

share_clean= sql(f"""
SELECT  wc.name, wc.region, share_raw.Code, {",".join(fossil_columns)}, {",".join(renew_cols)}, {"+".join(fossil_columns)} as fossil, {"+".join(renew_cols)} as renewable FROM share_raw
INNER JOIN pop_clean on share_raw.Code = pop_clean.Code
INNER JOIN world_clean AS wc ON share_raw.Code = wc.Code
WHERE share_raw.Code IS NOT NULL AND share_raw.Code IS NOT "OWID_WRL"
AND year = 2024

ORDER BY renewable DESC
LIMIT 20
""")

display(share_clean)

fossil_share_clean= sql(f"""
SELECT  wc.name, wc.region, share_raw.Code, {",".join(fossil_columns)}, {",".join(renew_cols)}, {"+".join(fossil_columns)} as fossil, {"+".join(renew_cols)} as renewable FROM share_raw
INNER JOIN pop_clean on share_raw.Code = pop_clean.Code
INNER JOIN world_clean AS wc ON share_raw.Code = wc.Code
WHERE share_raw.Code IS NOT NULL AND share_raw.Code IS NOT "OWID_WRL"
AND year = 2024

ORDER BY fossil DESC
LIMIT 20
""")

display(fossil_share_clean)

Unnamed: 0,name,region,Code,gas,oil,coal,wind,hydro,other_renewables,nuclear,solar,biofuels,fossil,renewable
0,Iceland,Europe,ISL,0.0,35.052288,3.867714,0.03811,41.944,18.426788,0.0,2.5e-05,0.671072,38.920002,61.079995
1,Norway,Europe,NOR,10.875145,34.748486,2.845182,4.689538,45.066917,0.04249,0.0,0.178739,1.553502,48.468813,51.531187
2,Sweden,Europe,SWE,2.805776,40.132954,5.538157,11.96261,19.085928,3.057774,14.942303,1.223991,1.250507,48.476887,51.523113
3,Finland,Europe,FIN,6.143715,42.05808,10.145653,10.021552,6.946723,4.978643,15.82995,0.621486,3.254199,58.347448,41.652553
4,Switzerland,Europe,CHE,15.623207,48.644077,0.386184,0.077479,20.358095,0.713321,10.413437,2.674608,1.109593,64.653468,35.346533
5,Brazil,Americas,BRA,11.097937,50.51955,5.230067,3.838979,14.621363,2.060633,0.558282,2.523628,9.549561,66.847554,33.152447
6,France,Europe,FRA,18.694588,45.359646,2.887011,2.756961,4.146557,0.562129,22.248041,1.35573,1.989337,66.941245,33.058756
7,Austria,Europe,AUT,22.63297,43.752766,9.277962,3.05543,14.861383,1.541471,0.0,3.126327,1.751691,75.663698,24.336303
8,Slovenia,Europe,SVN,14.18565,44.314625,17.229225,0.0113,9.089899,0.458699,9.969601,3.132628,1.608375,75.7295,24.270502
9,Denmark,Europe,DNK,11.302169,62.446857,3.672187,14.208114,0.013487,4.003976,0.0,2.692655,1.660555,77.421213,22.578787


Unnamed: 0,name,region,Code,gas,oil,coal,wind,hydro,other_renewables,nuclear,solar,biofuels,fossil,renewable
0,Turkmenistan,Asia,TKM,76.85086,23.146406,0.0,0.0,0.000851,0.0,0.0,0.00188,0.0,99.997266,0.002731
1,Trinidad and Tobago,Americas,TTO,88.05143,11.944149,0.0,1.7e-05,0.0,0.0,0.0,0.004407,0.0,99.995579,0.004424
2,Kuwait,Asia,KWT,46.96385,52.797966,0.207167,0.006696,0.0,0.0,0.0,0.024326,0.0,99.968983,0.031022
3,Algeria,Africa,DZA,66.60319,33.068714,0.228871,0.002114,0.006744,0.0,0.0,0.090373,0.0,99.900775,0.099231
4,Singapore,Asia,SGP,12.363035,87.101326,0.333909,0.0,0.0,0.080322,0.0,0.121406,0.0,99.79827,0.201729
5,Iraq,Asia,IRQ,26.937363,72.82226,0.0,0.0,0.188941,0.0,0.0,0.051442,0.0,99.759623,0.240384
6,Saudi Arabia,Asia,SAU,37.095886,62.55164,0.052546,0.048624,0.0,0.0,0.0,0.251306,0.0,99.700072,0.29993
7,Qatar,Asia,QAT,71.44886,28.199583,0.023374,0.0,0.0,0.020716,0.0,0.307466,0.0,99.671817,0.328183
8,Bangladesh,Asia,BGD,52.016747,30.54752,16.984356,0.018501,0.189188,0.002231,0.0,0.241458,0.0,99.548623,0.451378
9,Oman,Asia,OMN,71.3906,27.83983,0.309314,0.027421,0.0,0.0,0.0,0.432832,0.0,99.539744,0.460253


# 1 Chi guida la transizione?

In [122]:
fig1 = px.bar(share_clean,
             x='renewable',
             y='name',
             color='region',
             orientation='h',
             title='Top 10 countries for renewable share',
             text='renewable')

fig1.update_traces(texttemplate='%{text:.2s}%', textposition='inside') # Position text inside the bar
fig1.update_layout(yaxis={'categoryorder':'total ascending', 'title': ''}, # Remove y-axis title
                    xaxis={'title': ''},
                    width=1000, height=1000, font=dict(size=15),
                    title_x=0.5) # Center the title
fig1.show()

In [128]:
fig2 = px.bar(fossil_share_clean,
             x='fossil',
             y='name',
             color='region',
             orientation='h',
             title='Top 10 countries for renewable share',
             text='renewable')

fig2.update_traces(texttemplate='%{text:.2s}%', textposition='inside') # Position text inside the bar
fig2.update_layout(yaxis={'categoryorder':'total ascending', 'title': ''}, # Remove y-axis title
                    xaxis={'title': ''},
                    width=1000, height=1000, font=dict(size=15),
                    title_x=0.5) # Center the title
fig2.show()

# Data Analysis

In [35]:
merged_data = sql("""
SELECT
    ec.Code,
    (CAST(ec.emissions_total AS REAL) / gc.gdp) AS emissions_per_gdp,
    (CAST(ecc.total_consumption AS REAL) / gc.gdp) AS consumption_per_gdp,
    wc.region
FROM
    emission_clean AS ec
INNER JOIN
    energy_cons_clean AS ecc ON ec.Code = ecc.Code
INNER JOIN
    gdp_clean AS gc ON ec.Code = gc.Code
INNER JOIN
    world_clean AS wc ON ec.Code = wc.Code
""")

display(merged_data)

Unnamed: 0,Code,emissions_per_gdp,consumption_per_gdp,region
0,BGD,7.4e-05,3.58944e-10,Asia
1,BRA,0.000116,9.185196e-10,Americas
2,CHN,0.000366,1.442859e-09,Asia
3,COD,3.6e-05,0.0,Africa
4,EGY,0.000132,5.730886e-10,Africa
5,FRA,7.1e-05,6.648849e-10,Europe
6,DEU,0.000109,6.017206e-10,Europe
7,IND,0.000224,7.916477e-10,Asia
8,IDN,0.000198,7.290362e-10,Asia
9,IRN,0.000534,2.414057e-09,Asia


In [38]:
fig = px.scatter(merged_data, x='emissions_per_gdp', y='consumption_per_gdp', color='region', hover_data=['Code'])
fig.update_traces(marker=dict(size=12)) # Increase point size
fig.update_layout(width=800, height=800) # Make the plot square
fig.show()

In [None]:
fig = px.choropleth(emission_clean, locations='Code', color='emissions_total',
                    hover_name='Code', # show country name on hover
                    color_continuous_scale=px.colors.sequential.Greens, # color scale
                    title='CO2 Emissions per Country (2024)')
fig.show()

In [134]:
renew_cols = ['hydro', 'nuclear', 'wind', 'solar', 'other_renewables', 'biofuels']

share_clean_melted = pd.melt(share_clean,
                             id_vars=['name', 'region', 'Code'],
                             value_vars=renew_cols,
                             var_name='renewable_source',
                             value_name='share_value')

share_clean_melted = share_clean_melted[share_clean_melted['share_value'] != 0]

display(share_clean_melted.head())

Unnamed: 0,name,region,Code,renewable_source,share_value
0,Iceland,Europe,ISL,hydro,41.944
1,Norway,Europe,NOR,hydro,45.066917
2,Sweden,Europe,SWE,hydro,19.085928
3,Finland,Europe,FIN,hydro,6.946723
4,Switzerland,Europe,CHE,hydro,20.358095


In [153]:
color_map = {
    'hydro': '#1F77B4',  # Vibrant Blue
    'nuclear': '#9467BD', # Vibrant Purple
    'wind': '#17BECF',    # Vibrant Cyan
    'solar': '#FFD700',   # Vibrant Gold/Yellow
    'other_renewables': '#2CA02C', # Vibrant Green
    'biofuels': '#8C564B' # Vibrant Brown
}

fig3 = px.bar(share_clean_melted,
             x='share_value',
             y='name',
             color='renewable_source',
             orientation='h',
             title='Top Countries for Renewable Share (by Source)',
             category_orders={'renewable_source': ['hydro', 'nuclear', 'wind', 'solar', 'other_renewables', 'biofuels']},
             color_discrete_map=color_map) # Apply custom color map

fig3.update_traces(textposition='none') # Remove text from inside the bar
fig3.update_layout(yaxis={'categoryorder':'total ascending', 'title': ''},
                    xaxis={'title': ''},
                    width=1000, height=1000, font=dict(size=15),
                    title_x=0.5,
                    legend_title_text='Renewable Sources') # Add title for the primary legend
fig3.show()

In [155]:
region_pastel_color_map = {
    'Europe': '#CBD5E8',  # Light Blue
    'Oceania': '#B3E2CD',  # Light Green
    'Americas': '#FDCDAC', # Light Orange
    'Asia': '#F4CAE4',    # Light Pink
    'Africa': '#E6F5C9',  # Pale Yellow Green
    None: '#CCCCCC' # Grey for any missing region
}

shapes_to_add = []

# Get the list of countries in the order they appear on the y-axis
# y_categories_ordered and y_category_to_pos are already available in the kernel state

for i, country_name in enumerate(y_categories_ordered):
    # Find the region for the current country
    country_region = share_clean[share_clean['name'] == country_name]['region'].iloc[0]

    # Get the corresponding pastel color
    fill_color = region_pastel_color_map.get(country_region, '#CCCCCC') # Default to grey if region not found

    # Create a rectangle shape for the 'shadow'
    shapes_to_add.append(
        go.layout.Shape(
            type="rect",
            xref="x",
            yref="y",
            x0=0, # Start at the beginning of the x-axis
            y0=i - 0.5, # Slightly below the bar center
            y1=i + 0.5, # Slightly above the bar center
            x1=100, # End at the maximum of the x-axis (total percentage)
            fillcolor=fill_color,
            layer="below", # Place behind the bars
            line_width=0, # No border for the shadow
            opacity=0.6 # Adjust opacity if desired
        )
    )

# Update the layout of fig3 with the new shapes
fig3.update_layout(shapes=shapes_to_add)

# Display the modified figure
fig3.show()

# Quali nazioni guidano la transizione?