<a href="https://colab.research.google.com/github/william-toscani/Data_Visualization_Project/blob/main/Data_Viz_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Libraries

In [None]:
!pip install pandasql

In [2]:
import requests
import pandas as pd
import numpy as np
from pandasql import sqldf
sql = lambda q: sqldf(q, globals())

import plotly.express as px
import plotly.graph_objects as go

# Datasets

## World

In [3]:
world_raw = pd.read_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv")
world_raw.rename(columns={'alpha-3': 'code'}, inplace=True)
#display(world_raw.head())
#display(world_raw.columns)


world_clean = sql("""
SELECT name, code, region FROM world_raw
""")

display(world_clean)

Unnamed: 0,name,code,region
0,Afghanistan,AFG,Asia
1,Åland Islands,ALA,Europe
2,Albania,ALB,Europe
3,Algeria,DZA,Africa
4,American Samoa,ASM,Oceania
...,...,...,...
244,Wallis and Futuna,WLF,Oceania
245,Western Sahara,ESH,Africa
246,Yemen,YEM,Asia
247,Zambia,ZMB,Africa


## Population

In [78]:
pop_raw = pd.read_csv("https://ourworldindata.org/grapher/population-with-un-projections.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#pop_metadata = requests.get("https://ourworldindata.org/grapher/population-with-un-projections.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

#display(pop_raw.head())
#display(pop_metadata)
#display(pop_df.info())
#display(pop_df.describe())
#display(pop_df.columns)
#display(pop_df.shape)
#display(pop_df.dtypes)
#display(pop_df.isnull().sum())

pop_clean = sql("""
SELECT world_clean.code, pop_raw.population__sex_all__age_all__variant_medium as population FROM pop_raw
INNER JOIN world_clean ON pop_raw.Code = world_clean.code
WHERE year = 2024
ORDER by population desc
""")

display(pop_clean)

Unnamed: 0,code,population
0,IND,1.450936e+09
1,CHN,1.419321e+09
2,USA,3.454266e+08
3,IDN,2.834879e+08
4,PAK,2.512692e+08
...,...,...
231,MSR,4.414000e+03
232,FLK,3.492000e+03
233,TKL,2.528000e+03
234,NIU,1.841000e+03


## GDP

In [5]:
gdp_raw = pd.read_csv("https://ourworldindata.org/grapher/gdp-worldbank.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#gdp_metadata = requests.get("https://ourworldindata.org/grapher/gdp-worldbank.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

#display(gdp_raw.head())
#display(gdp_metadata)
#display(gdp_df.info())

gdp_clean = sql("""
SELECT gdp_raw.code, ny_gdp_mktp_pp_kd as gdp FROM gdp_raw
INNER JOIN pop_clean ON gdp_raw.Code = pop_clean.code
WHERE year = 2024
""")

display(gdp_clean)

Unnamed: 0,Code,gdp
0,ALB,51360027803
1,DZA,722912369991
2,AND,5402033635
3,AGO,278239182845
4,ATG,2772069805
...,...,...
178,UZB,379988885832
179,VUT,1038801497
180,VNM,1455759302312
181,ZMB,79207172680


## Energy consumption by source

In [6]:
energy_cons_raw = pd.read_csv("https://ourworldindata.org/grapher/energy-consumption-by-source-and-country.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#metadata = requests.get("https://ourworldindata.org/grapher/energy-consumption-by-source-and-country.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

columns_to_sum = [col for col in energy_cons_raw.columns if col not in ['Year', 'Code', 'Entity']]
energy_cons_raw['total_consumption'] = energy_cons_raw[columns_to_sum].sum(axis=1)
#display(energy_cons_raw)

energy_cons_clean = sql("""
SELECT energy_cons_raw.code, total_consumption FROM energy_cons_raw
INNER JOIN gdp_clean ON energy_cons_raw.Code = gdp_clean.code
WHERE year = 2024
""")

display(energy_cons_clean)


Unnamed: 0,Code,total_consumption
0,DZA,760.020251
1,AGO,0.000000
2,ARG,975.028883
3,AUS,1657.616945
4,AUT,402.558966
...,...,...
93,USA,26349.447510
94,UZB,693.631223
95,VNM,1457.180538
96,ZMB,0.000000


## Co2 emissions

In [25]:
emission_raw = pd.read_csv("https://ourworldindata.org/grapher/annual-co2-emissions-per-country.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#emission_metadata = requests.get("https://ourworldindata.org/grapher/annual-co2-emissions-per-country.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()
#display(emission_raw.head())

emission_clean = sql("""
SELECT wc.region, emission_raw.code, emissions_total FROM emission_raw
INNER JOIN energy_cons_clean ON emission_raw.Code = energy_cons_clean.Code
INNER JOIN world_clean AS wc ON energy_cons_clean.Code = wc.Code
WHERE year = 2024
ORDER BY emissions_total DESC
""")

display(emission_clean)

Unnamed: 0,region,Code,emissions_total
0,Asia,CHN,1.228904e+10
1,Americas,USA,4.904120e+09
2,Asia,IND,3.193478e+09
3,Europe,RUS,1.780524e+09
4,Asia,JPN,9.618673e+08
...,...,...,...
93,Africa,GAB,5.398794e+06
94,Africa,MDG,4.528381e+06
95,Americas,GUY,4.510330e+06
96,Europe,ISL,3.803011e+06


## Share

In [8]:
share_raw = pd.read_csv("https://ourworldindata.org/grapher/share-of-primary-energy-consumption-by-source.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
#metadata = requests.get("https://ourworldindata.org/grapher/share-of-primary-energy-consumption-by-source.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

fossil_columns = ['gas', 'oil', 'coal']
renew_cols = ['wind', 'hydro', 'other_renewables', 'nuclear', 'solar', 'biofuels']

new_columns = {}
for col in share_raw.columns:
    if '__pct_direct_primary_energy' in col:
        new_name = col.replace('__pct_direct_primary_energy', '')
        new_columns[col] = new_name
share_raw = share_raw.rename(columns=new_columns)

share_clean= sql(f"""
SELECT  wc.name, wc.region, share_raw.Code, {",".join(fossil_columns)}, {",".join(renew_cols)}, {"+".join(fossil_columns)} as fossil, {"+".join(renew_cols)} as renewable FROM share_raw
INNER JOIN pop_clean on share_raw.Code = pop_clean.Code
INNER JOIN world_clean AS wc ON share_raw.Code = wc.Code
WHERE share_raw.Code IS NOT NULL AND share_raw.Code IS NOT "OWID_WRL"
AND year = 2024

ORDER BY renewable DESC
LIMIT 20
""")

display(share_clean)

fossil_share_clean= sql(f"""
SELECT  wc.name, wc.region, share_raw.Code, {",".join(fossil_columns)}, {",".join(renew_cols)}, {"+".join(fossil_columns)} as fossil, {"+".join(renew_cols)} as renewable FROM share_raw
INNER JOIN pop_clean on share_raw.Code = pop_clean.Code
INNER JOIN world_clean AS wc ON share_raw.Code = wc.Code
WHERE share_raw.Code IS NOT NULL AND share_raw.Code IS NOT "OWID_WRL"
AND year = 2024

ORDER BY fossil DESC
LIMIT 20
""")

display(fossil_share_clean)

Unnamed: 0,name,region,Code,gas,oil,coal,wind,hydro,other_renewables,nuclear,solar,biofuels,fossil,renewable
0,Iceland,Europe,ISL,0.0,35.052288,3.867714,0.03811,41.944,18.426788,0.0,2.5e-05,0.671072,38.920002,61.079995
1,Norway,Europe,NOR,10.875145,34.748486,2.845182,4.689538,45.066917,0.04249,0.0,0.178739,1.553502,48.468813,51.531187
2,Sweden,Europe,SWE,2.805776,40.132954,5.538157,11.96261,19.085928,3.057774,14.942303,1.223991,1.250507,48.476887,51.523113
3,Finland,Europe,FIN,6.143715,42.05808,10.145653,10.021552,6.946723,4.978643,15.82995,0.621486,3.254199,58.347448,41.652553
4,Switzerland,Europe,CHE,15.623207,48.644077,0.386184,0.077479,20.358095,0.713321,10.413437,2.674608,1.109593,64.653468,35.346533
5,Brazil,Americas,BRA,11.097937,50.51955,5.230067,3.838979,14.621363,2.060633,0.558282,2.523628,9.549561,66.847554,33.152447
6,France,Europe,FRA,18.694588,45.359646,2.887011,2.756961,4.146557,0.562129,22.248041,1.35573,1.989337,66.941245,33.058756
7,Austria,Europe,AUT,22.63297,43.752766,9.277962,3.05543,14.861383,1.541471,0.0,3.126327,1.751691,75.663698,24.336303
8,Slovenia,Europe,SVN,14.18565,44.314625,17.229225,0.0113,9.089899,0.458699,9.969601,3.132628,1.608375,75.7295,24.270502
9,Denmark,Europe,DNK,11.302169,62.446857,3.672187,14.208114,0.013487,4.003976,0.0,2.692655,1.660555,77.421213,22.578787


Unnamed: 0,name,region,Code,gas,oil,coal,wind,hydro,other_renewables,nuclear,solar,biofuels,fossil,renewable
0,Turkmenistan,Asia,TKM,76.85086,23.146406,0.0,0.0,0.000851,0.0,0.0,0.00188,0.0,99.997266,0.002731
1,Trinidad and Tobago,Americas,TTO,88.05143,11.944149,0.0,1.7e-05,0.0,0.0,0.0,0.004407,0.0,99.995579,0.004424
2,Kuwait,Asia,KWT,46.96385,52.797966,0.207167,0.006696,0.0,0.0,0.0,0.024326,0.0,99.968983,0.031022
3,Algeria,Africa,DZA,66.60319,33.068714,0.228871,0.002114,0.006744,0.0,0.0,0.090373,0.0,99.900775,0.099231
4,Singapore,Asia,SGP,12.363035,87.101326,0.333909,0.0,0.0,0.080322,0.0,0.121406,0.0,99.79827,0.201729
5,Iraq,Asia,IRQ,26.937363,72.82226,0.0,0.0,0.188941,0.0,0.0,0.051442,0.0,99.759623,0.240384
6,Saudi Arabia,Asia,SAU,37.095886,62.55164,0.052546,0.048624,0.0,0.0,0.0,0.251306,0.0,99.700072,0.29993
7,Qatar,Asia,QAT,71.44886,28.199583,0.023374,0.0,0.0,0.020716,0.0,0.307466,0.0,99.671817,0.328183
8,Bangladesh,Asia,BGD,52.016747,30.54752,16.984356,0.018501,0.189188,0.002231,0.0,0.241458,0.0,99.548623,0.451378
9,Oman,Asia,OMN,71.3906,27.83983,0.309314,0.027421,0.0,0.0,0.0,0.432832,0.0,99.539744,0.460253


## Cons e Emission / gdp

In [42]:
emissions_gdp = sql("""
SELECT ec.Code, wc.name, wc.region,
(CAST(ec.emissions_total AS REAL) / gc.gdp) AS emissions_per_gdp,
(CAST(ecc.total_consumption AS REAL) / gc.gdp) AS consumption_per_gdp FROM emission_clean AS ec
INNER JOIN energy_cons_clean AS ecc ON ec.Code = ecc.Code
INNER JOIN gdp_clean AS gc ON ec.Code = gc.Code
INNER JOIN world_clean AS wc ON ec.Code = wc.Code
ORDER BY emissions_per_gdp DESC
""")

display(emissions_gdp)

Unnamed: 0,Code,name,region,emissions_per_gdp,consumption_per_gdp
0,TTO,Trinidad and Tobago,Americas,0.000797,3.685384e-09
1,MNG,Mongolia,Asia,0.000755,0.000000e+00
2,LBY,Libya,Africa,0.000720,0.000000e+00
3,TKM,Turkmenistan,Asia,0.000602,2.758446e-09
4,KWT,Kuwait,Asia,0.000573,2.323552e-09
...,...,...,...,...,...
93,SWE,Sweden,Europe,0.000057,8.835079e-10
94,IRL,Ireland,Europe,0.000054,2.896196e-10
95,TCD,Chad,Africa,0.000054,0.000000e+00
96,CHE,Switzerland,Europe,0.000043,4.460273e-10


# Data Analysis

## Vediamo i 10 paesi che hanno un'efficenza ambientale economica elevata

In [89]:
top_10_emissions_gdp = sql("""
SELECT wc.name, wc.region, ec.emissions_per_gdp FROM emissions_gdp AS ec
INNER JOIN world_clean AS wc ON ec.Code = wc.Code
ORDER BY emissions_per_gdp
LIMIT 10
""")

display(top_10_emissions_gdp)

Unnamed: 0,name,region,emissions_per_gdp
0,"Congo, Democratic Republic of the",Africa,3.6e-05
1,Switzerland,Europe,4.3e-05
2,Chad,Africa,5.4e-05
3,Ireland,Europe,5.4e-05
4,Sweden,Europe,5.7e-05
5,Denmark,Europe,6.4e-05
6,Hong Kong,Asia,6.7e-05
7,Singapore,Asia,6.7e-05
8,Sri Lanka,Asia,6.9e-05
9,France,Europe,7.1e-05


In [90]:
emissions_by_continent = sql("""
SELECT region, SUM(emissions_total) AS total_emissions FROM emission_clean
GROUP BY region
ORDER BY total_emissions ASC
""")

display(emissions_by_continent)

Unnamed: 0,region,total_emissions
0,Oceania,427557300.0
1,Africa,1308210000.0
2,Europe,4830581000.0
3,Americas,6907925000.0
4,Asia,22282360000.0


## 0. Andiamo a vedere tra i 100 paesi con piu popolazione i 20 paesi che emettono di piu e di meno rispetto al loro gdp e vediamo come è composto il loro mix energetico

In [69]:
top_20 = sql("""
SELECT sc.fossil, sc.renewable, wc.name, wc.region, ec.emissions_per_gdp FROM emissions_gdp AS ec
INNER JOIN world_clean AS wc ON ec.Code = wc.Code
INNER JOIN pop_clean AS pc ON ec.Code = pc.Code
LEFT JOIN share_clean AS sc ON ec.Code = sc.Code
ORDER BY emissions_per_gdp DESC
LIMIT 20
""")

flop_20 = sql("""
SELECT sc.fossil, sc.renewable, wc.name, wc.region, ec.emissions_per_gdp FROM emissions_gdp AS ec
INNER JOIN world_clean AS wc ON ec.Code = wc.Code
INNER JOIN pop_clean AS pc ON ec.Code = pc.Code
LEFT JOIN share_clean AS sc ON ec.Code = sc.Code
ORDER BY emissions_per_gdp ASC
LIMIT 20
""")
display(top_20, flop_20)

Unnamed: 0,fossil,renewable,name,region,emissions_per_gdp
0,,,"Iran, Islamic Republic of",Asia,0.000534
1,,,South Africa,Africa,0.000505
2,,,Iraq,Asia,0.000399
3,,,Kazakhstan,Asia,0.000388
4,,,Uzbekistan,Asia,0.000366
5,,,China,Asia,0.000366
6,,,Saudi Arabia,Asia,0.000313
7,,,United Arab Emirates,Asia,0.000298
8,,,Russian Federation,Europe,0.000292
9,,,Algeria,Africa,0.000274


Unnamed: 0,fossil,renewable,name,region,emissions_per_gdp
0,,,"Congo, Democratic Republic of the",Africa,3.6e-05
1,,,Chad,Africa,5.4e-05
2,48.476887,51.523113,Sweden,Europe,5.7e-05
3,,,Sri Lanka,Asia,6.9e-05
4,66.941245,33.058756,France,Europe,7.1e-05
5,,,Bangladesh,Asia,7.4e-05
6,78.13332,21.866678,Portugal,Europe,7.9e-05
7,,,Angola,Africa,8e-05
8,,,Madagascar,Africa,8.5e-05
9,,,United Kingdom of Great Britain and Northern I...,Europe,8.6e-05


## 1. Andiamo a confrontare le 20 economie piu grandi del mondo (per gdp) del 2024 e andiamo a vedere in che posizioni si trovano per quanto riguarda le emissioni di c02 prodotte

In [115]:
gdp_top20 = sql("""
SELECT  wc.name, wc.code, wc.region, gdp,
RANK () OVER (ORDER BY gdp DESC) AS gdp_rank FROM gdp_clean
INNER JOIN world_clean AS wc ON gdp_clean.Code = wc.Code
ORDER BY gdp DESC
LIMIT 20
""")

display(gdp_top20)



emission_gdp_top20 = sql("""
SELECT gdp_top20.name, gdp_top20.code, gdp_top20.region, emissions_per_gdp,
RANK () OVER (ORDER BY emissions_per_gdp DESC) AS emissions_per_gdp_rank FROM gdp_top20
INNER JOIN emissions_gdp ON emissions_gdp.Code = gdp_top20.code
ORDER BY emissions_per_gdp
LIMIT 20
""")

display(emission_gdp_top20)

Unnamed: 0,name,code,region,gdp,gdp_rank
0,China,CHN,Asia,33597882047418,1
1,United States of America,USA,Americas,25675525530499,2
2,India,IND,Asia,14243939416927,3
3,Russian Federation,RUS,Europe,6088997179872,4
4,Japan,JPN,Asia,5714945983582,5
5,Germany,DEU,Europe,5246975952526,6
6,Brazil,BRA,Americas,4165328804627,7
7,Indonesia,IDN,Asia,4102194303076,8
8,France,FRA,Europe,3731760755094,9
9,United Kingdom of Great Britain and Northern I...,GBR,Europe,3635610019744,10


Unnamed: 0,name,code,region,emissions_per_gdp,emissions_per_gdp_rank
0,France,FRA,Europe,7.1e-05,20
1,United Kingdom of Great Britain and Northern I...,GBR,Europe,8.6e-05,19
2,Spain,ESP,Europe,9.3e-05,18
3,Italy,ITA,Europe,9.6e-05,17
4,Germany,DEU,Europe,0.000109,16
5,Brazil,BRA,Americas,0.000116,15
6,Egypt,EGY,Africa,0.000132,14
7,Mexico,MEX,Americas,0.00016,13
8,Poland,POL,Europe,0.000165,12
9,Japan,JPN,Asia,0.000168,11


In [126]:
fig = px.bar(emission_gdp_top20, x='emissions_per_gdp', y='code', color='region', orientation='h', title='The Enviromental Efficiency of the Economics of top 20 Countries for GDP')
fig.update_layout(yaxis={'categoryorder': 'array', 'categoryarray': emission_gdp_top20['code'].tolist()[::-1]}, width=800, height=800) # Reverse the array to put lowest emissions_per_gdp at the top
fig.show()

In [133]:

share_top20 = sql("""
SELECT gdp_top20.name, gdp_top20.code, gdp_top20.region,share_clean.renewable,
RANK () OVER (ORDER BY emissions_per_gdp DESC) AS emissions_per_gdp_rank FROM gdp_top20
INNER JOIN emissions_gdp ON emissions_gdp.Code = gdp_top20.code
LEFT JOIN share_clean ON share_clean.Code = gdp_top20.code
ORDER BY share_clean.renewable DESC
LIMIT 20
""")

display(share_top20)

Unnamed: 0,name,code,region,renewable,emissions_per_gdp_rank
0,Brazil,BRA,Americas,33.152447,15
1,France,FRA,Europe,33.058756,20
2,Spain,ESP,Europe,18.231726,18
3,Canada,CAN,Americas,17.004494,5
4,China,CHN,Asia,,1
5,Saudi Arabia,SAU,Asia,,2
6,Russian Federation,RUS,Europe,,3
7,Australia,AUS,Oceania,,4
8,India,IND,Asia,,6
9,Indonesia,IDN,Asia,,7


## Top 10 countries for renewable energy

In [122]:
fig1 = px.bar(share_clean,
             x='renewable',
             y='name',
             color='region',
             orientation='h',
             title='Top 10 countries for renewable share',
             text='renewable')

fig1.update_traces(texttemplate='%{text:.2s}%', textposition='inside') # Position text inside the bar
fig1.update_layout(yaxis={'categoryorder':'total ascending', 'title': ''}, # Remove y-axis title
                    xaxis={'title': ''},
                    width=1000, height=1000, font=dict(size=15),
                    title_x=0.5) # Center the title
fig1.show()

## Top 10 countries for fossil energy

In [75]:
fig2 = px.bar(fossil_share_clean,
             x='renewable',
             y='name',
             color='region',
             orientation='h',
             title='Top 10 countries for renewable share',
             text='renewable')

fig2.update_traces(texttemplate='%{text:.2f}%', textposition='inside') # Position text inside the bar
fig2.update_layout(yaxis={'categoryorder':'total ascending', 'title': ''}, # Remove y-axis title
                    xaxis={'title': ''},
                    width=1000, height=1000, font=dict(size=15),
                    title_x=0.5) # Center the title
fig2.show()

## Emission and cons 2d map

In [38]:
fig = px.scatter(merged_data, x='emissions_per_gdp', y='consumption_per_gdp', color='region', hover_data=['Code'])
fig.update_traces(marker=dict(size=12)) # Increase point size
fig.update_layout(width=800, height=800) # Make the plot square
fig.show()

## Choreplot emission

In [24]:
fig = px.choropleth(emission_clean, locations='Code', color='emissions_total',
                    hover_name='Code', # show country name on hover
                    color_continuous_scale=px.colors.sequential.Greens, # color scale
                    title='CO2 Emissions per Country (2024)')
fig.show()

## Top Countries for Renewable Share (by Source) v1 e v2

In [32]:
renew_cols = ['hydro', 'nuclear', 'wind', 'solar', 'other_renewables', 'biofuels']

share_clean_melted = pd.melt(share_clean,
                             id_vars=['name', 'region', 'Code'],
                             value_vars=renew_cols,
                             var_name='renewable_source',
                             value_name='share_value')

share_clean_melted = share_clean_melted[share_clean_melted['share_value'] != 0]

display(share_clean_melted.head())

Unnamed: 0,name,region,Code,renewable_source,share_value
0,Iceland,Europe,ISL,hydro,41.944
1,Norway,Europe,NOR,hydro,45.066917
2,Sweden,Europe,SWE,hydro,19.085928
3,Finland,Europe,FIN,hydro,6.946723
4,Switzerland,Europe,CHE,hydro,20.358095


In [33]:
color_map = {
    'hydro': '#1F77B4',  # Vibrant Blue
    'nuclear': '#9467BD', # Vibrant Purple
    'wind': '#17BECF',    # Vibrant Cyan
    'solar': '#FFD700',   # Vibrant Gold/Yellow
    'other_renewables': '#2CA02C', # Vibrant Green
    'biofuels': '#8C564B' # Vibrant Brown
}

fig3 = px.bar(share_clean_melted,
             x='share_value',
             y='name',
             color='renewable_source',
             orientation='h',
             title='Top Countries for Renewable Share (by Source)',
             category_orders={'renewable_source': ['hydro', 'nuclear', 'wind', 'solar', 'other_renewables', 'biofuels']},
             color_discrete_map=color_map) # Apply custom color map

fig3.update_traces(textposition='none') # Remove text from inside the bar
fig3.update_layout(yaxis={'categoryorder':'total ascending', 'title': ''},
                    xaxis={'title': ''},
                    width=1000, height=1000, font=dict(size=15),
                    title_x=0.5,
                    legend_title_text='Renewable Sources') # Add title for the primary legend
fig3.show()

In [76]:
region_pastel_color_map = {
    'Europe': '#CBD5E8',  # Light Blue
    'Oceania': '#B3E2CD',  # Light Green
    'Americas': '#FDCDAC', # Light Orange
    'Asia': '#F4CAE4',    # Light Pink
    'Africa': '#E6F5C9',  # Pale Yellow Green
    None: '#CCCCCC' # Grey for any missing region
}

shapes_to_add = []

# Get the list of countries in the order they appear on the y-axis
# y_categories_ordered and y_category_to_pos are already available in the kernel state

for i, country_name in enumerate(y_categories_ordered):
    # Find the region for the current country
    country_region = share_clean[share_clean['name'] == country_name]['region'].iloc[0]

    # Get the corresponding pastel color
    fill_color = region_pastel_color_map.get(country_region, '#CCCCCC') # Default to grey if region not found

    # Create a rectangle shape for the 'shadow'
    shapes_to_add.append(
        go.layout.Shape(
            type="rect",
            xref="x",
            yref="y",
            x0=0, # Start at the beginning of the x-axis
            y0=i - 0.5, # Slightly below the bar center
            y1=i + 0.5, # Slightly above the bar center
            x1=100, # End at the maximum of the x-axis (total percentage)
            fillcolor=fill_color,
            layer="below", # Place behind the bars
            line_width=0, # No border for the shadow
            opacity=0.6 # Adjust opacity if desired
        )
    )

# Update the layout of fig3 with the new shapes
fig3.update_layout(shapes=shapes_to_add)

# Display the modified figure
fig3.show()

NameError: name 'y_categories_ordered' is not defined

## Top 20 countries by emission per gdp

In [29]:
fig = px.bar(merged_data,
             x='emissions_per_gdp',
             y='Code',
             color='region',
             orientation='h',
             title='Top 20 Countries by Emissions per GDP',
             text='emissions_per_gdp')

fig.update_traces(texttemplate='%{text:.2e}', textposition='outside')
fig.update_layout(yaxis={'categoryorder':'total ascending', 'title': ''},
                    xaxis={'title': 'Emissions per GDP'},
                    width=1000, height=800, font=dict(size=15),
                    title_x=0.5)
fig.show()

## TREEMAP

In [18]:
fig = px.treemap(merged_data,
                 path=[px.Constant("World"), 'region', 'Code'],
                 values='emissions_per_gdp',
                 title='CO2 Emissions per GDP by Region and Country',
                 color_continuous_scale='Greens')

fig.update_layout(margin = dict(t=50, l=25, r=25, b=25), width=800, height=800)
fig.show()

In [13]:
fig = px.treemap(emission_clean,
                 path=[px.Constant("World"), 'region', 'Code'],
                 values='emissions_total',
                 title='CO2 Emissions by Region and Country (Top 20)',
                 color_continuous_scale='Greens')

fig.update_layout(margin = dict(t=50, l=25, r=25, b=25), width=800, height=800)
fig.show()

## EMISSION GDP

In [31]:
global_emissions_gdp = sql("""
SELECT
    wc.name,
    wc.code,
    wc.region,
    er.emissions_total,
    gr.ny_gdp_mktp_pp_kd AS gdp,
    (CAST(er.emissions_total AS REAL) / gr.ny_gdp_mktp_pp_kd) AS emissions_per_gdp
FROM
    world_clean AS wc
INNER JOIN
    emission_raw AS er ON wc.code = er.Code
INNER JOIN
    gdp_raw AS gr ON wc.code = gr.Code
WHERE
    er.Year = 2024 AND gr.Year = 2024

ORDER BY emissions_per_gdp DESC
""")

display(global_emissions_gdp)

Unnamed: 0,name,code,region,emissions_total,gdp,emissions_per_gdp
0,Trinidad and Tobago,TTO,Americas,34576280.0,43362105898,0.000797
1,Mongolia,MNG,Asia,44693890.0,59221044568,0.000755
2,Libya,LBY,Africa,65260070.0,90608777729,0.000720
3,Turkmenistan,TKM,Asia,81005550.0,134555061341,0.000602
4,Kuwait,KWT,Asia,129518650.0,225947312053,0.000573
...,...,...,...,...,...,...
177,Uganda,UGA,Africa,6333810.0,144136790455,0.000044
178,Rwanda,RWA,Africa,2033550.0,46542623255,0.000044
179,Switzerland,CHE,Europe,32071708.0,741034943164,0.000043
180,"Congo, Democratic Republic of the",COD,Africa,5904451.0,164367209943,0.000036


In [21]:
min_emissions_per_gdp = global_emissions_gdp['emissions_per_gdp'].min()
max_emissions_per_gdp = global_emissions_gdp['emissions_per_gdp'].max()

print(f"Global Minimum Emissions per GDP: {min_emissions_per_gdp}")
print(f"Global Maximum Emissions per GDP: {max_emissions_per_gdp}")

Global Minimum Emissions per GDP: 1.362585383107925e-05
Global Maximum Emissions per GDP: 0.0007973847045467128


In [22]:
plot_data = global_emissions_gdp.copy()
plot_data['emissions_per_gdp_normalized'] = (plot_data['emissions_per_gdp'] - min_emissions_per_gdp) / (max_emissions_per_gdp - min_emissions_per_gdp)

# Select the top 20 countries based on original emissions_per_gdp
plot_data = plot_data.sort_values(by='emissions_per_gdp', ascending=False).head(20)

display(plot_data.head())

Unnamed: 0,name,code,region,emissions_total,gdp,emissions_per_gdp,emissions_per_gdp_normalized
166,Trinidad and Tobago,TTO,Americas,34576280.0,43362105898,0.000797,1.0
110,Mongolia,MNG,Asia,44693890.0,59221044568,0.000755,0.945533
94,Libya,LBY,Africa,65260070.0,90608777729,0.00072,0.901571
169,Turkmenistan,TKM,Asia,81005550.0,134555061341,0.000602,0.75074
88,Kuwait,KWT,Asia,129518650.0,225947312053,0.000573,0.713994


In [23]:
fig = px.bar(plot_data,
             x='emissions_per_gdp_normalized',
             y='name',
             color='region',
             orientation='h',
             title='Top 20 Countries by Normalized CO2 Emissions per GDP (2024)',
             text='emissions_per_gdp_normalized')

fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig.update_layout(yaxis={'categoryorder':'total ascending', 'title': ''},
                    xaxis={'title': 'Normalized Emissions per GDP'},
                    width=1000, height=800, font=dict(size=15),
                    title_x=0.5)
fig.show()

## Bump plot trial

In [67]:
gdp_top20['gdp_rank'] = gdp_top20['gdp'].rank(ascending=False).astype(int)
display(gdp_top20.head())

Unnamed: 0,name,region,gdp,gdp_rank
0,China,Asia,33597882047418,1
1,United States of America,Americas,25675525530499,2
2,India,Asia,14243939416927,3
3,Russian Federation,Europe,6088997179872,4
4,Japan,Asia,5714945983582,5


In [80]:
merged_ranks = pd.merge(gdp_top20[['name', 'region', 'gdp_rank']],
                        emissions_rank_gdp_top20[['name', 'emissions_per_gdp_rank']],
                        on='name',
                        how='inner')
display(merged_ranks.head())

Unnamed: 0,name,region,gdp_rank,emissions_per_gdp_rank
0,China,Asia,1,20
1,United States of America,Americas,2,13
2,India,Asia,3,15
3,Russian Federation,Europe,4,18
4,Japan,Asia,5,10


In [91]:
melted_ranks = merged_ranks.melt(id_vars=['name', 'region'],
                                 value_vars=['gdp_rank', 'emissions_per_gdp_rank'],
                                 var_name='rank_type',
                                 value_name='rank_value')

display(melted_ranks.head())

melted_ranks['rank_value'] = pd.to_numeric(melted_ranks['rank_value'])
display(melted_ranks.info())

# Create a list of text positions based on rank_type
text_positions = ['middle left' if rt == 'gdp_rank' else 'middle right' for rt in melted_ranks['rank_type']]

fig = px.line(melted_ranks,
              x='rank_type',
              y='rank_value',
              color='region',
              line_group='name',
              text='name',  # Set text to country name here
              hover_name='name',
              title='Shifts in Country Rankings: GDP vs. Emissions per GDP',
              labels={'rank_type': 'Rank Type', 'rank_value': 'Rank'})

fig.update_layout(yaxis={'autorange': 'reversed'}, width=800, height=800) # Invert y-axis and make it square
fig.update_traces(mode='lines+markers+text', marker=dict(size=12), # Increase marker size
                  line=dict(width=7), # Make lines thicker
                  textposition=text_positions) # Use dynamic text positions
fig.show()

Unnamed: 0,name,region,rank_type,rank_value
0,China,Asia,gdp_rank,1
1,United States of America,Americas,gdp_rank,2
2,India,Asia,gdp_rank,3
3,Russian Federation,Europe,gdp_rank,4
4,Japan,Asia,gdp_rank,5


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   name        40 non-null     object
 1   region      40 non-null     object
 2   rank_type   40 non-null     object
 3   rank_value  40 non-null     int64 
dtypes: int64(1), object(3)
memory usage: 1.4+ KB


None