## 1. Import Libraries and Load Data

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:.2f}'.format)

print("Libraries imported successfully!")

Libraries imported successfully!


In [2]:
# Load the dataset
df = pd.read_csv('../../data/processed/cleaned-GDP-countries.csv')

# Display first few rows
print(f"üìä Dataset Shape: {df.shape[0]:,} rows √ó {df.shape[1]} columns")
print("\n" + "="*60)
print("First 5 rows of the dataset:")
print("="*60)
df.head()

üìä Dataset Shape: 259 rows √ó 25 columns

First 5 rows of the dataset:


Unnamed: 0,Country Name,Country Code,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Aruba,ABW,1.87,1.9,1.96,2.04,2.25,2.36,2.47,2.68,2.84,2.55,2.45,2.64,2.62,2.73,2.79,2.96,2.98,3.09,3.28,3.4,2.48,2.93,3.28
1,Africa Eastern and Southern,AFE,287.2,260.99,267.81,355.72,442.7,516.66,580.24,665.6,713.5,715.49,849.41,945.44,953.0,962.44,978.74,898.31,828.96,973.03,1012.29,1009.75,933.41,1085.6,1191.64
2,Afghanistan,AFG,3.52,2.81,3.83,4.52,5.22,6.2,6.97,9.75,10.11,12.42,15.86,17.81,19.91,20.15,20.5,19.13,18.12,18.75,18.05,18.8,19.96,14.26,14.5
3,Africa Western and Central,AFW,142.14,150.06,179.39,207.75,258.57,317.1,402.72,471.54,575.06,515.47,606.28,691.19,748.13,844.2,903.93,778.02,700.03,694.05,777.84,833.29,797.3,858.11,893.64
4,Angola,AGO,9.13,8.94,15.29,17.81,23.55,36.97,52.38,65.27,88.54,70.31,83.8,111.79,128.05,132.34,135.97,90.5,52.76,73.69,79.45,70.9,48.5,66.51,104.4


## 2. Data Preprocessing

In [3]:
# Melt the dataframe to long format for easier plotting
id_vars = ['Country Name', 'Country Code']
value_vars = [col for col in df.columns if col not in id_vars]

df_long = df.melt(id_vars=id_vars, value_vars=value_vars, var_name='Year', value_name='GDP')

# Convert Year to integer
df_long['Year'] = pd.to_numeric(df_long['Year'], errors='coerce')

# Display transformed data
print("Transformed Dataset Shape:", df_long.shape)
df_long.head()

Transformed Dataset Shape: (5957, 4)


Unnamed: 0,Country Name,Country Code,Year,GDP
0,Aruba,ABW,2000,1.87
1,Africa Eastern and Southern,AFE,2000,287.2
2,Afghanistan,AFG,2000,3.52
3,Africa Western and Central,AFW,2000,142.14
4,Angola,AGO,2000,9.13


## 3. Global Economic Trends

In [4]:
# Global GDP Trend (Sum of all countries)
global_gdp = df_long.groupby('Year')['GDP'].sum().reset_index()

fig = px.line(global_gdp, x='Year', y='GDP', title='Global GDP Trend Over Time',
              labels={'GDP': 'Total GDP (USD)'})
fig.update_layout(height=500)
fig.show()

## 4. Country-Specific Analysis

In [5]:
# Interactive Country GDP Viewer
countries = sorted(df_long['Country Name'].unique())

country_dropdown = widgets.Dropdown(
    options=countries,
    value='United States' if 'United States' in countries else countries[0],
    description='Country:',
    style={'description_width': 'initial'}
)

output = widgets.Output()

def update_gdp_plot(change):
    with output:
        output.clear_output(wait=True)
        country = change['new']
        country_data = df_long[df_long['Country Name'] == country]
        
        fig = px.line(country_data, x='Year', y='GDP', title=f'GDP Growth: {country}',
                      markers=True)
        fig.update_layout(height=500)
        fig.show()
        
        # Calculate growth rate
        start_gdp = country_data.iloc[0]['GDP']
        end_gdp = country_data.iloc[-1]['GDP']
        growth = ((end_gdp - start_gdp) / start_gdp) * 100 if start_gdp != 0 else 0
        
        print(f"Total Growth ({country_data['Year'].min()} - {country_data['Year'].max()}): {growth:.2f}%")

country_dropdown.observe(update_gdp_plot, names='value')

display(widgets.VBox([widgets.HTML('<h3>üåç Explore Country GDP</h3>'), 
                      country_dropdown, output]))

update_gdp_plot({'new': country_dropdown.value})

VBox(children=(HTML(value='<h3>üåç Explore Country GDP</h3>'), Dropdown(description='Country:', index=246, optio‚Ä¶

## 5. Comparative Analysis

In [6]:
# Top Economies by Year
year_slider = widgets.IntSlider(
    value=df_long['Year'].max(),
    min=df_long['Year'].min(),
    max=df_long['Year'].max(),
    step=1,
    description='Year:',
    continuous_update=False
)

top_output = widgets.Output()

def update_top_economies(change):
    with top_output:
        top_output.clear_output(wait=True)
        year = change['new']
        
        year_data = df_long[df_long['Year'] == year]
        top_20 = year_data.nlargest(20, 'GDP')
        
        fig = px.bar(top_20, x='GDP', y='Country Name', orientation='h',
                     title=f'Top 20 Economies in {year}',
                     color='GDP', color_continuous_scale='Viridis')
        fig.update_layout(yaxis={'categoryorder':'total ascending'}, height=600)
        fig.show()

year_slider.observe(update_top_economies, names='value')

display(widgets.VBox([widgets.HTML('<h3>üèÜ Top Economies</h3>'), 
                      year_slider, top_output]))

update_top_economies({'new': year_slider.value})

VBox(children=(HTML(value='<h3>üèÜ Top Economies</h3>'), IntSlider(value=2022, continuous_update=False, descript‚Ä¶

## 6. Global Map Visualization

In [7]:
# Interactive World Map
map_output = widgets.Output()

def update_map(change):
    with map_output:
        map_output.clear_output(wait=True)
        year = change['new']
        year_data = df_long[df_long['Year'] == year]
        
        fig = px.choropleth(year_data, locations='Country Code', color='GDP',
                            hover_name='Country Name',
                            color_continuous_scale='Plasma',
                            title=f'Global GDP Distribution - {year}')
        fig.update_layout(height=600)
        fig.show()

year_slider_map = widgets.IntSlider(
    value=df_long['Year'].max(),
    min=df_long['Year'].min(),
    max=df_long['Year'].max(),
    step=1,
    description='Year:',
    continuous_update=False
)

year_slider_map.observe(update_map, names='value')

display(widgets.VBox([widgets.HTML('<h3>üó∫Ô∏è Global GDP Map</h3>'), 
                      year_slider_map, map_output]))

update_map({'new': year_slider_map.value})

VBox(children=(HTML(value='<h3>üó∫Ô∏è Global GDP Map</h3>'), IntSlider(value=2022, continuous_update=False, descri‚Ä¶

---
## Summary of Findings

### Key Observations
1. **Global Growth**: [To be filled] - General upward trend in global GDP.
2. **Top Economies**: [To be filled] - US, China, etc. dominate the charts.
3. **Emerging Markets**: [To be filled] - Rapid growth observed in Asian economies.

### Implications
- **Economic Power**: Concentration of wealth in top economies.
- **Development**: Correlation with health outcomes (to be explored).

### Next Steps
- Correlate GDP with Obesity and Diabetes rates.
- Analyze GDP per capita for better standard of living comparison.