## 1. Import Libraries and Load Data

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)

print("‚úÖ Libraries imported successfully!")

In [None]:
# Load the dataset
df = pd.read_csv('../../data/raw/Income-category-wise-countries.csv')

# Display first few rows
print(f"üìä Dataset Shape: {df.shape[0]:,} rows √ó {df.shape[1]} columns")
print("\n" + "="*60)
print("First 5 rows of the dataset:")
print("="*60)
df.head()

## 2. Data Cleaning and Preprocessing

In [None]:
# Check for missing values
print("üîç Missing Values:")
print(df.isnull().sum())

# Drop rows where IncomeGroup is missing (usually aggregates like 'World', 'Arab World')
df_clean = df.dropna(subset=['IncomeGroup'])
print(f"\nüìä Cleaned Dataset Shape: {df_clean.shape[0]:,} rows")

## 3. Distribution Analysis

In [None]:
# Distribution of Countries by Income Group
income_counts = df_clean['IncomeGroup'].value_counts().reset_index()
income_counts.columns = ['Income Group', 'Count']

fig = px.bar(income_counts, x='Income Group', y='Count', color='Income Group',
             title='Distribution of Countries by Income Group',
             text='Count', color_discrete_sequence=px.colors.qualitative.Set3)
fig.update_layout(height=500)
fig.show()

In [None]:
# Regional Distribution of Income Groups
region_income = df_clean.groupby(['Region', 'IncomeGroup']).size().reset_index(name='Count')

fig = px.bar(region_income, x='Region', y='Count', color='Income Group',
             title='Income Group Distribution by Region',
             barmode='stack', color_discrete_sequence=px.colors.qualitative.Set3)
fig.update_layout(height=600, xaxis={'categoryorder':'total descending'})
fig.show()

## 4. Global Map Visualization

In [None]:
# Map of Income Groups
fig = px.choropleth(df_clean, locations='Country Code', color='IncomeGroup',
                    hover_name='TableName',
                    title='Global Distribution of Income Groups',
                    color_discrete_map={
                        'High income': 'forestgreen',
                        'Upper middle income': 'yellowgreen',
                        'Lower middle income': 'orange',
                        'Low income': 'firebrick'
                    })
fig.update_layout(height=600)
fig.show()

---
## üìä Summary of Findings

### Key Observations
1. **Distribution**: [To be filled] - High income countries are concentrated in Europe/NA. Low income in Africa.
2. **Regional Disparities**: [To be filled] - Sub-Saharan Africa has the highest proportion of Low Income countries.

### Implications
- **Health Resources**: Income level strongly dictates healthcare infrastructure.
- **Nutrition Transition**: Income growth drives dietary changes (more processed food).

### Next Steps
- Use Income Group as a categorical variable to segment Diabetes and Obesity analysis.