##### Dataset: "https://www.kaggle.com/datasets/mathurinache/world-happiness-report?select=2022.csv"

## Q1: Preview & Shape – Display the first 10 rows and the total number of rows & columns.

In [232]:
import pandas as pd

In [233]:
df = pd.read_csv("2022.csv")
df.head(10)

Unnamed: 0,RANK,Country,Happiness score,Whisker-high,Whisker-low,Dystopia (1.83) + residual,Explained by: GDP per capita,Explained by: Social support,Explained by: Healthy life expectancy,Explained by: Freedom to make life choices,Explained by: Generosity,Explained by: Perceptions of corruption
0,1,Finland,7821,7886,7756,2518,1892,1258,775,736,109,534
1,2,Denmark,7636,7710,7563,2226,1953,1243,777,719,188,532
2,3,Iceland,7557,7651,7464,2320,1936,1320,803,718,270,191
3,4,Switzerland,7512,7586,7437,2153,2026,1226,822,677,147,461
4,5,Netherlands,7415,7471,7359,2137,1945,1206,787,651,271,419
5,6,Luxembourg*,7404,7501,7307,2042,2209,1155,790,700,120,388
6,7,Sweden,7384,7454,7315,2003,1920,1204,803,724,218,512
7,8,Norway,7365,7440,7290,1925,1997,1239,786,728,217,474
8,9,Israel,7364,7426,7301,2634,1826,1221,818,568,155,143
9,10,New Zealand,7200,7279,7120,1954,1852,1235,752,680,245,483


In [234]:
df.shape

(147, 12)

## Q2: Column Info – Show all column names, their data types, and count missing values.

In [236]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 147 entries, 0 to 146
Data columns (total 12 columns):
 #   Column                                      Non-Null Count  Dtype 
---  ------                                      --------------  ----- 
 0   RANK                                        147 non-null    int64 
 1   Country                                     147 non-null    object
 2   Happiness score                             146 non-null    object
 3   Whisker-high                                146 non-null    object
 4   Whisker-low                                 146 non-null    object
 5   Dystopia (1.83) + residual                  146 non-null    object
 6   Explained by: GDP per capita                146 non-null    object
 7   Explained by: Social support                146 non-null    object
 8   Explained by: Healthy life expectancy       146 non-null    object
 9   Explained by: Freedom to make life choices  146 non-null    object
 10  Explained by: Generosity  

In [237]:
df.isnull().sum() 

RANK                                          0
Country                                       0
Happiness score                               1
Whisker-high                                  1
Whisker-low                                   1
Dystopia (1.83) + residual                    1
Explained by: GDP per capita                  1
Explained by: Social support                  1
Explained by: Healthy life expectancy         1
Explained by: Freedom to make life choices    1
Explained by: Generosity                      1
Explained by: Perceptions of corruption       1
dtype: int64

In [238]:
df = df.dropna()

## Q3: Country Filter – List all details for the country "India".

In [240]:
india_details = df[df['Country'] == 'India']
india_details

Unnamed: 0,RANK,Country,Happiness score,Whisker-high,Whisker-low,Dystopia (1.83) + residual,Explained by: GDP per capita,Explained by: Social support,Explained by: Healthy life expectancy,Explained by: Freedom to make life choices,Explained by: Generosity,Explained by: Perceptions of corruption
135,136,India,3777,3828,3726,795,1167,376,471,647,198,123


## Q4: Sorting – Sort countries by Happiness score in descending order and show the top 5.

In [242]:
sorted_df = df.sort_values(by = 'Happiness score', ascending = False)
top_countries = sorted_df[['Country', 'Happiness score']].head()
top_countries

Unnamed: 0,Country,Happiness score
0,Finland,7821
1,Denmark,7636
2,Iceland,7557
3,Switzerland,7512
4,Netherlands,7415


## Q5: Top by GDP – Find the top 5 countries by GDP per capita.

In [244]:
sorted_df_gdp = df.sort_values(by = 'Explained by: GDP per capita', ascending = False)
top_countries_by_gdp = sorted_df_gdp[['Country', 'Explained by: GDP per capita']].head()
top_countries_by_gdp

Unnamed: 0,Country,Explained by: GDP per capita
5,Luxembourg*,2209
26,Singapore,2149
12,Ireland,2129
3,Switzerland,2026
23,United Arab Emirates,1998


## Q6: Average Happiness – Calculate the average Happiness score for all countries.

In [246]:
non_str_cols = [c for c in df.columns if c != 'Country']

for c in non_str_cols:
    df[c] = pd.to_numeric(df[c].astype(str).str.replace(',', ''))

In [247]:
avg_happiness_score = df.groupby('Country')['Happiness score'].mean().reset_index(name = 'avg_score')
avg_happiness_score

Unnamed: 0,Country,avg_score
0,Afghanistan,2404.0
1,Albania,5199.0
2,Algeria,5122.0
3,Argentina,5967.0
4,Armenia,5399.0
...,...,...
141,Venezuela,4925.0
142,Vietnam,5485.0
143,Yemen*,4197.0
144,Zambia,3760.0


## Q7: Top GDP Countries - Group data by the "Explained by: GDP per capita" value ranges (e.g., High, Medium, Low) and compute the average "Happiness score" for each range.

In [249]:
df['GDP_tier'] = pd.qcut(df['Explained by: GDP per capita'], q = 3, labels = ["Low", "Medium", "High"])

In [250]:
summary = df.groupby('GDP_tier').agg({'Happiness score': 'mean', 'Country': 'count'})
summary

  summary = df.groupby('GDP_tier').agg({'Happiness score': 'mean', 'Country': 'count'})


Unnamed: 0_level_0,Happiness score,Country
GDP_tier,Unnamed: 1_level_1,Unnamed: 2_level_1
Low,4672.816327,49
Medium,5398.583333,48
High,6586.163265,49


## Q8: Correlation – Find the correlation between GDP per capita and Happiness score.

In [252]:
correlation = df['Explained by: GDP per capita'].corr(df['Happiness score'])
print(round(correlation,2))

0.76


## Q9: Top Country per Metric - For each metric column (e.g., "Explained by: Social support", "Explained by: Generosity"), find the country with the highest value for that metric.

In [254]:
metric_cols = [
    'Explained by: GDP per capita',
    'Explained by: Social support',
    'Explained by: Healthy life expectancy',
    'Explained by: Freedom to make life choices',
    'Explained by: Generosity',
    'Explained by: Perceptions of corruption'
]


In [255]:
top_countries_per_metric = {}

for col in metric_cols:
    max_idx = df[col].idxmax() 
    country = df.loc[max_idx, 'Country']
    value = df.loc[max_idx, col]
    top_countries_per_metric[col] = (country, value)

top_df = pd.DataFrame(top_countries_per_metric, index=['Country', 'Value']).T
print(top_df)

                                                              Country Value
Explained by: GDP per capita                              Luxembourg*  2209
Explained by: Social support                                  Iceland  1320
Explained by: Healthy life expectancy       Hong Kong S.A.R. of China   942
Explained by: Freedom to make life choices                   Cambodia   740
Explained by: Generosity                                    Indonesia   468
Explained by: Perceptions of corruption                     Singapore   587


## Q10: Happiness Rank Check - Verify if the "RANK" column is consistent with the ordering of "Happiness score" in descending order. If not, identify mismatches.

In [257]:
df['Computed_Rank'] = df['Happiness score'].rank(ascending=False, method='first').astype(int)

mismatches = len(df[df['RANK'] != df['Computed_Rank']])

mismatches

0