In [1]:
from pathlib import Path
import pandas as pd

In [2]:
# Load files
mort = pd.read_csv('data/raw/child-motality.csv')
gdp = pd.read_csv('data/raw/gdp-data.csv')

In [3]:
def pivot_data(df, value_col):
    # years to one column
    year_cols = [c for c in df.columns if str(c).isdigit()]

    tidy = df.melt(
        id_vars=["geo", "name"],
        value_vars=year_cols,
        var_name="year",
        value_name=value_col
    )

    tidy["year"] = pd.to_numeric(tidy["year"], errors="coerce").astype("Int64")
    tidy[value_col] = pd.to_numeric(tidy[value_col], errors="coerce")
    tidy = tidy.dropna(subset=["geo", "name", "year", value_col])

    return tidy

In [4]:
mort_tidy = pivot_data(mort, "mortality_rate")
gdp_tidy  = pivot_data(gdp, "gdpcapita")

In [5]:
mort_tidy

Unnamed: 0,geo,name,year,mortality_rate
0,afg,Afghanistan,1800,468.58
1,ago,Angola,1800,485.68
2,alb,Albania,1800,375.20
4,are,UAE,1800,434.46
5,arg,Argentina,1800,402.19
...,...,...,...,...
58389,wsm,Samoa,2100,2.90
58390,yem,Yemen,2100,8.36
58391,zaf,South Africa,2100,4.60
58392,zmb,Zambia,2100,19.13


In [6]:
gdp_tidy

Unnamed: 0,geo,name,year,gdpcapita
0,afg,Afghanistan,1800,560.88817
1,ago,Angola,1800,435.23259
2,alb,Albania,1800,547.53369
3,and,Andorra,1800,1598.53128
4,are,UAE,1800,1332.77712
...,...,...,...,...
58088,wsm,Samoa,2100,36317.37350
58089,yem,Yemen,2100,8056.50777
58090,zaf,South Africa,2100,49804.86608
58091,zmb,Zambia,2100,24859.65796


In [7]:
# Save cleaned files
mort_tidy.to_csv("data/preprocessed/child_mortality_tidy.csv", index=False)
gdp_tidy.to_csv("data/preprocessed/gdp_per_capita_tidy.csv", index=False)

In [8]:
# Merge into final schema
merged = mort_tidy.merge(gdp_tidy, on=["geo", "name", "year"], how="inner")
merged = merged[["geo", "name", "year", "mortality_rate", "gdpcapita"]]
merged[["mortality_rate", "gdpcapita"]] = merged[["mortality_rate", "gdpcapita"]].fillna(0)

In [9]:
merged

Unnamed: 0,geo,name,year,mortality_rate,gdpcapita
0,afg,Afghanistan,1800,468.58,560.88817
1,ago,Angola,1800,485.68,435.23259
2,alb,Albania,1800,375.20,547.53369
3,are,UAE,1800,434.46,1332.77712
4,arg,Argentina,1800,402.19,1981.80486
...,...,...,...,...,...
56698,wsm,Samoa,2100,2.90,36317.37350
56699,yem,Yemen,2100,8.36,8056.50777
56700,zaf,South Africa,2100,4.60,49804.86608
56701,zmb,Zambia,2100,19.13,24859.65796


In [10]:
merged.to_csv("data/preprocessed/merged_tidy.csv", index=False)