In [1]:
import pandas as pd

In [34]:
# --- Load data ---
eurostat = pd.read_csv('original_data/eurostat.csv')

In [36]:
# --- Prepare Eurostat aging data ---
selected_cols = ['age', 'sex', 'nace_r2', 'geo', 'TIME_PERIOD', 'OBS_VALUE']
eurostat = eurostat[selected_cols].copy()

In [37]:
eu_countries = [
    'Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czechia',
    'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece',
    'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg',
    'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia',
    'Slovenia', 'Spain', 'Sweden'
]

In [38]:
# Filter relevant observations
eurostat = eurostat[
    eurostat["TIME_PERIOD"].isin([2014, 2024]) &
    (eurostat["sex"] == "Total") &
    (eurostat["age"].isin(["15 years or over", "50 years or over"])) &
    (eurostat["geo"].isin(eu_countries))
]

In [39]:
eurostat = eurostat[['age', 'nace_r2', 'geo', 'TIME_PERIOD', 'OBS_VALUE']]
eurostat.columns = ['age', 'economic_activity', 'country', 'year', 'num_of_workers']

excluded = ['No response', 'Total - all NACE activities', 'Unknown NACE activity']
eurostat = eurostat[~eurostat["economic_activity"].isin(excluded)]

In [41]:
eurostat_countries = eurostat.groupby(["country", "year", "age"])["num_of_workers"].sum().reset_index()
eurostat_countries["num_of_workers"] = eurostat_countries["num_of_workers"] * 1000

In [43]:
# Pivot for aging ratios
eurostat_countries = eurostat_countries.pivot_table(
    index=["country"],
    columns=["age", "year"],
    values="num_of_workers",
    aggfunc="sum"
)

In [44]:
# Flatten MultiIndex and rename columns for merging
eurostat_countries = eurostat_countries.reset_index()
eurostat_countries.columns = [
    f"{col[0]}_{col[1]}" if isinstance(col, tuple) else col
    for col in eurostat_countries.columns
]
eurostat_countries.columns = [col.rstrip("_") for col in eurostat_countries.columns]

In [47]:
# Compute aging ratios
eurostat_countries["aging_score_2014"] = eurostat_countries["50 years or over_2014"] / eurostat_countries["15 years or over_2014"] * 100
eurostat_countries["aging_score_2024"] = eurostat_countries["50 years or over_2024"] / eurostat_countries["15 years or over_2024"] * 100
eurostat_countries["aging_diff_2014_2024"] = (eurostat_countries["aging_score_2024"] - eurostat_countries["aging_score_2014"])

In [49]:
eurostat_countries.to_csv("transformed_data/eurostat_countries.csv")