In [14]:
import pandas as pd

In [16]:
# --- Load data ---
eurostat = pd.read_csv('original_data/eurostat.csv')
nace = pd.read_csv('transformed_data/economic_activity_sector.csv')

In [17]:
# --- Prepare Eurostat aging data ---
selected_cols = ['age', 'sex', 'nace_r2', 'geo', 'TIME_PERIOD', 'OBS_VALUE']
eurostat = eurostat[selected_cols].copy()

In [18]:
# Keep only EU members
eu_countries = [
    'Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czechia',
    'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece',
    'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg',
    'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia',
    'Slovenia', 'Spain', 'Sweden'
]

In [19]:
# Filter relevant observations
eurostat = eurostat[
    eurostat["TIME_PERIOD"].isin([2014, 2024]) &
    (eurostat["sex"] == "Total") &
    (eurostat["age"].isin(["15 years or over", "50 years or over"])) &
    (eurostat["geo"].isin(eu_countries))
]

In [20]:
# Merge with sectors
eurostat = eurostat.merge(nace, how='left', left_on='nace_r2', right_on = "Economic Activity")
eurostat = eurostat[['age', 'sector', 'geo', 'TIME_PERIOD', 'OBS_VALUE']]
eurostat.columns = ['age', 'sector', 'country', 'year', 'num_of_workers']
eurostat["num_of_workers"] = eurostat["num_of_workers"] * 1000

excluded = ['No response', 'Total - all NACE activities', 'Unknown NACE activity']
eurostat = eurostat[~eurostat["sector"].isin(excluded)]

In [22]:
# Pivot for aging ratios
eurostat = eurostat.pivot_table(
    index=["country", "sector"],
    columns=["age", "year"],
    values="num_of_workers",
    aggfunc="sum"
)

In [23]:
# Flatten MultiIndex and rename columns for merging
eurostat = eurostat.reset_index()
eurostat.columns = [
    f"{col[0]}_{col[1]}" if isinstance(col, tuple) else col
    for col in eurostat.columns
]
eurostat.columns = [col.rstrip("_") for col in eurostat.columns]

In [24]:
# Compute aging ratios
eurostat["aging_score_2014"] = eurostat["50 years or over_2014"] / eurostat["15 years or over_2014"] * 100
eurostat["aging_score_2024"] = eurostat["50 years or over_2024"] / eurostat["15 years or over_2024"] * 100
eurostat["aging_diff_2014_2024"] = (eurostat["aging_score_2024"] - eurostat["aging_score_2014"])
eurostat.fillna(0, inplace=True)

In [26]:
# Save
eurostat.to_csv("transformed_data/eurostat_full.csv", index=False)