In [36]:
import pandas as pd

In [37]:
# --- Load data ---
eurostat = pd.read_csv('original_data/eurostat.csv')
nace = pd.read_csv('transformed_data/economic_activity_sector.csv')

In [38]:
# --- Prepare Eurostat aging data ---
selected_cols = ['age', 'sex', 'nace_r2', 'geo', 'TIME_PERIOD', 'OBS_VALUE']
eurostat = eurostat[selected_cols].copy()

In [39]:
eu_countries = [
    'Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czechia',
    'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece',
    'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg',
    'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia',
    'Slovenia', 'Spain', 'Sweden'
]
excluded = ['No response', 'Total - all NACE activities', 'Unknown NACE activity']

In [40]:
# Filter relevant observations
eurostat = eurostat[
    eurostat["TIME_PERIOD"].isin([2014, 2024]) &
    (eurostat["sex"] == "Total") &
    (eurostat["age"].isin(["15 years or over", "50 years or over"])) &
    (eurostat["geo"].isin(eu_countries)) 
]
eurostat = eurostat[~eurostat["nace_r2"].isin(excluded)]

In [41]:
eurostat = eurostat.merge(nace, how='left', left_on='nace_r2', right_on = "Economic Activity")

In [43]:
eurostat = eurostat[['age', 'sector', 'geo', 'TIME_PERIOD', 'OBS_VALUE']]
eurostat.columns = ['age', 'sector', 'country', 'year', 'num_of_workers']

In [44]:
eurostat_economic_activity = eurostat.groupby(["sector", "year", "age"])["num_of_workers"].sum().reset_index()
eurostat_economic_activity["num_of_workers"] = eurostat_economic_activity["num_of_workers"] * 1000

In [45]:
# Pivot for aging ratios
eurostat_economic_activity = eurostat_economic_activity.pivot_table(
    index=["sector"],
    columns=["age", "year"],
    values="num_of_workers",
    aggfunc="sum"
)

In [46]:
# Flatten MultiIndex and rename columns for merging
eurostat_economic_activity = eurostat_economic_activity.reset_index()
eurostat_economic_activity.columns = [
    f"{col[0]}_{col[1]}" if isinstance(col, tuple) else col
    for col in eurostat_economic_activity.columns
]
eurostat_economic_activity.columns = [col.rstrip("_") for col in eurostat_economic_activity.columns]

In [47]:
# Compute aging ratios
eurostat_economic_activity["aging_score_2014"] = eurostat_economic_activity["50 years or over_2014"] / eurostat_economic_activity["15 years or over_2014"] * 100
eurostat_economic_activity["aging_score_2024"] = eurostat_economic_activity["50 years or over_2024"] / eurostat_economic_activity["15 years or over_2024"] * 100
eurostat_economic_activity["aging_diff_2014_2024"] = (eurostat_economic_activity["aging_score_2024"] - eurostat_economic_activity["aging_score_2014"])

In [49]:
eurostat_economic_activity.to_csv("transformed_data/eurostat_economic_activity.csv", index=False)