In [None]:
import pandas as pd
from pathlib import Path
import kagglehub


root = Path().cwd().parent.parent

for f in (root / "data").rglob("*.json"):
    if "summary" not in f.name:
        if "cost_of_living" not in f.name:
            if "happiness" not in f.name:
                print(f.stem)

In [None]:
for i in pd.read_json(root / "data/cost_of_living.json")["item"].drop_duplicates().values:
    print(i.lower())

In [None]:

class HappinessTask():
    output_name = "happiness.json"

    @staticmethod
    def _read_and_attatch_year(path: Path) -> pd.DataFrame:
        df = pd.read_csv(path)
        df["Year"] = path.stem
        return df

    def extract(self) -> pd.DataFrame:
        path: str = kagglehub.dataset_download("unsdsn/world-happiness")
        return pd.concat([self._read_and_attatch_year(p) for p in Path(path).glob("*.csv")])

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        # fmt: off
        return (
            df[[
                "Country",
                "Year",
                "Happiness.Rank",
                "Happiness.Score",
                "Economy..GDP.per.Capita.",
                "Family",
                "Health..Life.Expectancy.",
                "Freedom",
                "Generosity",
                "Trust..Government.Corruption.",
                "Dystopia.Residual",
            ]]
            [df["Happiness.Rank"].notna()]
        )
        # fmt: on

    def load(self, df: pd.DataFrame) -> pd.DataFrame:
        df.to_json(Path(os.getenv("SIGN_TO_MIGRATE_ROOT")) / f"data/{self.output_name}", orient="records", index=False)

In [None]:
for i in HappinessTask().extract().columns:
    print(i.lower())