Ce notebook sert à faire la composition des jeux de données par années en regroupants les indices de bonheur aux autres indicateurs.

In [1]:
import pandas as pd

In [54]:
def fixer_nom_pays_ind(df: pd.DataFrame) -> pd.DataFrame:
    # Cette méthode sert à corriger les noms des pays au niveau des indicateurs afins qu'ils correspondent avec les noms utilisés par le rapport sur le bonheur
    df.Country = df.Country.replace("Korea, South", "South Korea")
    df.Country = df.Country.replace("Congo, Democratic Republic of the", "Congo (Kinshasa)")
    df.Country = df.Country.replace("Congo, Republic of the", "Congo (Brazzaville)")
    df.Country = df.Country.replace("Congo", "Congo (Brazzaville)")
    df.Country = df.Country.replace("Burma", "Myanmar")
    df.Country = df.Country.replace("Cote d'Ivoire", "Ivory Coast")
    df.Country = df.Country.replace("Czech Republic", "Czechia")
    df.Country = df.Country.replace("Macedonia", "North Macedonia") #changement de nom en 2019
    df.Country = df.Country.replace("Gambia, The", "Gambia") 
    df.Country = df.Country.replace("Swaziland", "Eswatini") #changement de nom en 2018
    df.Country = df.Country.replace("Turkey (Turkiye)", "Turkiye") #changement de nom en 2022
    df.Country = df.Country.replace("Turkey", "Turkiye") #changement de nom en 2022
    return df

In [55]:
def fixer_nom_pays_rapport(df: pd.DataFrame) -> pd.DataFrame:
    # Cette méthode sert à corriger les noms des pays au niveau du rapport sur le bonheur afin qu'ils correspondent avec les noms utilisés pour les indicateurs
    df.Country = df.Country.apply(str.rstrip, args=("*",) )
    df.Country = df.Country.replace("Congo", "Congo (Brazzaville)")
    df.Country = df.Country.replace("Czech Republic", "Czechia")
    df.Country = df.Country.replace("Taiwan Province of China", "Taiwan")
    df.Country = df.Country.replace("Trinidad & Tobago", "Trinidad and Tobago") 
    df.Country = df.Country.replace("Macedonia", "North Macedonia") #changement de nom en 2019
    df.Country = df.Country.replace("Swaziland", "Eswatini") #changement de nom en 2018
    df.Country = df.Country.replace("Eswatini, Kingdom of", "Eswatini")
    df.Country = df.Country.replace("Turkey", "Turkiye") #changement de nom en 2022
    return df

In [57]:
def fixer_nom_colonnes_rapport(df: pd.DataFrame) -> pd.DataFrame:
    # Cette méthode sert à uniformiser les noms des colonnes provenants du csv du rapport sur le bonheur.
    df = df.rename(columns={"Country name": "Country",
                            "Country or region": "Country",
                            "Score": "Happiness Score",
                            "Happiness.Score": "Happiness Score",
                            "Happiness score": "Happiness Score",
                            "Ladder score": "Happiness Score"
                           })
    return df

In [35]:
def creer_df_annuel(annee) -> pd.DataFrame:

    # lecture bonheur
    df_bonheur = pd.read_csv(f"data/bonheur/{annee}.csv")
    df_bonheur = fixer_nom_pays_rapport(fixer_nom_colonnes_rapport(df_bonheur))
    df_bonheur = df_bonheur.set_index("Country", drop=False, verify_integrity=True)

    indicateurs = ["nativity",
                   "migration",
                   "health_expend",
                   "educ_expend",
                   "gdp",
                   "taxes",
                   "budget",
                   "milit"
                  ]
    
    for indicateur in indicateurs:
        # lecture indicateur 
        df_ind = pd.read_csv(f"data/worldfacts/{annee}/{indicateur}.csv", sep=";", converters={'Country': str.strip})
        # On garde uniquement le pays et la valeur textuelle simple de l'indicateur, 
        #  le pays doit être à la position 0 et la valeur à la position 2
        df_ind = df_ind.iloc[:, [0,2]]
        
        # On enleve les lignes vides
        df_ind = df_ind.loc[df_ind.Country!=""]
        
        df_ind = fixer_nom_pays_ind(df_ind)
        df_ind = df_ind.set_index("Country", drop=True, verify_integrity=True)
        
        # assignation du bon nom à la colonne de donnée
        df_ind.columns = [indicateur]

        # On enleve les lignes sans valeurs pour l'indicateur
        df_ind = df_ind.loc[df_ind[indicateur] != "#VALUE!"]

        # On enleve les lignes résultantes d'une conversion de PIB NA
        df_ind = df_ind.loc[df_ind[indicateur] != "A "]
        df_ind = df_ind.loc[df_ind[indicateur] != "NA "]

        # On enlever les lignes résultantes d'une conversion NA -> N
        df_ind = df_ind.loc[df_ind[indicateur] != "N"]

        # on doit aussi enlever les virgules comme séparateur de millier pour le PIB, les PIB ont été chargé en format str
        if type(df_ind[indicateur][0]) == str:
            df_ind[indicateur] = df_ind[indicateur].str.replace(",","")
        
        # jointure avec df_bonheur
        df_bonheur = df_bonheur.join(df_ind, validate="1:1")
        # transformation en float
        df_bonheur[indicateur] = df_bonheur[indicateur].astype(float)

        # Check des valeurs manquantes
        print(f"Pays manquant l'indicateur: {indicateur}.")
        print(df_bonheur.loc[df_bonheur[indicateur].isnull()].filter(items=['Country', indicateur]))
        print("--------------------------------------------------")

    return df_bonheur

In [60]:
df_b2015 = creer_df_annuel("2015")

df_b2015.filter(items=["Country", "Happiness Score", "nativity","migration","health_expend","educ_expend","gdp","taxes","budget","milit"])

Pays manquant l'indicateur: nativity.
                                         Country  nativity
Country                                                   
North Cyprus                        North Cyprus       NaN
Kosovo                                    Kosovo       NaN
Somaliland region              Somaliland region       NaN
Palestinian Territories  Palestinian Territories       NaN
--------------------------------------------------
Pays manquant l'indicateur: migration.
                                         Country  migration
Country                                                    
North Cyprus                        North Cyprus        NaN
Kosovo                                    Kosovo        NaN
Montenegro                            Montenegro        NaN
Somaliland region              Somaliland region        NaN
Palestinian Territories  Palestinian Territories        NaN
--------------------------------------------------
Pays manquant l'indicateur: health_expend.
    

Unnamed: 0_level_0,Country,Happiness Score,nativity,migration,health_expend,educ_expend,gdp,taxes,budget,milit
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Switzerland,Switzerland,7.587,10.50,4.74,11.5,5.3,58100.0,33.6,0.3,0.64
Iceland,Iceland,7.561,13.91,4.43,9.1,7.4,43600.0,45.3,0.1,0.13
Denmark,Denmark,7.527,10.27,2.20,10.6,8.7,44300.0,55.5,-1.4,1.37
Norway,Norway,7.522,12.14,7.25,9.6,6.6,66900.0,57.5,12.5,1.40
Canada,Canada,7.427,10.28,5.66,10.9,5.3,44800.0,37.6,-2.3,1.00
...,...,...,...,...,...,...,...,...,...,...
Rwanda,Rwanda,3.465,33.75,0.85,11.1,5.1,1700.0,24.4,-3.7,1.12
Benin,Benin,3.340,36.02,0.00,4.6,5.3,1900.0,20.4,-2.0,1.03
Syria,Syria,3.006,22.17,-19.79,3.3,4.9,5100.0,2.7,-5.8,
Burundi,Burundi,2.905,42.01,0.00,8.0,5.8,900.0,27.9,-2.8,2.39


In [61]:
df_b2016 = creer_df_annuel("2016")
df_b2016.filter(items=["Country", "Happiness Score", "nativity","migration","health_expend","educ_expend","gdp","taxes","budget","milit"])

Pays manquant l'indicateur: nativity.
                                         Country  nativity
Country                                                   
North Cyprus                        North Cyprus       NaN
Kosovo                                    Kosovo       NaN
Somaliland Region              Somaliland Region       NaN
Palestinian Territories  Palestinian Territories       NaN
--------------------------------------------------
Pays manquant l'indicateur: migration.
                                         Country  migration
Country                                                    
North Cyprus                        North Cyprus        NaN
Kosovo                                    Kosovo        NaN
Montenegro                            Montenegro        NaN
Somaliland Region              Somaliland Region        NaN
Palestinian Territories  Palestinian Territories        NaN
--------------------------------------------------
Pays manquant l'indicateur: health_expend.
    

Unnamed: 0_level_0,Country,Happiness Score,nativity,migration,health_expend,educ_expend,gdp,taxes,budget,milit
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Denmark,Denmark,7.526,10.4,2.1,10.8,8.5,45700.0,54.0,-1.7,1.20
Switzerland,Switzerland,7.509,10.5,4.7,11.7,5.1,58600.0,34.7,0.2,0.64
Iceland,Iceland,7.501,13.8,4.2,8.9,7.0,46100.0,42.2,-0.5,0.13
Norway,Norway,7.498,12.2,6.6,9.7,7.4,68400.0,54.3,5.9,1.59
Finland,Finland,7.413,10.7,3.0,9.7,7.2,41100.0,55.6,-2.8,1.37
...,...,...,...,...,...,...,...,...,...,...
Benin,Benin,3.484,35.5,0.0,4.6,4.4,2100.0,17.4,-6.4,1.03
Afghanistan,Afghanistan,3.360,38.3,-1.2,8.2,,1900.0,8.2,-23.7,28.09
Togo,Togo,3.303,33.7,0.0,5.2,4.8,1500.0,25.5,-5.7,
Syria,Syria,3.069,21.7,-2.1,3.3,5.1,5100.0,15.9,-7.3,


In [62]:
df_b2017 = creer_df_annuel("2017")
df_b2017.filter(items=["Country", "Happiness Score", "nativity","migration","health_expend","educ_expend","gdp","taxes","budget","milit"])

Pays manquant l'indicateur: nativity.
                                         Country  nativity
Country                                                   
North Cyprus                        North Cyprus       NaN
Hong Kong S.A.R., China  Hong Kong S.A.R., China       NaN
Kosovo                                    Kosovo       NaN
Palestinian Territories  Palestinian Territories       NaN
--------------------------------------------------
Pays manquant l'indicateur: migration.
                                         Country  migration
Country                                                    
North Cyprus                        North Cyprus        NaN
Hong Kong S.A.R., China  Hong Kong S.A.R., China        NaN
Kosovo                                    Kosovo        NaN
Montenegro                            Montenegro        NaN
Palestinian Territories  Palestinian Territories        NaN
Syria                                      Syria        NaN
--------------------------------------

Unnamed: 0_level_0,Country,Happiness Score,nativity,migration,health_expend,educ_expend,gdp,taxes,budget,milit
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Norway,Norway,7.537,12.2,5.9,9.7,7.4,69400.0,54.2,3.0,1.62
Denmark,Denmark,7.522,10.5,2.1,10.8,8.6,48200.0,52.9,-0.6,1.15
Iceland,Iceland,7.504,13.7,4.0,8.9,7.8,49200.0,58.4,17.2,0.10
Switzerland,Switzerland,7.494,10.5,4.7,11.7,5.1,60400.0,34.9,0.7,0.71
Finland,Finland,7.469,10.7,2.9,9.7,7.2,42300.0,54.2,-1.9,1.33
...,...,...,...,...,...,...,...,...,...,...
Rwanda,Rwanda,3.471,30.7,0.2,7.5,5.0,2000.0,23.3,-3.7,1.21
Syria,Syria,3.462,21.2,,3.3,5.1,2900.0,2.0,-9.7,
Tanzania,Tanzania,3.349,35.6,-0.5,5.6,3.5,3100.0,14.0,-3.3,1.14
Burundi,Burundi,2.905,41.3,0.0,7.5,5.4,800.0,16.9,-4.5,2.21


In [63]:
df_b2018 = creer_df_annuel("2018")
df_b2018.filter(items=["Country", "Happiness Score", "nativity","migration","health_expend","educ_expend","gdp","taxes","budget","milit"])

Pays manquant l'indicateur: nativity.
                                         Country  nativity
Country                                                   
Northern Cyprus                  Northern Cyprus       NaN
Kosovo                                    Kosovo       NaN
Palestinian Territories  Palestinian Territories       NaN
--------------------------------------------------
Pays manquant l'indicateur: migration.
                                         Country  migration
Country                                                    
Northern Cyprus                  Northern Cyprus        NaN
Kosovo                                    Kosovo        NaN
Montenegro                            Montenegro        NaN
Palestinian Territories  Palestinian Territories        NaN
Syria                                      Syria        NaN
--------------------------------------------------
Pays manquant l'indicateur: health_expend.
                                         Country  health_expend

Unnamed: 0_level_0,Country,Happiness Score,nativity,migration,health_expend,educ_expend,gdp,taxes,budget,milit
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Finland,Finland,7.632,10.7,2.9,9.7,7.2,44500.0,53.1,-0.6,1.23
Norway,Norway,7.594,12.2,5.9,9.7,7.7,72100.0,54.4,4.4,1.61
Denmark,Denmark,7.555,10.9,2.1,10.8,7.6,50100.0,53.0,1.1,1.21
Iceland,Iceland,7.495,13.6,4.0,8.9,7.8,52200.0,42.4,1.5,0.10
Switzerland,Switzerland,7.487,10.5,4.7,11.7,5.1,62100.0,35.7,1.1,0.71
...,...,...,...,...,...,...,...,...,...,...
Yemen,Yemen,3.355,27.6,0.4,5.6,4.6,2500.0,9.0,-5.2,3.97
Tanzania,Tanzania,3.303,35.3,-0.5,5.6,3.5,3200.0,15.2,-1.8,1.14
South Sudan,South Sudan,3.254,36.9,10.6,2.7,1.8,1600.0,8.5,-1.3,10.93
Central African Republic,Central African Republic,3.083,34.0,0.0,4.2,1.2,700.0,14.6,-0.9,


In [64]:
df_b2019 = creer_df_annuel("2019")
df_b2019.filter(items=["Country", "Happiness Score", "nativity","migration","health_expend","educ_expend","gdp","taxes","budget","milit"])

Pays manquant l'indicateur: nativity.
                                         Country  nativity
Country                                                   
Kosovo                                    Kosovo       NaN
Northern Cyprus                  Northern Cyprus       NaN
Palestinian Territories  Palestinian Territories       NaN
--------------------------------------------------
Pays manquant l'indicateur: migration.
                                         Country  migration
Country                                                    
Northern Cyprus                  Northern Cyprus        NaN
Palestinian Territories  Palestinian Territories        NaN
--------------------------------------------------
Pays manquant l'indicateur: health_expend.
                                         Country  health_expend
Country                                                        
Taiwan                                    Taiwan            NaN
Kosovo                                    Kosovo   

Unnamed: 0_level_0,Country,Happiness Score,nativity,migration,health_expend,educ_expend,gdp,taxes,budget,milit
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Finland,Finland,7.769,10.7,2.8,9.5,6.9,44500.0,53.1,-0.6,1.29
Denmark,Denmark,7.600,10.9,4.3,10.4,7.6,50100.0,53.0,1.1,1.32
Norway,Norway,7.554,12.2,5.3,10.5,8.0,72100.0,54.4,4.4,1.80
Iceland,Iceland,7.494,13.6,3.7,8.3,7.5,52200.0,42.4,1.5,0.30
Netherlands,Netherlands,7.488,10.9,1.9,10.4,5.5,53900.0,43.4,1.1,1.36
...,...,...,...,...,...,...,...,...,...,...
Rwanda,Rwanda,3.334,29.8,-0.5,6.8,3.1,2100.0,21.3,-4.3,1.23
Tanzania,Tanzania,3.231,35.3,-0.5,4.1,3.4,3200.0,15.2,-1.8,1.21
Afghanistan,Afghanistan,3.203,37.5,-0.6,10.2,4.1,2000.0,11.2,-15.1,0.99
Central African Republic,Central African Republic,3.083,34.0,0.0,4.3,1.2,700.0,14.6,-0.9,1.41


In [65]:
df_b2020 = creer_df_annuel("2020")
df_b2020.filter(items=["Country", "Happiness Score", "nativity","migration","health_expend","educ_expend","gdp","taxes","budget","milit"])

Pays manquant l'indicateur: nativity.
                                             Country  nativity
Country                                                       
North Cyprus                            North Cyprus       NaN
Hong Kong S.A.R. of China  Hong Kong S.A.R. of China       NaN
Palestinian Territories      Palestinian Territories       NaN
--------------------------------------------------
Pays manquant l'indicateur: migration.
                                             Country  migration
Country                                                        
North Cyprus                            North Cyprus        NaN
Hong Kong S.A.R. of China  Hong Kong S.A.R. of China        NaN
Palestinian Territories      Palestinian Territories        NaN
--------------------------------------------------
Pays manquant l'indicateur: health_expend.
                                             Country  health_expend
Country                                                            
Taiwan 

Unnamed: 0_level_0,Country,Happiness Score,nativity,migration,health_expend,educ_expend,gdp,taxes,budget,milit
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Finland,Finland,7.8087,10.6,2.6,9.2,6.4,49380.0,53.1,-0.6,1.50
Denmark,Denmark,7.6456,11.1,2.8,10.1,7.8,66351.0,53.0,1.1,1.32
Switzerland,Switzerland,7.5599,10.5,4.6,12.3,5.1,81475.0,35.7,1.1,0.70
Iceland,Iceland,7.5045,13.3,3.3,8.3,7.7,54696.0,42.4,1.5,0.30
Norway,Norway,7.4880,12.2,4.0,10.4,7.9,91781.0,54.4,4.4,1.80
...,...,...,...,...,...,...,...,...,...,...
Central African Republic,Central African Republic,3.4759,33.2,0.0,5.8,1.2,700.0,14.6,-0.9,1.50
Rwanda,Rwanda,3.3123,27.9,-3.3,6.6,3.1,2100.0,21.3,-4.3,1.20
Zimbabwe,Zimbabwe,3.2992,33.6,-5.0,6.6,5.9,1306.0,21.5,-9.6,1.00
South Sudan,South Sudan,2.8166,38.8,0.2,9.8,1.5,1600.0,8.5,-1.3,3.50


In [67]:
df_b2021 = creer_df_annuel("2021")
df_b2021.filter(items=["Country", "Happiness Score", "nativity","migration","health_expend","educ_expend","gdp","taxes","budget","milit"])

Pays manquant l'indicateur: nativity.
                                             Country  nativity
Country                                                       
North Cyprus                            North Cyprus       NaN
Hong Kong S.A.R. of China  Hong Kong S.A.R. of China       NaN
Palestinian Territories      Palestinian Territories       NaN
--------------------------------------------------
Pays manquant l'indicateur: migration.
                                             Country  migration
Country                                                        
North Cyprus                            North Cyprus        NaN
Hong Kong S.A.R. of China  Hong Kong S.A.R. of China        NaN
North Macedonia                      North Macedonia        NaN
Palestinian Territories      Palestinian Territories        NaN
--------------------------------------------------
Pays manquant l'indicateur: health_expend.
                                             Country  health_expend
Country    

Unnamed: 0_level_0,Country,Happiness Score,nativity,migration,health_expend,educ_expend,gdp,taxes,budget,milit
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Finland,Finland,7.842,10.49,2.46,9.0,6.3,47300.0,53.1,-0.6,2.20
Denmark,Denmark,7.620,11.17,2.74,10.1,7.8,55900.0,53.0,1.1,1.40
Switzerland,Switzerland,7.571,10.41,4.55,11.9,4.9,68400.0,35.7,1.1,0.80
Iceland,Iceland,7.554,13.12,3.09,8.5,7.6,52300.0,42.4,1.5,
Netherlands,Netherlands,7.464,10.98,1.90,10.0,5.4,54200.0,43.4,1.1,1.49
...,...,...,...,...,...,...,...,...,...,...
Lesotho,Lesotho,3.512,23.30,-4.59,9.3,7.4,2300.0,39.7,-6.0,1.60
Botswana,Botswana,3.467,20.60,2.85,5.9,6.9,16000.0,30.5,-1.0,3.00
Rwanda,Rwanda,3.415,27.18,-3.27,7.5,3.4,2100.0,21.3,-4.3,1.30
Zimbabwe,Zimbabwe,3.145,33.34,-4.93,4.7,3.6,2700.0,21.5,-9.6,0.70


In [58]:
df_b2022 = creer_df_annuel("2022")
df_b2022.filter(items=["Country", "Happiness Score", "nativity","migration","health_expend","educ_expend","gdp","taxes","budget","milit"])

Pays manquant l'indicateur: nativity.
                                             Country  nativity
Country                                                       
North Cyprus                            North Cyprus       NaN
Hong Kong S.A.R. of China  Hong Kong S.A.R. of China       NaN
Palestinian Territories      Palestinian Territories       NaN
--------------------------------------------------
Pays manquant l'indicateur: migration.
                                             Country  migration
Country                                                        
North Cyprus                            North Cyprus        NaN
Hong Kong S.A.R. of China  Hong Kong S.A.R. of China        NaN
Palestinian Territories      Palestinian Territories        NaN
--------------------------------------------------
Pays manquant l'indicateur: health_expend.
                                             Country  health_expend
Country                                                            
Taiwan 

Unnamed: 0_level_0,Country,Happiness Score,nativity,migration,health_expend,educ_expend,gdp,taxes,budget,milit
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Finland,Finland,7821,10.42,2.35,9.2,5.9,47300.0,53.1,-0.6,2.0
Denmark,Denmark,7636,11.22,2.73,10.0,6.4,55900.0,53.0,1.1,1.4
Iceland,Iceland,7557,12.96,2.89,8.6,7.7,52300.0,42.4,1.5,
Switzerland,Switzerland,7512,10.36,4.52,11.3,5.2,68400.0,35.7,1.1,0.7
Netherlands,Netherlands,7415,10.99,1.90,10.1,5.3,54200.0,43.4,1.1,1.7
...,...,...,...,...,...,...,...,...,...,...
Botswana,Botswana,3471,20.28,2.81,6.1,8.7,16000.0,30.5,-1.0,3.0
Rwanda,Rwanda,3268,26.44,-3.21,6.4,3.8,2100.0,21.3,-4.3,1.4
Zimbabwe,Zimbabwe,2995,33.07,-4.83,7.7,3.9,2700.0,21.5,-9.6,2.6
Lebanon,Lebanon,2955,13.10,-0.94,8.7,1.7,11600.0,21.5,-6.9,3.2


In [68]:
df_b2023 = creer_df_annuel("2023")
df_b2023.filter(items=["Country", "Happiness Score", "nativity","migration","health_expend","educ_expend","gdp","taxes","budget","milit"])

Pays manquant l'indicateur: nativity.
                                             Country  nativity
Country                                                       
Hong Kong S.A.R. of China  Hong Kong S.A.R. of China       NaN
State of Palestine                State of Palestine       NaN
--------------------------------------------------
Pays manquant l'indicateur: migration.
                                             Country  migration
Country                                                        
Hong Kong S.A.R. of China  Hong Kong S.A.R. of China        NaN
State of Palestine                State of Palestine        NaN
--------------------------------------------------
Pays manquant l'indicateur: health_expend.
                                             Country  health_expend
Country                                                            
Taiwan                                        Taiwan            NaN
Kosovo                                        Kosovo            Na

Unnamed: 0_level_0,Country,Happiness Score,nativity,migration,health_expend,educ_expend,gdp,taxes,budget,milit
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Finland,Finland,7.804,10.33,2.25,9.6,5.9,48800.0,19.85,-0.6,1.40
Denmark,Denmark,7.586,11.25,2.71,10.5,6.4,58000.0,34.07,1.1,1.15
Iceland,Iceland,7.530,12.80,2.69,9.6,7.7,53600.0,22.33,1.5,
Israel,Israel,7.473,17.30,1.99,8.3,7.1,39400.0,21.90,-2.0,5.50
Netherlands,Netherlands,7.403,10.98,1.89,11.1,5.3,56600.0,24.38,1.1,1.20
...,...,...,...,...,...,...,...,...,...,...
Congo (Kinshasa),Congo (Kinshasa),3.207,39.64,-0.63,4.1,2.7,1100.0,11.20,-0.9,1.30
Zimbabwe,Zimbabwe,3.204,32.77,-4.74,3.4,3.9,2100.0,7.21,-9.6,1.80
Sierra Leone,Sierra Leone,3.138,31.49,1.72,8.8,9.1,1600.0,15.60,-7.9,0.40
Lebanon,Lebanon,2.392,12.86,-0.94,8.0,1.7,13000.0,8.90,-6.9,4.60


### Sauvegarde des fichiers annuels 

In [80]:
df_b2015.to_csv("data/merged/2015.csv", sep=";", index=False) 
df_b2016.to_csv("data/merged/2016.csv", sep=";", index=False) 
df_b2017.to_csv("data/merged/2017.csv", sep=";", index=False) 
df_b2018.to_csv("data/merged/2018.csv", sep=";", index=False) 
df_b2019.to_csv("data/merged/2019.csv", sep=";", index=False) 
df_b2020.to_csv("data/merged/2020.csv", sep=";", index=False) 
df_b2021.to_csv("data/merged/2021.csv", sep=";", index=False) 
df_b2022.to_csv("data/merged/2022.csv", sep=";", index=False) 
df_b2023.to_csv("data/merged/2023.csv", sep=";", index=False) 

### Sauvegarde du fichier global

In [112]:
colonnes = ["annee", "Country", "Happiness Score", "nativity","migration","health_expend","educ_expend","gdp","taxes","budget","milit"]
df_global = pd.DataFrame()

df_b2015["annee"] = 2015
df_b2016["annee"] = 2016
df_b2017["annee"] = 2017
df_b2018["annee"] = 2018
df_b2019["annee"] = 2019
df_b2020["annee"] = 2020
df_b2021["annee"] = 2021
df_b2022["annee"] = 2022
df_b2023["annee"] = 2023

df_global = pd.concat([df_b2015.filter(items=colonnes),
                       df_b2016.filter(items=colonnes),
                       df_b2017.filter(items=colonnes),
                       df_b2018.filter(items=colonnes),
                       df_b2019.filter(items=colonnes),
                       df_b2020.filter(items=colonnes),
                       df_b2021.filter(items=colonnes),
                       df_b2022.filter(items=colonnes),
                       df_b2023.filter(items=colonnes),
                      ], verify_integrity=True, ignore_index=True)

df_global["Happiness Score"] = df_global["Happiness Score"].astype(str).apply(str.replace, args=(",",".")).astype(float)
df_global.to_csv("data/merged/global.csv", sep=";", index=False) 

In [99]:
df_global

Unnamed: 0,annee,Country,Happiness Score,nativity,migration,health_expend,educ_expend,gdp,taxes,budget,milit
0,2015,Switzerland,7.587,10.50,4.74,11.5,5.3,58100.0,33.60,0.3,0.64
1,2015,Iceland,7.561,13.91,4.43,9.1,7.4,43600.0,45.30,0.1,0.13
2,2015,Denmark,7.527,10.27,2.20,10.6,8.7,44300.0,55.50,-1.4,1.37
3,2015,Norway,7.522,12.14,7.25,9.6,6.6,66900.0,57.50,12.5,1.40
4,2015,Canada,7.427,10.28,5.66,10.9,5.3,44800.0,37.60,-2.3,1.00
...,...,...,...,...,...,...,...,...,...,...,...
1362,2023,Congo (Kinshasa),3.207,39.64,-0.63,4.1,2.7,1100.0,11.20,-0.9,1.30
1363,2023,Zimbabwe,3.204,32.77,-4.74,3.4,3.9,2100.0,7.21,-9.6,1.80
1364,2023,Sierra Leone,3.138,31.49,1.72,8.8,9.1,1600.0,15.60,-7.9,0.40
1365,2023,Lebanon,2.392,12.86,-0.94,8.0,1.7,13000.0,8.90,-6.9,4.60
