# Le PIB est-il un indicateur robuste de l'accès à l'alimentation ?

In [1]:
import requests
import pandas as pd
import numpy as np

## Partie 1 : Collecte des données

### Récupération des données de PIB à partir de la base de données de la banque mondiale

In [2]:
url = "https://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.KD?format=json&per_page=20000"


response = requests.get(url)

# Extraction du JSON de la réponse
PIB_hab = response.json()  # ici c’est correct

# Vérification des données
print(type(PIB_hab),len(PIB_hab))
print(PIB_hab[0]) # Métadonnées
print(PIB_hab[1][:5]) # Données de PIB par pays


<class 'list'> 2
{'page': 1, 'pages': 1, 'per_page': 20000, 'total': 17290, 'sourceid': '2', 'lastupdated': '2025-10-07'}
[{'indicator': {'id': 'NY.GDP.PCAP.KD', 'value': 'GDP per capita (constant 2015 US$)'}, 'country': {'id': 'ZH', 'value': 'Africa Eastern and Southern'}, 'countryiso3code': 'AFE', 'date': '2024', 'value': 1416.25036869408, 'unit': '', 'obs_status': '', 'decimal': 1}, {'indicator': {'id': 'NY.GDP.PCAP.KD', 'value': 'GDP per capita (constant 2015 US$)'}, 'country': {'id': 'ZH', 'value': 'Africa Eastern and Southern'}, 'countryiso3code': 'AFE', 'date': '2023', 'value': 1412.62538438423, 'unit': '', 'obs_status': '', 'decimal': 1}, {'indicator': {'id': 'NY.GDP.PCAP.KD', 'value': 'GDP per capita (constant 2015 US$)'}, 'country': {'id': 'ZH', 'value': 'Africa Eastern and Southern'}, 'countryiso3code': 'AFE', 'date': '2022', 'value': 1421.79716897878, 'unit': '', 'obs_status': '', 'decimal': 1}, {'indicator': {'id': 'NY.GDP.PCAP.KD', 'value': 'GDP per capita (constant 2015 

#Retraitement des données


Suppression des colonnes inutiles

In [3]:
donnees = PIB_hab[1]  # On ne garde pas les métadonnées

# On transforme le JSON en DataFrame et on ne garde que le PIB/hab pour chaque années avec le nom et le code du pays
liste_simplifiee = []
for item in donnees:
    nom_pays = item['country']['value']  # nom complet du pays
    annee = int(item['date'])
    pib = item['value']
    liste_simplifiee.append({'nom_pays': nom_pays, 'année': annee, 'PIB_par_habitant': pib})

PIB_hab_simple = pd.DataFrame(liste_simplifiee)

# Filtrer uniquement les années de 1960 à 2022. 
PIB_hab_simple = PIB_hab_simple[(PIB_hab_simple['année'] >= 1960) & (PIB_hab_simple['année'] <= 2022)]

# Afficher les 5 premières lignes
print(PIB_hab_simple.head(200))

                        nom_pays  année  PIB_par_habitant
2    Africa Eastern and Southern   2022       1421.797169
3    Africa Eastern and Southern   2021       1409.040699
4    Africa Eastern and Southern   2020       1383.724119
5    Africa Eastern and Southern   2019       1463.437891
6    Africa Eastern and Southern   2018       1471.438317
..                           ...    ...               ...
203       Caribbean small states   2016      13879.439408
204       Caribbean small states   2015      14402.472578
205       Caribbean small states   2014      14514.365526
206       Caribbean small states   2013      14247.783112
207       Caribbean small states   2012      14138.383887

[200 rows x 3 columns]


Filtrage pour enlever les pays contenant des valeurs manquantes

In [4]:
PIB_hab_simple_sans_NA = PIB_hab_simple.dropna()
print(len(PIB_hab_simple['nom_pays'])/(2022-1960+1),len(PIB_hab_simple_sans_NA['nom_pays'])//(2022-1960+1)) # On a enlevé 48 pays ou groupe de pays de la liste

266.0 218


Restriction aux pays suivants : France, Etats-unis, Angleterre, Russie, Chine, Japon, Brésil, Afrique du Sud

In [5]:
# Liste des pays à conserver
pays_a_conserver = [
    'France', 'United States', 'United Kingdom', 
    'Russia', 'China', 'Japan', 'Brazil', 'South Africa'
]

# Filtrer le DataFrame
PIB_hab_filtré = PIB_hab_simple_sans_NA[PIB_hab_simple_sans_NA['nom_pays'].isin(pays_a_conserver)]

# Vérifier le résultat
print(PIB_hab_filtré['nom_pays'].unique())
print(PIB_hab_filtré.head(126))

['Brazil' 'China' 'France' 'Japan' 'South Africa' 'United Kingdom'
 'United States']
     nom_pays  année  PIB_par_habitant
4877   Brazil   2022       9032.084816
4878   Brazil   2021       8799.229458
4879   Brazil   2020       8435.011433
4880   Brazil   2019       8771.440513
4881   Brazil   2018       8722.336303
...       ...    ...               ...
5910    China   1964        206.399719
5911    China   1963        178.749039
5912    China   1962        166.089290
5913    China   1961        177.353934
5914    China   1960        241.388656

[126 rows x 3 columns]


### Récupération des données de PIB à partir de la base de données FAOSTAT

In [6]:
pip install faostat

Collecting faostat
  Downloading faostat-1.1.2-py3-none-any.whl.metadata (15 kB)
Downloading faostat-1.1.2-py3-none-any.whl (9.8 kB)
Installing collected packages: faostat
Successfully installed faostat-1.1.2
Note: you may need to restart the kernel to use updated packages.


In [7]:
import faostat
df = faostat.list_datasets_df()
list = []
for i in range(67):
    list.append(df.iloc[i,0:2])
    print(list[i])


code                              QCL
label    Crops and livestock products
Name: 0, dtype: object
code                     QI
label    Production Indices
Name: 1, dtype: object
code                                   QV
label    Value of Agricultural Production
Name: 2, dtype: object
code                                    FS
label    Suite of Food Security Indicators
Name: 3, dtype: object
code                       FBS
label    Food Balances (2010-)
Name: 4, dtype: object
code                                     SCL
label    Supply Utilization Accounts (2010-)
Name: 5, dtype: object
code                                        CB
label    Commodity Balances (non-food) (2010-)
Name: 6, dtype: object
code                                                  FBSH
label    Food Balances (-2013, old methodology and popu...
Name: 7, dtype: object
code                                                   CBH
label    Commodity Balances (non-food) (-2013, old meth...
Name: 8, dtype: object
code     

In [8]:
print(faostat.list_pars_df('FS'))
print(np.sort(faostat.get_par_df('FS', 'countries')))
print(faostat.get_par_df('FS', 'year3'))
print(faostat.get_par_df('FS', 'element'))
print(faostat.get_par_df('FS', 'item'))
print(faostat.get_par_df('FS', 'area'))

  parameter code          coding_systems  \
0           area  [M49, FAO, ISO2, ISO3]   
1        element                      []   
2           item                      []   
3          year3                      []   

                       subdimensions {code: meaning}  
0  {'countries': 'Countries', 'regions': 'Regions...  
1                           {'elements': 'Elements'}  
2                                 {'items': 'Items'}  
3                                {'years3': 'Years'}  
[['0' '2' 'Afghanistan']
 ['0' '3' 'Albania']
 ['0' '4' 'Algeria']
 ['0' '5' 'American Samoa']
 ['0' '6' 'Andorra']
 ['0' '7' 'Angola']
 ['0' '8' 'Antigua and Barbuda']
 ['0' '9' 'Argentina']
 ['0' '1' 'Armenia']
 ['0' '10' 'Australia']
 ['0' '11' 'Austria']
 ['0' '52' 'Azerbaijan']
 ['0' '12' 'Bahamas']
 ['0' '13' 'Bahrain']
 ['0' '16' 'Bangladesh']
 ['0' '14' 'Barbados']
 ['0' '57' 'Belarus']
 ['0' '255' 'Belgium']
 ['0' '23' 'Belize']
 ['0' '53' 'Benin']
 ['0' '17' 'Bermuda']
 ['0' '18' 'Bhutan']

On souhaite récupérer les indicateurs suivants : {
    21010: "suffisance des apports énergétiques alimentaires moyens",
    22000: "disponibilité alimentaire par habitant",
    21013: "disponibilité protéiques moyenne",
    210104: "disponibilité protéines moyennes animales",
    22013: "PIB/ HAB",
    210041: "prévalence de la sous alimentation",
    210401: "prévalence de l'insécurité alimentaire grave",
    210091: "prévalence de l'insécurité alimentaire modérée ou grave",
    21031: "variabilité des disponibilités alimentaires"
}

In [9]:
liste_pays = [21,68,100,110,185,229,231,351] # dans l'ordre : Brazil, ...

mypars = {'area': liste_pays,
              'element': [6120],
              'item': ['21010','22000','21013','210104','22013','210401','21031'],
              'year': [1999, 2025]}
data_faostat = faostat.get_data_df('FS', pars=mypars, strval=False)
data_faostat.head(100)

Unnamed: 0,Domain Code,Domain,Area Code,Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value
0,FS,Suite of Food Security Indicators,21,Brazil,6121,Value,21010,Average dietary energy supply adequacy (percen...,20002002,2000-2002,%,122.0
1,FS,Suite of Food Security Indicators,21,Brazil,6121,Value,21010,Average dietary energy supply adequacy (percen...,20012003,2001-2003,%,124.0
2,FS,Suite of Food Security Indicators,21,Brazil,6121,Value,21010,Average dietary energy supply adequacy (percen...,20022004,2002-2004,%,127.0
3,FS,Suite of Food Security Indicators,21,Brazil,6121,Value,21010,Average dietary energy supply adequacy (percen...,20032005,2003-2005,%,129.0
4,FS,Suite of Food Security Indicators,21,Brazil,6121,Value,21010,Average dietary energy supply adequacy (percen...,20042006,2004-2006,%,130.0
...,...,...,...,...,...,...,...,...,...,...,...,...
95,FS,Suite of Food Security Indicators,21,Brazil,6128,Value,21031,Per capita food supply variability (kcal/cap/day),2004,2004,kcal/cap/d,45.0
96,FS,Suite of Food Security Indicators,21,Brazil,6128,Value,21031,Per capita food supply variability (kcal/cap/day),2005,2005,kcal/cap/d,57.0
97,FS,Suite of Food Security Indicators,21,Brazil,6128,Value,21031,Per capita food supply variability (kcal/cap/day),2006,2006,kcal/cap/d,61.0
98,FS,Suite of Food Security Indicators,21,Brazil,6128,Value,21031,Per capita food supply variability (kcal/cap/day),2007,2007,kcal/cap/d,50.0


On nettoie la base

In [10]:
data_faostat = data_faostat.drop(['Domain Code', 'Domain', 'Area Code', 'Element Code', 'Element', 'Year Code', 'Item Code'], axis=1)
data_faostat = data_faostat.rename(columns={'Item': 'Indicateur'})
data_faostat.head(100)

Unnamed: 0,Area,Indicateur,Year,Unit,Value
0,Brazil,Average dietary energy supply adequacy (percen...,2000-2002,%,122.0
1,Brazil,Average dietary energy supply adequacy (percen...,2001-2003,%,124.0
2,Brazil,Average dietary energy supply adequacy (percen...,2002-2004,%,127.0
3,Brazil,Average dietary energy supply adequacy (percen...,2003-2005,%,129.0
4,Brazil,Average dietary energy supply adequacy (percen...,2004-2006,%,130.0
...,...,...,...,...,...
95,Brazil,Per capita food supply variability (kcal/cap/day),2004,kcal/cap/d,45.0
96,Brazil,Per capita food supply variability (kcal/cap/day),2005,kcal/cap/d,57.0
97,Brazil,Per capita food supply variability (kcal/cap/day),2006,kcal/cap/d,61.0
98,Brazil,Per capita food supply variability (kcal/cap/day),2007,kcal/cap/d,50.0


## Partie 2 : DATA VISUALIZATION

## Partie 3 : MODELE

### ACP

In [None]:

import faostat
import pandas as pd
df = faostat.list_datasets_df()
list = []
for i in range(67):
    list.append(df.iloc[i,0:2])
    print(list[i])
liste_pays = [21,68,100,110,185,229,231,351] # dans l'ordre : Brazil, ...

mypars = {'area': liste_pays,
              'element': [6120],
              'item': ['21010','22000','21013','210104','22013','210401','21031'],
              'year': [1999, 2025]}
data_faostat = faostat.get_data_df('FS', pars=mypars, strval=False)
data_faostat.head(100)
data_faostat = data_faostat.drop(['Domain Code', 'Domain', 'Area Code', 'Element Code', 'Element', 'Year Code', 'Item Code'], axis=1)
data_faostat = data_faostat.rename(columns={'Item': 'Indicateur'})
# Récupérer les noms d’indicateurs correspondant aux codes que tu veux
items_acp = ['21010','22000','21013','210104','210401','21031']

# On repart depuis les données AVANT suppression de 'Item Code'
# Donc recharge les données brutes :
data_faostat_raw = faostat.get_data_df('FS', pars=mypars, strval=False)

# Sélection uniquement des items que tu veux
data_items = data_faostat_raw[data_faostat_raw['Item Code'].astype(str).isin(items_acp)]

# Liste propre des noms d’indicateurs
liste_indicateurs = sorted(data_items['Item'].unique().tolist())
print("Indicateurs sélectionnés :", liste_indicateurs)

# --- Liste des items (codes FAOSTAT) que tu veux dans l'ACP ---
items_acp = ['21010','22000','21013','210104','210401','21031']

data_acp = data_faostat[data_faostat['Indicateur'].isin(liste_indicateurs)]

# Pivot large
df_wide = data_acp.pivot_table(
    index=['Area', 'Year'],
    columns='Indicateur',
    values='Value'
).reset_index()


df_wide_clean = df_wide.fillna(df_wide.mean(numeric_only=True))


# Vérification
print("Colonnes ACP :", df_wide_clean.columns)

# ACP
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

X = df_wide_clean.iloc[:, 2:]  # variables quantitatives

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=1)
df_wide_clean['Score_ACP'] = pca.fit_transform(X_scaled)

df_wide_clean.head(20)

print("Variance expliquée par PC1 :", pca.explained_variance_ratio_[0])

loadings = pd.DataFrame(
    pca.components_.T,
    index=X.columns,
    columns=['PC1']
)
print("\nContributions des variables à PC1 :")
print(loadings)

print("\nScores ACP (PC1) :")
print(df_wide_clean[['Area', 'Year', 'Score_ACP']].head(20))



### Régression(s) linéaire PIB/hab

### Régression(s) linéaire IDH