In [60]:
import requests
import pandas as pd


In [56]:
# Eurostat 
# in order to build the key
# need to know the dataflow (variable): 
# https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/dataflow/ESTAT/NAMA_10_GDP if we select GDP
# then the data structure (dimensions of the variable we can select) associated with the dataflow: 
# https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/dataflow/ESTAT/NAMA_10_GDP?references=children so we know in which order to declare the dimensions
# and finally the codelist associated with each structure: 
# https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/codelist/ESTAT/FREQ if we select the frequency FREQ


api_root = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/"

# annual variables
time_period_annual = "startPeriod=2000&endPeriod=2021" # 22 time periods per country
options = "&format=JSON&lang=en"

# Crop production in EU standard humidity 
# dimensions (in order): freq,crops,strucpro,geo,obsflag
dimensions = "A.C1000+C2000+R1000+R2000+I1100+V0000+F0000+T0000+U1100.AR.FR+EU"
url_api = f"{api_root}APRO_CPSH1/{dimensions}?{time_period_annual}{options}"
req = requests.get(url_api)
wb_APRO_CPSH1 = req.json()

# Selling prices of crop products (absolute prices) 
# freq,currency,prod_veg,geo,TIME_PERIOD
dimensions = "A.EUR.01110000+01120000+01600000+02120000+04121000+04122000+04192100+04192200+04194100+04194200+04195000+04196000+04199911+05120000.EU+FR"
url_api = f"{api_root}APRI_AP_CRPOUTA/{dimensions}?{time_period_annual}{options}"
req = requests.get(url_api)
wb_APRI_AP_CRPOUTA = req.json()


# quarterly variables
time_period_quarterly = "startPeriod=2000-Q1&endPeriod=2021-Q4"

# Price indices of the means of agricultural production, input (2010 = 100), until 2017 only
# freq,p_adj,unit,product,geo,TIME_PERIOD 
dimensions = "Q.NI.I10.200000+210000.EU28+FR"
url_api = f"{api_root}APRI_PI10_INQ/{dimensions}?{time_period_quarterly}{options}"
req = requests.get(url_api)
wb_APRI_PI10_INQ = req.json()

# Price indices of the means of agricultural production, input (2015 = 100), from 2015 only
# freq,p_adj,unit,product,geo,TIME_PERIOD 
dimensions = "Q.NI.I15.200000+210000.EU28+FR"
url_api = f"{api_root}APRI_PI15_INQ/{dimensions}?{time_period_quarterly}{options}"
req = requests.get(url_api)
wb_APRI_PI15_INQ = req.json()


# monthly variables
time_period_monthly = "startPeriod=2000-01&endPeriod=2021-12"

# Slaughtering in slaughterhouses
# freq,meat,meatitem,unit,geo,TIME_PERIOD
dimensions = "M.B1000.SL.THS_T.FR+EU28+DE+IE+ES"
url_api = f"{api_root}APRO_MT_PWGTM/{dimensions}?{time_period_monthly}{options}"
req = requests.get(url_api)
wb_APRO_MT_PWGTM = req.json()

# Poultry   
# freq,animals,hatchitm,geo,TIME_PERIOD
dimensions = "M.A5130O+A5130P.CH.FR+DE+UK+PL"
url_api = f"{api_root}APRO_EC_POULM/{dimensions}?{time_period_monthly}{options}"
req = requests.get(url_api)
wb_APRO_EC_POULM = req.json()


In [3]:
# quarterly variables
time_period_quarterly = "startPeriod=2000-Q1&endPeriod=2021-Q4"

# Price indices of the means of agricultural production, input (2015 = 100)
# freq,p_adj,unit,product,geo,TIME_PERIOD 
dimensions = "Q.NI.I15.200000+210000.EU28+FR"
url_api = f"{api_root}APRI_PI15_INQ/{dimensions}?{options}"
req = requests.get(url_api)
wb_APRI_PI15_INQ = req.json()

In [46]:
# useful keys: value, geo, time

# variable codes
variable_codes = ['APRO_CPSH1', 'APRI_AP_CRPOUTA', 'APRI_PI15_INQ', 'APRI_PI10_INQ', 'APRO_MT_PWGTM', 'APRO_EC_POULM']

# recovering_data_from_JSON
def recovering_data_from_JSON(json_data):

    # recovering time labels
    time = json_data['dimension']['time']['category']['label']
    list_time = list(time)
    list_time = list(filter(lambda x: x != '2022', list_time))

    # recovering country names 
    geo = json_data['dimension']['geo']['category']['index'].keys()
    list_geo = list(geo)

    # recovering category 
    if json_data == wb_APRO_CPSH1:
        cat = json_data['dimension']['crops']['category']['label']
    if json_data == wb_APRI_AP_CRPOUTA:
        cat = json_data['dimension']['prod_veg']['category']['label']
    if json_data == wb_APRI_PI15_INQ:
        cat = json_data['dimension']['product']['category']['label']
    if json_data == wb_APRO_MT_PWGTM:
        cat = json_data['dimension']['meat']['category']['label']
    if json_data == wb_APRO_EC_POULM:
        cat = json_data['dimension']['animals']['category']['label']

    list_cat = list(cat)

    # recovering values
    values = json_data['value'].values()
    list_values = list(values)

    return list_time, list_geo, list_cat, list_values

#dict = {'time': list_time, list_geo[0] : list_values[0:20], list_geo[1] : list_values[20:-1]}


In [57]:
data = recovering_data_from_JSON(wb_APRO_EC_POULM)
len(data[3])

2005

In [24]:
def create_df_annual_data(variable_codes, list_time):
        variable = list()
        dict = {'time': list_time}
        for i in range(len(variable_codes)):
                for j in range(len(list_geo)):
                        variable_iter = variable_codes[i] + "_" + list_geo[j] # FR_APRO_CPSH1
                        dict[variable_iter] = list_values[20:-1]
        df = pd.DataFrame(dict)
        return df
# create_df_annual_data(variable_code, list_time)

In [None]:
#!pip install pandasdmx
import pandasdmx as sdmx
estat = sdmx.Request('ESTAT')

In [2]:
metadata = estat.datastructure('DSD_APRO_CPSH1')
metadata
# Slaughtering in slaughterhouses
# freq,meat,meatitem,unit,geo,TIME_PERIOD
#dimensions = "M.B1000.SL.THS_T.FR+EU28+DE+IE+ES"
#url_api = f"{api_root}APRO_MT_PWGTM/{dimensions}?{time_period_monthly}{options}"
#startPeriod=2000-01&endPeriod=2021-12

<pandasdmx.StructureMessage>
  <Header>
    id: 'IDREF640763'
    prepared: '2022-12-19T11:18:03.293000+00:00'
    receiver: <Agency Unknown>
    sender: <Agency Unknown>
    source: 
    test: False
  response: <Response [200]>
  Codelist (6): CL_CROPS CL_FREQ CL_GEO CL_OBS_FLAG CL_OBS_STATUS CL_ST...
  ConceptScheme (1): CS_DSD_APRO_CPSH1
  DataStructureDefinition (1): DSD_APRO_CPSH1

In [4]:
for cl in 'CL_FREQ', 'CL_CROPS', 'CL_STRUCPRO':
    print(sdmx.to_pandas(metadata.codelist[cl]))

                name   parent
CL_FREQ                      
D              Daily  CL_FREQ
W             Weekly  CL_FREQ
Q          Quarterly  CL_FREQ
A             Annual  CL_FREQ
S        Semi-annual  CL_FREQ
M            Monthly  CL_FREQ
H          Half-year  CL_FREQ
                                                       name    parent
CL_CROPS                                                             
UAA                              Utilised agricultural area  CL_CROPS
ARA                                             Arable land  CL_CROPS
C0000     Cereals for the production of grain (including...  CL_CROPS
C1000     Cereals (excluding rice) for the production of...  CL_CROPS
C1100                                       Wheat and spelt  CL_CROPS
...                                                     ...       ...
PECR9                                 Other permanent crops  CL_CROPS
K0000                                       Kitchen gardens  CL_CROPS
U1000                         

In [None]:
key = dict(CROPS='C0000', STRUCPRO='PR_HU_EU')
params = dict(startPeriod='2000')
resp = estat.data('APRO_CPSH1', key=key, params=params)

In [7]:
flow_msg = estat.dataflow()

In [10]:
dataflows = sdmx.to_pandas(flow_msg.dataflow)
dataflows.head()
#len(dataflows)

DS-018995                          EU trade since 1999 by SITC
DS-022469    Extra-EU trade since 2000 by mode of transport...
DS-032655                     EU trade since 1988 by BEC/rev.4
DS-043227                        EFTA trade since 1995 by SITC
DS-056697    EU enlargement countries trade since 2002, by ...
dtype: object

In [53]:
agri = dataflows[dataflows.str.contains('agricultur', case=False)]
prod = dataflows[dataflows.str.contains('production', case=False)]
econ = dataflows[dataflows.str.contains('economic', case=False)]
outcome = set(agri) & set(prod)
outcome

{'EC indices of purchase prices of the means of agricultural production (1995 = 100) - annual data',
 'EC indices of purchase prices of the means of agricultural production (1995 = 100) - monthly data',
 'Holdings, cultivated area under vines (CAV) and agricultural area in use broken down by type of production, size class and regions',
 'Main farm indicators by agricultural area, type and economic size of the farm, share of consumed production, legal status of the holding and NUTS2 region',
 'Organic crop area by agricultural production methods and crops',
 'Organic crop area by agricultural production methods and crops (until 2011)',
 'Price indices of the means of agricultural production, input (2000 = 100) - annual data',
 'Price indices of the means of agricultural production, input (2000 = 100) - monthly data',
 'Price indices of the means of agricultural production, input (2000 = 100) - quarterly data',
 'Price indices of the means of agricultural production, input (2005 = 100) -

In [64]:
take = dataflows[dataflows.str.contains('Price indices of the means of agricultural production', case=False)]
take

apri_pi00_ina    Price indices of the means of agricultural pro...
apri_pi00_inm    Price indices of the means of agricultural pro...
apri_pi00_inq    Price indices of the means of agricultural pro...
apri_pi05_ina    Price indices of the means of agricultural pro...
apri_pi05_inq    Price indices of the means of agricultural pro...
apri_pi10_ina    Price indices of the means of agricultural pro...
apri_pi10_inq    Price indices of the means of agricultural pro...
apri_pi15_ina    Price indices of the means of agricultural pro...
apri_pi15_inq    Price indices of the means of agricultural pro...
dtype: object

In [200]:
metadata = estat.datastructure('DSD_apri_pi15_inq')
metadata.codelist
for cl in 'CL_PRODUCT', 'CL_P_ADJ', 'CL_UNIT', 'CL_FREQ', 'CL_GEO':
    print(sdmx.to_pandas(metadata.codelist[cl]))

                                                         name      parent
CL_PRODUCT                                                               
200000      Goods and services currently consumed in agric...  CL_PRODUCT
201000                               Seeds and planting stock  CL_PRODUCT
202000                                     Energy, lubricants  CL_PRODUCT
202100                                            Electricity  CL_PRODUCT
202200                                      Fuels for heating  CL_PRODUCT
202300                                            Motor fuels  CL_PRODUCT
202400                                             Lubricants  CL_PRODUCT
203000                         Fertilisers and soil improvers  CL_PRODUCT
203100                                   Straight fertilizers  CL_PRODUCT
203110                                Nitrogenous fertilizers  CL_PRODUCT
203120                                 Phosphatic fertilizers  CL_PRODUCT
203130                                

In [86]:
key = dict(GEO='EU28+FR+EU27_2020', PRODUCT='200000+210000', UNIT='I15', P_ADJ='RI')
params = dict(startPeriod='2000', endPeriod='2021')
resp = estat.data('apri_pi15_inq', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})

2022-12-19 14:58:25,655 pandasdmx.reader.sdmxml - INFO: Use supplied dsd=… argument for non–structure-specific message


P_ADJ,RI,RI,RI,RI,RI,RI
UNIT,I15,I15,I15,I15,I15,I15
PRODUCT,200000,200000,200000,210000,210000,210000
GEO,EU27_2020,EU28,FR,EU27_2020,EU28,FR
TIME_PERIOD,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4
2015Q1,100.97,101.14,100.83,100.37,100.42,100.25
2015Q2,100.16,100.32,100.77,99.32,99.4,99.79
2015Q3,99.92,99.84,99.61,99.91,99.89,100.04
2015Q4,98.96,98.71,98.78,100.41,100.31,99.92
2016Q1,98.3,98.0,97.41,101.36,101.22,100.38
2016Q2,97.01,96.91,96.42,100.42,100.34,99.55
2016Q3,96.54,96.5,95.44,100.78,100.69,100.06
2016Q4,96.4,96.55,95.91,101.13,100.99,100.43
2017Q1,98.28,98.44,97.13,101.49,101.34,100.58
2017Q2,96.84,97.03,95.9,101.07,100.81,100.91


In [168]:
data.columns.levels
price_Goods_services_consumed_agri_prod = data.loc[:, ('RI', 'I15', '200000')]
price_Goods_services_contributing_agri_prod = data.loc[:, ('RI', 'I15', '210000')]

In [194]:
def relabelling(df,dataflow_code):
    col = list(df.columns)
    mapping = {}
    #df.reset_index()
    for i in range(len(col)):
        col_name = col[i] + "_" + dataflow_code
        mapping[df.columns[i]] = col_name
        #df.rename(columns={df.columns[i]: col_name})
    df = df.rename(columns=mapping)
    return df

In [100]:
#!pip install pathlib
from pathlib import Path  

In [None]:
df1 = relabelling(price_Goods_services_contributing_agri_prod, 'price_gs_contributing_agri_prod')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/projet_python_ds//price_Goods_services_contributing_agri_prod.csv')
df1.to_csv(filepath, index=True)
df2 = relabelling(price_Goods_services_consumed_agri_prod, 'price_gs_consumed_agri_prod')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/projet_python_ds//price_Goods_services_consumed_agri_prod.csv')
df2.to_csv(filepath, index=True)

Now for the remaining datasets 

In [201]:
agri = dataflows[dataflows.str.contains('agricultur', case=False)]
prod = dataflows[dataflows.str.contains('production', case=False)]
poultry = dataflows[dataflows.str.contains('poultry', case=False)]
#outcome = set(agri) & set(poultry)
poultry


apro_ec_poula                                  Poultry - annual data
apro_ec_poulm                                 Poultry - monthly data
apro_ec_strpoul    Hatcheries - poultry other than hens - annual ...
ef_ls_gvbroiler    Poultry: number of farms and heads by size of ...
ef_ls_gvlayhen     Poultry: number of farms and heads by size of ...
ef_lsbroiaa        Poultry: number of farms and heads by agricult...
ef_lsbroiecs       Poultry: number of farms and heads by economic...
ef_lsk_poultry                             Poultry by NUTS 2 regions
ef_lslayhenaa      Laying hens: number of farms and heads of poul...
med_ag34                                             Poultry farming
tag00043                                 Production of meat: poultry
dtype: object

In [213]:
metadata = estat.datastructure('DSD_apro_ec_poulm')
for cl in 'CL_GEO', 'CL_FREQ', 'CL_HATCHITM', 'CL_ANIMALS':
    print(sdmx.to_pandas(metadata.codelist[cl]))


                                                       name  parent
CL_GEO                                                             
EU27_2020         European Union - 27 countries (from 2020)  CL_GEO
EU28              European Union - 28 countries (2013-2020)  CL_GEO
EU27_2007         European Union - 27 countries (2007-2013)  CL_GEO
EU25              European Union - 25 countries (2004-2006)  CL_GEO
EU15              European Union - 15 countries (1995-2004)  CL_GEO
BE                                                  Belgium  CL_GEO
BG                                                 Bulgaria  CL_GEO
CZ                                                  Czechia  CL_GEO
DK                                                  Denmark  CL_GEO
DE         Germany (until 1990 former territory of the FRG)  CL_GEO
EE                                                  Estonia  CL_GEO
IE                                                  Ireland  CL_GEO
EL                                              

In [215]:
key = dict(GEO='EU28+FR+EU27_2020+EU27_2007', HATCHITM='CH', ANIMALS='A5130P')
params = dict(startPeriod='2000-01', endPeriod='2021-12')
resp = estat.data('apro_ec_poulm', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})

2022-12-19 17:37:54,248 pandasdmx.reader.sdmxml - INFO: Use supplied dsd=… argument for non–structure-specific message


In [216]:
prod_chicken = data.loc[:, ('A5130P', 'CH')]

GEO,EU27_2007,EU27_2020,EU28,FR
TIME_PERIOD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01,,,,68679.00
2000-02,,,,70204.00
2000-03,,,,87880.00
2000-04,,,,72032.00
2000-05,,,,72065.00
...,...,...,...,...
2021-08,,,,69749.14
2021-09,,,,66146.46
2021-10,,,,63460.25
2021-11,,,,61889.48


In [None]:
metadata = estat.datastructure('DSD_APRO_MT_PWGTM')
metadata

In [None]:

for cl in 'CL_GEO', 'CL_FREQ', 'CL_MEAT', 'CL_MEATITEM', 'CL_UNIT':
    print(sdmx.to_pandas(metadata.codelist[cl]))


In [224]:
key = dict(GEO='EU28+FR+EU27_2020+EU27_2007', MEAT='B1000', MEATITEM='SL', UNIT='THS_T')
params = dict(startPeriod='2000-01', endPeriod='2021-12')
resp = estat.data('apro_mt_pwgtm', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})

2022-12-19 17:44:23,545 pandasdmx.reader.sdmxml - INFO: Use supplied dsd=… argument for non–structure-specific message


In [227]:
prod_bovine = data.loc[:, ('B1000', 'SL', 'THS_T')]
prod_bovine

GEO,EU27_2007,EU27_2020,EU28,FR
TIME_PERIOD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01,,,,134.82
2000-02,,,,127.47
2000-03,,,,144.23
2000-04,,,,124.41
2000-05,,,,144.02
...,...,...,...,...
2021-08,,567.50,,117.87
2021-09,,590.81,,120.45
2021-10,,576.96,,115.74
2021-11,,618.07,,123.13


In [251]:
metadata = estat.datastructure('DSD_apri_pi05_inq')

for cl in 'CL_PRODUCT', 'CL_P_ADJ', 'CL_UNIT', 'CL_FREQ', 'CL_GEO':
    print(sdmx.to_pandas(metadata.codelist[cl]))

                                                         name      parent
CL_PRODUCT                                                               
200000      Goods and services currently consumed in agric...  CL_PRODUCT
201000                               Seeds and planting stock  CL_PRODUCT
202000                                     Energy, lubricants  CL_PRODUCT
202100                                            Electricity  CL_PRODUCT
202200                                      Fuels for heating  CL_PRODUCT
202300                                            Motor fuels  CL_PRODUCT
202400                                             Lubricants  CL_PRODUCT
203000                         Fertilisers and soil improvers  CL_PRODUCT
203100                                   Straight fertilizers  CL_PRODUCT
203110                                Nitrogenous fertilizers  CL_PRODUCT
203120                                 Phosphatic fertilizers  CL_PRODUCT
203130                                

In [265]:
key = dict(GEO='FR', PRODUCT='200000+210000', UNIT='I00', P_ADJ='RI')
params = dict(startPeriod='2000', endPeriod='2021')
resp = estat.data('apri_pi00_inq', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})
price_Goods_services_consumed_agri_prod00 = data.loc[:, ('RI', 'I00', '200000')]
price_Goods_services_contributing_agri_prod00 = data.loc[:, ('RI', 'I00', '210000')]

key = dict(GEO='FR', PRODUCT='200000+210000', UNIT='I05', P_ADJ='RI')
params = dict(startPeriod='2000', endPeriod='2021')
resp = estat.data('apri_pi05_inq', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})
price_Goods_services_consumed_agri_prod05 = data.loc[:, ('RI', 'I05', '200000')]
price_Goods_services_contributing_agri_prod05 = data.loc[:, ('RI', 'I05', '210000')]

key = dict(GEO='FR', PRODUCT='200000+210000', UNIT='I10', P_ADJ='RI')
params = dict(startPeriod='2000', endPeriod='2021')
resp = estat.data('apri_pi10_inq', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})
price_Goods_services_consumed_agri_prod10 = data.loc[:, ('RI', 'I10', '200000')]
price_Goods_services_contributing_agri_prod10 = data.loc[:, ('RI', 'I10', '210000')]

2022-12-19 18:18:16,641 pandasdmx.reader.sdmxml - INFO: Use supplied dsd=… argument for non–structure-specific message
2022-12-19 18:18:17,393 pandasdmx.reader.sdmxml - INFO: Use supplied dsd=… argument for non–structure-specific message
2022-12-19 18:18:18,212 pandasdmx.reader.sdmxml - INFO: Use supplied dsd=… argument for non–structure-specific message


In [286]:
price_Goods_services_contributing_agri_prod.iloc[0,]
#price_Goods_services_contributing_agri_prod10.iloc[60]
for i in range(61):
    price_Goods_services_contributing_agri_prod1[i] = 


GEO,EU27_2020,EU28,FR
TIME_PERIOD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015Q1,100.37,100.42,100.25
2015Q2,99.32,99.4,99.79
2015Q3,99.91,99.89,100.04
2015Q4,100.41,100.31,99.92
2016Q1,101.36,101.22,100.38
2016Q2,100.42,100.34,99.55
2016Q3,100.78,100.69,100.06
2016Q4,101.13,100.99,100.43
2017Q1,101.49,101.34,100.58
2017Q2,101.07,100.81,100.91


In [284]:
range(60)

range(0, 60)