In [60]:
import requests
import pandas as pd


In [56]:
# Eurostat 
# in order to build the key
# need to know the dataflow (variable): 
# https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/dataflow/ESTAT/NAMA_10_GDP if we select GDP
# then the data structure (dimensions of the variable we can select) associated with the dataflow: 
# https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/dataflow/ESTAT/NAMA_10_GDP?references=children so we know in which order to declare the dimensions
# and finally the codelist associated with each structure: 
# https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/codelist/ESTAT/FREQ if we select the frequency FREQ


api_root = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/"

# annual variables
time_period_annual = "startPeriod=2000&endPeriod=2021" # 22 time periods per country
options = "&format=JSON&lang=en"

# Crop production in EU standard humidity 
# dimensions (in order): freq,crops,strucpro,geo,obsflag
dimensions = "A.C1000+C2000+R1000+R2000+I1100+V0000+F0000+T0000+U1100.AR.FR+EU"
url_api = f"{api_root}APRO_CPSH1/{dimensions}?{time_period_annual}{options}"
req = requests.get(url_api)
wb_APRO_CPSH1 = req.json()

# Selling prices of crop products (absolute prices) 
# freq,currency,prod_veg,geo,TIME_PERIOD
dimensions = "A.EUR.01110000+01120000+01600000+02120000+04121000+04122000+04192100+04192200+04194100+04194200+04195000+04196000+04199911+05120000.EU+FR"
url_api = f"{api_root}APRI_AP_CRPOUTA/{dimensions}?{time_period_annual}{options}"
req = requests.get(url_api)
wb_APRI_AP_CRPOUTA = req.json()


# quarterly variables
time_period_quarterly = "startPeriod=2000-Q1&endPeriod=2021-Q4"

# Price indices of the means of agricultural production, input (2010 = 100), until 2017 only
# freq,p_adj,unit,product,geo,TIME_PERIOD 
dimensions = "Q.NI.I10.200000+210000.EU28+FR"
url_api = f"{api_root}APRI_PI10_INQ/{dimensions}?{time_period_quarterly}{options}"
req = requests.get(url_api)
wb_APRI_PI10_INQ = req.json()

# Price indices of the means of agricultural production, input (2015 = 100), from 2015 only
# freq,p_adj,unit,product,geo,TIME_PERIOD 
dimensions = "Q.NI.I15.200000+210000.EU28+FR"
url_api = f"{api_root}APRI_PI15_INQ/{dimensions}?{time_period_quarterly}{options}"
req = requests.get(url_api)
wb_APRI_PI15_INQ = req.json()


# monthly variables
time_period_monthly = "startPeriod=2000-01&endPeriod=2021-12"

# Slaughtering in slaughterhouses
# freq,meat,meatitem,unit,geo,TIME_PERIOD
dimensions = "M.B1000.SL.THS_T.FR+EU28+DE+IE+ES"
url_api = f"{api_root}APRO_MT_PWGTM/{dimensions}?{time_period_monthly}{options}"
req = requests.get(url_api)
wb_APRO_MT_PWGTM = req.json()

# Poultry   
# freq,animals,hatchitm,geo,TIME_PERIOD
dimensions = "M.A5130O+A5130P.CH.FR+DE+UK+PL"
url_api = f"{api_root}APRO_EC_POULM/{dimensions}?{time_period_monthly}{options}"
req = requests.get(url_api)
wb_APRO_EC_POULM = req.json()


In [3]:
# quarterly variables
time_period_quarterly = "startPeriod=2000-Q1&endPeriod=2021-Q4"

# Price indices of the means of agricultural production, input (2015 = 100)
# freq,p_adj,unit,product,geo,TIME_PERIOD 
dimensions = "Q.NI.I15.200000+210000.EU28+FR"
url_api = f"{api_root}APRI_PI15_INQ/{dimensions}?{options}"
req = requests.get(url_api)
wb_APRI_PI15_INQ = req.json()

In [46]:
# useful keys: value, geo, time

# variable codes
variable_codes = ['APRO_CPSH1', 'APRI_AP_CRPOUTA', 'APRI_PI15_INQ', 'APRI_PI10_INQ', 'APRO_MT_PWGTM', 'APRO_EC_POULM']

# recovering_data_from_JSON
def recovering_data_from_JSON(json_data):

    # recovering time labels
    time = json_data['dimension']['time']['category']['label']
    list_time = list(time)
    list_time = list(filter(lambda x: x != '2022', list_time))

    # recovering country names 
    geo = json_data['dimension']['geo']['category']['index'].keys()
    list_geo = list(geo)

    # recovering category 
    if json_data == wb_APRO_CPSH1:
        cat = json_data['dimension']['crops']['category']['label']
    if json_data == wb_APRI_AP_CRPOUTA:
        cat = json_data['dimension']['prod_veg']['category']['label']
    if json_data == wb_APRI_PI15_INQ:
        cat = json_data['dimension']['product']['category']['label']
    if json_data == wb_APRO_MT_PWGTM:
        cat = json_data['dimension']['meat']['category']['label']
    if json_data == wb_APRO_EC_POULM:
        cat = json_data['dimension']['animals']['category']['label']

    list_cat = list(cat)

    # recovering values
    values = json_data['value'].values()
    list_values = list(values)

    return list_time, list_geo, list_cat, list_values

#dict = {'time': list_time, list_geo[0] : list_values[0:20], list_geo[1] : list_values[20:-1]}


In [57]:
data = recovering_data_from_JSON(wb_APRO_EC_POULM)
len(data[3])

2005

In [24]:
def create_df_annual_data(variable_codes, list_time):
        variable = list()
        dict = {'time': list_time}
        for i in range(len(variable_codes)):
                for j in range(len(list_geo)):
                        variable_iter = variable_codes[i] + "_" + list_geo[j] # FR_APRO_CPSH1
                        dict[variable_iter] = list_values[20:-1]
        df = pd.DataFrame(dict)
        return df
# create_df_annual_data(variable_code, list_time)

In [None]:
#!pip install pandasdmx
import pandasdmx as sdmx
estat = sdmx.Request('ESTAT')

In [2]:
metadata = estat.datastructure('DSD_APRO_CPSH1')
metadata
# Slaughtering in slaughterhouses
# freq,meat,meatitem,unit,geo,TIME_PERIOD
#dimensions = "M.B1000.SL.THS_T.FR+EU28+DE+IE+ES"
#url_api = f"{api_root}APRO_MT_PWGTM/{dimensions}?{time_period_monthly}{options}"
#startPeriod=2000-01&endPeriod=2021-12

<pandasdmx.StructureMessage>
  <Header>
    id: 'IDREF640763'
    prepared: '2022-12-19T11:18:03.293000+00:00'
    receiver: <Agency Unknown>
    sender: <Agency Unknown>
    source: 
    test: False
  response: <Response [200]>
  Codelist (6): CL_CROPS CL_FREQ CL_GEO CL_OBS_FLAG CL_OBS_STATUS CL_ST...
  ConceptScheme (1): CS_DSD_APRO_CPSH1
  DataStructureDefinition (1): DSD_APRO_CPSH1

In [4]:
for cl in 'CL_FREQ', 'CL_CROPS', 'CL_STRUCPRO':
    print(sdmx.to_pandas(metadata.codelist[cl]))

                name   parent
CL_FREQ                      
D              Daily  CL_FREQ
W             Weekly  CL_FREQ
Q          Quarterly  CL_FREQ
A             Annual  CL_FREQ
S        Semi-annual  CL_FREQ
M            Monthly  CL_FREQ
H          Half-year  CL_FREQ
                                                       name    parent
CL_CROPS                                                             
UAA                              Utilised agricultural area  CL_CROPS
ARA                                             Arable land  CL_CROPS
C0000     Cereals for the production of grain (including...  CL_CROPS
C1000     Cereals (excluding rice) for the production of...  CL_CROPS
C1100                                       Wheat and spelt  CL_CROPS
...                                                     ...       ...
PECR9                                 Other permanent crops  CL_CROPS
K0000                                       Kitchen gardens  CL_CROPS
U1000                         

In [None]:
key = dict(CROPS='C0000', STRUCPRO='PR_HU_EU')
params = dict(startPeriod='2000')
resp = estat.data('APRO_CPSH1', key=key, params=params)

In [287]:
flow_msg = estat.dataflow()

In [10]:
dataflows = sdmx.to_pandas(flow_msg.dataflow)
dataflows.head()
#len(dataflows)

DS-018995                          EU trade since 1999 by SITC
DS-022469    Extra-EU trade since 2000 by mode of transport...
DS-032655                     EU trade since 1988 by BEC/rev.4
DS-043227                        EFTA trade since 1995 by SITC
DS-056697    EU enlargement countries trade since 2002, by ...
dtype: object

In [53]:
agri = dataflows[dataflows.str.contains('agricultur', case=False)]
prod = dataflows[dataflows.str.contains('production', case=False)]
outcome = set(agri) & set(prod)
outcome

{'EC indices of purchase prices of the means of agricultural production (1995 = 100) - annual data',
 'EC indices of purchase prices of the means of agricultural production (1995 = 100) - monthly data',
 'Holdings, cultivated area under vines (CAV) and agricultural area in use broken down by type of production, size class and regions',
 'Main farm indicators by agricultural area, type and economic size of the farm, share of consumed production, legal status of the holding and NUTS2 region',
 'Organic crop area by agricultural production methods and crops',
 'Organic crop area by agricultural production methods and crops (until 2011)',
 'Price indices of the means of agricultural production, input (2000 = 100) - annual data',
 'Price indices of the means of agricultural production, input (2000 = 100) - monthly data',
 'Price indices of the means of agricultural production, input (2000 = 100) - quarterly data',
 'Price indices of the means of agricultural production, input (2005 = 100) -

In [64]:
take = dataflows[dataflows.str.contains('Price indices of the means of agricultural production', case=False)]
take

apri_pi00_ina    Price indices of the means of agricultural pro...
apri_pi00_inm    Price indices of the means of agricultural pro...
apri_pi00_inq    Price indices of the means of agricultural pro...
apri_pi05_ina    Price indices of the means of agricultural pro...
apri_pi05_inq    Price indices of the means of agricultural pro...
apri_pi10_ina    Price indices of the means of agricultural pro...
apri_pi10_inq    Price indices of the means of agricultural pro...
apri_pi15_ina    Price indices of the means of agricultural pro...
apri_pi15_inq    Price indices of the means of agricultural pro...
dtype: object

In [200]:
metadata = estat.datastructure('DSD_apri_pi15_inq')
metadata.codelist
for cl in 'CL_PRODUCT', 'CL_P_ADJ', 'CL_UNIT', 'CL_FREQ', 'CL_GEO':
    print(sdmx.to_pandas(metadata.codelist[cl]))

                                                         name      parent
CL_PRODUCT                                                               
200000      Goods and services currently consumed in agric...  CL_PRODUCT
201000                               Seeds and planting stock  CL_PRODUCT
202000                                     Energy, lubricants  CL_PRODUCT
202100                                            Electricity  CL_PRODUCT
202200                                      Fuels for heating  CL_PRODUCT
202300                                            Motor fuels  CL_PRODUCT
202400                                             Lubricants  CL_PRODUCT
203000                         Fertilisers and soil improvers  CL_PRODUCT
203100                                   Straight fertilizers  CL_PRODUCT
203110                                Nitrogenous fertilizers  CL_PRODUCT
203120                                 Phosphatic fertilizers  CL_PRODUCT
203130                                

In [291]:
key = dict(GEO='EU28+FR+EU27_2020', PRODUCT='200000+210000', UNIT='I15', P_ADJ='RI')
params = dict(startPeriod='2000', endPeriod='2021')
resp = estat.data('apri_pi15_inq', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})

2022-12-19 18:39:14,589 pandasdmx.reader.sdmxml - INFO: Use supplied dsd=… argument for non–structure-specific message


In [292]:
# sorting by columns
# data.columns.levels
price_Goods_services_consumed_agri_prod15 = data.loc[:, ('RI', 'I15', '200000')]
price_Goods_services_contributing_agri_prod15 = data.loc[:, ('RI', 'I15', '210000')]

Now for the remaining datasets

In [None]:
key = dict(GEO='EU28+FR+EU27_2020+EU27_2007', HATCHITM='CH', ANIMALS='A5130P')
params = dict(startPeriod='2000-01', endPeriod='2021-12')
resp = estat.data('apro_ec_poulm', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})

prod_chicken = data.loc[:, ('A5130P', 'CH')]

key = dict(GEO='EU28+FR+EU27_2020+EU27_2007', MEAT='B1000', MEATITEM='SL', UNIT='THS_T')
params = dict(startPeriod='2000-01', endPeriod='2021-12')
resp = estat.data('apro_mt_pwgtm', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})

prod_bovine = data.loc[:, ('B1000', 'SL', 'THS_T')]

key = dict(GEO='FR', PRODUCT='200000+210000', UNIT='I00', P_ADJ='RI')
params = dict(startPeriod='2000', endPeriod='2021')
resp = estat.data('apri_pi00_inq', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})

price_Goods_services_consumed_agri_prod00 = data.loc[:, ('RI', 'I00', '200000')]
price_Goods_services_contributing_agri_prod00 = data.loc[:, ('RI', 'I00', '210000')]

key = dict(GEO='FR', PRODUCT='200000+210000', UNIT='I05', P_ADJ='RI')
params = dict(startPeriod='2000', endPeriod='2021')
resp = estat.data('apri_pi05_inq', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})

price_Goods_services_consumed_agri_prod05 = data.loc[:, ('RI', 'I05', '200000')]
price_Goods_services_contributing_agri_prod05 = data.loc[:, ('RI', 'I05', '210000')]

key = dict(GEO='FR', PRODUCT='200000+210000', UNIT='I10', P_ADJ='RI')
params = dict(startPeriod='2000', endPeriod='2021')
resp = estat.data('apri_pi10_inq', key=key, params=params)
data = resp.to_pandas(datetime('TIME_PERIOD'))
    
price_Goods_services_consumed_agri_prod10 = data.loc[:, ('RI', 'I10', '200000')]
price_Goods_services_contributing_agri_prod10 = data.loc[:, ('RI', 'I10', '210000')]

In [194]:
def relabelling(df,dataflow_code):
    col = list(df.columns)
    mapping = {}
    #df.reset_index()
    for i in range(len(col)):
        col_name = col[i] + "_" + dataflow_code
        mapping[df.columns[i]] = col_name
        #df.rename(columns={df.columns[i]: col_name})
    df = df.rename(columns=mapping)
    return df

In [300]:
df1 = relabelling(prod_bovine, 'prod_bovine')
df2 = relabelling(prod_chicken, 'prod_chicken')
df3 = relabelling(price_Goods_services_consumed_agri_prod00, 'price_Goods_services_consumed_agri_prod00')
df4 = relabelling(price_Goods_services_contributing_agri_prod00, 'price_Goods_services_contributing_agri_prod00')
df5 = relabelling(price_Goods_services_consumed_agri_prod05, 'price_Goods_services_consumed_agri_prod05')
df6 = relabelling(price_Goods_services_contributing_agri_prod05, 'price_Goods_services_contributing_agri_prod05')
df7 = relabelling(price_Goods_services_consumed_agri_prod10, 'price_Goods_services_consumed_agri_prod10')
df8 = relabelling(price_Goods_services_contributing_agri_prod10, 'price_Goods_services_contributing_agri_prod10')
df9 = relabelling(price_Goods_services_contributing_agri_prod15, 'price_Goods_services_contributing_agri_prod15')
df10 = relabelling(price_Goods_services_consumed_agri_prod15, 'price_Goods_services_consumed_agri_prod15')


In [421]:
#!pip install pathlib
from pathlib import Path  

In [631]:
df = relabelling(price_Goods_services_consumed_agri_prod15, 'price_Goods_services_consumed_agri_prod15')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/projet_python_ds//price_Goods_services_consumed_agri_prod15.csv')
df.to_csv(filepath, index=True)

df = relabelling(price_Goods_services_contributing_agri_prod15, 'price_Goods_services_contributing_agri_prod15')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/projet_python_ds//price_Goods_services_contributing_agri_prod15.csv')
df.to_csv(filepath, index=True)

df = relabelling(price_Goods_services_consumed_agri_prod10, 'price_Goods_services_consumed_agri_prod10')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/projet_python_ds//price_Goods_services_consumed_agri_prod10.csv')
df.to_csv(filepath, index=True)

df = relabelling(price_Goods_services_contributing_agri_prod10, 'price_Goods_services_contributing_agri_prod10')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/projet_python_ds//price_Goods_services_contributing_agri_prod10.csv')
df.to_csv(filepath, index=True)

df = relabelling(price_Goods_services_consumed_agri_prod05, 'price_Goods_services_consumed_agri_prod05')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/projet_python_ds//price_Goods_services_consumed_agri_prod05.csv')
df.to_csv(filepath, index=True)

df = relabelling(price_Goods_services_contributing_agri_prod05, 'price_Goods_services_contributing_agri_prod05')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/projet_python_ds//price_Goods_services_contributing_agri_prod05.csv')
df.to_csv(filepath, index=True)

df = relabelling(price_Goods_services_consumed_agri_prod00, 'price_Goods_services_consumed_agri_prod00')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/projet_python_ds//price_Goods_services_consumed_agri_prod00.csv')
df.to_csv(filepath, index=True)

df = relabelling(price_Goods_services_contributing_agri_prod15, 'price_Goods_services_contributing_agri_prod00')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/projet_python_ds//price_Goods_services_contributing_agri_prod00.csv')
df.to_csv(filepath, index=True)

df = relabelling(prod_chicken, 'prod_chicken')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/projet_python_ds//prod_chicken.csv')
df.to_csv(filepath, index=True)

df = relabelling(prod_bovine, 'prod_bovine')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/projet_python_ds//prod_bovine.csv')
df.to_csv(filepath, index=True)


In [732]:
def convert_to_monthly(names,dict_of_dfs):
    d = dict_of_dfs
    for i in range(len(names)-2):
        i = names[i]
        dataset = i + '.csv'
        d[i] = pd.read_csv(dataset)
        d[i]['TIME_PERIOD'] = pd.to_datetime(d[i]['TIME_PERIOD']).dt.to_period('M')
        d[i] = d[i].set_index('TIME_PERIOD').resample('M').interpolate()
        d[i] = d[i].reset_index() ##
    d['prod_bovine'] = pd.read_csv('prod_bovine.csv')
    d['prod_chicken'] = pd.read_csv('prod_chicken.csv')

    return d


In [735]:
names = ['price_Goods_services_consumed_agri_prod15', 'price_Goods_services_contributing_agri_prod15',  'price_Goods_services_consumed_agri_prod10', 'price_Goods_services_contributing_agri_prod10', 'price_Goods_services_consumed_agri_prod05','price_Goods_services_contributing_agri_prod05', 'price_Goods_services_consumed_agri_prod00', 'price_Goods_services_contributing_agri_prod00', 'prod_bovine', 'prod_chicken']
dict_of_dfs = {}
dict_data = convert_to_monthly(names,dict_of_dfs)


0     2015-01
1     2015-02
2     2015-03
3     2015-04
4     2015-05
       ...   
77    2021-06
78    2021-07
79    2021-08
80    2021-09
81    2021-10
Name: TIME_PERIOD, Length: 82, dtype: period[M]

In [693]:
names = ['price_Goods_services_consumed_agri_prod15', 'price_Goods_services_contributing_agri_prod15',  'price_Goods_services_consumed_agri_prod10', 'price_Goods_services_contributing_agri_prod10', 'price_Goods_services_consumed_agri_prod05','price_Goods_services_contributing_agri_prod05', 'price_Goods_services_consumed_agri_prod00', 'price_Goods_services_contributing_agri_prod00', 'prod_bovine', 'prod_chicken']
dict_of_dfs = {}
dict_data = convert_to_monthly(names,dict_of_dfs)

test = dict_data[names[1]].reset_index()
test

Unnamed: 0,TIME_PERIOD,EU27_2020_price_Goods_services_contributing_agri_prod15,EU28_price_Goods_services_contributing_agri_prod15,FR_price_Goods_services_contributing_agri_prod15
0,2015-01,100.370000,100.420000,100.250000
1,2015-02,100.020000,100.080000,100.096667
2,2015-03,99.670000,99.740000,99.943333
3,2015-04,99.320000,99.400000,99.790000
4,2015-05,99.516667,99.563333,99.873333
...,...,...,...,...
77,2021-06,106.026667,102.710000,107.663333
78,2021-07,106.480000,102.710000,108.490000
79,2021-08,106.603333,102.710000,108.916667
80,2021-09,106.726667,102.710000,109.343333


In [642]:
#
names = ['price_Goods_services_consumed_agri_prod15', 'price_Goods_services_consumed_agri_prod10', 'price_Goods_services_consumed_agri_prod05', 'price_Goods_services_consumed_agri_prod00', 'prod_bovine', 'prod_chicken']
dict_data = {}
dict_data = convert_to_monthly(names)

dict_data[names[0]].set_index('TIME_PERIOD').resample('M').interpolate()
dict_data[names[1]].set_index('TIME_PERIOD').resample('M').interpolate()
dict_data[names[2]].set_index('TIME_PERIOD').resample('M').interpolate()
dict_data[names[3]].set_index('TIME_PERIOD').resample('M').interpolate()


KeyError: 'price_Goods_services_consumed_agri_prod10'

In [643]:
#
names = ['price_Goods_services_consumed_agri_prod15', 'price_Goods_services_contributing_agri_prod15',  'price_Goods_services_consumed_agri_prod10', 'price_Goods_services_contributing_agri_prod10', 'price_Goods_services_consumed_agri_prod05','price_Goods_services_contributing_agri_prod05', 'price_Goods_services_consumed_agri_prod00', 'price_Goods_services_contributing_agri_prod00', 'prod_bovine', 'prod_chicken']
dict_data = convert_to_monthly(names)

dict_data[names[0]].set_index('TIME_PERIOD').resample('M').interpolate()
dict_data[names[1]].set_index('TIME_PERIOD').resample('M').interpolate()
dict_data[names[2]].set_index('TIME_PERIOD').resample('M').interpolate()
dict_data[names[3]].set_index('TIME_PERIOD').resample('M').interpolate()
dict_data[names[4]].set_index('TIME_PERIOD').resample('M').interpolate()
dict_data[names[5]].set_index('TIME_PERIOD').resample('M').interpolate()
dict_data[names[6]].set_index('TIME_PERIOD').resample('M').interpolate()
dict_data[names[7]].set_index('TIME_PERIOD').resample('M').interpolate()

dict_data[names[8]] = pd.read_csv('prod_bovine.csv')
dict_data[names[9]] = pd.read_csv('prod_chicken.csv')

di

In [647]:
#
dict_data[names[7]] = dict_data[names[7]].set_index('TIME_PERIOD').resample('M').interpolate()


KeyError: "None of ['TIME_PERIOD'] are in the columns"

In [648]:
dict_data[names[7]]

Unnamed: 0_level_0,EU27_2020_price_Goods_services_contributing_agri_prod00,EU28_price_Goods_services_contributing_agri_prod00,FR_price_Goods_services_contributing_agri_prod00
TIME_PERIOD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-01,100.370000,100.420000,100.250000
2015-02,100.020000,100.080000,100.096667
2015-03,99.670000,99.740000,99.943333
2015-04,99.320000,99.400000,99.790000
2015-05,99.516667,99.563333,99.873333
...,...,...,...
2021-06,106.026667,102.710000,107.663333
2021-07,106.480000,102.710000,108.490000
2021-08,106.603333,102.710000,108.916667
2021-09,106.726667,102.710000,109.343333


In [742]:
def merging_datasets(names, dict_data):

    df = pd.merge(dict_data[names[0]], dict_data[names[1]], how='outer', on='TIME_PERIOD')
    
    for i in range(len(names)-4):
        df1 = pd.merge(df, dict_data[names[i+2]], how='outer', on='TIME_PERIOD')
    
    df1['TIME_PERIOD'] = df1['TIME_PERIOD'].astype(str)

    df2 = pd.merge(dict_data[names[8]], dict_data[names[9]], how='outer', on='TIME_PERIOD')

    df2['TIME_PERIOD'] = pd.to_datetime(df2['TIME_PERIOD'])
    df1['TIME_PERIOD'] = pd.to_datetime(df1['TIME_PERIOD'])

    df = pd.merge(df1,df2, how='outer',on='TIME_PERIOD')

    return df

In [744]:
names = ['price_Goods_services_consumed_agri_prod15', 'price_Goods_services_contributing_agri_prod15',  'price_Goods_services_consumed_agri_prod10', 'price_Goods_services_contributing_agri_prod10', 'price_Goods_services_consumed_agri_prod05','price_Goods_services_contributing_agri_prod05', 'price_Goods_services_consumed_agri_prod00', 'price_Goods_services_contributing_agri_prod00', 'prod_bovine', 'prod_chicken']
dict_of_dfs = {}
dict_data = convert_to_monthly(names,dict_of_dfs)
df = merging_datasets(names, dict_data)
df

Unnamed: 0,TIME_PERIOD,EU27_2020_price_Goods_services_consumed_agri_prod15,EU28_price_Goods_services_consumed_agri_prod15,FR_price_Goods_services_consumed_agri_prod15,EU27_2020_price_Goods_services_contributing_agri_prod15,EU28_price_Goods_services_contributing_agri_prod15,FR_price_Goods_services_contributing_agri_prod15,EU27_2020_price_Goods_services_contributing_agri_prod00,EU28_price_Goods_services_contributing_agri_prod00,FR_price_Goods_services_contributing_agri_prod00,EU27_2007_prod_bovine,EU27_2020_prod_bovine,EU28_prod_bovine,FR_prod_bovine,EU27_2007_prod_chicken,EU27_2020_prod_chicken,EU28_prod_chicken,FR_prod_chicken
0,2015-01-01,100.97,101.140000,100.830000,100.370000,100.420000,100.250000,100.370000,100.420000,100.250000,629.59,548.70,632.72,119.82,,,,73905.69
1,2015-02-01,100.70,100.866667,100.810000,100.020000,100.080000,100.096667,100.020000,100.080000,100.096667,572.29,503.87,575.24,106.52,,,,69263.60
2,2015-03-01,100.43,100.593333,100.790000,99.670000,99.740000,99.943333,99.670000,99.740000,99.943333,652.72,587.41,656.18,135.03,,,,74002.17
3,2015-04-01,100.16,100.320000,100.770000,99.320000,99.400000,99.790000,99.320000,99.400000,99.790000,606.63,529.07,609.77,117.65,,,,74878.53
4,2015-05-01,100.08,100.160000,100.383333,99.516667,99.563333,99.873333,99.516667,99.563333,99.873333,580.77,520.83,584.22,117.08,,,,80816.07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,2014-10-01,,,,,,,,,,688.47,601.01,692.19,123.38,,,,62528.59
260,2014-11-01,,,,,,,,,,618.05,547.71,621.48,119.74,,,,59775.35
261,2014-12-01,,,,,,,,,,630.05,570.57,633.92,121.86,,,,71545.00
262,2021-11-01,,,,,,,,,,,618.07,,123.13,,,,61889.48


In [731]:
names = ['price_Goods_services_consumed_agri_prod15', 'price_Goods_services_contributing_agri_prod15',  'price_Goods_services_consumed_agri_prod10', 'price_Goods_services_contributing_agri_prod10', 'price_Goods_services_consumed_agri_prod05','price_Goods_services_contributing_agri_prod05', 'price_Goods_services_consumed_agri_prod00', 'price_Goods_services_contributing_agri_prod00', 'prod_bovine', 'prod_chicken']
dict_of_dfs = {}
dict_data = convert_to_monthly(names,dict_of_dfs)

test0 = dict_data[names[0]].reset_index()
test1 = dict_data[names[1]].reset_index()
test8 = dict_data[names[8]]
test9=dict_data[names[9]]

In [725]:
dftest = pd.merge(test0, test1, how='outer', on='TIME_PERIOD')

type(test1["TIME_PERIOD"][0])
#dftest2 = pd.merge(test8, test9, how='outer', on='TIME_PERIOD')
#type(dftest2['TIME_PERIOD'][0]) == str

NameError: name 'pandas' is not defined

In [678]:
df = pd.merge(dict_data[names[0]], dict_data[names[1]], how='outer', on='TIME_PERIOD')
df1 = pd.merge(df, dict_data[names[0+2]], how='outer', on='TIME_PERIOD')
df1 = pd.merge(df1, dict_data[names[1+2]], how='outer', on='TIME_PERIOD')
df1 = pd.merge(df1, dict_data[names[2+2]], how='outer', on='TIME_PERIOD')

df1['TIME_PERIOD'] = df1['TIME_PERIOD'].astype(str)
df1['TIME_PERIOD'] = pd.to_datetime(df1['TIME_PERIOD'])

KeyError: 'TIME_PERIOD'

In [594]:
df1 = merging_datasets(names)
df1

Unnamed: 0,TIME_PERIOD,EU27_2020_price_Goods_services_consumed_agri_prod15,EU28_price_Goods_services_consumed_agri_prod15,FR_price_Goods_services_consumed_agri_prod15,EU27_2020_price_Goods_services_contributing_agri_prod15,EU28_price_Goods_services_contributing_agri_prod15,FR_price_Goods_services_contributing_agri_prod15,FR_price_Goods_services_consumed_agri_prod10,FR_price_Goods_services_contributing_agri_prod10,FR_price_Goods_services_consumed_agri_prod05,FR_price_Goods_services_contributing_agri_prod05,FR_price_Goods_services_consumed_agri_prod00,EU27_2020_price_Goods_services_contributing_agri_prod00,EU28_price_Goods_services_contributing_agri_prod00,FR_price_Goods_services_contributing_agri_prod00
0,2015-01,100.97,101.14,100.83,100.37,100.42,100.25,104.0,100.3,,,,100.37,100.42,100.25
1,2015-04,100.16,100.32,100.77,99.32,99.40,99.79,103.7,99.7,,,,99.32,99.40,99.79
2,2015-07,99.92,99.84,99.61,99.91,99.89,100.04,102.7,100.1,,,,99.91,99.89,100.04
3,2015-10,98.96,98.71,98.78,100.41,100.31,99.92,101.7,100.1,,,,100.41,100.31,99.92
4,2016-01,98.30,98.00,97.41,101.36,101.22,100.38,100.8,100.8,,,,101.36,101.22,100.38
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,2013-10,,,,,,,107.1,100.6,,,,,,
84,2014-01,,,,,,,107.2,100.8,,,,,,
85,2014-04,,,,,,,106.9,100.4,,,,,,
86,2014-07,,,,,,,105.8,99.9,,,,,,


In [605]:
df2['TIME_PERIOD'] = pd.to_datetime(df2['TIME_PERIOD'])


In [607]:
df1['TIME_PERIOD'] = df1['TIME_PERIOD'].astype(str)
df1['TIME_PERIOD'] = pd.to_datetime(df1['TIME_PERIOD'])

In [608]:
df2 = pd.merge(dict_data[names[8]], dict_data[names[9]], how='outer', on='TIME_PERIOD')
df2['TIME_PERIOD'] = pd.to_datetime(df2['TIME_PERIOD'])
df1['TIME_PERIOD'] = df1['TIME_PERIOD'].astype(str)
df1['TIME_PERIOD'] = pd.to_datetime(df1['TIME_PERIOD'])
df = pd.merge(df1,df2, how='outer',on='TIME_PERIOD')


In [609]:
df

Unnamed: 0,TIME_PERIOD,FR_price_Goods_services_consumed_agri_prod00,EU27_2007_prod_bovine,EU27_2020_prod_bovine,EU28_prod_bovine,FR_prod_bovine,EU27_2007_prod_chicken,EU27_2020_prod_chicken,EU28_prod_chicken,FR_prod_chicken
0,2000-01-01,96.1,,,,134.82,,,,68679.00
1,2000-04-01,97.7,,,,124.41,,,,72032.00
2,2000-07-01,98.6,,,,123.43,,,,61311.00
3,2000-10-01,100.5,,,,129.13,,,,65491.00
4,2001-01-01,101.8,,,,114.21,,,,70510.00
...,...,...,...,...,...,...,...,...,...,...
259,2021-08-01,,,567.50,,117.87,,,,69749.14
260,2021-09-01,,,590.81,,120.45,,,,66146.46
261,2021-10-01,,,576.96,,115.74,,,,63460.25
262,2021-11-01,,,618.07,,123.13,,,,61889.48


In [487]:
nam=['price_Goods_services_consumed_agri_prod00','price_Goods_services_consumed_agri_prod05']
#dict_data = convert_to_monthly(nam)
dict_data = {}
dict_data['price_Goods_services_consumed_agri_prod00'] = pd.read_csv('price_Goods_services_consumed_agri_prod00.csv')
#type(dict_data['price_Goods_services_consumed_agri_prod00']['TIME_PERIOD'])
dict_data['price_Goods_services_consumed_agri_prod00']['TIME_PERIOD'] = pd.to_datetime(dict_data['price_Goods_services_consumed_agri_prod00']['TIME_PERIOD']).dt.to_period('M')


In [537]:
d = {}
nam=['price_Goods_services_consumed_agri_prod00','price_Goods_services_consumed_agri_prod05']
for i in nam:
        dataset = i + '.csv'
        d[i] = pd.read_csv(dataset)
        d[i]['TIME_PERIOD'] = pd.to_datetime(d[i]['TIME_PERIOD']).dt.to_period('M')
        #d[i].set_index('TIME_PERIOD').resample('M').interpolate()
d

Unnamed: 0,TIME_PERIOD,FR_price_Goods_services_consumed_agri_prod05
0,2000-01,
1,2000-04,
2,2000-07,
3,2000-10,
4,2001-01,
5,2001-04,
6,2001-07,
7,2001-10,
8,2002-01,
9,2002-04,


In [534]:
d.values()

dict_values([   TIME_PERIOD  FR_price_Goods_services_consumed_agri_prod00
0      2000-01                                          96.1
1      2000-04                                          97.7
2      2000-07                                          98.6
3      2000-10                                         100.5
4      2001-01                                         101.8
5      2001-04                                         101.4
6      2001-07                                         101.6
7      2001-10                                         101.6
8      2002-01                                         101.0
9      2002-04                                         101.9
10     2002-07                                         101.2
11     2002-10                                         100.9
12     2003-01                                         101.6
13     2003-04                                         101.6
14     2003-07                                         101.7
15     2003

In [505]:
for i in nam:
    print(i)
    print(type(i))

price_Goods_services_consumed_agri_prod00
<class 'str'>
price_Goods_services_consumed_agri_prod05
<class 'str'>


In [None]:
dict_data['price_Goods_services_consumed_agri_prod15'] = pd.read_csv('price_Goods_services_consumed_agri_prod15.csv')
type(dict_data['price_Goods_services_consumed_agri_prod15']['TIME_PERIOD'])

dict_data['price_Goods_services_consumed_agri_prod15']['TIME_PERIOD'] = pd.to_datetime(dict_data['price_Goods_services_consumed_agri_prod15']['TIME_PERIOD']).dt.to_period('M')
dict_data['price_Goods_services_consumed_agri_prod15'].set_index('TIME_PERIOD').resample('M').interpolate()

In [481]:
df1 = pd.read_csv('price_Goods_services_consumed_agri_prod00.csv')
type(df1['TIME_PERIOD'])

df1['TIME_PERIOD'] = pd.to_datetime(df1['TIME_PERIOD']).dt.to_period('M')
df1.set_index('TIME_PERIOD').resample('M').interpolate()

ValueError: to assemble mappings requires at least that [year, month, day] be specified: [day,month,year] is missing