In [1]:
import requests
import pandas as pd

In [5]:
# Eurostat 
# in order to build the key
# need to know the dataflow (variable): 
# https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/dataflow/ESTAT/NAMA_10_GDP if we select GDP
# then the data structure (dimensions of the variable we can select) associated with the dataflow: 
# https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/dataflow/ESTAT/NAMA_10_GDP?references=children so we know in which order to declare the dimensions
# and finally the codelist associated with each structure: 
# https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/codelist/ESTAT/FREQ if we select the frequency FREQ

# quarterly variables

# Price indices of the means of agricultural production, input (2010 = 100), until 2017 only
# freq,p_adj,unit,product,geo,TIME_PERIOD 

# Price indices of the means of agricultural production, input (2015 = 100), from 2015 only
# freq,p_adj,unit,product,geo,TIME_PERIOD 


# monthly variables

# Slaughtering in slaughterhouses
# freq,meat,meatitem,unit,geo,TIME_PERIOD

# Poultry   
# freq,animals,hatchitm,geo,TIME_PERIOD


In [2]:
#!pip install pandasdmx
import pandasdmx as sdmx
estat = sdmx.Request('ESTAT')

  warn(


In [7]:
flow_msg = estat.dataflow()

In [8]:
dataflows = sdmx.to_pandas(flow_msg.dataflow)
dataflows.head()

DS-018995                          EU trade since 1999 by SITC
DS-022469    Extra-EU trade since 2000 by mode of transport...
DS-032655                     EU trade since 1988 by BEC/rev.4
DS-043227                        EFTA trade since 1995 by SITC
DS-056697    EU enlargement countries trade since 2002, by ...
dtype: object

In [9]:
agri = dataflows[dataflows.str.contains('agricultur', case=False)]
prod = dataflows[dataflows.str.contains('production', case=False)]
outcome = set(agri) & set(prod)
outcome

{'EC indices of purchase prices of the means of agricultural production (1995 = 100) - annual data',
 'EC indices of purchase prices of the means of agricultural production (1995 = 100) - monthly data',
 'Holdings, cultivated area under vines (CAV) and agricultural area in use broken down by type of production, size class and regions',
 'Main farm indicators by agricultural area, type and economic size of the farm, share of consumed production, legal status of the holding and NUTS2 region',
 'Organic crop area by agricultural production methods and crops',
 'Organic crop area by agricultural production methods and crops (until 2011)',
 'Price indices of the means of agricultural production, input (2000 = 100) - annual data',
 'Price indices of the means of agricultural production, input (2000 = 100) - monthly data',
 'Price indices of the means of agricultural production, input (2000 = 100) - quarterly data',
 'Price indices of the means of agricultural production, input (2005 = 100) -

In [10]:
take = dataflows[dataflows.str.contains('Price indices of the means of agricultural production', case=False)]
take

apri_pi00_ina    Price indices of the means of agricultural pro...
apri_pi00_inm    Price indices of the means of agricultural pro...
apri_pi00_inq    Price indices of the means of agricultural pro...
apri_pi05_ina    Price indices of the means of agricultural pro...
apri_pi05_inq    Price indices of the means of agricultural pro...
apri_pi10_ina    Price indices of the means of agricultural pro...
apri_pi10_inq    Price indices of the means of agricultural pro...
apri_pi15_ina    Price indices of the means of agricultural pro...
apri_pi15_inq    Price indices of the means of agricultural pro...
dtype: object

In [11]:
metadata = estat.datastructure('DSD_apri_pi15_inq')
metadata.codelist
for cl in 'CL_PRODUCT', 'CL_P_ADJ', 'CL_UNIT', 'CL_FREQ', 'CL_GEO':
    print(sdmx.to_pandas(metadata.codelist[cl]))

                                                         name      parent
CL_PRODUCT                                                               
200000      Goods and services currently consumed in agric...  CL_PRODUCT
201000                               Seeds and planting stock  CL_PRODUCT
202000                                     Energy, lubricants  CL_PRODUCT
202100                                            Electricity  CL_PRODUCT
202200                                      Fuels for heating  CL_PRODUCT
202300                                            Motor fuels  CL_PRODUCT
202400                                             Lubricants  CL_PRODUCT
203000                         Fertilisers and soil improvers  CL_PRODUCT
203100                                   Straight fertilizers  CL_PRODUCT
203110                                Nitrogenous fertilizers  CL_PRODUCT
203120                                 Phosphatic fertilizers  CL_PRODUCT
203130                                

In [12]:
key = dict(GEO='EU28+FR+EU27_2020', PRODUCT='200000+210000', UNIT='I15', P_ADJ='RI')
params = dict(startPeriod='2000', endPeriod='2021')
resp = estat.data('apri_pi15_inq', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})
data

2022-12-20 01:17:28,708 pandasdmx.reader.sdmxml - INFO: Use supplied dsd=… argument for non–structure-specific message


P_ADJ,RI,RI,RI,RI,RI,RI
UNIT,I15,I15,I15,I15,I15,I15
PRODUCT,200000,200000,200000,210000,210000,210000
GEO,EU27_2020,EU28,FR,EU27_2020,EU28,FR
TIME_PERIOD,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4
2015Q1,100.97,101.14,100.83,100.37,100.42,100.25
2015Q2,100.16,100.32,100.77,99.32,99.4,99.79
2015Q3,99.92,99.84,99.61,99.91,99.89,100.04
2015Q4,98.96,98.71,98.78,100.41,100.31,99.92
2016Q1,98.3,98.0,97.41,101.36,101.22,100.38
2016Q2,97.01,96.91,96.42,100.42,100.34,99.55
2016Q3,96.54,96.5,95.44,100.78,100.69,100.06
2016Q4,96.4,96.55,95.91,101.13,100.99,100.43
2017Q1,98.28,98.44,97.13,101.49,101.34,100.58
2017Q2,96.84,97.03,95.9,101.07,100.81,100.91


In [13]:
# sorting by columns
price_Goods_services_consumed_agri_prod15 = data.loc[:, ('RI', 'I15', '200000')]
price_Goods_services_contributing_agri_prod15 = data.loc[:, ('RI', 'I15', '210000')]

Now for the remaining datasets

In [41]:
key = dict(GEO='EU28+FR+EU27_2020+EU27_2007', HATCHITM='CH', ANIMALS='A5130P')
params = dict(startPeriod='2000-01', endPeriod='2021-12')
resp = estat.data('apro_ec_poulm', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})

prod_chicken = data.loc[:, ('A5130P', 'CH')]

key = dict(GEO='EU28+FR+EU27_2020+EU27_2007', MEAT='B1000', MEATITEM='SL', UNIT='THS_T')
params = dict(startPeriod='2000-01', endPeriod='2021-12')
resp = estat.data('apro_mt_pwgtm', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})

prod_bovine = data.loc[:, ('B1000', 'SL', 'THS_T')]

key = dict(GEO='FR', PRODUCT='200000+210000', UNIT='I00', P_ADJ='RI')
params = dict(startPeriod='2000', endPeriod='2021')
resp = estat.data('apri_pi00_inq', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})

price_Goods_services_consumed_agri_prod00 = data.loc[:, ('RI', 'I00', '200000')]
price_Goods_services_contributing_agri_prod00 = data.loc[:, ('RI', 'I00', '210000')]

key = dict(GEO='FR', PRODUCT='200000+210000', UNIT='I05', P_ADJ='RI')
params = dict(startPeriod='2000', endPeriod='2021')
resp = estat.data('apri_pi05_inq', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})

price_Goods_services_consumed_agri_prod05 = data.loc[:, ('RI', 'I05', '200000')]
price_Goods_services_contributing_agri_prod05 = data.loc[:, ('RI', 'I05', '210000')]

key = dict(GEO='FR', PRODUCT='200000+210000', UNIT='I10', P_ADJ='RI')
params = dict(startPeriod='2000', endPeriod='2021')
resp = estat.data('apri_pi10_inq', key=key, params=params)
data = resp.to_pandas(
    datetime={'dim': 'TIME_PERIOD', 'freq': 'FREQ'})
#data = resp.to_pandas(datetime('TIME_PERIOD'))
    
price_Goods_services_consumed_agri_prod10 = data.loc[:, ('RI', 'I10', '200000')]
price_Goods_services_contributing_agri_prod10 = data.loc[:, ('RI', 'I10', '210000')]

2022-12-20 01:33:44,093 pandasdmx.reader.sdmxml - INFO: Use supplied dsd=… argument for non–structure-specific message
2022-12-20 01:33:47,318 pandasdmx.reader.sdmxml - INFO: Use supplied dsd=… argument for non–structure-specific message
2022-12-20 01:33:48,892 pandasdmx.reader.sdmxml - INFO: Use supplied dsd=… argument for non–structure-specific message
2022-12-20 01:33:50,185 pandasdmx.reader.sdmxml - INFO: Use supplied dsd=… argument for non–structure-specific message
2022-12-20 01:33:51,802 pandasdmx.reader.sdmxml - INFO: Use supplied dsd=… argument for non–structure-specific message


In [14]:
def relabelling(df,dataflow_code):
    col = list(df.columns)
    mapping = {}
    #df.reset_index()
    for i in range(len(col)):
        col_name = col[i] + "_" + dataflow_code
        mapping[df.columns[i]] = col_name
        #df.rename(columns={df.columns[i]: col_name})
    df = df.rename(columns=mapping)
    return df

In [43]:
df1 = relabelling(prod_bovine, 'prod_bovine')
df2 = relabelling(prod_chicken, 'prod_chicken')
df3 = relabelling(price_Goods_services_consumed_agri_prod00, 'price_Goods_services_consumed_agri_prod00')
df4 = relabelling(price_Goods_services_contributing_agri_prod00, 'price_Goods_services_contributing_agri_prod00')
df5 = relabelling(price_Goods_services_consumed_agri_prod05, 'price_Goods_services_consumed_agri_prod05')
df6 = relabelling(price_Goods_services_contributing_agri_prod05, 'price_Goods_services_contributing_agri_prod05')
df7 = relabelling(price_Goods_services_consumed_agri_prod10, 'price_Goods_services_consumed_agri_prod10')
df8 = relabelling(price_Goods_services_contributing_agri_prod10, 'price_Goods_services_contributing_agri_prod10')
df9 = relabelling(price_Goods_services_contributing_agri_prod15, 'price_Goods_services_contributing_agri_prod15')
df10 = relabelling(price_Goods_services_consumed_agri_prod15, 'price_Goods_services_consumed_agri_prod15')


In [19]:
#!pip install pathlib
from pathlib import Path  

In [34]:
df10 = relabelling(price_Goods_services_consumed_agri_prod15, 'price_Goods_services_consumed_agri_prod15')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data/price_Goods_services_consumed_agri_prod15.csv')
df10.to_csv(filepath, index=True)

df9 = relabelling(price_Goods_services_contributing_agri_prod15, 'price_Goods_services_contributing_agri_prod15')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data/price_Goods_services_contributing_agri_prod15.csv')
df9.to_csv(filepath, index=True)

df7 = relabelling(price_Goods_services_consumed_agri_prod10, 'price_Goods_services_consumed_agri_prod10')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data/price_Goods_services_consumed_agri_prod10.csv')
df7.to_csv(filepath, index=True)

df8 = relabelling(price_Goods_services_contributing_agri_prod10, 'price_Goods_services_contributing_agri_prod10')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data/price_Goods_services_contributing_agri_prod10.csv')
df8.to_csv(filepath, index=True)

df5 = relabelling(price_Goods_services_consumed_agri_prod05, 'price_Goods_services_consumed_agri_prod05')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data/price_Goods_services_consumed_agri_prod05.csv')
df5.to_csv(filepath, index=True)

df6 = relabelling(price_Goods_services_contributing_agri_prod05, 'price_Goods_services_contributing_agri_prod05')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data/price_Goods_services_contributing_agri_prod05.csv')
df6.to_csv(filepath, index=True)

df3 = relabelling(price_Goods_services_consumed_agri_prod00, 'price_Goods_services_consumed_agri_prod00')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data/price_Goods_services_consumed_agri_prod00.csv')
df3.to_csv(filepath, index=True)

df4 = relabelling(price_Goods_services_contributing_agri_prod15, 'price_Goods_services_contributing_agri_prod00')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data/price_Goods_services_contributing_agri_prod00.csv')
df4.to_csv(filepath, index=True)

df2 = relabelling(prod_chicken, 'prod_chicken')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data/prod_chicken.csv')
df2.to_csv(filepath, index=True)

df1 = relabelling(prod_bovine, 'prod_bovine')
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data/prod_bovine.csv')
df1.to_csv(filepath, index=True)


In [51]:
def convert_to_monthly2(names,dict_of_dfs):
    d = dict_of_dfs
    for i in range(len(names)-2):
        i = names[i]
        dataset = i + '.csv'
        d[i] = pd.read_csv(dataset)
        d[i]['TIME_PERIOD'] = pd.to_datetime(d[i]['TIME_PERIOD']).dt.to_period('M')
        d[i] = d[i].set_index('TIME_PERIOD').resample('M').interpolate()
        d[i] = d[i].reset_index() ##
        d[i]['TIME_PERIOD'] =  d[i]['TIME_PERIOD'].astype(str) ##
    d['prod_bovine'] = pd.read_csv('prod_bovine.csv')
    d['prod_chicken'] = pd.read_csv('prod_chicken.csv')
    return d

In [22]:
def merging_datasets2(names, dict_data):

    df = pd.merge(dict_data[names[0]], dict_data[names[1]], how='outer', on='TIME_PERIOD')
    
    for i in range(len(names)-2):
        df = pd.merge(df, dict_data[names[i+2]], how='outer', on='TIME_PERIOD')
    
    df['TIME_PERIOD'] = pd.to_datetime(df['TIME_PERIOD'])


    return df

In [97]:
names = ['price_Goods_services_consumed_agri_prod15', 'price_Goods_services_contributing_agri_prod15',  'price_Goods_services_consumed_agri_prod10', 'price_Goods_services_contributing_agri_prod10', 'price_Goods_services_consumed_agri_prod05','price_Goods_services_contributing_agri_prod05', 'price_Goods_services_consumed_agri_prod00', 'price_Goods_services_contributing_agri_prod00', 'prod_bovine', 'prod_chicken']
dict_of_dfs = {}
dict_data = convert_to_monthly2(names,dict_of_dfs)
df = merging_datasets2(names, dict_data)

In [99]:
def cleaning_for_FR(df):
    df = df.set_index('TIME_PERIOD')
    df = df.filter(regex='FR', axis=1)
    df = df.reset_index()
    df = df.sort_values('TIME_PERIOD')
    return df

In [100]:
df = cleaning_for_FR(df)

Unnamed: 0,TIME_PERIOD,FR_price_Goods_services_consumed_agri_prod15,FR_price_Goods_services_contributing_agri_prod15,FR_price_Goods_services_consumed_agri_prod10,FR_price_Goods_services_contributing_agri_prod10,FR_price_Goods_services_consumed_agri_prod05,FR_price_Goods_services_contributing_agri_prod05,FR_price_Goods_services_consumed_agri_prod00,FR_price_Goods_services_contributing_agri_prod00,FR_prod_bovine,FR_prod_chicken
82,2000-01-01,,,,,,,96.100000,,134.82,68679.00
83,2000-02-01,,,,,,,96.633333,,127.47,70204.00
84,2000-03-01,,,,,,,97.166667,,144.23,87880.00
85,2000-04-01,,,,,,,97.700000,,124.41,72032.00
86,2000-05-01,,,,,,,98.000000,,144.02,72065.00
...,...,...,...,...,...,...,...,...,...,...,...
79,2021-08-01,104.456667,108.916667,,,,,,108.916667,117.87,69749.14
80,2021-09-01,106.833333,109.343333,,,,,,109.343333,120.45,66146.46
81,2021-10-01,109.210000,109.770000,,,,,,109.770000,115.74,63460.25
262,2021-11-01,,,,,,,,,123.13,61889.48


In [103]:
filepath = Path('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/projet_python_ds//DF.csv')
df.to_csv(filepath, index=True)

Importing new extra dataset from website

In [109]:
df1 = pd.read_csv('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data//apri_pi15_outq.csv.gz')
df2 = pd.read_csv('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data//apri_pi10_outq.csv.gz')
df3 = pd.read_csv('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data//apri_pi05_outq.csv.gz')
df4 = pd.read_csv('C:/Users/t480/Desktop/University courses/ENSAE 2A/PYTHON DS/data//apri_pi00_outq.csv.gz')


In [89]:
#df2['TIME_PERIOD'] = pd.to_datetime(df2['TIME_PERIOD'])
#df2 = df2[df2['TIME_PERIOD']<'2017-01-01']
df2

Unnamed: 0,DATAFLOW,LAST UPDATE,freq,p_adj,unit,product,geo,TIME_PERIOD,OBS_VALUE,OBS_FLAG
0,ESTAT:APRI_PI10_OUTQ(1.0),17/04/18 11:00:00,Q,RI,I10,10000,EU28,2010-Q1,78.7,d
1,ESTAT:APRI_PI10_OUTQ(1.0),17/04/18 11:00:00,Q,RI,I10,10000,EU28,2010-Q2,80.9,d
2,ESTAT:APRI_PI10_OUTQ(1.0),17/04/18 11:00:00,Q,RI,I10,10000,EU28,2010-Q3,106.4,d
3,ESTAT:APRI_PI10_OUTQ(1.0),17/04/18 11:00:00,Q,RI,I10,10000,EU28,2010-Q4,119.5,d
4,ESTAT:APRI_PI10_OUTQ(1.0),17/04/18 11:00:00,Q,RI,I10,10000,EU28,2011-Q1,145.1,d
...,...,...,...,...,...,...,...,...,...,...
635,ESTAT:APRI_PI10_OUTQ(1.0),17/04/18 11:00:00,Q,RI,PCH_SM,61000,FR,2016-Q4,7.4,
636,ESTAT:APRI_PI10_OUTQ(1.0),17/04/18 11:00:00,Q,RI,PCH_SM,61000,FR,2017-Q1,6.4,
637,ESTAT:APRI_PI10_OUTQ(1.0),17/04/18 11:00:00,Q,RI,PCH_SM,61000,FR,2017-Q2,-6.7,
638,ESTAT:APRI_PI10_OUTQ(1.0),17/04/18 11:00:00,Q,RI,PCH_SM,61000,FR,2017-Q3,-9.4,


In [41]:
def cleaning_imported_df(name,dft):
    dft = dft[['geo','TIME_PERIOD','OBS_VALUE', 'product']]
    #dft = dft.set_index('TIME_PERIOD')
    #dft = dft.pivot(columns='TIME_PERIOD', values='OBS_VALUE')
    dft = dft[dft.geo == 'FR']
    dft = dft.drop(columns=['geo'])
    #dft = dft.pivot(columns='TIME_PERIOD', values='OBS_VALUE')
    #dft2 = dft = dft[dft.product == '610000']
    #dft = dft.set_index('TIME_PERIOD')
    #dft = relabelling(dft, 'price_output__agri_prod15_FR')
    #dft = dft.reset_index()
    unique_vals = list(dft['product'].unique())
    dict_df = {}
    for i in range(len(unique_vals)):
        i_name = str(unique_vals[i])
        dict_df[i_name] = dft[dft['product'] == unique_vals[i]]
        dict_df[i_name] = dict_df[i_name].drop(columns=['product'])
        dict_df[i_name] = dict_df[i_name].rename(columns = {'OBS_VALUE':i_name})
        dict_df[i_name] = dict_df[i_name].set_index('TIME_PERIOD')
        dict_df[i_name] = relabelling(dict_df[i_name],name)
        dict_df[i_name] = dict_df[i_name].reset_index()
    return dict_df, unique_vals

In [44]:
def convert_to_monthly3(names,dict_of_dfs):
    d = dict_of_dfs
    for i in range(len(names)):
        i = names[i]
        d[i]['TIME_PERIOD'] = pd.to_datetime(d[i]['TIME_PERIOD']).dt.to_period('M')
        d[i] = d[i].set_index('TIME_PERIOD').resample('M').interpolate()
        d[i] = d[i].reset_index() ##
        d[i]['TIME_PERIOD'] =  d[i]['TIME_PERIOD'].astype(str) ##
    return d

In [101]:
d = dict_df
names = ['10000', '21000', '41000', '61000']
i = names[1]
d[i]['TIME_PERIOD'] = pd.to_datetime(d[i]['TIME_PERIOD']).dt.to_period('M')
#d[i] = d[i].set_index('TIME_PERIOD').resample('M').interpolate()
#d[i] = d[i].reset_index() ##
#d[i]['TIME_PERIOD'] =  d[i]['TIME_PERIOD'].astype(str) ##

In [105]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(d[i])

   TIME_PERIOD  21000_price_output__agri_prod10_FR
0      2005-01                                62.1
1      2005-04                                61.6
2      2005-07                                64.5
3      2005-10                                65.6
4      2006-01                                62.4
5      2006-04                                66.2
6      2006-07                                71.5
7      2006-10                                75.9
8      2007-01                                74.7
9      2007-04                                74.6
10     2007-07                                95.3
11     2007-10                               114.2
12     2008-01                               135.2
13     2008-04                               130.6
14     2008-07                               108.5
15     2008-10                                81.1
16     2009-01                                75.5
17     2009-04                                81.9
18     2009-07                 

In [103]:
d[i]['TIME_PERIOD'].nunique()
un = list(d[i]['TIME_PERIOD'].unique())
un

[Period('2005-01', 'M'),
 Period('2005-04', 'M'),
 Period('2005-07', 'M'),
 Period('2005-10', 'M'),
 Period('2006-01', 'M'),
 Period('2006-04', 'M'),
 Period('2006-07', 'M'),
 Period('2006-10', 'M'),
 Period('2007-01', 'M'),
 Period('2007-04', 'M'),
 Period('2007-07', 'M'),
 Period('2007-10', 'M'),
 Period('2008-01', 'M'),
 Period('2008-04', 'M'),
 Period('2008-07', 'M'),
 Period('2008-10', 'M'),
 Period('2009-01', 'M'),
 Period('2009-04', 'M'),
 Period('2009-07', 'M'),
 Period('2009-10', 'M'),
 Period('2010-01', 'M'),
 Period('2010-04', 'M'),
 Period('2010-07', 'M'),
 Period('2010-10', 'M'),
 Period('2011-01', 'M'),
 Period('2011-04', 'M'),
 Period('2011-07', 'M'),
 Period('2011-10', 'M'),
 Period('2012-01', 'M'),
 Period('2012-04', 'M'),
 Period('2012-07', 'M'),
 Period('2012-10', 'M'),
 Period('2013-01', 'M'),
 Period('2013-04', 'M'),
 Period('2013-07', 'M'),
 Period('2013-10', 'M'),
 Period('2014-01', 'M'),
 Period('2014-04', 'M'),
 Period('2014-07', 'M'),
 Period('2014-10', 'M'),


In [100]:
dict_df, unique_vals = cleaning_imported_df('price_output__agri_prod10_FR', df2)
#dict_df = convert_to_monthly3(['10000', '21000', '41000', '61000'], dict_df)
#df1 = merging_datasets2(['10000', '21000', '41000', '61000'], dict_df)
#unique_vals
#df2
dict_df


{'10000':    TIME_PERIOD  10000_price_output__agri_prod10_FR
 0      2005-Q1                                69.7
 1      2005-Q2                                67.7
 2      2005-Q3                                74.2
 3      2005-Q4                                74.5
 4      2006-Q1                                72.9
 ..         ...                                 ...
 95     2016-Q4                                -3.3
 96     2017-Q1                                11.7
 97     2017-Q2                                 5.6
 98     2017-Q3                                -1.4
 99     2017-Q4                                -5.9
 
 [100 rows x 2 columns],
 '21000':    TIME_PERIOD  21000_price_output__agri_prod10_FR
 0      2005-Q1                                62.1
 1      2005-Q2                                61.6
 2      2005-Q3                                64.5
 3      2005-Q4                                65.6
 4      2006-Q1                                62.4
 ..         ...    

In [110]:
dict_df, unique_vals = cln('price_output__agri_prod15_FR', df1)
dict_df = convert_to_monthly3(['10000', '21000', '41000', '61000'], dict_df)
df1 = merging_datasets2(['10000', '21000', '41000', '61000'], dict_df)

dict_df, unique_vals = cln('price_output__agri_prod10_FR', df2)
dict_df = convert_to_monthly3(['10000', '21000', '41000', '61000'], dict_df)
df2 = merging_datasets2(['10000', '21000', '41000', '61000'], dict_df)

dict_df, unique_vals = cln('price_output__agri_prod05_FR', df3)
dict_df = convert_to_monthly3(['10000', '21000', '41000', '61000'], dict_df)
df3 = merging_datasets2(['10000', '21000', '41000', '61000'], dict_df)

dict_df, unique_vals = cln('price_output__agri_prod00_FR', df4)
dict_df = convert_to_monthly3(['10000', '21000', '41000', '61000'], dict_df)
df4 = merging_datasets2(['10000', '21000', '41000', '61000'], dict_df)

df = pd.merge(df1, df2, how='outer', on='TIME_PERIOD')
df = pd.merge(df, df3, how='outer', on='TIME_PERIOD')
df = pd.merge(df, df4, how='outer', on='TIME_PERIOD')
df = df.sort_values('TIME_PERIOD')


In [111]:
df

Unnamed: 0,TIME_PERIOD,10000_price_output__agri_prod15_FR,21000_price_output__agri_prod15_FR,41000_price_output__agri_prod15_FR,61000_price_output__agri_prod15_FR,10000_price_output__agri_prod10_FR,21000_price_output__agri_prod10_FR,41000_price_output__agri_prod10_FR,61000_price_output__agri_prod10_FR,10000_price_output__agri_prod05_FR,21000_price_output__agri_prod05_FR,41000_price_output__agri_prod05_FR,61000_price_output__agri_prod05_FR,10000_price_output__agri_prod00_FR,21000_price_output__agri_prod00_FR,41000_price_output__agri_prod00_FR,61000_price_output__agri_prod00_FR
208,2000-01-01,,,,,,,,,,,,,99.400000,91.800000,104.2,98.700000
209,2000-02-01,,,,,,,,,,,,,99.666667,94.033333,103.8,99.833333
210,2000-03-01,,,,,,,,,,,,,99.933333,96.266667,103.4,100.966667
211,2000-04-01,,,,,,,,,,,,,100.200000,98.500000,103.0,102.100000
212,2000-05-01,,,,,,,,,,,,,99.233333,98.233333,99.5,99.366667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,2021-12-01,154.446667,175.746667,106.470000,117.133333,,,,,,,,,,,,
84,2022-01-01,157.990000,182.800000,106.540000,117.910000,,,,,,,,,,,,
85,2022-02-01,168.943333,187.393333,105.676667,117.086667,,,,,,,,,,,,
86,2022-03-01,179.896667,191.986667,104.813333,116.263333,,,,,,,,,,,,
