In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def graficar(df: pd.DataFrame, feature, linea_cero=False):
    fig, ax = plt.subplots(figsize=[10,5])
    ax.plot(df[feature])
    if linea_cero:
        ax.axhline(y=0,color='grey',linestyle='--')

# Carga de datos
Se importa los datasets:
* JSTdatasetR5 de [http://data.macrohistory.net/JST/JSTdatasetR5.dta](http://data.macrohistory.net/JST/JSTdatasetR5.dta)
* WBdataset extraido de [https://databank.bancomundial.org/](https://databank.bancomundial.org/)

## JSTdatasetR5

In [3]:
df_jst = pd.read_stata("../data/JSTdatasetR5.dta")

In [4]:
df_jst.head()

Unnamed: 0,year,country,iso,ifs,pop,rgdpmad,rgdppc,rconpc,gdp,iy,...,eq_capgain,eq_dp,eq_capgain_interp,eq_tr_interp,eq_dp_interp,bond_rate,eq_div_rtn,capital_tr,risky_tr,safe_tr
0,1870.0,Australia,AUS,193,1775.0,3273.239437,13.836157,21.449734,208.78,0.109266,...,-0.070045,0.071417,,,,0.049118,0.066415,,,
1,1871.0,Australia,AUS,193,1675.0,3298.507463,13.936864,19.930801,211.56,0.104579,...,0.041654,0.065466,,,,0.048446,0.068193,,,
2,1872.0,Australia,AUS,193,1722.0,3553.426249,15.044247,21.085006,227.4,0.130438,...,0.108945,0.062997,,,,0.047373,0.069861,,,
3,1873.0,Australia,AUS,193,1769.0,3823.629169,16.219443,23.25491,266.54,0.124986,...,0.083086,0.064484,,,,0.04672,0.069842,,,
4,1874.0,Australia,AUS,193,1822.0,3834.796926,16.268228,23.45805,287.58,0.14196,...,0.119389,0.063503,,,,0.046533,0.071085,,,


In [5]:
df_jst.tail()

Unnamed: 0,year,country,iso,ifs,pop,rgdpmad,rgdppc,rconpc,gdp,iy,...,eq_capgain,eq_dp,eq_capgain_interp,eq_tr_interp,eq_dp_interp,bond_rate,eq_div_rtn,capital_tr,risky_tr,safe_tr
2659,2013.0,USA,USA,111,315820.328999,31571.993947,103.425299,101.892671,16784.851,0.192086,...,0.271035,0.019355,,,,0.023508,0.024601,0.139843,0.212405,-0.065168
2660,2014.0,USA,USA,111,318106.646578,32113.618881,105.186253,104.113597,17527.258,0.196377,...,0.13635,0.019199,,,,0.025408,0.021817,0.130063,0.132729,0.122398
2661,2015.0,USA,USA,111,320413.930388,32800.923063,107.42159,107.192931,18224.78,0.198301,...,-9.2e-05,0.021124,,,,0.021358,0.021122,0.046193,0.065433,-0.008779
2662,2016.0,USA,USA,111,322705.239927,33078.508719,108.318698,109.333457,18715.04,0.195831,...,,,,,,,,,,
2663,2017.0,USA,USA,111,324802.861426,33593.446309,110.013284,111.38915,19519.424,0.204547,...,,,,,,,,,,


### Países

In [6]:
df_jst.country.unique()

array(['Australia', 'Belgium', 'Canada', 'Switzerland', 'Germany',
       'Denmark', 'Spain', 'Finland', 'France', 'UK', 'Ireland', 'Italy',
       'Japan', 'Netherlands', 'Norway', 'Portugal', 'Sweden', 'USA'],
      dtype=object)

In [7]:
df_jst.iso.unique()

array(['AUS', 'BEL', 'CAN', 'CHE', 'DEU', 'DNK', 'ESP', 'FIN', 'FRA',
       'GBR', 'IRL', 'ITA', 'JPN', 'NLD', 'NOR', 'PRT', 'SWE', 'USA'],
      dtype=object)

In [8]:
df_jst.isna().sum().sort_values()

year                           0
country                        0
iso                            0
ifs                            0
peg_type                       0
peg_base                       0
crisisJST                     50
peg                           50
peg_strict                    50
pop                           51
cpi                           52
rgdpmad                       52
rgdppc                        52
xrusd                         53
gdp                           77
ltrate                        87
imports                      115
exports                      115
narrowm                      115
crisisJST_old                148
expenditure                  155
rconpc                       156
revenue                      174
stir                         198
money                        201
debtgdp                      229
ca                           229
tloans                       250
iy                           293
lev                          362
bond_rate 

### Preprocesado

In [9]:
features = np.array(['year','iso','pop','rgdpmad','ca','imports',
'exports','revenue','expenditure','debtgdp','narrowm','stir','ltrate','cpi'])

In [10]:
def preprocesarJST(df : pd.DataFrame):
    paises = df.iso.unique()
    df_r = pd.DataFrame()
    df.interpolate()
    for pais in paises:
        df_n = df[df.iso==pais][features].copy()
        df_n = df_n.astype({'year': np.int16})
        #Fija year como indice
        df_n = df_n.set_index('year')
        #Calcula el PBI real usando el PBI per capita y la poblacion
        df_n['rgdp'] = df_n['rgdpmad']*df_n['pop']
        #calcula el ratio de crecimiento del PBI real
        df_n['rgdp_growth'] = df_n['rgdp'].pct_change()
        #interpola datos faltantes en la serie
        df_n = df_n.interpolate(method='quadratic')
        #elimina filas con datos faltantes en los extremos
        df_n = df_n.dropna()

        df_r = df_r.append(df_n)
    return df_r

In [11]:
df_jst = preprocesarJST(df_jst)

In [12]:
df_jst.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2330 entries, 1902 to 2017
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   iso          2330 non-null   object 
 1   pop          2330 non-null   float64
 2   rgdpmad      2330 non-null   float64
 3   ca           2330 non-null   float64
 4   imports      2330 non-null   float64
 5   exports      2330 non-null   float64
 6   revenue      2330 non-null   float64
 7   expenditure  2330 non-null   float64
 8   debtgdp      2330 non-null   float64
 9   narrowm      2330 non-null   float64
 10  stir         2330 non-null   float64
 11  ltrate       2330 non-null   float64
 12  cpi          2330 non-null   float64
 13  rgdp         2330 non-null   float64
 14  rgdp_growth  2330 non-null   float64
dtypes: float64(14), object(1)
memory usage: 291.2+ KB


## WBdataset

In [18]:
df_wb = pd.read_csv('../data/WBdataset.csv')

In [19]:
df_wb.head()

Unnamed: 0,país Code,tiempo Code,NY.GDP.MKTP.KD,NE.TRD.GNFS.ZS,NE.EXP.GNFS.ZS,NE.GDI.FTOT.ZS,NE.CON.GOVT.ZS,NE.IMP.GNFS.ZS,NV.IND.TOTL.ZS,NV.IND.MANF.ZS,FR.INR.RINR,FP.CPI.TOTL,FM.LBL.BMNY.GD.ZS,FM.LBL.BMNY.ZG,SL.UEM.TOTL.NE.ZS,SL.UEM.TOTL.ZS,SL.EMP.TOTL.SP.NE.ZS,SL.EMP.TOTL.SP.ZS
0,ARG,YR1960,150797800000.0,15.208096,7.604049,23.478768,9.880445,7.604047,,,,,21.139251,,,,,
1,ARG,YR1961,158982900000.0,11.989892,5.994947,27.765895,7.789632,5.994945,,,,,16.605999,-0.359712,,,,
2,ARG,YR1962,157628300000.0,14.075526,4.691843,21.730503,12.192375,9.383683,,,,,14.310117,10.108303,,,,
3,ARG,YR1963,149261100000.0,15.780906,7.890454,18.272516,10.252195,7.890452,,,,,16.017618,33.114754,,,,
4,ARG,YR1964,164381700000.0,11.12743,5.563716,17.1789,7.229026,5.563714,,,,,15.96786,41.37931,,,,


In [20]:
df_wb.tail()

Unnamed: 0,país Code,tiempo Code,NY.GDP.MKTP.KD,NE.TRD.GNFS.ZS,NE.EXP.GNFS.ZS,NE.GDI.FTOT.ZS,NE.CON.GOVT.ZS,NE.IMP.GNFS.ZS,NV.IND.TOTL.ZS,NV.IND.MANF.ZS,FR.INR.RINR,FP.CPI.TOTL,FM.LBL.BMNY.GD.ZS,FM.LBL.BMNY.ZG,SL.UEM.TOTL.NE.ZS,SL.UEM.TOTL.ZS,SL.EMP.TOTL.SP.NE.ZS,SL.EMP.TOTL.SP.ZS
3839,GBR,YR2017,3088030000000.0,61.354757,30.045686,18.018943,18.529638,31.309071,17.829685,8.947602,,114.943592,144.271087,8.296332,4.33,4.33,60.18,60.186001
3840,GBR,YR2018,3139011000000.0,62.314223,30.506397,17.773526,18.352082,31.807826,17.906024,8.846982,,117.579064,146.520893,5.299826,4.0,4.0,60.560001,60.556999
3841,GBR,YR2019,3191493000000.0,62.927269,31.004978,17.718796,18.800567,31.922291,17.981247,8.879728,,119.622711,139.337724,-1.364158,3.74,3.74,60.849998,60.855999
3842,GBR,YR2020,2891616000000.0,56.108854,28.120834,17.106203,22.347195,27.98802,17.024908,8.648937,,120.806362,160.150856,9.70644,,4.472,,59.991001
3843,GBR,YR2021,,,,,,,,,,123.848715,,,,4.526,,59.653999
