In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
## Source: https://www.statsmodels.org/stable/datasets/generated/macrodata.html

data = pd.read_csv('macrodata-2.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 203 entries, 0 to 202
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   year      203 non-null    float64
 1   quarter   203 non-null    float64
 2   realgdp   203 non-null    float64
 3   realcons  203 non-null    float64
 4   realinv   203 non-null    float64
 5   realgovt  203 non-null    float64
 6   realdpi   203 non-null    float64
 7   cpi       203 non-null    float64
 8   m1        203 non-null    float64
 9   tbilrate  203 non-null    float64
 10  unemp     203 non-null    float64
 11  pop       203 non-null    float64
 12  infl      203 non-null    float64
 13  realint   203 non-null    float64
dtypes: float64(14)
memory usage: 22.3 KB


In [3]:
data.head()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [4]:
## Creating a time based index of periods consisting of the year and quarter

periods = pd.PeriodIndex(year = data.year, quarter = data.quarter, name = 'date')

periods

PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', name='date', length=203)

In [5]:
## Changing '1959Q1' to a date '1959-01-01'

periods.to_timestamp('D', 'start')

DatetimeIndex(['1959-01-01', '1959-04-01', '1959-07-01', '1959-10-01',
               '1960-01-01', '1960-04-01', '1960-07-01', '1960-10-01',
               '1961-01-01', '1961-04-01',
               ...
               '2007-04-01', '2007-07-01', '2007-10-01', '2008-01-01',
               '2008-04-01', '2008-07-01', '2008-10-01', '2009-01-01',
               '2009-04-01', '2009-07-01'],
              dtype='datetime64[ns]', name='date', length=203, freq='QS-OCT')

In [6]:
## Current index is just integers and we want to replace it

data.index

RangeIndex(start=0, stop=203, step=1)

In [7]:
## Specifying a new index directly

data.index = periods.to_timestamp('D', 'start')

In [8]:
## Selecting the columns with the items to focus on

columns = pd.Index(['realgdp', 'realcons', 'realinv', 'cpi', 'unemp', 'infl', 'realint'], name = 'item')

columns

Index(['realgdp', 'realcons', 'realinv', 'cpi', 'unemp', 'infl', 'realint'], dtype='object', name='item')

In [9]:
## Conforming to the column index specified

data = data.reindex(columns = columns)

data.head()

item,realgdp,realcons,realinv,cpi,unemp,infl,realint
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1959-01-01,2710.349,1707.4,286.898,28.98,5.8,0.0,0.0
1959-04-01,2778.801,1733.7,310.859,29.15,5.1,2.34,0.74
1959-07-01,2775.488,1751.8,289.226,29.35,5.3,2.74,1.09
1959-10-01,2785.204,1753.7,299.356,29.37,5.6,0.27,4.06
1960-01-01,2847.699,1770.5,331.722,29.54,5.2,2.31,1.19


In [10]:
## Stacking to create a series (if needed)
## No need to reset index to create a new column and index

data.stack().head(10)

date        item    
1959-01-01  realgdp     2710.349
            realcons    1707.400
            realinv      286.898
            cpi           28.980
            unemp          5.800
            infl           0.000
            realint        0.000
1959-04-01  realgdp     2778.801
            realcons    1733.700
            realinv      310.859
dtype: float64

In [11]:
## Filtering the data where inflation is less than or equal to 2%

data.loc[data.infl <= 2]

item,realgdp,realcons,realinv,cpi,unemp,infl,realint
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1959-01-01,2710.349,1707.4,286.898,28.98,5.8,0.0,0.0
1959-10-01,2785.204,1753.7,299.356,29.37,5.6,0.27,4.06
1960-04-01,2834.39,1792.9,298.152,29.55,5.2,0.14,2.55
1960-10-01,2802.616,1788.2,259.764,29.84,6.3,1.21,1.08
1961-01-01,2819.264,1787.7,266.405,29.81,6.8,-0.4,2.77
1961-04-01,2872.005,1814.3,286.246,29.92,7.0,1.47,0.81
1961-07-01,2918.419,1823.1,310.227,29.98,6.8,0.8,1.52
1961-10-01,2977.83,1859.6,315.463,30.04,6.2,0.8,1.8
1962-04-01,3064.709,1902.5,331.039,30.22,5.5,0.13,2.65
1962-10-01,3100.563,1945.1,325.65,30.44,5.5,0.79,2.08


In [12]:
## Exporting the filtered data into a new csv file

f_data = data.loc[data.infl <= 2]
f_data.to_csv('filtered.csv')

In [13]:
f_data.describe()

item,realgdp,realcons,realinv,cpi,unemp,infl,realint
count,43.0,43.0,43.0,43.0,43.0,43.0,43.0
mean,7361.221233,4953.713953,1064.691326,108.770558,5.706977,0.45093,2.89814
std,4034.207853,2884.194618,714.209922,73.488477,1.227346,1.917848,2.37519
min,2710.349,1707.4,259.764,28.98,3.7,-8.79,-0.71
25%,3190.7095,1981.0,351.9115,30.615,4.9,0.25,1.515
50%,7950.164,5284.4,967.442,135.1,5.5,0.89,2.57
75%,11318.623,7776.0,1779.2085,177.0,6.25,1.3,3.805
max,13324.6,9267.7,2232.193,216.889,10.7,2.0,10.95


In [14]:
data.describe()

item,realgdp,realcons,realinv,cpi,unemp,infl,realint
count,203.0,203.0,203.0,203.0,203.0,203.0,203.0
mean,7221.171901,4825.293103,1012.863862,105.075788,5.884729,3.96133,1.336502
std,3214.956044,2313.346192,585.102267,61.278878,1.458574,3.253216,2.668799
min,2710.349,1707.4,259.764,28.98,3.4,-8.79,-6.79
25%,4440.1035,2874.1,519.1475,41.05,4.9,2.27,-0.085
50%,6559.594,4299.9,896.21,104.1,5.7,3.24,1.34
75%,9629.3465,6398.15,1436.6815,159.65,6.8,4.975,2.63
max,13415.266,9363.6,2264.721,218.61,10.7,14.62,10.95


In [15]:
## Sorting filtered data by unemployment rate to see even with inflation rate less than or equal to 2% the U.S. economy fell into a recession

f_data.sort_values(by = "unemp", ascending = False)

item,realgdp,realcons,realinv,cpi,unemp,infl,realint
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1982-10-01,5871.001,3947.9,622.93,97.9,10.7,-0.82,8.77
2009-01-01,12925.41,9209.2,1558.494,212.671,8.1,0.94,-0.71
1993-04-01,8486.435,5671.1,1063.263,144.5,7.1,1.94,1.08
1961-04-01,2872.005,1814.3,286.246,29.92,7.0,1.47,0.81
1986-01-01,7022.757,4639.3,967.442,108.7,7.0,-4.39,10.95
2008-10-01,13141.92,9195.3,1857.661,212.174,6.9,-8.79,8.91
1961-01-01,2819.264,1787.7,266.405,29.81,6.8,-0.4,2.77
1961-07-01,2918.419,1823.1,310.227,29.98,6.8,0.8,1.52
1993-10-01,8643.769,5783.7,1118.583,146.3,6.6,1.92,1.13
1991-01-01,7950.164,5284.4,896.21,135.1,6.6,1.19,4.65


### Need to analyze more recent U.S. Real Interest Rate (%) data

In [16]:
## Importing U.S. Real Interest Rate (%) Data
## Source: https://www.kaggle.com/datasets/nicolasgonzalezmunoz/world-bank-world-development-indicators

data2 = pd.read_csv('world_bank_development_indicators.csv')

In [17]:
data2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16780 entries, 0 to 16779
Data columns (total 50 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   country                                    16780 non-null  object 
 1   date                                       16780 non-null  object 
 2   agricultural_land%                         15298 non-null  float64
 3   forest_land%                               7914 non-null   float64
 4   land_area                                  15608 non-null  float64
 5   avg_precipitation                          10086 non-null  float64
 6   trade_in_services%                         9165 non-null   float64
 7   control_of_corruption_estimate             4564 non-null   float64
 8   control_of_corruption_std                  4564 non-null   float64
 9   access_to_electricity%                     7348 non-null   float64
 10  renewvable_energy_cons

In [18]:
data2.head()

Unnamed: 0,country,date,agricultural_land%,forest_land%,land_area,avg_precipitation,trade_in_services%,control_of_corruption_estimate,control_of_corruption_std,access_to_electricity%,...,multidimensional_poverty_headcount_ratio%,gini_index,birth_rate,death_rate,life_expectancy_at_birth,population,rural_population,voice_and_accountability_estimate,voice_and_accountability_std,intentional_homicides
0,Afghanistan,1960-01-01,,,,,,,,,...,,,50.34,31.921,32.535,8622466.0,7898093.0,,,
1,Afghanistan,1961-01-01,57.801696,,652230.0,327.0,,,,,...,,,50.443,31.349,33.068,8790140.0,8026804.0,,,
2,Afghanistan,1962-01-01,57.893688,,652230.0,327.0,,,,,...,,,50.57,30.845,33.547,8969047.0,8163985.0,,,
3,Afghanistan,1963-01-01,57.970348,,652230.0,327.0,,,,,...,,,50.703,30.359,34.016,9157465.0,8308019.0,,,
4,Afghanistan,1964-01-01,58.06694,,652230.0,327.0,,,,,...,,,50.831,29.867,34.494,9355514.0,8458694.0,,,


In [19]:
columns = pd.Index(['country', 'date', 'inflation_annual%', 'real_interest_rate', 'risk_premium_on_lending', 'doing_business', 'GDP_current_US', 'gini_index'], name = 'item')

columns

Index(['country', 'date', 'inflation_annual%', 'real_interest_rate',
       'risk_premium_on_lending', 'doing_business', 'GDP_current_US',
       'gini_index'],
      dtype='object', name='item')

In [20]:
## Conforming to the column index specified

data2 = data2.reindex(columns = columns)

In [21]:
data2.head()

item,country,date,inflation_annual%,real_interest_rate,risk_premium_on_lending,doing_business,GDP_current_US,gini_index
0,Afghanistan,1960-01-01,,,,,537777800.0,
1,Afghanistan,1961-01-01,,,,,548888900.0,
2,Afghanistan,1962-01-01,,,,,546666700.0,
3,Afghanistan,1963-01-01,,,,,751111200.0,
4,Afghanistan,1964-01-01,,,,,800000000.0,


In [22]:
## Filtering countries to only contain data for United States

specific_country = ['United States']

data2[data2['country'].isin(specific_country)]

item,country,date,inflation_annual%,real_interest_rate,risk_premium_on_lending,doing_business,GDP_current_US,gini_index
15961,United States,1960-01-01,1.457976,,1.874167,,5.433000e+11,
15962,United States,1961-01-01,1.070724,3.107885,2.124167,,5.633000e+11,
15963,United States,1962-01-01,1.198773,3.215346,1.721667,,6.051000e+11,
15964,United States,1963-01-01,1.239669,3.374882,1.343333,,6.386000e+11,
15965,United States,1964-01-01,1.278912,2.951669,0.947500,,6.858000e+11,
...,...,...,...,...,...,...,...,...
16019,United States,2018-01-01,2.442583,2.441415,2.965000,,2.053306e+13,41.4
16020,United States,2019-01-01,1.812210,3.427090,3.210833,6.0,2.138098e+13,41.5
16021,United States,2020-01-01,1.233584,2.210411,3.162500,,2.106047e+13,39.7
16022,United States,2021-01-01,4.697859,-1.189357,3.206111,,2.331508e+13,


In [23]:
## Exporting the filtered data only containing United States

f_data2 = data2[data2['country'].isin(specific_country)]
f_data2.to_csv('filtered2.csv')

In [24]:
## Cleaning the filtered data by hiding dataframe index

