In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [26]:
## Source: https://www.statsmodels.org/stable/datasets/generated/macrodata.html

data = pd.read_csv('macrodata-2.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 203 entries, 0 to 202
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   year      203 non-null    float64
 1   quarter   203 non-null    float64
 2   realgdp   203 non-null    float64
 3   realcons  203 non-null    float64
 4   realinv   203 non-null    float64
 5   realgovt  203 non-null    float64
 6   realdpi   203 non-null    float64
 7   cpi       203 non-null    float64
 8   m1        203 non-null    float64
 9   tbilrate  203 non-null    float64
 10  unemp     203 non-null    float64
 11  pop       203 non-null    float64
 12  infl      203 non-null    float64
 13  realint   203 non-null    float64
dtypes: float64(14)
memory usage: 22.3 KB


In [27]:
data.head()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [28]:
## Creating a time based index of periods consisting of the year and quarter

periods = pd.PeriodIndex(year = data.year, quarter = data.quarter, name = 'date')

periods

PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', name='date', length=203)

In [29]:
## Changing '1959Q1' to a date '1959-01-01'

periods.to_timestamp('D', 'start')

DatetimeIndex(['1959-01-01', '1959-04-01', '1959-07-01', '1959-10-01',
               '1960-01-01', '1960-04-01', '1960-07-01', '1960-10-01',
               '1961-01-01', '1961-04-01',
               ...
               '2007-04-01', '2007-07-01', '2007-10-01', '2008-01-01',
               '2008-04-01', '2008-07-01', '2008-10-01', '2009-01-01',
               '2009-04-01', '2009-07-01'],
              dtype='datetime64[ns]', name='date', length=203, freq='QS-OCT')

In [30]:
## Current index is just integers and we want to replace it

data.index

RangeIndex(start=0, stop=203, step=1)

In [31]:
## Specifying a new index directly

data.index = periods.to_timestamp('D', 'start')

In [32]:
## Selecting the columns with the items to focus on

columns = pd.Index(['realgdp', 'realint', 'infl', 'cpi', 'realinv', 'realcons', 'unemp'], name = 'item')

columns

Index(['realgdp', 'realint', 'infl', 'cpi', 'realinv', 'realcons', 'unemp'], dtype='object', name='item')

In [33]:
## Conforming to the column index specified

data = data.reindex(columns = columns)

data.head()

item,realgdp,realint,infl,cpi,realinv,realcons,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1959-01-01,2710.349,0.0,0.0,28.98,286.898,1707.4,5.8
1959-04-01,2778.801,0.74,2.34,29.15,310.859,1733.7,5.1
1959-07-01,2775.488,1.09,2.74,29.35,289.226,1751.8,5.3
1959-10-01,2785.204,4.06,0.27,29.37,299.356,1753.7,5.6
1960-01-01,2847.699,1.19,2.31,29.54,331.722,1770.5,5.2


In [34]:
## Stacking to create a series (if needed)
## No need to reset index to create a new column and index

data.stack().head()

date        item   
1959-01-01  realgdp    2710.349
            realint       0.000
            infl          0.000
            cpi          28.980
            realinv     286.898
dtype: float64

In [35]:
## Filtering the data where inflation is less than or equal to 2%

data.loc[data.infl <= 2]

item,realgdp,realint,infl,cpi,realinv,realcons,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1959-01-01,2710.349,0.0,0.0,28.98,286.898,1707.4,5.8
1959-10-01,2785.204,4.06,0.27,29.37,299.356,1753.7,5.6
1960-04-01,2834.39,2.55,0.14,29.55,298.152,1792.9,5.2
1960-10-01,2802.616,1.08,1.21,29.84,259.764,1788.2,6.3
1961-01-01,2819.264,2.77,-0.4,29.81,266.405,1787.7,6.8
1961-04-01,2872.005,0.81,1.47,29.92,286.246,1814.3,7.0
1961-07-01,2918.419,1.52,0.8,29.98,310.227,1823.1,6.8
1961-10-01,2977.83,1.8,0.8,30.04,315.463,1859.6,6.2
1962-04-01,3064.709,2.65,0.13,30.22,331.039,1902.5,5.5
1962-10-01,3100.563,2.08,0.79,30.44,325.65,1945.1,5.5


In [36]:
## Exporting the filtered data into a new csv file

f_data = data.loc[data.infl <= 2]
f_data.to_csv('filtered.csv')

In [37]:
f_data.describe()

item,realgdp,realint,infl,cpi,realinv,realcons,unemp
count,43.0,43.0,43.0,43.0,43.0,43.0,43.0
mean,7361.221233,2.89814,0.45093,108.770558,1064.691326,4953.713953,5.706977
std,4034.207853,2.37519,1.917848,73.488477,714.209922,2884.194618,1.227346
min,2710.349,-0.71,-8.79,28.98,259.764,1707.4,3.7
25%,3190.7095,1.515,0.25,30.615,351.9115,1981.0,4.9
50%,7950.164,2.57,0.89,135.1,967.442,5284.4,5.5
75%,11318.623,3.805,1.3,177.0,1779.2085,7776.0,6.25
max,13324.6,10.95,2.0,216.889,2232.193,9267.7,10.7


In [38]:
data.describe()

item,realgdp,realint,infl,cpi,realinv,realcons,unemp
count,203.0,203.0,203.0,203.0,203.0,203.0,203.0
mean,7221.171901,1.336502,3.96133,105.075788,1012.863862,4825.293103,5.884729
std,3214.956044,2.668799,3.253216,61.278878,585.102267,2313.346192,1.458574
min,2710.349,-6.79,-8.79,28.98,259.764,1707.4,3.4
25%,4440.1035,-0.085,2.27,41.05,519.1475,2874.1,4.9
50%,6559.594,1.34,3.24,104.1,896.21,4299.9,5.7
75%,9629.3465,2.63,4.975,159.65,1436.6815,6398.15,6.8
max,13415.266,10.95,14.62,218.61,2264.721,9363.6,10.7


In [39]:
## Sorting filtered data by unemployment rate to see even with inflation rate less than or equal to 2% the U.S. economy fell into a recession

f_data.sort_values(by = "unemp", ascending = False)

item,realgdp,realint,infl,cpi,realinv,realcons,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1982-10-01,5871.001,8.77,-0.82,97.9,622.93,3947.9,10.7
2009-01-01,12925.41,-0.71,0.94,212.671,1558.494,9209.2,8.1
1993-04-01,8486.435,1.08,1.94,144.5,1063.263,5671.1,7.1
1961-04-01,2872.005,0.81,1.47,29.92,286.246,1814.3,7.0
1986-01-01,7022.757,10.95,-4.39,108.7,967.442,4639.3,7.0
2008-10-01,13141.92,8.91,-8.79,212.174,1857.661,9195.3,6.9
1961-01-01,2819.264,2.77,-0.4,29.81,266.405,1787.7,6.8
1961-07-01,2918.419,1.52,0.8,29.98,310.227,1823.1,6.8
1993-10-01,8643.769,1.13,1.92,146.3,1118.583,5783.7,6.6
1991-01-01,7950.164,4.65,1.19,135.1,896.21,5284.4,6.6


In [2]:
### Data Visualization
