In [None]:
import pandas as pd

Create FRED Economic Data specific function for formating/cleaning csv files

In [None]:
def fred_clean(name, file):
    """
    name: string input. names the dataframe and title of value header column.
    file: string input. file name of downloaded FRED data set. format as name.csv
    """
    df = str(name) + '_df'
    df = pd.read_csv(r'./data/' + file)
    # rename header
    df.columns = ['date', name]
    # change data type to datetime
    df['date'] = pd.to_datetime(df['date'])
    # remove days from data column. yyyy-mm '-dd'
    df['date'] = df['date'].dt.strftime('%Y-%m')
    # remove duplicate date values (time step = monthly)
    df = df.drop_duplicates('date')
    # set index to date and sort
    df = df.set_index('date', drop = False)
    df = df.sort_index()
    # slice time series data from '2008-01' to '2022-12'
    df = df.loc['2003-01': '2022-12']
    
    # replace values in this dataset that are str('.') 
  
    df = df.replace('.', 0)
        
    # change data type to float in cpi columns
    df[[name]] = df[[name]].astype(float)
    df['date'] = pd.to_datetime(df['date'])
    return df

CREATE: df_11

In [None]:
m1_supply_df = fred_clean('m1_supply', 'M1SL.csv')
gdp_df = fred_clean('gdp', 'GDP.CSV')
fed_funds_rate_df = fred_clean('fed_funds_rate', 'FEDFUNDS.csv')
fed_bal_df = fred_clean('fed_bal', 'RESPPANWW.csv')
gov_debt_df = fred_clean('gov_debt', 'GFDEBTN.csv')

In [None]:
#remove date column (currently indexed)
m1_supply_df = m1_supply_df.drop(['date'], axis=1)
gdp_df = gdp_df.drop(['date'], axis=1)
fed_funds_rate_df = fed_funds_rate_df.drop(['date'], axis=1)
fed_bal_df = fed_bal_df.drop(['date'], axis=1)
gov_debt_df = gov_debt_df.drop(['date'], axis=1)

In [None]:
#merge all data frames to df_11
df_11 = pd.merge(m1_supply_df, gdp_df, how='outer', on = 'date').merge(fed_funds_rate_df, how='outer', on = 'date').merge(fed_bal_df, how='outer', on = 'date').merge(gov_debt_df, how='outer', on = 'date')
#linear interpolation for government_debt_usd quarterly data
df_11 = df_11.interpolate(limit=1, limit_direction='both').fillna(method='ffill')
#create velocity_money = gdp / m1_supply
df_11 ['velocity_money'] = df_11['gdp'] / df_11['m1_supply']
df_11

In [None]:
# save data frame as df_11
from pathlib import Path  
filepath = Path('./data_frames/df_11.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
df_11.to_csv(filepath)

CREATE: df_12

In [None]:
#### import data ####
nasdaq100_df = fred_clean('nasdaq100', 'NASDAQ100.csv')
#https://fred.stlouisfed.org/series/NASDAQ100
btc_price_df = fred_clean('btc_price', 'CBBTCUSD.csv')
#https://fred.stlouisfed.org/series/CBBTCUSD
housing_index_df = fred_clean('housing_index', 'USSTHPI.csv')
#https://fred.stlouisfed.org/series/USSTHPI
commod_index_df = fred_clean('commod_index', 'PALLFNFINDEXM.csv')
#https://fred.stlouisfed.org/series/PALLFNFINDEXM
energy_index_df = fred_clean('energy_index', 'PNRGINDEXM.csv')
#https://fred.stlouisfed.org/series/PNRGINDEXM

In [None]:
#drop all other rows after first of month
nasdaq100_df = nasdaq100_df.assign(M=nasdaq100_df["date"].dt.to_period("M")).drop_duplicates("M").drop("M",axis=1)
btc_price_df = btc_price_df.assign(M=btc_price_df["date"].dt.to_period("M")).drop_duplicates("M").drop("M",axis=1)
housing_index_df = housing_index_df.assign(M=housing_index_df["date"].dt.to_period("M")).drop_duplicates("M").drop("M",axis=1)
commod_index_df = commod_index_df.assign(M=commod_index_df["date"].dt.to_period("M")).drop_duplicates("M").drop("M",axis=1)
energy_index_df = energy_index_df.assign(M=energy_index_df["date"].dt.to_period("M")).drop_duplicates("M").drop("M",axis=1)

In [None]:
#remove date column (currently indexed)
nasdaq100_df = nasdaq100_df.drop(['date'], axis=1)
btc_price_df = btc_price_df.drop(['date'], axis=1)
housing_index_df = housing_index_df.drop(['date'], axis=1)
commod_index_df = commod_index_df.drop(['date'], axis=1)
energy_index_df = energy_index_df.drop(['date'], axis=1)

In [None]:
#merge all data frames to df_12
df_12 = pd.merge(nasdaq100_df, btc_price_df, how='outer', on = 'date').merge(housing_index_df, how='outer', on = 'date').merge(commod_index_df, how='outer', on = 'date').merge(energy_index_df, how='outer', on = 'date')
#linear interpolation for government_debt_usd quarterly data
df_12 = df_12.interpolate(limit=1, limit_direction='both').fillna(method='ffill')
df_12

In [None]:
# save data frame as df_12
from pathlib import Path  
filepath = Path('./data_frames/df_12.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
df_12.to_csv(filepath)

CREATE: df_13

In [None]:
#### import data ####
cpi_index_df = fred_clean('cpi_index', 'CPIAUCSL.csv')
#https://fred.stlouisfed.org/series/CPIAUCSL
unit_labor_df = fred_clean('unit_labor', 'ULCNFB.csv')
#https://fred.stlouisfed.org/series/ULCNFB
personal_savings_df = fred_clean('personal_savings','PSAVE.csv')
#https://fred.stlouisfed.org/series/PSAVE
personal_expenses_df = fred_clean('personal_expenses', 'PCE.csv')
#https://fred.stlouisfed.org/series/PCE
rent_prim_res_df = fred_clean('rent_prim_res', 'CUUR0000SEHA.csv')
#https://fred.stlouisfed.org/series/CUUR0000SEHA

In [None]:
#remove date column (currently indexed)
cpi_index_df = cpi_index_df.drop(['date'], axis=1)
unit_labor_df = unit_labor_df.drop(['date'], axis=1)
personal_savings_df = personal_savings_df.drop(['date'], axis=1)
personal_expenses_df = personal_expenses_df.drop(['date'], axis=1)
rent_prim_res_df = rent_prim_res_df.drop(['date'], axis=1)

In [None]:
#merge all data frames to df_13
df_13 = pd.merge(cpi_index_df, unit_labor_df, how='outer', on = 'date').merge(personal_savings_df, how='outer', on = 'date').merge(personal_expenses_df, how='outer', on = 'date').merge(rent_prim_res_df, how='outer', on = 'date')
#linear interpolation for government_debt_usd quarterly data
df_13 = df_13.interpolate(limit=1, limit_direction='both').fillna(method='ffill')
df_13

In [None]:
# save data frame as df_13
from pathlib import Path  
filepath = Path('./data_frames/df_13.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
df_13.to_csv(filepath)

Merge df_11, df_12 and df_13 to df_1

In [None]:
df_1 = pd.merge(df_11, df_12, how='outer', on = 'date').merge(df_13, how='outer', on = 'date')
df_1

In [None]:
# save data frame as df_1
from pathlib import Path  
filepath = Path('./data_frames/df_1.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
df_1.to_csv(filepath)