In [1]:
##### setting up the environment
from datetime import date, datetime, timedelta
import os
from typing import Literal
import time
import re
import pandas as pd
import numpy as np
import json
import requests
from dotenv import load_dotenv
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
from pmdarima.arima import auto_arima 
import seaborn as sns
from tabulate import tabulate
import math
import warnings





# Set up the data

In [2]:
BUCKET = '/mnt/dsa/home/gpchow/inflationpers/dataframe/'

# headline inflation (monthly frequency)
headline_inflation_df = pd.read_parquet(BUCKET + "inflation-headline.parquet")

# headline inflation by country (monthly frequency)
headline_inflation_country = pd.read_parquet(BUCKET + "inflation-other-country.parquet")
headline_inflation_country = pd.merge(headline_inflation_country, headline_inflation_df, left_index = True, right_index = True)
headline_inflation_country.rename(columns = {'cpi_month': 'malaysia'}, inplace = True)

In [3]:
start_date = pd.to_datetime('1972-01-01')

# Comparison with other countries
## Analysis using full sample

In [4]:
def modeling(input, country, num_lags, output_choice):
    # create the lagged variable dataframe
    X = pd.DataFrame(index=input.index)

    for lag in range(1, num_lags + 1):
        X[f'{country}_lag{lag}'] = input[country].shift(lag)

    # Add constant term to X
    X = sm.add_constant(X)

    inflation_df_month_full = input[country].loc[start_date:,]
    X_full = X.loc[start_date:,]
    
    # Fit the OLS model
    model = sm.OLS(inflation_df_month_full, X_full)
    results = model.fit()

    coef = sum(results.params[1:])
    if output_choice == 1:
        print(country, ": ", coef)
    elif output_choice == 2:
        result_df = pd.DataFrame({'country': [country], 'coefficient': [coef]})
        return result_df
    else:
        print("Invalid output choice. Please enter 1 to print the result or 2 to store it in a DataFrame.")

country_df = pd.DataFrame()
country_df['country'] = np.nan
country_df['coefficient'] = np.nan

for country in headline_inflation_country.columns:
    result_df_1 = modeling(headline_inflation_country, country, 12, 2)
    country_df = pd.concat([country_df, result_df_1], ignore_index=True)

country_df['country'] = country_df['country'].apply(lambda x: x.title())
country_df = country_df.sort_values(by='country')
country_df['coefficient'] = country_df['coefficient'].round(3)
country_df

Unnamed: 0,country,coefficient
0,Argentina,0.903
1,Chile,0.98
2,France,0.995
3,Germany,0.973
4,Indonesia,0.964
5,Italy,0.995
13,Malaysia,0.951
6,Philippines,0.962
7,Singapore,0.943
8,Spain,0.991


In [5]:
advanced_economies = ['France', 'Germany', 'Italy', 'Singapore', 'Spain', 'United Kingdom', 'United States']
country_df['classification'] = country_df['country'].apply(lambda x: 'Advanced' if x in advanced_economies else 'Emerging')
mean_result_df = country_df.groupby('classification')['coefficient'].mean().reset_index()
mean_result_df

Unnamed: 0,classification,coefficient
0,Advanced,0.9794
1,Emerging,0.965111


## rolling mean of inflation

In [6]:
def modeling_rolling_country(country, num_lags, window_size, start_date, rho_full):
    # create the lagged variable dataframe
    X = pd.DataFrame(index=headline_inflation_country.index)

    for lag in range(1, num_lags + 1):
        X[f'{country}_lag{lag}'] = headline_inflation_country[country].shift(lag)

    # Add constant term to X
    X = sm.add_constant(X)

    inflation_df_month_full = headline_inflation_country[country].loc[start_date:,]
    X_full = X.loc[start_date:,]
    
    models_coefficients = pd.DataFrame()  

    for i in range(window_size, len(inflation_df_month_full)):
        train_subset = inflation_df_month_full[i-window_size:i]
        X_lagged = X_full[i-window_size:i]

        # Fit the OLS model
        model = sm.OLS(train_subset, X_lagged)
        results = model.fit()
        models_coefficients[f"Model {i+1}"] = results.params
    
    rho = models_coefficients[1:].sum(axis=0)
    rho.index = inflation_df_month_full.index[window_size:]

    rho_full[country] = rho
    return rho_full

rolling_rho_country = pd.DataFrame(columns=headline_inflation_country.columns)

for country in headline_inflation_country.columns:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        modeling_rolling_country(country, 12, 168, start_date, rolling_rho_country)


### graph

In [7]:
country_before_mean = rolling_rho_country[:'2008-12-01'].mean().round(3)
country_before_mean.name = 'mean_before'
country_before_mean = pd.DataFrame(country_before_mean)
country_before_mean.reset_index(inplace = True)
country_before_mean.rename(columns = {'index':'country'}, inplace = True)
country_before_mean['country'] = country_before_mean['country'].str.title()

country_after_mean = rolling_rho_country['2009-01-01':].mean().round(3)
country_after_mean.name = 'mean_after'
country_after_mean = pd.DataFrame(country_after_mean)
country_after_mean.reset_index(inplace = True)
country_after_mean.rename(columns = {'index':'country'}, inplace = True)
country_after_mean['country'] = country_after_mean['country'].str.title()

country_df = pd.merge(country_df, country_before_mean, on='country')
country_df = pd.merge(country_df, country_after_mean, on='country').drop(columns = 'classification')
country_df['country'] = country_df['country'].apply(lambda x: x.title())
country_df = country_df.sort_values(by='country')
country_df

Unnamed: 0,country,coefficient,mean_before,mean_after
0,Argentina,0.903,0.912,0.923
1,Chile,0.98,0.968,0.919
2,France,0.995,0.965,0.852
3,Germany,0.973,0.959,0.795
4,Indonesia,0.964,0.937,0.919
5,Italy,0.995,0.985,0.934
6,Malaysia,0.951,0.942,0.853
7,Philippines,0.962,0.943,0.927
8,Singapore,0.943,0.933,0.905
9,Spain,0.991,0.971,0.915


### correlation between rolling mean and persistence

In [8]:
rolling_mean = headline_inflation_country.rolling(window=168).mean()

In [9]:
rolling_mean

Unnamed: 0_level_0,argentina,chile,france,germany,indonesia,italy,philippines,singapore,spain,thailand,turkey,united_kingdom,united_states,malaysia
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1958-01-01,,,,,,,,,,,,,,
1958-02-01,,,,,,,,,,,,,,
1958-03-01,,,,,,,,,,,,,,
1958-04-01,,,,,,,,,,,,,,
1958-05-01,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-01,23.137985,4.071282,1.621196,2.028935,4.895976,1.998929,3.926037,2.528689,2.089752,2.078161,15.499345,2.699834,2.540624,2.042318
2023-03-01,23.701668,4.121153,1.648890,2.062746,4.888025,2.033581,3.954068,2.561791,2.095012,2.079788,15.739524,2.736132,2.555920,2.044053
2023-04-01,24.295276,4.164929,1.676819,2.093815,4.875008,2.071999,3.977819,2.594621,2.104727,2.083640,15.934881,2.764882,2.568727,2.054250
2023-05-01,24.922479,4.202068,1.699827,2.117892,4.861356,2.108298,3.998816,2.621996,2.109242,2.076392,16.106726,2.795439,2.577500,2.061988


In [10]:
import scipy.stats as stats 

pearson_corr_sci = pd.DataFrame()
pearson_pvalue_sci = pd.DataFrame()

for country in rolling_rho_country.columns:
    pearson = stats.pearsonr(rolling_rho_country[country], rolling_mean.loc[rolling_rho_country.index[0]:, country])

    # Create a new DataFrame for the correlation coefficient and p-value
    corr_df = pd.DataFrame({country: [pearson[0].round(3)]})
    pvalue_df = pd.DataFrame({country: [pearson[1].round(3)]})
    
    # Concatenate the new DataFrame to the existing pearson_corr_sci and pearson_pvalue_sci DataFrames
    pearson_corr_sci = pd.concat([pearson_corr_sci, corr_df], axis=1)
    pearson_pvalue_sci = pd.concat([pearson_pvalue_sci, pvalue_df], axis=1)

pearson_corr1 = pd.concat([pearson_corr_sci, pearson_pvalue_sci], ignore_index=True)
pearson_corr1 = pearson_corr1.T
pearson_corr1.reset_index(inplace = True)
pearson_corr1.rename(columns = {0:'correlation', 1:'p-value', 'index': 'country'}, inplace = True)
pearson_corr1['country'] = pearson_corr1['country'].apply(lambda x: x.title())
pearson_corr1 = pearson_corr1.sort_values('correlation')

pearson_corr1

Unnamed: 0,country,correlation,p-value
0,Argentina,-0.315,0.0
10,Turkey,0.013,0.788
1,Chile,0.263,0.0
7,Singapore,0.337,0.0
6,Philippines,0.478,0.0
5,Italy,0.541,0.0
8,Spain,0.586,0.0
13,Malaysia,0.599,0.0
11,United_Kingdom,0.652,0.0
4,Indonesia,0.662,0.0
