In [1]:
import os
import sys
sys.path.insert(0, "../../")
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

In [2]:
folder = sys.path[0] + "outputs/text/"
epu_files = [
    f"{folder}{con}/epu/{con}_epu.csv" for con in os.listdir(folder)
    if con not in [".DS_Store", "tonga", "pacific"]
]
sentiment_files = [
    f"{folder}{con}/sentiment/{con}_sentiment.csv" for con in os.listdir(folder)
    if con not in [".DS_Store", "tonga", "pacific"]
]

epu_inflation_files = [
    f"{folder}{con}/epu/{con}_epu_inflation.csv" for con in os.listdir(folder)
    if con not in [".DS_Store", "tonga", "pacific"]
]

google_volume_files = [
    f"{folder}{con}/{con}_google_volume.csv" for con in os.listdir(folder)
    if con not in [".DS_Store", "tonga", "pacific"]
]


In [3]:
inflation_df = pd.DataFrame()
for file in epu_inflation_files:
    country_name = file.split("/")[-1].replace("_epu_inflation.csv", "")
    df = pd.read_csv(file).drop("Unnamed: 0", axis=1)
    df["date"] = pd.to_datetime(df["date"])
    temp = df[["date", "epu_weighted", "epu_unweighted"]]
    temp.columns = [
        f"inflation_{i}" if i != "date" else i for i in temp.columns
    ]
    temp["country"] = country_name
    inflation_df = pd.concat([inflation_df, temp],
                             axis=0).reset_index(drop=True)

In [4]:
volume_df = pd.DataFrame()
for file in google_volume_files:
    country_name = file.split("/")[-1].replace("_google_volume.csv", "")
    df = pd.read_csv(file).drop("Unnamed: 0", axis=1)
    df["date"] = pd.to_datetime(df["date"])
    temp = df[["date", "trends"]]
    temp["country"] = country_name
    volume_df = pd.concat([volume_df, temp],
                             axis=0).reset_index(drop=True)

In [5]:
sentiment_df = pd.DataFrame()
for file in sentiment_files:
    country_name = file.split("/")[-1].replace("_sentiment.csv", "")
    df = pd.read_csv(file).drop("Unnamed: 0", axis=1)
    df["date"] = pd.to_datetime(df["date"])
    temp = df[["date", "score", "z_score"]].rename({"score": "sentiment"}, axis=1)
    temp["country"] = country_name
    sentiment_df = pd.concat([sentiment_df, temp], axis=0).reset_index(drop=True)

In [6]:
df = pd.read_csv("epu_macro.csv")
df["date"] = pd.to_datetime(df["date"])
df["quarter"] = df["date"].apply(lambda x: f"{x.year}q{x.quarter}")
inflation_df["date"] = pd.to_datetime(inflation_df["date"])
df = df.merge(sentiment_df, how="outer", on=["country", "date"]).drop("sentiment_x", axis=1)

In [7]:
df = df.rename({"sentiment_y": "sentiment", "z_score": "sentiment_z_score"}, axis=1)
df = df.merge(volume_df, how="left", on=["country", "date"])

In [8]:
df = df.merge(inflation_df, how="outer", on=["country", "date"])
df.to_csv("epu_macro_2.csv", encoding="utf-8")

In [9]:
df["inflation_epu_weighted"] = df["inflation_epu_weighted"].fillna(0)

In [10]:
df["cpi"] = pd.to_numeric(df["cpi"])

In [11]:
df.to_stata("pic_epu_cpi.dta")

In [27]:
import statsmodels.formula.api as smf

res = smf.ols(
    "cpi~C(country) + sentiment_z_score + inflation_epu_weighted + sentiment_z_score * inflation_epu_weighted",
    data=df)
res.fit().summary()

0,1,2,3
Dep. Variable:,cpi,R-squared:,0.757
Model:,OLS,Adj. R-squared:,0.753
Method:,Least Squares,F-statistic:,189.5
Date:,"Thu, 04 Apr 2024",Prob (F-statistic):,1.45e-126
Time:,11:17:17,Log-Likelihood:,-1575.7
No. Observations:,434,AIC:,3167.0
Df Residuals:,426,BIC:,3200.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,110.7337,0.982,112.814,0.000,108.804,112.663
C(country)[T.papua_new_guinea],35.8907,1.277,28.106,0.000,33.381,38.401
C(country)[T.samoa],-0.5116,1.296,-0.395,0.693,-3.059,2.036
C(country)[T.solomon_islands],-2.4389,1.345,-1.814,0.070,-5.082,0.204
C(country)[T.vanuatu],24.2508,1.745,13.895,0.000,20.820,27.681
sentiment_z_score,14.5196,1.324,10.965,0.000,11.917,17.122
inflation_epu_weighted,0.0088,0.003,3.446,0.001,0.004,0.014
sentiment_z_score:inflation_epu_weighted,-0.0097,0.006,-1.689,0.092,-0.021,0.002

0,1,2,3
Omnibus:,9.386,Durbin-Watson:,0.464
Prob(Omnibus):,0.009,Jarque-Bera (JB):,10.448
Skew:,0.267,Prob(JB):,0.00538
Kurtosis:,3.54,Cond. No.,1130.0


In [3]:
epu_df = pd.DataFrame()
for file in epu_files:
    country_name = file.split("/")[-1].replace("_epu.csv", "")
    df = pd.read_csv(file).drop("Unnamed: 0", axis=1)
    df["date"] = pd.to_datetime(df["date"])
    temp = df[["date", "epu_weighted", "epu_unweighted"]]
    temp["country"] = country_name
    epu_df = pd.concat([epu_df, temp], axis=0).reset_index(drop=True)