In [3]:
# interpolate data
import pandas as pd

file_path = "/content/interpolated_education_dataset.csv"
df = pd.read_csv(file_path)

df_bangladesh = df[df["Country"] == "Bangladesh"].copy()

# sort by Year and interpolate missing values linearly
df_bangladesh_interpolated = df_bangladesh.sort_values("Year").interpolate(method='linear')

# review cleaned data
print(df_bangladesh_interpolated.head())

      Country  Year  Fertility_Rate  GDP_Per_Capita  Homicide_Rate  \
0  Bangladesh  1980           6.326      206.075714       1.672038   
1  Bangladesh  1981           6.237      224.241392       1.829637   
2  Bangladesh  1982           6.118      199.595945       1.987816   
3  Bangladesh  1983           5.906      184.706773       2.170414   
4  Bangladesh  1984           5.732      193.435021       2.442022   

   Avg_Yrs_Education  Infant_Mortality  Life_Expectancy  
0              1.310         13.789642          52.2778  
1              1.436         13.472392          52.6383  
2              1.562         13.139923          53.0119  
3              1.688         12.788301          53.4409  
4              1.814         12.423361          53.8259  


  df_bangladesh_interpolated = df_bangladesh.sort_values("Year").interpolate(method='linear')


In [6]:
# ADF TEST
from statsmodels.tsa.stattools import adfuller
import pandas as pd
import numpy as np

fertility_series = df_bangladesh_interpolated["Fertility_Rate"]
result = adfuller(fertility_series)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -4.625900
p-value: 0.000116
Critical Values:
	1%: -3.646
	5%: -2.954
	10%: -2.616


In [10]:
# Data is stationary, so goog to run ARIMAX using a (1,0,1) fit
from statsmodels.tsa.statespace.sarimax import SARIMAX

# define the target and exogenous variables
y = df_bangladesh_interpolated['Fertility_Rate']
X = df_bangladesh_interpolated[['GDP_Per_Capita', 'Avg_Yrs_Education',
                                'Life_Expectancy', 'Infant_Mortality',
                                'Homicide_Rate']]

# ARIMAX(1,0,1)
model = SARIMAX(
    endog=y,
    exog=X,
    order=(1, 0, 1),
    enforce_stationarity=True,
    enforce_invertibility=True
)

results = model.fit(disp=False)

# show results
print(results.summary())

                               SARIMAX Results                                
Dep. Variable:         Fertility_Rate   No. Observations:                   44
Model:               SARIMAX(1, 0, 1)   Log Likelihood                  78.657
Date:                Sun, 23 Mar 2025   AIC                           -141.314
Time:                        20:11:32   BIC                           -127.040
Sample:                             0   HQIC                          -136.020
                                 - 44                                         
Covariance Type:                  opg                                         
                        coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------
GDP_Per_Capita    -4.148e-05      0.000     -0.384      0.701      -0.000       0.000
Avg_Yrs_Education     0.0249      0.053      0.467      0.641      -0.080       0.129
Life_Expectancy       0.

