In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
from scipy import stats
%matplotlib inline

In [2]:
df = pd.read_csv('./economic_index.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,year,month,interest_rate,unemployment_rate,index_price
0,0,2017,12,2.75,5.3,1464
1,1,2017,11,2.5,5.3,1394
2,2,2017,10,2.5,5.3,1357
3,3,2017,9,2.5,5.3,1293
4,4,2017,8,2.5,5.4,1256


In [3]:
# Drop unneccessary columns

df.drop(columns = ['Unnamed: 0','year','month'],axis=1,inplace=True)

In [4]:
df.head()

Unnamed: 0,interest_rate,unemployment_rate,index_price
0,2.75,5.3,1464
1,2.5,5.3,1394
2,2.5,5.3,1357
3,2.5,5.3,1293
4,2.5,5.4,1256


In [5]:
## check null values
df.isnull().sum()

interest_rate        0
unemployment_rate    0
index_price          0
dtype: int64

In [6]:
# Lets do some visualization
sns.pairplot(df)

<seaborn.axisgrid.PairGrid at 0x1f5dbf9bfb0>

In [7]:
df.corr()

Unnamed: 0,interest_rate,unemployment_rate,index_price
interest_rate,1.0,-0.925814,0.935793
unemployment_rate,-0.925814,1.0,-0.922338
index_price,0.935793,-0.922338,1.0


In [8]:
# Visualization the datapoints more closely
sns.scatterplot(df,x='interest_rate',y='unemployment_rate')



<Axes: xlabel='index_price', ylabel='Count'>

In [9]:
# Independent and dependent features

x = df.iloc[:,:-1]
y = df['index_price']

In [10]:
x.head()

Unnamed: 0,interest_rate,unemployment_rate
0,2.75,5.3
1,2.5,5.3
2,2.5,5.3
3,2.5,5.3
4,2.5,5.4


In [11]:
y.head()

0    1464
1    1394
2    1357
3    1293
4    1256
Name: index_price, dtype: int64

In [12]:
# train test split
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=42)

In [13]:
sns.regplot(df,x='interest_rate',y='unemployment_rate')

<Axes: xlabel='interest_rate', ylabel='unemployment_rate'>

In [14]:
from sklearn.preprocessing import StandardScaler

scalar = StandardScaler()
X_train_std = scalar.fit_transform(x_train)
x_test_std = scalar.transform(x_test)

In [15]:
from sklearn.linear_model import LinearRegression
regression = LinearRegression()
regression.fit(X_train_std,y_train)

In [16]:
## Cross Validation

from sklearn.model_selection import cross_val_score
validation_score = cross_val_score(regression,X_train_std,y_train,scoring='neg_mean_squared_error',cv=3)

In [17]:

validation_score

array([-4921.61331265, -7686.87497294, -5135.9962549 ])

In [18]:
## Prediction

y_pred = regression.predict(x_test_std)
y_pred

array([1204.22770398,  821.65051903, 1406.51300368,  857.70889608,
        994.90992298, 1168.16932693])

In [19]:
# Performance metrics
from sklearn.metrics import mean_absolute_error,mean_squared_error

mse = mean_squared_error(y_test,y_pred)
mae = mean_absolute_error(y_test,y_pred)
rmse = np.sqrt(mse)
print(mse)
print(mae)
print(rmse)

5793.762887712583
59.93578152323558
76.11677139574815


In [20]:
# R_Square

from sklearn.metrics import r2_score
score = r2_score(y_test,y_pred)
print(score)

0.8278978091457141


# Assumptions

In [21]:
plt.scatter(y_test,y_pred)

<matplotlib.collections.PathCollection at 0x1f5dce91010>

In [22]:
residuals = y_test-y_pred
print(residuals)

8     -45.227704
16    149.349481
0      57.486996
18     26.291104
11     80.090077
9      -1.169327
Name: index_price, dtype: float64


In [23]:
## Plot residuals
sns.displot(residuals,kind='kde')

<seaborn.axisgrid.FacetGrid at 0x1f5dcda97c0>

In [24]:
plt.scatter(y_pred,residuals)

<matplotlib.collections.PathCollection at 0x1f5dd73fd40>

In [None]:
## OLS Linear Regression
import statsmodels.api as sm
model = sm.OLS(y_train,X_train_std).fit()

In [27]:
print(model.summary())

                                 OLS Regression Results                                
Dep. Variable:            index_price   R-squared (uncentered):                   0.035
Model:                            OLS   Adj. R-squared (uncentered):             -0.086
Method:                 Least Squares   F-statistic:                             0.2880
Date:                Mon, 22 Sep 2025   Prob (F-statistic):                       0.754
Time:                        12:16:39   Log-Likelihood:                         -150.85
No. Observations:                  18   AIC:                                      305.7
Df Residuals:                      16   BIC:                                      307.5
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

  res = hypotest_fun_out(*samples, **kwds)
