# Linear Regression

In [18]:
import numpy as np
import pandas as pd

In [19]:
# The error is due to perfect collinearity between variables (Income and Population are perfectly correlated in the sample).
# Let's create a small dataset where the independent variables are not perfectly collinear.
data = {
    'Income': [30, 40, 50, 60, 70],
    'Population': [110, 140, 210, 260, 320],
    'Rice_Consumption': [85, 95, 130, 155, 180]
}
df = pd.DataFrame(data)
df

Unnamed: 0,Income,Population,Rice_Consumption
0,30,110,85
1,40,140,95
2,50,210,130
3,60,260,155
4,70,320,180


In [20]:
# Prepare the design matrix X (add intercept)
X = np.column_stack((np.ones(len(df)), df['Income'], df['Population']))
y = df['Rice_Consumption'].values.reshape(-1, 1)

In [26]:
# Least squares solution: beta = (X^T X)^-1 X^T y
XtX = X.T @ X
XtX_inv = np.linalg.inv(XtX)
XtY = X.T @ y
beta = XtX_inv @ XtY

XtX,XtX_inv,XtY,beta

(array([[5.000e+00, 2.500e+02, 1.040e+03],
        [2.500e+02, 1.350e+04, 5.740e+04],
        [1.040e+03, 5.740e+04, 2.458e+05]]),
 array([[ 1.47125e+01, -1.09625e+00,  1.93750e-01],
        [-1.09625e+00,  9.21250e-02, -1.68750e-02],
        [ 1.93750e-01, -1.68750e-02,  3.12500e-03]]),
 array([[   645.],
        [ 34750.],
        [147850.]]),
 array([[40.8125 ],
        [-0.70625],
        [ 0.59375]]))

In [None]:
# Predicted values
y_pred = X @ beta

# Residuals
residuals = y - y_pred

# Sum of squared residuals
SSR = np.sum(residuals ** 2)

# Show results
beta.flatten(), y_pred.flatten(), residuals.flatten(), SSR

(array([40.8125 , -0.70625,  0.59375]),
 array([ 84.9375,  95.6875, 130.1875, 152.8125, 181.375 ]),
 array([ 0.0625, -0.6875, -0.1875,  2.1875, -1.375 ]),
 np.float64(7.187499999999984))

# Logistic Regression