# Coef Analysis of linear Regression

In [1]:
# imports
import os
import pickle

import pandas as pd
from sklearn.model_selection import train_test_split

## Getting Data

In [2]:
data = pd.read_csv('new_Exam_Score_Prediction.csv')

y = data['exam_score']
X = data.drop('exam_score', axis=1)

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

x_train

Unnamed: 0,age,gender,study_hours,class_attendance,internet_access,sleep_hours,sleep_quality,facility_rating,exam_difficulty,diploma,group_study,self_study,online_videos,coaching,mixed
1156,20,1,5.57,43.4,0,5.4,1,2,0,0,0,1,0,0,0
1536,23,0,3.52,61.1,1,4.6,0,2,1,1,0,0,0,1,0
5035,20,0,1.83,58.6,1,9.3,1,1,1,0,1,0,0,0,0
9040,19,1,7.17,44.1,1,4.5,0,1,2,0,0,0,0,1,0
7272,23,1,5.06,86.1,1,8.7,0,2,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11964,20,1,0.39,58.0,1,6.0,2,0,1,0,0,0,1,0,0
5191,23,0,2.44,74.7,0,5.1,2,2,1,0,0,1,0,0,0
5390,20,0,0.90,92.3,0,9.3,2,2,2,0,1,0,0,0,0
860,21,1,7.51,72.8,0,4.7,0,2,0,0,0,0,0,1,0


# Getting Model

In [3]:
with open(os.path.join('pipes', 'Linear.pkl'), 'rb') as f:
    Pipe = pickle.load(f)

Pipe

# T-test for Regression Coefficients

In [4]:
import numpy as np
import statsmodels.api as sm

names = x_train.columns
selector = Pipe.named_steps['features_selector']
mask = selector.get_support()
selected_names = np.array(names)[mask]

test_x = x_train[selected_names]
test_y = y_train


X_constant = sm.add_constant(test_x)
sm_model = sm.OLS(test_y, X_constant).fit()
print(sm_model.summary())

                            OLS Regression Results                            
Dep. Variable:             exam_score   R-squared:                       0.734
Model:                            OLS   Adj. R-squared:                  0.733
Method:                 Least Squares   F-statistic:                     2842.
Date:                Fri, 19 Dec 2025   Prob (F-statistic):               0.00
Time:                        02:17:33   Log-Likelihood:                -34366.
No. Observations:                9291   AIC:                         6.875e+04
Df Residuals:                    9281   BIC:                         6.882e+04
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -3.2384      0.531  

# Drop Mixed studing method

In [5]:
names = x_train.columns
selector = Pipe.named_steps['features_selector']
mask = selector.get_support()
selected_names = np.array(names)[mask]
selected_names = selected_names[selected_names != 'mixed']

test_x = x_train[selected_names]
test_y = y_train


X_constant = sm.add_constant(test_x)
sm_model2 = sm.OLS(test_y, X_constant).fit()
print(sm_model2.summary())

                            OLS Regression Results                            
Dep. Variable:             exam_score   R-squared:                       0.734
Model:                            OLS   Adj. R-squared:                  0.733
Method:                 Least Squares   F-statistic:                     2842.
Date:                Fri, 19 Dec 2025   Prob (F-statistic):               0.00
Time:                        02:17:33   Log-Likelihood:                -34366.
No. Observations:                9291   AIC:                         6.875e+04
Df Residuals:                    9281   BIC:                         6.882e+04
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -2.7405      0.672  

# Test data

## With mixed

In [6]:
names = x_train.columns
selector = Pipe.named_steps['features_selector']
mask = selector.get_support()
selected_names = np.array(names)[mask]

test_x = x_test[selected_names]
test_y = y_test


X_constant = sm.add_constant(test_x)
sm_model = sm.OLS(test_y, X_constant).fit()
print(sm_model.summary())

                            OLS Regression Results                            
Dep. Variable:             exam_score   R-squared:                       0.725
Model:                            OLS   Adj. R-squared:                  0.724
Method:                 Least Squares   F-statistic:                     1163.
Date:                Fri, 19 Dec 2025   Prob (F-statistic):               0.00
Time:                        02:17:33   Log-Likelihood:                -14744.
No. Observations:                3983   AIC:                         2.951e+04
Df Residuals:                    3973   BIC:                         2.957e+04
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -2.1017      0.821  

## Without Mixed

In [7]:
names = x_train.columns
selector = Pipe.named_steps['features_selector']
mask = selector.get_support()
selected_names = np.array(names)[mask]
selected_names = selected_names[selected_names != 'mixed']

test_x = x_test[selected_names]
test_y = y_test


X_constant = sm.add_constant(test_x)
sm_model2 = sm.OLS(test_y, X_constant).fit()
print(sm_model2.summary())

                            OLS Regression Results                            
Dep. Variable:             exam_score   R-squared:                       0.725
Model:                            OLS   Adj. R-squared:                  0.724
Method:                 Least Squares   F-statistic:                     1163.
Date:                Fri, 19 Dec 2025   Prob (F-statistic):               0.00
Time:                        02:17:33   Log-Likelihood:                -14744.
No. Observations:                3983   AIC:                         2.951e+04
Df Residuals:                    3973   BIC:                         2.957e+04
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -1.2910      1.031  