# Student Marks Prediction Based On Study And Slept Hours

## Data Processing

### Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Loading the dataset

In [2]:
dataset = pd.read_csv('data/02Students.csv')
df = dataset.copy()
df

Unnamed: 0,Hours,sHours,Marks
0,0,6,34
1,1,7,36
2,1,6,33
3,1,8,39
4,1,8,42
5,2,8,45
6,2,6,38
7,3,6,45
8,3,7,53
9,3,7,46


### Splitting the data vertically into independent and dependent variables

In [3]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [4]:
X, y

(    Hours  sHours
 0       0       6
 1       1       7
 2       1       6
 3       1       8
 4       1       8
 5       2       8
 6       2       6
 7       3       6
 8       3       7
 9       3       7
 10      3       8
 11      4       8
 12      4       7
 13      4       7
 14      5       8
 15      5       6
 16      6       6
 17      6       7
 18      7       8
 19      7       8
 20      7       6
 21      7       6
 22      8       7
 23      8       8
 24      9       8
 25      9       5
 26      9       8
 27     10       8
 28     10       8
 29     11       7,
 0     34
 1     36
 2     33
 3     39
 4     42
 5     45
 6     38
 7     45
 8     53
 9     46
 10    56
 11    59
 12    55
 13    56
 14    72
 15    59
 16    62
 17    71
 18    78
 19    88
 20    61
 21    74
 22    71
 23    89
 24    82
 25    67
 26    89
 27    81
 28    82
 29    79
 Name: Marks, dtype: int64)

### Splitting the dataset into train/test split

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234)
X_train, X_test

(    Hours  sHours
 13      4       7
 22      8       7
 24      9       8
 0       0       6
 2       1       6
 27     10       8
 26      9       8
 18      7       8
 5       2       8
 16      6       6
 25      9       5
 11      4       8
 9       3       7
 17      6       7
 29     11       7
 20      7       6
 12      4       7
 21      7       6
 6       2       6
 19      7       8
 15      5       6,
     Hours  sHours
 7       3       6
 10      3       8
 4       1       8
 1       1       7
 28     10       8
 8       3       7
 3       1       8
 23      8       8
 14      5       8)

In [6]:
y_train, y_test

(13    56
 22    71
 24    82
 0     34
 2     33
 27    81
 26    89
 18    78
 5     45
 16    62
 25    67
 11    59
 9     46
 17    71
 29    79
 20    61
 12    55
 21    74
 6     38
 19    88
 15    59
 Name: Marks, dtype: int64,
 7     45
 10    56
 4     42
 1     36
 28    82
 8     53
 3     39
 23    89
 14    72
 Name: Marks, dtype: int64)

### Train the multiple regression model

In [7]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [8]:
y_pred = regressor.predict(X_test)
y_pred

array([45.76085662, 55.91055579, 46.57952582, 41.50467624, 88.56916066,
       50.8357062 , 46.57952582, 79.2381307 , 65.24158575])

In [9]:
np.concatenate([y_test.values.reshape(len(y_test),1), y_pred.reshape(len(y_pred), 1)], axis=1)

array([[45.        , 45.76085662],
       [56.        , 55.91055579],
       [42.        , 46.57952582],
       [36.        , 41.50467624],
       [82.        , 88.56916066],
       [53.        , 50.8357062 ],
       [39.        , 46.57952582],
       [89.        , 79.2381307 ],
       [72.        , 65.24158575]])

### Evaluate the model performance

In [12]:
score = regressor.score(X_test, y_test)
score

0.901444999187003

In [13]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.901444999187003

In [14]:
coefficents = regressor.coef_
coefficents

array([4.66551498, 5.07484959])

In [15]:
intercept = regressor.intercept_
intercept

np.float64(1.3152141538094924)

### Multi linear equation

In [18]:
# y = 1.31 + 4.67*study_hours + 5.07*slept_hours

### How much error our model has made (RMSE error)

In [17]:
from sklearn.metrics import mean_squared_error
import math
regressor_rmse = math.sqrt(mean_squared_error(y_test, y_pred))
regressor_rmse

5.755363941612014