# **importing required libraries**

In [36]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# **Loading Data**

In [48]:
df = pd.read_csv("Student_Performance.csv")
df

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0
...,...,...,...,...,...,...
9995,1,49,Yes,4,2,23.0
9996,7,64,Yes,8,5,58.0
9997,6,83,Yes,8,5,74.0
9998,9,97,Yes,7,0,95.0


# **Preparing the data**

## Converting non numeric to numeric

In [49]:
df['Extracurricular Activities']=df['Extracurricular Activities'].map({'Yes':1 , 'No':0})
df
df.dtypes

Hours Studied                         int64
Previous Scores                       int64
Extracurricular Activities            int64
Sleep Hours                           int64
Sample Question Papers Practiced      int64
Performance Index                   float64
dtype: object

## Seperating x and y

In [50]:
y=df['Performance Index']

In [51]:
x = df.drop('Performance Index', axis='columns')

In [52]:
x

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced
0,7,99,1,9,1
1,4,82,0,4,2
2,8,51,1,7,2
3,5,52,1,5,2
4,7,75,0,8,5
...,...,...,...,...,...
9995,1,49,1,4,2
9996,7,64,1,8,5
9997,6,83,1,8,5
9998,9,97,1,7,0


## Data Normalization

In [59]:
scaler = MinMaxScaler()
scaler.fit(x)
new_x = scaler.transform(x)
x = pd.DataFrame(new_x, columns=['Hours Studied', 'Previous Scores', 'Extracurricular Activities', 'Sleep Hours', 'Sample Question Papers Practiced'])
x

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced
0,0.750,1.000000,1.0,1.0,0.111111
1,0.375,0.711864,0.0,0.0,0.222222
2,0.875,0.186441,1.0,0.6,0.222222
3,0.500,0.203390,1.0,0.2,0.222222
4,0.750,0.593220,0.0,0.8,0.555556
...,...,...,...,...,...
9995,0.000,0.152542,1.0,0.0,0.222222
9996,0.750,0.406780,1.0,0.8,0.555556
9997,0.625,0.728814,1.0,0.8,0.555556
9998,1.000,0.966102,1.0,0.6,0.000000


## Data Splitting

In [60]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=100)

In [62]:
x_test

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced
8018,0.125,0.813559,1.0,0.4,0.666667
9225,0.500,0.406780,1.0,0.0,0.333333
3854,0.500,0.152542,0.0,1.0,1.000000
2029,0.875,0.983051,1.0,0.2,1.000000
3539,1.000,0.796610,0.0,0.6,0.777778
...,...,...,...,...,...
6923,0.625,0.813559,1.0,0.6,0.666667
1207,0.000,0.169492,1.0,0.6,0.666667
7960,0.000,0.694915,0.0,0.4,0.666667
2339,0.500,0.118644,0.0,0.8,1.000000


# **Model Building**