In [35]:
#Simple Linear Regression example to predict the first year college grades of students from their high school SAT and GPA scores

#Prodigy University is seeking to enhance its enrollment process. They plan to do so by implementing a predictive analytics model aimed at identifying prospective students who demonstrate a high potential for academic success.

#The goal is to develop a predictive model that can accurately forecast the first-year college GPA of applicants based on their SAT scores and high school scores. This model is intended to serve as a strategic tool for the admissions office, enabling them to efficiently shortlist candidates who not only meet the academic standards of the university but are also likely to thrive in their chosen fields of study. By doing so, the university aspires to optimize its student selection process, improve academic outcomes, and foster an environment of excellence and high achievement.

In [36]:
import pandas as pd

In [37]:
data=pd.read_csv('student_gpa_data_1000.csv')
df.head(5)

Unnamed: 0,sat_sum,hs_gpa,fy_gpa
0,482,3.99,3.37
1,728,3.43,3.22
2,650,3.42,3.67
3,486,2.51,2.35
4,451,2.53,2.78


In [38]:
from collections.abc import ValuesView
# convert data to numpy
X = data[['sat_sum','hs_gpa']].values
# reshape the fy_gpa into 2D array with [data size] rows and 1 column
y=data['fy_gpa'].values.reshape(-1,1)
print(X.shape)
print(y.shape)

(1000, 2)
(1000, 1)


In [39]:
# train test split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [40]:
from sklearn.preprocessing import StandardScaler
# normalize the feautures so that it is easier to train the data
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [41]:
X_train.shape

(800, 2)

In [42]:
import torch
# convert numpy to Pytorch tensors
X_train_tensor=torch.tensor(X_train,dtype=torch.float32)
y_train_tensor=torch.tensor(y_train,dtype=torch.float32)
X_test_tensor=torch.tensor(X_test,dtype=torch.float32)
y_test_tensor=torch.tensor(y_test,dtype=torch.float32)


In [43]:
# BUILDING MODEL

import torch.nn as nn


In [44]:
# BUILDING MODEL WITH 2 NEURONS
model=nn.Sequential(
    nn.Linear(2,2), # 2 i/p 2 o/p
    nn.Sigmoid(),   # activation fxn
    nn.Linear(2,1)
)

In [45]:
# forward propagation
preds=model(X_train_tensor)

In [46]:
preds[:5]

tensor([[-0.5089],
        [-0.5073],
        [-0.5091],
        [-0.5057],
        [-0.5110]], grad_fn=<SliceBackward0>)

In [47]:
from torch.nn import MSELoss

In [48]:
# Calculating Loss
criterion = MSELoss()
loss=criterion(preds, y_train_tensor)
print(loss)


tensor(15.1825, grad_fn=<MseLossBackward0>)


In [49]:
preds[:5]

tensor([[-0.5089],
        [-0.5073],
        [-0.5091],
        [-0.5057],
        [-0.5110]], grad_fn=<SliceBackward0>)

In [50]:
y_train_tensor[:5]

tensor([[3.4500],
        [2.3300],
        [3.2100],
        [3.8600],
        [3.6400]])

In [51]:
model[0].weight

Parameter containing:
tensor([[ 0.0208, -0.3149],
        [-0.3581,  0.2503]], requires_grad=True)

In [52]:
model[2].weight

Parameter containing:
tensor([[0.0269, 0.0254]], requires_grad=True)

In [53]:

import torch.optim as optim
# Your optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [54]:
loss.backward()

In [55]:
optimizer.step()

In [56]:
model[0].weight

Parameter containing:
tensor([[ 0.0208, -0.3149],
        [-0.3581,  0.2503]], requires_grad=True)

In [57]:
model[2].weight

Parameter containing:
tensor([[0.0303, 0.0287]], requires_grad=True)

In [58]:
from torch.utils.data import TensorDataset,DataLoader

In [59]:
train_data=TensorDataset(X_train_tensor,y_train_tensor)

In [60]:
train_data=TensorDataset(X_train_tensor,y_train_tensor)

In [61]:
model=nn.Sequential(
    nn.Linear(2,2),
    nn.Sigmoid(),
    nn.Linear(2,1)
)
optimizer=optim.SGD(model.parameters(),lr=0.001)

In [62]:
# performance on train and test sets before training
train_loss = criterion(model(X_train_tensor),y_train_tensor).item()
test_loss = criterion(model(X_test_tensor),y_test_tensor).item()
print(f' without Training:\n Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')


 without Training:
 Train Loss: 8.1139, Test Loss: 8.2756


In [63]:
#  looking at predictions
model(X_train_tensor)[:5]

tensor([[0.4499],
        [0.5206],
        [0.6219],
        [0.7985],
        [0.3490]], grad_fn=<SliceBackward0>)

In [69]:
# STOCHASTIC GRADIENT DESCENT
train_loader=DataLoader(train_data,batch_size=1,shuffle=True)
for epoch in range(10):
  for x_batch,y_batch in train_loader:
    # forward pass
    preds=model(x_batch)
    loss=criterion(preds,y_batch)
    # backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  train_loss=criterion(model(X_train_tensor),y_train_tensor).item()
  # print epoch,': ',train_loss)
  test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
  print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 1: Train Loss: 0.0747, Test Loss: 0.0783
Epoch 2: Train Loss: 0.0747, Test Loss: 0.0777
Epoch 3: Train Loss: 0.0747, Test Loss: 0.0783
Epoch 4: Train Loss: 0.0747, Test Loss: 0.0784
Epoch 5: Train Loss: 0.0747, Test Loss: 0.0786
Epoch 6: Train Loss: 0.0747, Test Loss: 0.0786
Epoch 7: Train Loss: 0.0746, Test Loss: 0.0779
Epoch 8: Train Loss: 0.0747, Test Loss: 0.0776
Epoch 9: Train Loss: 0.0746, Test Loss: 0.0780
Epoch 10: Train Loss: 0.0746, Test Loss: 0.0783


In [70]:
#looking at predictions
model(X_train_tensor)[:5]

tensor([[3.2970],
        [2.9931],
        [3.5485],
        [3.0365],
        [3.6960]], grad_fn=<SliceBackward0>)

In [72]:
# reintialize model weights
model=nn.Sequential(
    nn.Linear(2,2),
    nn.Sigmoid(),
    nn.Linear(2,1)
)
optimizer=optim.SGD(model.parameters(),lr=0.001)

In [77]:
train_loader=DataLoader(train_data,batch_size=800,shuffle=True)
for epoch in range(1000):
  for x_batch,y_batch in train_loader:
    # forward pass
    preds=model(x_batch)
    loss=criterion(preds,y_batch)
    # backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  if (epoch+1)%100==0:
    train_loss=criterion(model(X_train_tensor),y_train_tensor).item()
    # print epoch,': ',train_loss)
    test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
    print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 100: Train Loss: 0.1140, Test Loss: 0.1294
Epoch 200: Train Loss: 0.1108, Test Loss: 0.1250
Epoch 300: Train Loss: 0.1084, Test Loss: 0.1217
Epoch 400: Train Loss: 0.1063, Test Loss: 0.1190
Epoch 500: Train Loss: 0.1045, Test Loss: 0.1166
Epoch 600: Train Loss: 0.1029, Test Loss: 0.1145
Epoch 700: Train Loss: 0.1013, Test Loss: 0.1126
Epoch 800: Train Loss: 0.0999, Test Loss: 0.1108
Epoch 900: Train Loss: 0.0985, Test Loss: 0.1092
Epoch 1000: Train Loss: 0.0972, Test Loss: 0.1076


In [78]:
# mini batch gradient descent
model=nn.Sequential(
    nn.Linear(2,2),
    nn.Sigmoid(),
    nn.Linear(2,1)
)
optimizer=optim.SGD(model.parameters(),lr=0.001)

In [79]:
train_loader=DataLoader(train_data,batch_size=64,shuffle=True)
for epoch in range(500):
  for x_batch,y_batch in train_loader:
    # forward pass
    preds=model(x_batch)
    loss=criterion(preds,y_batch)
    # backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  if (epoch+1)%50==0:
    train_loss=criterion(model(X_train_tensor),y_train_tensor).item()
    # print epoch,': ',train_loss)
    test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
    print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 50: Train Loss: 0.1806, Test Loss: 0.2111
Epoch 100: Train Loss: 0.0976, Test Loss: 0.1133
Epoch 150: Train Loss: 0.0906, Test Loss: 0.1030
Epoch 200: Train Loss: 0.0859, Test Loss: 0.0965
Epoch 250: Train Loss: 0.0826, Test Loss: 0.0916
Epoch 300: Train Loss: 0.0801, Test Loss: 0.0881
Epoch 350: Train Loss: 0.0784, Test Loss: 0.0854
Epoch 400: Train Loss: 0.0771, Test Loss: 0.0834
Epoch 450: Train Loss: 0.0762, Test Loss: 0.0819
Epoch 500: Train Loss: 0.0756, Test Loss: 0.0808
