In [1]:
from torch.utils.data import DataLoader
import torch

In [2]:
t = torch.arange(7, dtype=torch.float32)
data_loader = DataLoader(t)

In [3]:
for item in data_loader:
    print(item)

tensor([0.])
tensor([1.])
tensor([2.])
tensor([3.])
tensor([4.])
tensor([5.])
tensor([6.])


In [3]:
t

tensor([0., 1., 2., 3., 4., 5., 6.])

In [6]:
data_loader = DataLoader(t, batch_size=3, drop_last=False, shuffle=True)

for i, batch in enumerate(data_loader, 1):
    print(f'batch {i}:', batch)

batch 1: tensor([1., 4., 0.])
batch 2: tensor([5., 3., 6.])


In [5]:
torch.manual_seed(23)
data_loader = DataLoader(t, batch_size=3, drop_last=True, shuffle=True)
for i, batch in enumerate(data_loader, 1):
    print(f'batch {i}:', batch)

batch 1: tensor([0., 2., 6.])
batch 2: tensor([4., 5., 3.])


In [7]:
from torch.utils.data import Dataset

class JointDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    def __len__(self):
        return len(self.x)
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [12]:
torch.manual_seed(23)
t_x = torch.rand([4, 3], dtype=torch.float32)
t_y = torch.arange(4)

In [13]:
t_x

tensor([[0.4283, 0.2889, 0.4224],
        [0.3571, 0.9577, 0.1100],
        [0.2933, 0.9205, 0.5876],
        [0.1299, 0.6729, 0.1028]])

In [14]:
t_y

tensor([0, 1, 2, 3])

In [16]:
joint_dataset = JointDataset(t_x, t_y)
joint_dataset.x

tensor([[0.4283, 0.2889, 0.4224],
        [0.3571, 0.9577, 0.1100],
        [0.2933, 0.9205, 0.5876],
        [0.1299, 0.6729, 0.1028]])

In [17]:
len(joint_dataset)

4

In [20]:
joint_dataset[3]

(tensor([0.1299, 0.6729, 0.1028]), tensor(3))

In [21]:
for example in joint_dataset:
    print('  x: ', example[0],
          '  y: ', example[1])

  x:  tensor([0.4283, 0.2889, 0.4224])   y:  tensor(0)
  x:  tensor([0.3571, 0.9577, 0.1100])   y:  tensor(1)
  x:  tensor([0.2933, 0.9205, 0.5876])   y:  tensor(2)
  x:  tensor([0.1299, 0.6729, 0.1028])   y:  tensor(3)


 Or use Class `TensorDataset` directly

In [22]:
torch.manual_seed(23)
from torch.utils.data import TensorDataset
joint_dataset = TensorDataset(t_x, t_y)

for example in joint_dataset:
    print('  x: ', example[0],
          '  y: ', example[1])

  x:  tensor([0.4283, 0.2889, 0.4224])   y:  tensor(0)
  x:  tensor([0.3571, 0.9577, 0.1100])   y:  tensor(1)
  x:  tensor([0.2933, 0.9205, 0.5876])   y:  tensor(2)
  x:  tensor([0.1299, 0.6729, 0.1028])   y:  tensor(3)


In [23]:
torch.manual_seed(1)
data_loader = DataLoader(dataset=joint_dataset, batch_size=2, shuffle=True)

for i, batch in enumerate(data_loader, 1):
        print(f'batch {i}:', 'x:', batch[0],
              '\n         y:', batch[1])

for epoch in range(2):
    print(f'epoch {epoch+1}')
    for i, batch in enumerate(data_loader, 1):
        print(f'batch {i}:', 'x:', batch[0],
              '\n         y:', batch[1])

batch 1: x: tensor([[0.2933, 0.9205, 0.5876],
        [0.4283, 0.2889, 0.4224]]) 
         y: tensor([2, 0])
batch 2: x: tensor([[0.3571, 0.9577, 0.1100],
        [0.1299, 0.6729, 0.1028]]) 
         y: tensor([1, 3])
epoch 1
batch 1: x: tensor([[0.4283, 0.2889, 0.4224],
        [0.2933, 0.9205, 0.5876]]) 
         y: tensor([0, 2])
batch 2: x: tensor([[0.3571, 0.9577, 0.1100],
        [0.1299, 0.6729, 0.1028]]) 
         y: tensor([1, 3])
epoch 2
batch 1: x: tensor([[0.1299, 0.6729, 0.1028],
        [0.2933, 0.9205, 0.5876]]) 
         y: tensor([3, 2])
batch 2: x: tensor([[0.4283, 0.2889, 0.4224],
        [0.3571, 0.9577, 0.1100]]) 
         y: tensor([0, 1])


In [24]:
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler

In [25]:
df = pd.read_csv("C:/Training/Academy/Statistics (Python)/Cases/human-resources-analytics/HR_comma_sep.csv")
dum_df = pd.get_dummies(df,drop_first=True)
dum_df.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,Department_RandD,Department_accounting,Department_hr,Department_management,Department_marketing,Department_product_mng,Department_sales,Department_support,Department_technical,salary_low,salary_medium
0,0.38,0.53,2,157,3,0,1,0,False,False,False,False,False,False,True,False,False,True,False
1,0.8,0.86,5,262,6,0,1,0,False,False,False,False,False,False,True,False,False,False,True
2,0.1,0.77,6,247,4,0,1,0,False,False,False,False,False,False,True,False,False,True,False
3,0.92,0.85,5,259,5,0,1,0,False,False,False,False,False,False,True,False,False,True,False
4,0.89,1.0,5,224,5,0,1,0,False,False,False,False,False,False,True,False,False,True,False


In [26]:
X = dum_df.drop('left', axis=1)
scaler = MinMaxScaler()

y = dum_df['left'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, 
                                                    random_state=23,stratify=y)
X_scl_trn = scaler.fit_transform(X_train) 
X_scl_tst = scaler.transform(X_test) 

In [27]:
X_torch = torch.from_numpy(X_scl_trn)
y_torch = torch.from_numpy(y_train)
print(X_torch.size())
print(y_torch.size())

torch.Size([10496, 18])
torch.Size([10496])


In [28]:
data_loader = DataLoader(y_torch, batch_size=30, drop_last=False)

for i, batch in enumerate(data_loader, 1):
    print(f'batch {i}:', batch)

batch 1: tensor([0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1,
        0, 0, 1, 0, 1, 0])
batch 2: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0, 1, 0, 0])
batch 3: tensor([1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
        1, 0, 1, 0, 0, 0])
batch 4: tensor([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
        0, 1, 0, 0, 0, 1])
batch 5: tensor([0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
        1, 0, 1, 0, 1, 0])
batch 6: tensor([0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        1, 1, 0, 0, 0, 0])
batch 7: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1,
        0, 0, 0, 1, 0, 0])
batch 8: tensor([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
        0, 0, 0, 0, 0, 0])
batch 9: tensor([0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0

In [29]:
from torch.utils.data import TensorDataset
joint_dataset = TensorDataset(X_torch.float(), y_torch.float())

In [30]:
type(joint_dataset)

torch.utils.data.dataset.TensorDataset

In [43]:
torch.manual_seed(23)
data_loader = DataLoader(dataset=joint_dataset, batch_size=16, shuffle=True)

#for i, batch in enumerate(data_loader, 1):
#        print(f'batch {i}:', 'x:', batch[0].shape,
#              '\n         y:', batch[1].shape)



In [32]:
# Create a model
model = nn.Sequential(nn.Linear(in_features=X_scl_trn.shape[1], out_features=5),
                      nn.ReLU(),
                      nn.Linear(5, 3),
                      nn.ReLU(),
                      nn.Linear(3,1),
                      nn.Sigmoid())

In [33]:
criterion = torch.nn.BCELoss()
# Construct the optimizer (Adam in this case)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

Prediction with Default Weights

In [34]:
y_pred = model(X_torch.float())
y_torch = y_torch.unsqueeze(1)
print(y_torch.shape)
print(y_pred.shape)

torch.Size([10496, 1])
torch.Size([10496, 1])


In [44]:
#for epoch in range(2):
#    print(f'epoch {epoch+1}')
#    for i, batch in enumerate(data_loader, 1):
        #print(f'batch {i}:', 'x:', batch[0],
         #     '\n         y:', batch[1])
#        print((batch[0].shape, batch[1].shape))

In [54]:
# Gradient Descent

for epoch in np.arange(0,100):
    for i, batch in enumerate(data_loader, 1):
      # Forward pass: Compute predicted y by passing x to the model
      y_pred_prob = model(batch[0].float())

      # Compute and print loss
      loss = criterion(y_pred_prob, batch[1].float().unsqueeze(1))

      # Zero gradients, perform a backward pass, and update the weights.
      optimizer.zero_grad()

      # perform a backward pass (backpropagation)
      loss.backward()

      # Update the parameters
      optimizer.step()
    
    if epoch%10 == 0:
          print('epoch: ', epoch+1,' train loss: ', loss.item())

epoch:  1  train loss:  0.3603532910346985
epoch:  11  train loss:  0.25500327348709106
epoch:  21  train loss:  0.4412398338317871
epoch:  31  train loss:  0.6336061954498291
epoch:  41  train loss:  0.21128126978874207
epoch:  51  train loss:  0.44721323251724243
epoch:  61  train loss:  0.27233803272247314
epoch:  71  train loss:  0.3763129413127899
epoch:  81  train loss:  0.25553634762763977
epoch:  91  train loss:  0.2304210513830185


In [55]:
X_torch_tst = torch.from_numpy(X_scl_tst)
y_torch_tst = torch.from_numpy(y_test)
y_torch_tst = y_torch_tst.unsqueeze(1)
print(y_torch_tst.shape)

torch.Size([4499, 1])


Prediction with Final Weights

In [56]:
y_pred = model(X_torch_tst.float())
y_pred[:5]

tensor([[0.0285],
        [0.4318],
        [0.0985],
        [0.0198],
        [0.1643]], grad_fn=<SliceBackward0>)

In [57]:
y_pred.shape, y_test.shape

(torch.Size([4499, 1]), (4499,))

In [58]:
type(y_pred.detach().numpy())

numpy.ndarray

In [59]:
y_pred = y_pred.detach().numpy()
y_pred.shape

(4499, 1)

In [60]:
from sklearn.metrics import log_loss
log_loss(y_test, y_pred)

0.31486943317341504