In [70]:
# MLP by Rozita 9/16/2021

import pandas as pd  # we need this package to use excel files, ("pip nstall pandas" if you need to install)
import numpy as np
from sklearn.model_selection import train_test_split 
import torch
import torch.nn as nn

# step 1-read Excel file 
#download data
#two types of dataset:
#1-Labels of data are one-hat (2 classes, so data ahs Label 1 and Label 2 and if each sample belongs to label 1, label 1 will be 1 and Label 2 will be 0.
#2- Labele encoded  #this one is used in our example so we have one Label, if it is o(1), sample is man (woman)
# you can open data in pycharm, use Table Editor or Text (install .csv plug in, in pycharm )

# Data has 3168 rows (num of samples) x 21 columns (20 num of features + 1 column for labels)

csv_file = pd.read_csv('./gender_voice.csv') #./data/gender_voice.csv  if file is in data folder
#print(csv_file)

#reading just 20 columns         
raw_data = csv_file.iloc[:,:20]     # gives locations of data, raw_data.shape  3168*2     raw_data.size=63360 
data = raw_data.values              #data[:][:].shape 3168*20 #values have no Header
#print(data)

#reading column 21 as label
raw_labels = csv_file.iloc[:,20]
labels = raw_labels.values
#print(labels) 

#[1,1,1, ....0,0,0] 
#Data needs shuffle cuz first samples with labe 1 comes and then samples with labels 0 come 
# you can shuffle using next lines or shuffle in tran_test_split
# idx = np.arange(data.shape[0]) #dat.shape is 3168*20. data.shape[0] is number of samples or subjects
# np.random.shuffle(idx) #randomize idx 
# data = data[idx, :]
# labels = labels[idx]

test_ratio = 0.2
valid_ratio = 0.1

#random_state=1 to get same data in each time you run code
#First split data to train and test and then split train set to train and validation
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=test_ratio, random_state=1, shuffle=True) 
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=valid_ratio, random_state=1, shuffle=True)
#print(x_train.shape)

#do not forget to convert everything to tensor
x_train = torch.tensor(x_train).float()
x_test = torch.tensor(x_test).float()
x_valid = torch.tensor(x_valid).float()

y_train = torch.tensor(y_train).float()  #cuz we have BCE (Binary Cross Entropy), we did not use Long (was for crossEntropy)
y_test = torch.tensor(y_test).float()     
y_valid = torch.tensor(y_valid).float()


In [71]:
# step 2) Define Model, Loss, Optimizer ------------------------------------------------------------------------
# 2-1)creat a Model
# 2-1-1) In case of single layer:
#model=torch.nn.Linear(num_features,num_classes)  #input No. is 3 and output No. is 2
# 2-1-2) In case of multi-layer:

num_hidden1=10;
num_features = x_train.shape[1] #num of columns 
num_classes = 1                 # 2 classes, but we have one neuron at output
model = nn.Sequential(
                          nn.Linear(num_features,num_hidden1),
                          nn.ReLU(),                           #see other activation functions in help of pytorch
                          nn.Linear(num_hidden1, num_classes),
                          nn.Sigmoid(),
                         )

# 2-2) Loss Function
# MSE loss not good idea for classification, use pytorch.org in doc, see torch.nn and in help look for loss
loss = nn.BCELoss()  #since Binary cross-entropy 

# 2-3) optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #lr is Learning rate

print('Model:',model,'\n Loss:', loss, '\n optimizer:', optimizer)



Model: Sequential(
  (0): Linear(in_features=20, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=1, bias=True)
  (3): Sigmoid()
) 
 Loss: BCELoss() 
 optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0
)


In [72]:
#Train
num_epochs=200

num_samples_train = x_train.shape[0] #rows numbers
num_samples_valid = x_valid.shape[0] 
num_samples_test = x_test.shape[0]

for epoch in range(num_epochs):
    # this is necessary to reset gradients (write model.bias.grad in command window when running breakpoint)
    optimizer.zero_grad()
    yp = model(x_train) #yp between 0 and 1 cuz of sigmoid
    #print(x_train.shape)
    #print('yp:',yp, 'yp.shape',yp.shape)
    #print('X_train:',x_train, 'Xtrain.shape', x_train.shape)
    loss_value = loss(yp[:,0], y_train)
    
    yp = torch.round(yp[:, 0]) #we just have one neourn. round values to closet integer value
    num_corrects = torch.sum(yp == y_train) #use breakpoint
    acc_train = num_corrects.float() / float(num_samples_train)  #use breakpoint
    # or acc=num_corrects.float()/float(num_samples)
    
    loss_value.backward()
    optimizer.step()
    #model.weights.grad
    #model.bias.grad

    yp=model(x_valid) # for validation you do not need loss_value.backward
    num_corrects = torch.sum(yp[:,0].round()== y_valid)  # use breakpoint
    acc_valid = num_corrects.float() / float(num_samples_valid)  # use breakpoint

    print('Epoch: ', epoch, ', Train Loss: ', loss_value.item(),
          ', Train Accuracy: ', acc_train.item(),
          ', Validation Accuracy: ', acc_valid.item())
        
# This part is out of train loop and it is for Test data
 # for test you do not need loss_value.backward
yp=model(x_test)
num_corrects = torch.sum(yp[:,0].round()== y_test)  # use breakpoint
acc_test= num_corrects.float() / float(num_samples_test)  # use breakpoint
print('\n Test accuracy:', acc_test.item())



Epoch:  0 , Train Loss:  0.9511905908584595 , Train Accuracy:  0.4942982494831085 , Validation Accuracy:  0.5275590419769287
Epoch:  1 , Train Loss:  0.9424186944961548 , Train Accuracy:  0.4942982494831085 , Validation Accuracy:  0.5275590419769287
Epoch:  2 , Train Loss:  0.933948278427124 , Train Accuracy:  0.4942982494831085 , Validation Accuracy:  0.5275590419769287
Epoch:  3 , Train Loss:  0.8885992765426636 , Train Accuracy:  0.4942982494831085 , Validation Accuracy:  0.5275590419769287
Epoch:  4 , Train Loss:  0.8434571623802185 , Train Accuracy:  0.4942982494831085 , Validation Accuracy:  0.5275590419769287
Epoch:  5 , Train Loss:  0.835080087184906 , Train Accuracy:  0.4942982494831085 , Validation Accuracy:  0.5275590419769287
Epoch:  6 , Train Loss:  0.8265467882156372 , Train Accuracy:  0.4942982494831085 , Validation Accuracy:  0.5275590419769287
Epoch:  7 , Train Loss:  0.8182052373886108 , Train Accuracy:  0.4942982494831085 , Validation Accuracy:  0.5275590419769287
Ep

Epoch:  130 , Train Loss:  0.6530975699424744 , Train Accuracy:  0.5438596606254578 , Validation Accuracy:  0.4566929042339325
Epoch:  131 , Train Loss:  0.6529049277305603 , Train Accuracy:  0.5442982316017151 , Validation Accuracy:  0.4606299102306366
Epoch:  132 , Train Loss:  0.6527116894721985 , Train Accuracy:  0.5434210300445557 , Validation Accuracy:  0.4645669162273407
Epoch:  133 , Train Loss:  0.6525179147720337 , Train Accuracy:  0.5451754331588745 , Validation Accuracy:  0.4645669162273407
Epoch:  134 , Train Loss:  0.6523233652114868 , Train Accuracy:  0.5460526347160339 , Validation Accuracy:  0.4685039222240448
Epoch:  135 , Train Loss:  0.6521282196044922 , Train Accuracy:  0.5478070378303528 , Validation Accuracy:  0.4685039222240448
Epoch:  136 , Train Loss:  0.6519324779510498 , Train Accuracy:  0.5491228103637695 , Validation Accuracy:  0.4685039222240448
Epoch:  137 , Train Loss:  0.6517361998558044 , Train Accuracy:  0.5495613813400269 , Validation Accuracy:  0.4