In [1]:
import os 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import datasets
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay


import torch
import torch.nn as nn

from utils.helper import fn_plot_torch_hist

In [2]:
###------------------------------
### global variables
##-----------------
RANDOM_STATE = 24
# np.random.RandomState(seed = RANDOM_STATE)
# nn.random.set_seed(RANDOM_STATE)
# rng= np.random.default_rng(seed= RANDOM_STATE)

N_SAMPLE  = 1000
NOISE = .2
ALPHA = .001
TEST_SIZE = .2
EPOCHS = 2000 #NO. of itersation used to optimize weights
# input and output directories

params = {'legend.fontsize' : 'medium', 
          'figure.figsize' : (15, 4),
          'axes.labelsize' : 'medium',
          'axes.titlesize' : 'large',
          'xtick.labelsize' : 'medium',
          'ytick.labelsize' : 'medium',
         }
loss_hist = {} # creating an empty dictionary which will store loss and epoch later so that we can make loss curve later

plt.rcParams.update(params)

CMAP = plt.cm.coolwarm

# plt.style.use('seaborn-v0_8-darkgrid')

plt.style.use('ggplot')

NameError: name 'tf' is not defined

In [None]:
## Machine specific code

# physical_device = tf.config.list_physical_devices('GPU')

# if len(physical_device) >0:
    # tf.config.experimental.set_memory_growth(physical_device[0],True)


In [None]:
X, y = datasets.make_moons(n_samples = N_SAMPLE, # n_samples: how many datasets in it
                          noise = NOISE, # peportion of randommness added to data so that it matched to real life data , without noise it becomes perfect half circle in moon dataset
                          shuffle = True, 
                          random_state = RANDOM_STATE)
X.shape, y.shape

In [None]:
X_train, X_test, y_train, y_test=   train_test_split(X,y , random_state= RANDOM_STATE, stratify=y, test_size= TEST_SIZE)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
torch.cuda.is_available()

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device # manually set device in pytorch 

## making model using pytorch

In [None]:
input_dim = X_train.shape[1]
# input is not passsed as list here
model = nn.Sequential(  ## nn stand for neural network torch.nn

    nn.Linear(input_dim, 5), # no of neurons=5
    nn.ReLU(), # activation function
    nn.Linear(5, 5), # no of neurons=5, no. of input 5
    nn.ReLU(), 
    nn.Linear(5, 4), # no of neurons=4, no of input 5
    nn.ReLU(), 
    nn.Linear(4, 3), 
    nn.ReLU(), 
    nn.Linear(3, 2)
    ).to(device = device) 

In [None]:
model

In [None]:
# we have to convert datatype from numpy to tensor in torch
# moving all the training data to device
train_X = torch.tensor(X_train, dtype = torch.float32, device = device) #float32 is the min data precision which can give us good result, tensor flow automatically
# device= device means we are putting it to the available device
train_y = torch.tensor(y_train, dtype = torch.int64, device = device) 


test_X = torch.tensor(X_test, dtype = torch.float32, device = device)
test_y = torch.tensor(y_test, dtype = torch.int64, device = device)

In [None]:
train_X

`optimizer.zero_grad()` clears the gradients of all the parameters that the optimizer is managing. Gradients are accumulated in PyTorch by default during the backward pass (when you call loss.backward()), so this step ensures that the gradients don’t mix between different training iterations.

`optimizer.step()` uses the gradients that were calculated during the backward pass `(loss.backward())` to adjust the parameters of the model.
The adjustments are based on the optimization algorithm (e.g., SGD, Adam, etc.) and its hyperparameters (e.g., learning rate, momentum).

In [None]:
### defining loss function # just  like compile of tensorflow
loss_fn = nn.CrossEntropyLoss()

#Adam needs two parameters over each weight
optimizer = torch.optim.Adam(model.parameters(), lr = ALPHA)# lr is learning rate

for epoch in range(EPOCHS):
    model.train() #model.train() means that we are using model in train mode thoufh if use model directly then default is also train
    # we have to do this because wiegrhs gets updated onlu in training mode
    predict_proba = model(train_X)
    curr_loss = loss_fn(predict_proba, train_y)
    
    #Backpropagation , just like fit part of tensorflow
    optimizer.zero_grad() # because tensors are immutable we can't change them  # it resets the values in the GPU as tensors are immutable
    curr_loss.backward()
    optimizer.step() # according to optimizer , move one step

In [None]:
output

In [None]:
output = model(train_X)
y_pred = torch.argmax(output, dim = 1).cpu().numpy()#dim is like axis in dataframe, .cpu()  moves to cpu, converted to numpy
y_pred

In [None]:
from sklearn.metrics import classification_report
print(classification_report(train_y.cpu().numpy(), y_pred))

In [None]:
del model
model = nn.Sequential(  ## nn stand for neural network torch.nn

    nn.Linear(input_dim, 5), # no of neurons=5
    nn.ReLU(), # activation function
    nn.Linear(5, 5), # no of neurons=5, no. of input 5
    nn.ReLU(), 
    nn.Linear(5, 4), # no of neurons=4, no of input 5
    nn.ReLU(), 
    nn.Linear(4, 3), 
    nn.ReLU(), 
    nn.Linear(3, 2)
    ).to(device = device) 

In [None]:
### defining loss function
loss_fn = nn.CrossEntropyLoss()

#Adam needs two parameters over each weight
optimizer = torch.optim.Adam(model.parameters(), lr = ALPHA)# lr is learning rate

#list to collect the progress
loss =[]
tloss = []
n_epoch = []
acc = []
tacc = [] # testing accuracy

for epoch in range(EPOCHS):
    model.train() #model.train() means that we are using model in train mode thoufh if use model directly then default is also train
    # we have to do this because wiegrhs gets updated onlu in training mode
    predict_proba = model(train_X)
    curr_loss = loss_fn(predict_proba, train_y)
    
    #Backpropagation , just like fit part of tensorflow
    optimizer.zero_grad() # because tensors are immutable we can't change them  # it resets the values in the GPU as tensors are immutable
    curr_loss.backward()
    optimizer.step() # according to optimizer , move one step

    loss.append(curr_loss.data.item())

    y_pred = torch.argmax(predict_proba, 1).cpu().numpy() 

    curr_acc = accuracy_score(train_y.cpu().numpy(), y_pred)

    acc.append(curr_acc)

    model.eval() 
    test_proba = model(test_X)
    test_loss = loss_fn(test_proba, test_y)
    tloss.append(test_loss.data.item())

    y_pred = torch.argmax(test_proba, 1) 
    test_acc = accuracy_score(test_y.cpu().numpy(), y_pred)
    tacc.append(test_acc)

    n_epoch.append(epoch)

    if epoch % 500 == 0:
        print(f'Epoch: {epoch:>5d} |Loss: {curr_loss:.5f}/{test_loss:.5f} | ACC: {curr_acc: .5f}/{test_acc: .5f}')

In [None]:
len(n_epoch)

In [None]:
hist_df = pd.DataFrame({'epoch': n_epoch,
                        'loss': loss,
                        'test_loss' : tloss,
                        'acc' : acc,
                        'test_Acc': tacc})

In [None]:
hist_df

In [None]:
display(hist_df.head())
fig, ax = plt.subplots(1,2)

# the required plot
hist_df.plot(y= ['loss','test_loss'], ax= ax[0])
hist_df.plot(y= ['acc','test_Acc'], ax= ax[1])

In [None]:
# define loss function
#criterias: on what we are defining loss , 
#sparse means: input is column vector, 
#check y and y_pred same or not
# from_logits = True means i did not put sigmoid or softmax at the last layer, google will automatically handle it.
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True)

In [None]:
loss_fn(y_train[:1], prediction).numpy()

In [None]:
#for classification : metrics = accuracy
model.compile(optimizer = 'adam', loss= loss_fn, metrics= ['accuracy'])

In [None]:
# model fitting 
# supply x, y and validation data , at each epoch check how model is doing on validation

history =  model.fit(x= X_train, y=y_train, validation_data= [X_test, y_test], epochs= EPOCHS)

In [None]:
# keep eye on loss , epoch after epoch it should go down , no harm at one or two places
# trainng loss goes up then problem
# even spikes come then check your model
# validation loss coming up then we think it is overfitting
# like if we keep epoch =2000 here then overfitting

In [None]:
history.history.keys() # tensor flow output is a dictionary, here: history.history, history is object
# .history is an attribute of history 

In [None]:
dir(history)

In [None]:
history.history  
#dictionary

In [None]:
hist_df= pd.DataFrame(history.history)
display(hist_df.head())
fig, ax = plt.subplots(1,2)

# the required plot
hist_df.plot(y= ['loss','val_loss'], ax= ax[0])
hist_df.plot(y= ['accuracy','val_accuracy'], ax= ax[1])

In [None]:
fn_plot_tf_hist(hist_df=hist_df) #Using function

In [None]:
y_pred = model.predict(X_train)
accuracy_score(y_train, y_pred.argmax(axis=1))

In [None]:
cm = confusion_matrix(y_train, y_pred.argmax(axis=1))
plost = ConfusionMatrixDisplay(cm, display_labels = [0,1])
fig, ax = plt.subplots(figsize = (4,4))

plost.plot(ax = ax, cmap = 'Blues', colorbar = False)
ax.grid(False)

In [None]:
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred.argmax(axis = 1))

In [None]:
cm = confusion_matrix(y_test, y_pred.argmax(axis = 1))
plat = ConfusionMatrixDisplay(cm, display_labels = [0, 1])
fig, ax =plt.subplots(figsize = (4,4))

plat.plot(ax = ax, cmap = 