**13. TensorFlow implementation of basic training pipeline**

In [36]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import pandas as pd

In [37]:
# dataset: Breast Cancer Detection
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [38]:
df.drop(columns=['id','Unnamed: 32'],inplace=True)
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [39]:
# Perform train-test-split
X_train,X_test,y_train,y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size=0.2)

In [40]:
# Pre-processing (normalizing)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [41]:
X_train[:3]

array([[-0.64163347, -0.42308543, -0.67212272, -0.63352921, -0.88759389,
        -0.90990536, -0.78285263, -0.67736344,  0.27513854, -0.77667194,
        -0.75636612,  0.0306427 , -0.86269843, -0.61778058,  0.21662329,
        -0.57069381, -0.55591999, -0.51117214, -0.28605623, -0.72400201,
        -0.65930262, -0.01714601, -0.67796259, -0.6324859 ,  0.20857787,
        -0.4976616 , -0.67109761, -0.35777537,  0.3380444 , -0.76562877],
       [-0.78365387, -0.39831711, -0.80064846, -0.73030307, -0.52986508,
        -0.97418005, -0.77031497, -0.72598393, -0.73912922, -0.4975452 ,
        -0.81816785,  0.61223471, -0.79323853, -0.62321289, -0.52727584,
        -0.85134004, -0.63519986, -0.94263168, -0.83539627, -0.76903114,
        -0.66138995,  1.65346141, -0.65626172, -0.62628856,  0.57481159,
        -0.58408906, -0.4303726 , -0.42678222,  0.12847587, -0.35873186],
       [-0.27643815,  0.72076052, -0.18836025, -0.34909055,  0.32544538,
         0.88509716,  0.01124286,  0.08489785,  0

In [42]:
y_train.head(3)

293    B
410    B
208    B
Name: diagnosis, dtype: object

In [43]:
# label encoding
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.fit_transform(y_test)
y_train[:5],y_test[:5]

(array([0, 0, 0, 1, 0]), array([1, 1, 0, 1, 1]))

**Main modelling**

In [44]:
# convert to PyTorch tensors
X_train_tensor = tf.cast(tf.constant(X_train),tf.float64)
X_test_tensor = tf.cast(tf.constant(X_test),tf.float64)

y_train_tensor = tf.cast(tf.constant(y_train),dtype = tf.float64)
y_test_tensor = tf.cast(tf.constant(y_test),dtype = tf.float64)

In [45]:
X_train_tensor.shape,y_train_tensor.shape

(TensorShape([455, 30]), TensorShape([455]))

In [46]:
# defining random seed (ensuring same as torch)
import torch as tr
random_seed = 14
tr.manual_seed(random_seed)

# defining the model

class myNN():
    def __init__(self,input_size):
        self.weights = tf.Variable(tr.rand(input_size,1,dtype = tr.float64),trainable = True,name = 'weights') # (input-features,1)
        self.bias = tf.Variable(tf.zeros([1],dtype = tf.float64),trainable = True, name = 'bias')

    def forward(self,x):
        z = tf.matmul(x,self.weights)+self.bias
        y_pred = tf.cast(tf.sigmoid(z),tf.float64)
        return y_pred

    def loss_function(self, y_pred, y_true):
        epsilon = 1e-7
        y_pred = tf.clip_by_value(y_pred, epsilon, 1 - epsilon)
        log2 = tf.math.log(tf.constant(2.0, dtype=tf.float64))
        loss = -(y_true * tf.math.log(y_pred) / log2 + 
                (1 - y_true) * tf.math.log(1 - y_pred) / log2)
        return tf.reduce_mean(loss)


In [47]:
# important parameter of Neural Network
learning_rate = 0.1
epochs = 100

In [48]:
# training pipeline

### creating model
model = myNN(X_train_tensor.shape[1])
# model.weights,model.bias
print(model.weights)

<tf.Variable 'weights:0' shape=(30, 1) dtype=float64, numpy=
array([[0.55595966],
       [0.44202264],
       [0.63589829],
       [0.48015125],
       [0.33918581],
       [0.17246257],
       [0.86302582],
       [0.84650915],
       [0.28147485],
       [0.13011726],
       [0.67382409],
       [0.9374758 ],
       [0.64435337],
       [0.02952106],
       [0.61627446],
       [0.12969603],
       [0.8779494 ],
       [0.45323562],
       [0.95395624],
       [0.90866755],
       [0.78289562],
       [0.50769862],
       [0.58837452],
       [0.60700985],
       [0.17440583],
       [0.54931121],
       [0.9579063 ],
       [0.53612626],
       [0.75189053],
       [0.58501061]])>


In [49]:
### model training

# Loop no of epochs:
for epochi in range(epochs):

    with tf.GradientTape() as tape:
        # 1. Forward pass
        y_pred = model.forward(X_train_tensor)

        # 2. Loss Calculation
        loss = model.loss_function(y_pred,y_train_tensor)

    # 3. Backward pass
    grads = tape.gradient(loss,[model.weights,model.bias])

    # 4. Update parameters (don't track gradients while parameters update)
    model.weights.assign_sub(learning_rate*grads[0])
    model.bias.assign_sub(learning_rate*grads[1])

    # 6. printing loss in each epoch
    print(f'Epoch: {epochi+1}, Loss:{loss.numpy()}')

Epoch: 1, Loss:5.720763078231457
Epoch: 2, Loss:5.489305919764114
Epoch: 3, Loss:5.246917891175564
Epoch: 4, Loss:4.985842484001012
Epoch: 5, Loss:4.706246534709636
Epoch: 6, Loss:4.416627637383662
Epoch: 7, Loss:4.1123931652364485
Epoch: 8, Loss:3.795022608792063
Epoch: 9, Loss:3.4679256861193943
Epoch: 10, Loss:3.139178647512207
Epoch: 11, Loss:2.8108251063985685
Epoch: 12, Loss:2.489080055827911
Epoch: 13, Loss:2.1997520652642963
Epoch: 14, Loss:1.9452878185500513
Epoch: 15, Loss:1.7308510586949353
Epoch: 16, Loss:1.561255449071613
Epoch: 17, Loss:1.440169610416062
Epoch: 18, Loss:1.3585775621742535
Epoch: 19, Loss:1.3026566217010844
Epoch: 20, Loss:1.26162677065164
Epoch: 21, Loss:1.2294685139329111
Epoch: 22, Loss:1.2030184637309385
Epoch: 23, Loss:1.1805704118893798
Epoch: 24, Loss:1.161147058060867
Epoch: 25, Loss:1.1441408584682817
Epoch: 26, Loss:1.129139038619663
Epoch: 27, Loss:1.1158379491739843
Epoch: 28, Loss:1.104000240839894
Epoch: 29, Loss:1.0934325422247329
Epoch: 30,

In [50]:
model.bias

<tf.Variable 'bias:0' shape=(1,) dtype=float64, numpy=array([-0.52277026])>

In [51]:
# model evaluation
y_pred = model.forward(X_test_tensor)
thresh = 0.5
# y_pred_thresh = [int(y.item()>thresh) for y in y_pred]
y_pred_thresh = [int(y>thresh) for y in y_pred]
y_pred_thresh[:5]

[0, 0, 0, 0, 0]

In [52]:
accuracy = (sum(y_pred_thresh == y_test)/len(y_test))*100
print(f'Accuaracy: {accuracy:.2f} %')

Accuaracy: 64.91 %
