In [3]:
import tensorflow as tf
import numpy as np
import pandas as pd
from math import floor, log
from sklearn.model_selection import train_test_split

2023-02-20 20:47:30.939560: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-20 20:47:31.976532: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-02-20 20:47:31.976555: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-02-20 20:47:34.491092: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directo

In [4]:
random_seed = 42
epoch = 50
batch_size = 32

tb_log = "tb_logs"
logdir = tb_log + "/batch_loss"
file_writer = tf.summary.create_file_writer(logdir)

2023-02-20 20:47:37.202359: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-02-20 20:47:37.203945: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-02-20 20:47:37.203999: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (default): /proc/driver/nvidia/version does not exist
2023-02-20 20:47:37.206243: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
class FCNBlock(tf.keras.Model):
    def __init__(self, out_features: int):
        super(FCNBlock, self).__init__()
        self.linear = tf.keras.layers.Dense(units=out_features)
        self.relu = tf.keras.layers.ReLU()
        self.layer_norm = tf.keras.layers.LayerNormalization()
    
    def call(self, x: tf.Tensor) -> tf.Tensor:
        return self.relu(self.layer_norm(self.linear(x)))        


In [6]:
class Encoder(tf.keras.Model):
    def __init__(self, output_feature: int, num_fcn_blocks: int):
        super(Encoder, self).__init__()
        self.fcn_blocks =  [
            FCNBlock(output_feature * (2 ** i))
            for i in range(1, num_fcn_blocks)
        ]
    def call(self, x: tf.Tensor) -> tf.Tensor:
        for fcn_block in self.fcn_blocks:
            x = fcn_block(x)
        x_normalized = tf.math.l2_normalize(x, axis=-1)
        return x_normalized

In [7]:
class Decoder(tf.keras.Model):
    def __init__(self, output_feature: int, num_fcn_blocks: int):
        super(Decoder, self).__init__()
        assert num_fcn_blocks <= floor(log(output_feature)/log(2)) - 1
        
        self.fcn_blocks = [
            FCNBlock(output_feature // (2 ** i))
            for i in range(1, num_fcn_blocks)
        ]
        # append last block
        self.fcn_blocks.append(
            tf.keras.layers.Dense(1, activation="sigmoid")
        )
        
    def call(self, x: tf.Tensor) -> tf.Tensor:
        for fcn_block in self.fcn_blocks:
            x  = fcn_block(x)
        return x
    

In [8]:
class BreastCancerClassifier(tf.keras.Model):
    def __init__(self, encoder_depth: int, decoder_depth: int, output_feature: int):
        super(BreastCancerClassifier, self).__init__()
        assert decoder_depth <= encoder_depth
        
        self.encoder = Encoder(
            output_feature=output_feature,
            num_fcn_blocks=encoder_depth,
        )
        self.decoder = Decoder(
            output_feature=output_feature * (2 ** encoder_depth),
            num_fcn_blocks=decoder_depth,
        )
        
    def call(self, x: tf.Tensor) -> tf.Tensor:
        x_encoded = self.encoder(x)
        x_logits = self.decoder(x)
        return x_logits

In [9]:
class BatchLossCallback(tf.keras.callbacks.Callback):
    def __init__(self, tb_callback, writer):
        self.tb_callback = tb_callback
        self.writer = writer
        self.step = 0
        
    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_step = 0
    
    def on_epoch_end(self, epoch, logs=None):
        self.epoch_step += 1
    
    def on_train_end(self, epoch, logs=None):
        self.step = 0
        
    def on_test_batch_end(self, batch, logs=None):
        with self.writer.as_default():
            report = f"During evaluation, loss is {logs['loss']}"
            tf.summary.text("batch_losses", report, step=self.step)
            self.step += 1

    def on_train_batch_end(self, batch, logs=None):
        with self.writer.as_default():
            report = f"During training, on epoch: {self.epoch_step} on batch: {batch} loss is {logs['loss']}"
            tf.summary.text("batch_losses", report, step=self.step)
            self.step += 1
    

In [10]:
tb_callback = tf.keras.callbacks.TensorBoard(tb_log)
callbacks = [tb_callback, BatchLossCallback(tb_callback, file_writer)]

In [11]:
df = pd.read_csv("dataset/data.csv")
df = df.iloc[:, :-1]
train, test = train_test_split(df, test_size=0.2, random_state=random_seed)
def split_dataset_to_gt_target(dataframe):
    return dataframe.iloc[:, 2:], dataframe["diagnosis"].str.lower()

def convert_pandas_to_numpy_array(*args) -> tuple:
    return tuple(
        np.array(df) for df in args 
    )
train_x, train_y = split_dataset_to_gt_target(train)
test_x, test_y = split_dataset_to_gt_target(test)

In [12]:
label_mapping = {"m": 1, "b":0}

In [13]:
train_y = train_y.apply(lambda x: label_mapping[x])
test_y = test_y.apply(lambda x: label_mapping[x])

In [14]:
train_x, train_y, test_x, test_y = convert_pandas_to_numpy_array(train_x, train_y, test_x, test_y)

In [15]:
breast_cancer_classifier = BreastCancerClassifier(
    encoder_depth=5,
    decoder_depth=3,
    output_feature=64,
)
optim = tf.keras.optimizers.SGD(
    learning_rate=0.01,
    momentum=0.9,
    nesterov=True,
)
loss = tf.keras.losses.BinaryCrossentropy()

In [16]:
breast_cancer_classifier.compile(
    optimizer=optim,
    loss=loss,
    metrics=['accuracy', 'AUC'],
)


In [17]:
breast_cancer_classifier.fit(train_x, train_y, epochs=epoch, batch_size=batch_size, callbacks=callbacks)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fe470318130>

In [18]:
breast_cancer_classifier.evaluate(test_x, test_y,  callbacks=callbacks)



[0.12496017664670944, 0.9473684430122375, 0.994595468044281]

In [36]:
from sklearn.metrics import classification_report
import json

In [30]:
preds = np.round(breast_cancer_classifier.predict(test_x)).reshape(test_y.shape[0])



In [42]:
eval_results = classification_report(
    np.where(test_y == 1, "m", "b"),
    np.where(preds == 1, "m", "b"),
    output_dict=True,
)

In [43]:
with open("results/tf_results.json", "w") as f:
    json.dump(eval_results, f)