Setting Directory

In [1]:
import os
from pathlib import Path

In [2]:
import logging

In [3]:
%pwd

'/Volumes/T7/DL_Skin_Cancer_Project/skin_cancer_diagnosis/research'

In [4]:
os.chdir("../")

In [5]:
%pwd

'/Volumes/T7/DL_Skin_Cancer_Project/skin_cancer_diagnosis'

Training Component

Import Configurations

In [6]:
from skinCancerDiagnosis.entity.config_entity import TrailTrainingConfig
from skinCancerDiagnosis.config.configuration import ConfugarationManager
from skinCancerDiagnosis.components.data_prep import DataGenerator

Import Nesessary Libraries

In [7]:
import tensorflow as tf

In [8]:
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

Training Component

In [9]:
class vgg19Training:
    def __init__(self, config:TrailTrainingConfig):
        self.config = config
        self.early_stopping = EarlyStopping(monitor='val_loss',patience=config.params_patience,restore_best_weights=True)

    def define_base_model(self):
        base_model = VGG19(weights=self.config.params_weights, include_top=self.config.params_include_top)

        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = Dense(1024, activation='relu')(x)
        predictions = Dense(8, activation='softmax')(x)

        model = Model(inputs=base_model.input, outputs=predictions)

        for layer in base_model.layers:
            layer.trainable = False

        logging.info(model.summary())
        
        return model
    
    @staticmethod
    def save_model(path:Path, model:tf.keras.Model):
        model_path = os.path.join(path,'vgg19.h5')
        model.save(model_path)
    
    def get_model_and_compile(self):
        with tf.device('/GPU:0'):
            self.vgg19_model = self.define_base_model()
            self.vgg19_model.compile(
                optimizer = Adam(learning_rate=self.config.params_learning_rate),
                loss = "categorical_crossentropy",
                metrics = ['accuracy']
            )
    
    def train_model(self,train_generator,val_generator):
        with tf.device('/GPU:0'):
            self.history = self.vgg19_model.fit(
                train_generator,
                steps_per_epoch = len(train_generator),
                epochs = self.config.params_epochs,
                validation_data = val_generator,
                validation_steps = len(val_generator),
                callbacks = [self.early_stopping]
            )

        self.save_model(
            path=self.config.root_dir,
            model=self.vgg19_model
        )



Training Pipeline

In [10]:
try:
    config = ConfugarationManager()

    data_prep_config = config.get_data_prep_config()
    generator = DataGenerator(config=data_prep_config)
    train_generator = generator.get_train_generator()
    val_generator = generator.get_val_generator()
    
    trail_train_config = config.get_trail_training_config()
    vgg_trainer = vgg19Training(config=trail_train_config)
    vgg_trainer.get_model_and_compile()
    vgg_trainer.train_model(train_generator=train_generator,val_generator=val_generator)

except Exception as e:
    raise e

[2024-04-20 11:42:18,194: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-04-20 11:42:18,196: INFO: common: yaml file: params.yaml loaded successfully]
[2024-04-20 11:42:18,197: INFO: common: created directory at: artifacts]
Found 21491 images belonging to 8 classes.
Found 1910 images belonging to 8 classes.
[2024-04-20 11:42:20,134: INFO: common: created directory at: artifacts/trail_training]


2024-04-20 11:42:20.144951: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-04-20 11:42:20.145003: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-04-20 11:42:20.145019: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-04-20 11:42:20.145611: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-04-20 11:42:20.145659: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


[2024-04-20 11:42:21,099: INFO: 2374917882: None]
Epoch 1/100


2024-04-20 11:42:21.923754: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
  self._warn_if_super_not_called()


[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m580s[0m 856ms/step - accuracy: 0.5279 - loss: 1.4887 - val_accuracy: 0.5550 - val_loss: 1.3081
Epoch 2/100
[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 253us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 3/100


2024-04-20 11:52:01.866716: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_30]]
2024-04-20 11:52:01.866754: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13641577357546919972
2024-04-20 11:52:01.866761: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 518402207504589776
2024-04-20 11:52:01.866764: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-20 11:52:01.866794: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 12950760342411188371
2024-04-20 11:52:01.866801: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 3308000584102463897
2024-04-2

[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m917s[0m 1s/step - accuracy: 0.5775 - loss: 1.2001 - val_accuracy: 0.5775 - val_loss: 1.2469
Epoch 4/100


2024-04-20 12:07:18.855654: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-20 12:07:18.855678: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13641577357546919972
2024-04-20 12:07:18.855682: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 15165130125865868466
2024-04-20 12:07:18.855690: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 5565017664726728354
2024-04-20 12:07:18.855697: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 12950760342411188371
2024-04-20 12:07:18.855701: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/Shape

[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 600us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 5/100


2024-04-20 12:07:19.226163: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-20 12:07:19.226186: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_10]]
2024-04-20 12:07:19.226194: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 9423312555939678451
2024-04-20 12:07:19.226199: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 2844383230074721271
2024-04-20 12:07:19.226206: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 16803737733907186056
2024-04-20 12:07:19.226210: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 17842189774097085152
2024-04-

[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1010s[0m 2s/step - accuracy: 0.5942 - loss: 1.1473 - val_accuracy: 0.5822 - val_loss: 1.1786
Epoch 6/100
[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 195us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 7/100


2024-04-20 12:24:09.578795: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-20 12:24:09.579246: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
2024-04-20 12:24:09.579258: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 18095855055721677251
2024-04-20 12:24:09.579263: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 663200369810785647
2024-04-20 12:24:09.579266: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 3308000584102463897
2024-04-20 12:24:09.579269: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 1618462945501448721
2024-04-20 

[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m995s[0m 1s/step - accuracy: 0.6010 - loss: 1.1274 - val_accuracy: 0.5927 - val_loss: 1.1558
Epoch 8/100
[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 185us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 9/100


2024-04-20 12:40:44.431696: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/Shape/_14]]
2024-04-20 12:40:44.431973: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-20 12:40:44.431984: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13641577357546919972
2024-04-20 12:40:44.431992: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 15165130125865868466
2024-04-20 12:40:44.431998: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 12950760342411188371
2024-04-20 12:40:44.432002: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 5565017664726

[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1085s[0m 2s/step - accuracy: 0.6018 - loss: 1.1307 - val_accuracy: 0.5859 - val_loss: 1.1673
Epoch 10/100
[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 11/100


2024-04-20 12:58:49.392238: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-20 12:58:49.392262: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 11749444730991379000
2024-04-20 12:58:49.392268: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 518402207504589776
2024-04-20 12:58:49.392274: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 6671623724907996558
2024-04-20 12:58:49.392280: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13641577357546919972
2024-04-20 12:58:49.392283: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14601439287949195720
2024-04-20 12:58:49.392286: I tensorflow/core/framework/local_rendezv

[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1204s[0m 2s/step - accuracy: 0.6049 - loss: 1.1078 - val_accuracy: 0.5822 - val_loss: 1.1921
Epoch 12/100


2024-04-20 13:18:53.902049: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-20 13:18:53.902079: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 11749444730991379000
2024-04-20 13:18:53.902083: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 518402207504589776
2024-04-20 13:18:53.902092: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 6671623724907996558
2024-04-20 13:18:53.902095: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13641577357546919972
2024-04-20 13:18:53.902106: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14601439287949195720
2024-04-20 13:18:53.902109: I tensorflow/core/framework/local_rendezv

[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 717us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 13/100
[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1265s[0m 2s/step - accuracy: 0.6182 - loss: 1.0774 - val_accuracy: 0.5927 - val_loss: 1.1387
Epoch 14/100


2024-04-20 13:39:59.868903: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-20 13:39:59.868945: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/ArgMax/_16]]
2024-04-20 13:39:59.868973: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 12950760342411188371
2024-04-20 13:39:59.868977: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13641577357546919972
2024-04-20 13:39:59.869005: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 16517005651984862645
2024-04-20 13:39:59.869019: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 140256692426

[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 299us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 15/100
[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1122s[0m 2s/step - accuracy: 0.6107 - loss: 1.0771 - val_accuracy: 0.5942 - val_loss: 1.1678
Epoch 16/100
[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 17/100


2024-04-20 13:58:41.919059: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-20 13:58:41.919093: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 11749444730991379000
2024-04-20 13:58:41.919096: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 518402207504589776
2024-04-20 13:58:41.919100: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 6671623724907996558
2024-04-20 13:58:41.919103: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13641577357546919972
2024-04-20 13:58:41.919113: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14601439287949195720
2024-04-20 13:58:41.919117: I tensorflow/core/framework/local_rendezv

[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1084s[0m 2s/step - accuracy: 0.6123 - loss: 1.0726 - val_accuracy: 0.5921 - val_loss: 1.1631
Epoch 18/100
[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 19/100


2024-04-20 14:16:45.597167: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-20 14:16:45.597197: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 11749444730991379000
2024-04-20 14:16:45.597201: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 518402207504589776
2024-04-20 14:16:45.597205: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 6671623724907996558
2024-04-20 14:16:45.597211: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13641577357546919972
2024-04-20 14:16:45.597223: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14601439287949195720
2024-04-20 14:16:45.597231: W tensorflow/core/framework/local_rendezv

[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m875s[0m 1s/step - accuracy: 0.6177 - loss: 1.0691 - val_accuracy: 0.5827 - val_loss: 1.1485
Epoch 20/100
[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 21/100


2024-04-20 14:31:20.357339: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-20 14:31:20.357366: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 11749444730991379000
2024-04-20 14:31:20.357370: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 518402207504589776
2024-04-20 14:31:20.357373: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 6671623724907996558
2024-04-20 14:31:20.357376: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13641577357546919972
2024-04-20 14:31:20.357383: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14601439287949195720
2024-04-20 14:31:20.357419: I tensorflow/core/framework/local_rendezv

[1m672/672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.6187 - loss: 1.0604

KeyboardInterrupt: 