In [17]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model
import glob

In [36]:
gpu_list = [0]
physical_devices = tf.config.list_physical_devices("GPU")
final_gpu_list = [
    physical_devices[x] for x in range(len(physical_devices)) if x in gpu_list
]
tf.config.set_visible_devices(final_gpu_list, "GPU")
logical_gpus = tf.config.list_logical_devices("GPU")
strategy = tf.distribute.MirroredStrategy()
# As data and model has to be copied on all of GPUs.
REPLICAS = strategy.num_replicas_in_sync
# To copy and get data from all places we use autotune.
AUTO = tf.data.experimental.AUTOTUNE

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


In [22]:
def create_vgg_16(image_size, filters):
    input = layers.Input((image_size, image_size, 3))
    c1 = layers.Conv2D(filters = filters, 
                       kernel_size = 3, 
                       padding = 'same', 
                       activation = 'relu')(input)
    c2 = layers.Conv2D(filters = filters, 
                       kernel_size = 3, 
                       padding = 'same', 
                       activation = 'relu')(c1)
    p1 = layers.MaxPool2D(2, 2)(c2)
    c3 = layers.Conv2D(filters = filters * 2, 
                       kernel_size = 3, 
                       padding = 'same', 
                       activation = 'relu')(p1)
    c4 = layers.Conv2D(filters = filters * 2, 
                       kernel_size = 3, 
                       padding = 'same', 
                       activation = 'relu')(c3)
    p2 = layers.MaxPool2D(2, 2)(c4)
    filters = filters * 2
    c5 = layers.Conv2D(filters = filters * 2, 
                       kernel_size = 3, 
                       padding = 'same', 
                       activation = 'relu')(p2)
    c6 = layers.Conv2D(filters = filters * 2, 
                       kernel_size = 3, 
                       padding = 'same', 
                       activation = 'relu')(c5)
    c7 = layers.Conv2D(filters = filters * 2, 
                       kernel_size = 3, 
                       padding = 'same', 
                       activation = 'relu')(c6)
    p3 = layers.MaxPool2D(2, 2)(c7)
    filters *= 2
    c8 = layers.Conv2D(filters = filters * 2, 
                       kernel_size = 3, 
                       padding = 'same', 
                       activation = 'relu')(p3)
    c9 = layers.Conv2D(filters = filters * 2, 
                       kernel_size = 3, 
                       padding = 'same', 
                       activation = 'relu')(c8)
    c10 = layers.Conv2D(filters = filters * 2, 
                       kernel_size = 3, 
                       padding = 'same', 
                       activation = 'relu')(c9)
    p4 = layers.MaxPool2D(2, 2)(c10)
    c11 = layers.Conv2D(filters = filters * 2, 
                       kernel_size = 3, 
                       padding = 'same', 
                       activation = 'relu')(p4)
    c12 = layers.Conv2D(filters = filters * 2, 
                       kernel_size = 3, 
                       padding = 'same', 
                       activation = 'relu')(c11)
    c13 = layers.Conv2D(filters = filters * 2, 
                       kernel_size = 3, 
                       padding = 'same', 
                       activation = 'relu')(c12)
    p5 = layers.MaxPool2D(2, 2)(c13)
    f1 = layers.Flatten()(p5)
    d1 = layers.Dense(4096, activation = 'relu')(f1)
    d2 = layers.Dense(4096, activation = 'relu')(d1)
    d3 = layers.Dense(1, activation = 'sigmoid')(d2)
    model = Model(inputs=[input], outputs = [d3])
    return model

def compile_model(model):
    optim = keras.optimizers.SGD()
    loss = keras.losses.BinaryCrossentropy()
    metrics = [
        keras.metrics.Precision(),
        keras.metrics.Recall()
    ]
    model.compile(
        optimizer = optim,
        loss = loss,
        metrics = metrics
    )
    return model

In [23]:
model = create_vgg_16(64, 64)
model = compile_model(model)

In [18]:
files = glob.glob('../../files/train/*.jpg')

In [26]:
labels = [1 if x.split('/')[-1].split('.')[0] == 'dog' else 0 for x in files]

In [55]:
def read_imgs(img, label, shape):
    img = tf.io.read_file(img)
    img = tf.image.decode_jpeg(img, channels = 3)
    img = tf.image.resize(img, (shape, shape))
    return img, label

def get_data(data, shape=64, repeat = True, batch = True, batch_size = 32):
    data, labels, shapes = [x[0] for x in data], [x[1] for x in data], [shape for x in range(len(data))]
    tensor = tf.data.Dataset.from_tensor_slices((data, labels, shapes))
    tensor = tensor.cache()
    tensor = tensor.map(read_imgs, num_parallel_calls=AUTO)
    if repeat:
        tensor = tensor.repeat()
    if batch:
        tensor = tensor.batch(batch_size * REPLICAS)
    tensor = tensor.prefetch(AUTO)
    return tensor

In [56]:
data = [(files[x], labels[x]) for x in range(len(files))]

In [67]:
train_data = get_data(data, 256)
batch_size = 32
model = create_vgg_16(256, 64)
model = compile_model(model)
model_hist = model.fit(
    train_data,
    epochs = 1,
    verbose = 1,
    steps_per_epoch = len(files) // (REPLICAS * batch_size)
)



In [95]:
def create_pre_trained(image_size):
    with strategy.scope():
        input = layers.Input((image_size, image_size, 3))
        model = keras.applications.VGG16(include_top=False, weights=None, input_shape=(image_size, image_size, 3), pooling='max')(input)
        output_layer = layers.Dense(1, 'sigmoid')(model)
        model = Model(input, output_layer)

        optim = keras.optimizers.SGD()
        loss = keras.losses.BinaryCrossentropy()
        metrics = [
            keras.metrics.Precision(),
            keras.metrics.Recall()
        ]
        
        model.compile(
            optimizer = optim,
            loss = loss,
            metrics = metrics
        )
    return model

In [97]:
train_data = get_data(data, 256)
batch_size = 32
pre_model = create_pre_trained(256)
model_hist = pre_model.fit(
    train_data,
    epochs = 1,
    verbose = 1,
    steps_per_epoch = len(files) // (REPLICAS * batch_size)
)

2023-05-24 18:51:55.861784: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_3"
op: "TensorSliceDataset"
input: "Placeholder/_0"
input: "Placeholder/_1"
input: "Placeholder/_2"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
      type: DT_INT32
      type: DT_INT32
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 25000
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\025TensorSliceDataset:24"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
      shape {
      }
      shape {
      }
    }
  }
}
attr {
  key: "replicate_on_split"
  value {
    b: false
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_



<keras.engine.functional.Functional at 0x7fa8e84e3610>