# **#1 Load dataset from GCP Cloud Storage**

In [2]:
from google.colab import auth
auth.authenticate_user()

In [3]:
from google.cloud import storage

In [4]:
project_id = "onyx-sequencer-436913-i3"
client = storage.Client(project=project_id)

In [5]:
bucket_name = "cifake"
file_path = "cifake-real-and-ai-generated-synthetic-images.zip"
destination = "/content/cifake-real-and-ai-generated-synthetic-images.zip"

In [6]:
bucket = client.get_bucket(bucket_name)
blob = bucket.blob(file_path)
blob.download_to_filename(destination)

In [7]:
import zipfile

with zipfile.ZipFile(destination, 'r') as zip_ref:
    zip_ref.extractall("/content/cifake-real-and-ai-generated-synthetic-images")

# **#2 Prepare Training and Test Sets**

In [8]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, metrics, callbacks, optimizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.metrics import Precision, Recall, AUC

In [9]:
train_data = '/content/cifake-real-and-ai-generated-synthetic-images/train'
train_ds = tf.keras.utils.image_dataset_from_directory(
  train_data,
  labels='inferred',
  validation_split=0.7,
  subset="training",
  seed=123,
  image_size=(180, 180),
  batch_size=64)

Found 100000 files belonging to 2 classes.
Using 30000 files for training.


In [10]:
train_ds.class_names

['FAKE', 'REAL']

In [11]:
test_data = '/content/cifake-real-and-ai-generated-synthetic-images/test'
test_ds = tf.keras.utils.image_dataset_from_directory(
  test_data,
  labels='inferred',
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(180, 180),
  batch_size=64)

Found 20000 files belonging to 2 classes.
Using 4000 files for validation.


In [12]:
test_ds.class_names

['FAKE', 'REAL']

In [13]:
X_train = []
y_train = []

for images, labels in train_ds:
    X_train.append(images.numpy())
    y_train.append(labels.numpy())

X_train = np.concatenate(X_train, axis=0)
y_train = np.concatenate(y_train)

In [14]:
X_test = []
y_test = []

for images, labels in test_ds:
    X_test.append(images.numpy())
    y_test.append(labels.numpy())

X_test = np.concatenate(X_test, axis=0)
y_test = np.concatenate(y_test)

In [15]:
X_train.shape

(30000, 180, 180, 3)

In [16]:
y_train.shape

(30000,)

In [17]:
X_test.shape

(4000, 180, 180, 3)

In [18]:
y_test.shape

(4000,)

In [19]:
np.unique(y_train)
np.unique(y_test)

array([0, 1], dtype=int32)

# **#3 Normalise X_train and X_test**

In [20]:
X_train = X_train/255
X_test = X_test/255

In [21]:
X_train

array([[[[0.42745098, 0.59607846, 0.72156864],
         [0.42745098, 0.59607846, 0.72156864],
         [0.42745098, 0.59607846, 0.72156864],
         ...,
         [0.48235294, 0.6156863 , 0.7254902 ],
         [0.48235294, 0.6156863 , 0.7254902 ],
         [0.48235294, 0.6156863 , 0.7254902 ]],

        [[0.42745098, 0.59607846, 0.72156864],
         [0.42745098, 0.59607846, 0.72156864],
         [0.42745098, 0.59607846, 0.72156864],
         ...,
         [0.48235294, 0.6156863 , 0.7254902 ],
         [0.48235294, 0.6156863 , 0.7254902 ],
         [0.48235294, 0.6156863 , 0.7254902 ]],

        [[0.42745098, 0.59607846, 0.72156864],
         [0.42745098, 0.59607846, 0.72156864],
         [0.42745098, 0.59607846, 0.72156864],
         ...,
         [0.48235294, 0.6156863 , 0.7254902 ],
         [0.48235294, 0.6156863 , 0.7254902 ],
         [0.48235294, 0.6156863 , 0.7254902 ]],

        ...,

        [[0.54901963, 0.5411765 , 0.48235294],
         [0.54901963, 0.5411765 , 0.48235294]

# **#4 CNN Baseline Model**

In [30]:
model = Sequential()

model.add(layers.Conv2D(16, kernel_size=(3, 3), activation='relu', padding='valid', input_shape=(180, 180, 3)))
model.add(layers.MaxPool2D(pool_size=(2,2)))

model.add(layers.Conv2D(32, kernel_size=(3,3), padding='valid', activation='relu'))
model.add(layers.MaxPool2D(pool_size=(2,2)))

model.add(layers.Flatten())

model.add(layers.Dense(2, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.summary()

In [31]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy', Precision()])

In [32]:
model.fit(X_train, y_train, epochs=10, batch_size=16)

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.4998 - loss: 0.6990 - precision_2: 0.4910
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.5068 - loss: 0.6931 - precision_2: 0.1076
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.5026 - loss: 0.6931 - precision_2: 0.3194
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.5097 - loss: 0.6930 - precision_2: 0.0037
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.5055 - loss: 0.6931 - precision_2: 0.4993
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.4989 - loss: 0.6932 - precision_2: 0.3352
Epoch 7/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.5014 - loss: 0.6932 - precision_2

<keras.src.callbacks.history.History at 0x7fadd4245d50>

In [33]:
model.evaluate(X_test, y_test,verbose=0)

[0.6935043931007385, 0.49149999022483826, 0.0]

In [34]:
model.save('cnn_model.h5')

