# **#1 Load dataset from GCP Cloud Storage**

In [3]:
from google.colab import auth
auth.authenticate_user()

In [4]:
from google.cloud import storage

In [5]:
project_id = "onyx-sequencer-436913-i3"
client = storage.Client(project=project_id)

In [6]:
bucket_name = "cifake"
file_path = "cifake-real-and-ai-generated-synthetic-images.zip"
destination = "/content/cifake-real-and-ai-generated-synthetic-images.zip"

In [7]:
bucket = client.get_bucket(bucket_name)
blob = bucket.blob(file_path)
blob.download_to_filename(destination)

In [8]:
import zipfile

with zipfile.ZipFile(destination, 'r') as zip_ref:
    zip_ref.extractall("/content/cifake-real-and-ai-generated-synthetic-images")

# **#2 Prepare Training and Test Sets**

In [9]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, metrics, callbacks, optimizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.metrics import Precision, Recall, AUC

In [10]:
train_data = '/content/cifake-real-and-ai-generated-synthetic-images/train'
train_ds = tf.keras.utils.image_dataset_from_directory(
  train_data,
  labels='inferred',
  validation_split=0.1,
  subset="training",
  seed=123,
  image_size=(120, 120),
  batch_size=1024)

Found 100000 files belonging to 2 classes.
Using 90000 files for training.


In [11]:
train_ds.class_names

['FAKE', 'REAL']

In [12]:
test_data = '/content/cifake-real-and-ai-generated-synthetic-images/test'
test_ds = tf.keras.utils.image_dataset_from_directory(
  test_data,
  labels='inferred',
  validation_split=0.1,
  subset="validation",
  seed=123,
  image_size=(120, 120),
  batch_size=1024)

Found 20000 files belonging to 2 classes.
Using 2000 files for validation.


In [13]:
test_ds.class_names

['FAKE', 'REAL']

In [14]:
X_train = []
y_train = []

for images, labels in train_ds:
    X_train.append(images.numpy())
    y_train.append(labels.numpy())

X_train = np.concatenate(X_train, axis=0)
y_train = np.concatenate(y_train)

In [15]:
X_test = []
y_test = []

for images, labels in test_ds:
    X_test.append(images.numpy())
    y_test.append(labels.numpy())

X_test = np.concatenate(X_test, axis=0)
y_test = np.concatenate(y_test)

In [16]:
X_train.shape

(90000, 120, 120, 3)

In [17]:
y_train.shape

(90000,)

In [18]:
X_test.shape

(2000, 120, 120, 3)

In [19]:
y_test.shape

(2000,)

In [20]:
np.unique(y_train)
np.unique(y_test)

array([0, 1], dtype=int32)

# **#3 Normalise X_train and X_test**

In [21]:
X_train = X_train/255
X_test = X_test/255

In [22]:
X_train

array([[[[1.56862754e-02, 0.00000000e+00, 0.00000000e+00],
         [1.56862754e-02, 0.00000000e+00, 0.00000000e+00],
         [1.63398683e-02, 0.00000000e+00, 6.53594849e-04],
         ...,
         [3.52941789e-02, 2.74510402e-02, 3.13726105e-02],
         [3.92156877e-02, 3.13725509e-02, 3.52941193e-02],
         [3.92156877e-02, 3.13725509e-02, 3.52941193e-02]],

        [[1.56862754e-02, 0.00000000e+00, 0.00000000e+00],
         [1.56862754e-02, 0.00000000e+00, 0.00000000e+00],
         [1.63398683e-02, 0.00000000e+00, 6.53594849e-04],
         ...,
         [3.52941789e-02, 2.74510402e-02, 3.13726105e-02],
         [3.92156877e-02, 3.13725509e-02, 3.52941193e-02],
         [3.92156877e-02, 3.13725509e-02, 3.52941193e-02]],

        [[1.56862754e-02, 0.00000000e+00, 0.00000000e+00],
         [1.56862754e-02, 0.00000000e+00, 0.00000000e+00],
         [1.63398683e-02, 0.00000000e+00, 6.53594849e-04],
         ...,
         [4.05229256e-02, 3.13725993e-02, 3.52941677e-02],
         [

# **#4 CNN Baseline Model**

In [23]:
model = Sequential()

model.add(layers.Conv2D(8, kernel_size=(5, 5), activation='relu', padding='valid', input_shape=(120, 120, 3)))
model.add(layers.MaxPool2D(pool_size=(2,2)))

model.add(layers.Conv2D(4, kernel_size=(3,3), padding='valid', activation='relu'))
model.add(layers.MaxPool2D(pool_size=(2,2)))

model.add(layers.Flatten())

model.add(layers.Dense(2, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [25]:
model.fit(X_train, y_train, epochs=50, batch_size=1024)

Epoch 1/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 105ms/step - accuracy: 0.6227 - loss: 0.6293
Epoch 2/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 60ms/step - accuracy: 0.7509 - loss: 0.5005
Epoch 3/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 60ms/step - accuracy: 0.7621 - loss: 0.4849
Epoch 4/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 59ms/step - accuracy: 0.7697 - loss: 0.4666
Epoch 5/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 60ms/step - accuracy: 0.7852 - loss: 0.4449
Epoch 6/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 60ms/step - accuracy: 0.7947 - loss: 0.4273
Epoch 7/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 60ms/step - accuracy: 0.8169 - loss: 0.3972
Epoch 8/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 60ms/step - accuracy: 0.8391 - loss: 0.3658
Epoch 9/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7eb6e24e36a0>

In [26]:
model.evaluate(X_test, y_test,verbose=0)

[0.2965637147426605, 0.8809999823570251]

In [27]:
model.save('cnn_model.h5')



In [28]:
X_test[3]

array([[[0.40392157, 0.4392157 , 0.35686275],
        [0.40392157, 0.4392157 , 0.35686275],
        [0.41633984, 0.451634  , 0.36928102],
        ...,
        [0.53856224, 0.52287596, 0.51111126],
        [0.54901963, 0.53333336, 0.52156866],
        [0.54901963, 0.53333336, 0.52156866]],

       [[0.40392157, 0.4392157 , 0.35686275],
        [0.40392157, 0.4392157 , 0.35686275],
        [0.41633984, 0.451634  , 0.36928102],
        ...,
        [0.53856224, 0.52287596, 0.51111126],
        [0.54901963, 0.53333336, 0.52156866],
        [0.54901963, 0.53333336, 0.52156866]],

       [[0.4       , 0.43529412, 0.3529412 ],
        [0.4       , 0.43529412, 0.3529412 ],
        [0.41252723, 0.44782135, 0.3654684 ],
        ...,
        [0.5300655 , 0.5143792 , 0.5026145 ],
        [0.5405229 , 0.5248366 , 0.5130719 ],
        [0.5405229 , 0.5248366 , 0.5130719 ]],

       ...,

       [[0.41437903, 0.3581699 , 0.3555555 ],
        [0.41437903, 0.3581699 , 0.3555555 ],
        [0.4218954 , 0

In [39]:
prediction = model.predict(X_test[3].reshape(1,120,120,3))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


In [40]:
prediction

array([[0.87068963]], dtype=float32)

In [41]:
prediction[0][0]

0.87068963

In [42]:
y_test[3]

1

In [43]:
y_test[100]

0

In [44]:
prediction = model.predict(X_test[100].reshape(1,120,120,3))
prediction[0][0]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


0.0014086282

In [45]:
y_test[999]

1

In [46]:
prediction = model.predict(X_test[999].reshape(1,120,120,3))
prediction[0][0]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


0.9123783

In [48]:
X_test.shape

(2000, 120, 120, 3)

In [49]:
prediction = model.predict(X_test)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


In [52]:
prediction.shape

(2000, 1)

In [55]:
y_test.shape

(2000,)

In [57]:
import pandas as pd

predictions_flat = prediction.flatten()

comparison_df = pd.DataFrame({
    'True Labels': y_test,
    'Predicted Labels': predictions_flat
})

In [59]:
comparison_df['Correct Prediction'] = comparison_df['True Labels'] == comparison_df['Predicted Labels']

In [62]:
def one_hot_encode_predictions(x):
  if x < 0.5:
    return 0
  else:
    return 1

comparison_df['Predicted Labels'] = comparison_df['Predicted Labels'].apply(one_hot_encode_predictions)

In [64]:
comparison_df['Correct Prediction'] = comparison_df['True Labels'] == comparison_df['Predicted Labels']

In [65]:
comparison_df.head(5)

Unnamed: 0,True Labels,Predicted Labels,Correct Prediction
0,1,1,True
1,0,0,True
2,0,0,True
3,1,1,True
4,0,0,True


In [68]:
comparison_df['Correct Prediction'].value_counts()/2000

Unnamed: 0_level_0,count
Correct Prediction,Unnamed: 1_level_1
True,0.881
False,0.119
