In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt  # This library is imported but not used in the code.
import pandas as pd

# Load the data from TFRecord files.
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMAGE_SIZE = [224, 224]

# Get the file paths of the TFRecord files.
train_filepaths = tf.io.gfile.glob("/kaggle/input/tpu-getting-started/tfrecords-jpeg-224x224/train/*.tfrec")
val_filepaths = tf.io.gfile.glob("/kaggle/input/tpu-getting-started/tfrecords-jpeg-224x224/val/*.tfrec")
test_filepaths = tf.io.gfile.glob("/kaggle/input/tpu-getting-started/tfrecords-jpeg-224x224/test/*.tfrec")

# Function to decode the image from the TFRecord.
def decode_image(image, label=None, image_shape=(224, 224, 3)):
    image = tf.image.decode_jpeg(image, channels=3)  # Decode the JPEG image.
    image = tf.reshape(image, image_shape)  # Reshape it to the desired size.
    if label is None:
        return image
    else:
        return image, label

# Function to read a single TFRecord file.
def read_tfrecord(example, labeled=True):
    # Define the format of the TFRecord based on whether it's labeled.
    if labeled:
        tfrecord_format = {
            "image": tf.io.FixedLenFeature([], tf.string),
            "class": tf.io.FixedLenFeature([], tf.int64),
        }
    else:
        tfrecord_format = {
            "image": tf.io.FixedLenFeature([], tf.string),
        }
    example = tf.io.parse_single_example(example, tfrecord_format)  # Parse the TFRecord.
    image = decode_image(example['image'])
    if labeled:
        label = tf.cast(example['class'], tf.int32)
        return image, label
    return image

# Function to load a dataset from a list of TFRecord files.
def load_dataset(filenames, labeled=True):
    ignore_order = tf.data.Options()  # Create an option to read files in a non-deterministic order.
    ignore_order.experimental_deterministic = False
    dataset = tf.data.TFRecordDataset(filenames)  # Load the dataset from the files.
    dataset = dataset.with_options(ignore_order)  # Apply the non-deterministic order option.
    dataset = dataset.map(lambda x: read_tfrecord(x, labeled=labeled), num_parallel_calls=AUTOTUNE)  # Map the reading function.
    return dataset

# Function to configure the dataset.
def get_dataset(filenames, labeled=True):
    dataset = load_dataset(filenames, labeled=labeled)
    dataset = dataset.shuffle(2048)  # Shuffle the dataset.
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)  # Prefetch data for performance.
    dataset = dataset.batch(32)  # Batch the dataset.
    return dataset

# Create the neural network model.
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),  # Convolutional layer.
    tf.keras.layers.MaxPooling2D(2, 2),  # Max pooling layer.
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),  # Another convolutional layer.
    tf.keras.layers.MaxPooling2D(2, 2),  # Another max pooling layer.
    tf.keras.layers.Flatten(),  # Flatten the data.
    tf.keras.layers.Dense(128, activation='relu'),  # Dense layer.
    tf.keras.layers.Dense(104, activation='softmax')  # Output layer.
])

# Compile the model with an optimizer, loss function, and metric.
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model.
train_dataset = get_dataset(train_filepaths)
val_dataset = get_dataset(val_filepaths)
history = model.fit(train_dataset, epochs=10, validation_data=val_dataset)

# Evaluate the model's performance on the validation dataset.
val_loss, val_acc = model.evaluate(val_dataset)
print("Validation accuracy: ", val_acc)

# Prepare the test dataset and make predictions.
test_dataset = get_dataset(test_filepaths, labeled=False)
predictions = model.predict(test_dataset)
predicted_labels = tf.argmax(predictions, axis=1).numpy()

# Create a DataFrame for submission.
submission_df = pd.DataFrame({'id': range(0, len(predicted_labels)), 'label': predicted_labels})
submission_df.to_csv('/kaggle/working/submission.csv', index=False)


In [None]:
import matplotlib.pyplot as plt  # Importing the matplotlib library for plotting.

# Plotting accuracy and loss during training.
plt.figure(figsize=(12, 4))  # Setting up a new figure for the plots with specified size.

# First subplot for accuracy.
plt.subplot(1, 2, 1)  # Specifies that we are creating a 1 row x 2 columns grid of plots and selecting the first plot.
plt.plot(history.history['accuracy'], label='Training Accuracy')  # Plotting the training accuracy over epochs.
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')  # Plotting the validation accuracy over epochs.
plt.xlabel('Epoch')  # Setting the x-axis label.
plt.ylabel('Accuracy')  # Setting the y-axis label.
plt.legend()  # Displaying the legend, which indicates what each line on the plot represents.

# Second subplot for loss.
plt.subplot(1, 2, 2)  # Selecting the second plot in our 1 row x 2 columns grid.
plt.plot(history.history['loss'], label='Training Loss')  # Plotting the training loss over epochs.
plt.plot(history.history['val_loss'], label='Validation Loss')  # Plotting the validation loss over epochs.
plt.xlabel('Epoch')  # Setting the x-axis label.
plt.ylabel('Loss')  # Setting the y-axis label.
plt.legend()  # Displaying the legend.

plt.show()  # Displaying the plots.


In [None]:
# Taking a small batch of images from the validation dataset for visualization.
for img_batch, lbl_batch in val_dataset.take(1):
    break  # Once a single batch is taken, the loop breaks.

# Making predictions on that batch.
predictions_batch = model.predict(img_batch)  # Use the model to predict the labels of the images in the batch.
predicted_labels_batch = tf.argmax(predictions_batch, axis=1).numpy()  # Find the index with the highest prediction value for each image. This is the predicted label.

# Visualizing the images alongside their true and predicted labels.
plt.figure(figsize=(10, 10))  # Setting up a new figure for the plots with specified size.

for i in range(16):  # Looping to display 16 images.
    plt.subplot(4, 4, i + 1)  # Specifies that we are creating a 4x4 grid of plots and selecting the (i+1)th plot.
    plt.imshow(img_batch[i].numpy().astype("uint8"))  # Displaying the ith image from the batch.
    plt.title(f"True: {lbl_batch[i]}, Predict: {predicted_labels_batch[i]}")  # Setting the title for each image to show the true and predicted labels.
    plt.axis("off")  # Turning off the axis labels and ticks.
