<a href="https://colab.research.google.com/github/walidbrini/TensorFlow-Learning/blob/main/DigitRecognizer_LeNet5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'digit-recognizer:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-competitions-data%2Fkaggle-v2%2F3004%2F861823%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240413%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240413T134802Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D4f31916b9852ea4d84302d37b01790e95c8f8b27b297e1b376c2a58f9e3ce511844bd7865df03fa048180cd568a967268ec22842cab7d502b71598443f4111d208b13a644148eb25c344e3e46c9be0441e339bf1e7f73d85dccc41efb891b337950222cfa6f9d73865800ca8b0028320caf5b610d49df6a874d285b1ed98bac88083b68166214fc39fa4403a26313d9861ef3836bf47bb0564a3f81f21c109b2f8f637dd764f8b03cf2ee38b885d6495fcef92c348ca7b505f80f248e9074bf18260017f9e0d3da0cadc3beafb9f8967fb6c6b745466bd1a75297033e6544056fb229fe2a4c423d07980a2311685fb46b4d8012ab1c78e02ae2aa2404429403d'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


# Import Libraries


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Import and explore data

In [None]:
# load train data
df_train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')

X = df_train.drop('label', axis=1)
y = df_train['label']

In [None]:
# View Unique Labels
unique_labels = y.unique()
print("Unique Labels:", unique_labels)

# Count of Each Label
label_counts = y.value_counts()
print("\nLabel Counts:\n", label_counts)

# Plotting the Distribution
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(8, 6))
sns.countplot(x=y)
plt.title("Label Distribution")
plt.show()


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_validation, y_train, y_validation = train_test_split(X, y,
                                                                test_size=0.1,
                                                                random_state=42,
                                                                stratify=y)

# Data Normalization Preprocessing

In [None]:
def apply_preprocessing(X, y):

    # scale the features
    print(np.max(X))
    X_scaled = np.array(X/255)
    print('Shape:', X_scaled.shape)  # Corrected line
    X_tensor = X_scaled.reshape(len(X_scaled), 28, 28, 1)
    print('Shape:', X_tensor.shape)  # Corrected line

    # apply one-hot encoding to labels
    y_onehot = pd.get_dummies(y)

    return X_tensor, y_onehot


X_train_tensor, y_train_onehot = apply_preprocessing(X_train, y_train)
X_validation_tensor, y_validation_onehot = apply_preprocessing(X_validation, y_validation)

# Check Data

In [None]:
# show few digits
for i in range(9):
    plt.subplot(3, 3, i+1)
    plt.imshow(X_train_tensor[i, :])


# Create Model

In [None]:
from keras.layers import Dense, Conv2D,Flatten, AveragePooling2D, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Sequential, load_model

In [None]:
import tensorflow as tf

def build_lenet(input_shape):
    # Define Sequential Model
    model = tf.keras.Sequential()

    # C1 Convolution Layer
    model.add(tf.keras.layers.Conv2D(filters=6, strides=(1, 1), kernel_size=(5, 5), activation='tanh', input_shape=input_shape))

    # S2 SubSampling Layer
    model.add(tf.keras.layers.AveragePooling2D(pool_size=(2, 2), strides=(2, 2)))

    # C3 Convolution Layer
    model.add(tf.keras.layers.Conv2D(filters=16, strides=(1, 1), kernel_size=(5, 5), activation='tanh'))

    # S4 SubSampling Layer
    model.add(tf.keras.layers.AveragePooling2D(pool_size=(2, 2), strides=(2, 2)))

    # C5 Fully Connected Layer
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(units=120, activation='tanh'))

    # FC6 Fully Connected Layers
    model.add(tf.keras.layers.Dense(units=84, activation='tanh'))

    # Output Layer
    model.add(tf.keras.layers.Dense(units=10, activation='softmax'))

    # Compile the Model using the legacy SGD optimizer
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.legacy.SGD(lr=0.1, momentum=0.0, decay=0.0), metrics=['accuracy'])

    return model

# Assuming your input shape is (28, 28, 1)
input_shape = (28, 28, 1)
lenet_model = build_lenet(input_shape)

# Display model summary
lenet_model.summary()


# Train the model

In [None]:
# Train the model for 10 epochs
history = lenet_model.fit(X_train_tensor, y_train_onehot, epochs=10, batch_size=32, validation_data=(X_validation_tensor, y_validation_onehot))

# Plot training history
plt.figure(figsize=(12, 4))

# Plot training & validation accuracy values
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.show()

# Make predictions on the validation data


In [None]:
predictions = lenet_model.predict(X_validation_tensor)

# Convert predicted probabilities to class labels
predicted_labels = tf.argmax(predictions, axis=1).numpy()

# Calculate validation accuracy
accuracy = np.sum(predicted_labels == y_validation) / len(y_validation)
print(f'Validation Accuracy: {accuracy * 100:.2f}%')

# Submit

In [None]:
# Create a DataFrame for submission
submission_df = pd.DataFrame({'ImageId': np.arange(1, len(X_validation) + 1), 'Label': predicted_labels})

# Save submission DataFrame to CSV
submission_df.to_csv('submission.csv', index=False)

# Display the submission DataFrame (optional)
print(submission_df)

