#Get Data

In [None]:
# Install Kaggle Library
!pip install kaggle

# Before next step, user needs to download the free API KEY from Kaggle settings
# Upload the kaggle.json file to Google Colab Files

# Make directory for Kaggle & Refer to API KEY
! mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json


mkdir: cannot create directory ‘/root/.kaggle’: File exists
cp: cannot stat 'kaggle.json': No such file or directory


In [None]:
# Download Dataset
! kaggle datasets download -d robinreni/signature-verification-dataset

Downloading signature-verification-dataset.zip to /content
 98% 588M/601M [00:06<00:00, 47.3MB/s]
100% 601M/601M [00:06<00:00, 93.3MB/s]


In [None]:
# ! mkdir sfddata
! unzip signature-verification-dataset.zip -d sfddata

Delete Folder

In [None]:
import shutil

# Replace 'reduced' with the name of your folder
folder_to_delete = 'reduced'

# Use shutil.rmtree to delete the folder and its contents
shutil.rmtree(folder_to_delete, ignore_errors=True)

# Train

## Convert Image to Grayscale


In [None]:
"""Code is used for processing images"""

from PIL import Image, ImageOps
from tqdm import tqdm
import shutil
import os
import cv2
import numpy

THRESHOLD = 128

def image_to_grayscale(image_dir: str) -> Image:
    """Used for testing purposes to convert one image to grayscale"""
    image = Image.open(image_dir)
    gray_image = ImageOps.grayscale(image)
    return gray_image

def convert_grayscale(directory: str) -> list:
    """Converts all images in the given directory into gray scale"""
    converted_images = []
    for filename in os.listdir(directory):
        if filename.endswith(".png") or filename.endswith(".PNG"):
            image = Image.open(directory + "/" + filename)
            gray_image = ImageOps.grayscale(image)
            converted_images.append(gray_image)
    return converted_images

def reduce_noise(directory: str, image_path: str) -> list:
    """Runs fastNLMeansDenoising on each img"""
    reduced_dir = "/content/reduced/" + directory
    os.makedirs(image_path, exist_ok=True)

    for filename in os.listdir(directory):
        if filename.endswith(".png") or filename.endswith(".PNG"):
          noise_pic = cv2.imread(directory + "/" + filename)
          image_again = numpy.asarray(noise_pic)
          less_noise_pic = cv2.fastNlMeansDenoising(image_again, None, 15, 7, 21)
          curr_path = os.path.join(image_path, f"{filename}")
          cv2.imwrite(curr_path, less_noise_pic)

def resize(images: list, dimensions: tuple) -> None:
    """Resizes all the given images in a list"""
    for i in range(0, len(images)):
        image = images[i]
        images[i] = image.resize(dimensions)

# actual code to run
dimensions = (250, 250) # dimensions for the images, can be changed
directory_path = 'sfddata/sign_data/train/'
subdirectories = ["049", "049_forg"]

# Loop over the subdirectories
for subdirectory in subdirectories:
    curr_directory = directory_path + subdirectory # where the image files are located
    image_path = 'converted_images/train/' + subdirectory # where the images will be stored
    if subdirectory == "049":
      reduced_path = 'reduced/train/forgery'
    else:
      reduced_path = 'reduced/train/genuine'

    gray_images = convert_grayscale(curr_directory)
    resize(gray_images, dimensions)
    save_images = True

    os.makedirs(image_path, exist_ok=True)

    if save_images:
        for i in tqdm(range(0, len(gray_images))):
            # Saves the bitmap images
            image_filename = os.path.join(image_path, f"image{i}.png")
            gray_images[i].save(image_filename)
    # need to do post resize
    reduce_noise(image_path, reduced_path)

100%|██████████| 12/12 [00:00<00:00, 182.37it/s]
100%|██████████| 12/12 [00:00<00:00, 90.24it/s]


## Split Train Images into Tensorflow Datasets

In [None]:
import tensorflow as tf
from keras.utils import image_dataset_from_directory
import tensorflow_datasets as tfds
import pathlib
import numpy as np

BATCH_SIZE = 2232
IMG_HEIGHT = 250
IMG_WIDTH = 250

# Set directory to pull images from
DATA_DIR = pathlib.Path('/content/reduced/train')


# Make training & validation tensorflow datasets stored in list
train_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode='binary',
    validation_split=0.18,
    subset="both",
    shuffle = True,
    seed=1107,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
)

dataset_array = tfds.as_numpy(train_ds)

train_dataset = dataset_array[0]
val_dataset = dataset_array[1]

for images, labels in dataset_array[0]:
  np.save("/content/train_labels.npy", labels)
  np.save("/content/train_imgs.npy", images)

for images, labels in dataset_array[1]:
  np.save("/content/val_labels.npy", labels)
  np.save("/content/val_imgs.npy", images)

print(labels[1])

Found 24 files belonging to 2 classes.
Using 20 files for training.
Using 4 files for validation.
[0.]


# Test

## Convert Image to Grayscale


In [None]:
"""Code is used for processing images"""

from PIL import Image, ImageOps
from tqdm import tqdm
import shutil
import os
import cv2
import numpy

THRESHOLD = 128

def image_to_grayscale(image_dir: str) -> Image:
    """Used for testing purposes to convert one image to grayscale"""
    image = Image.open(image_dir)
    gray_image = ImageOps.grayscale(image)
    return gray_image

def convert_grayscale(directory: str) -> list:
    """Converts all images in the given directory into gray scale"""
    converted_images = []
    for filename in os.listdir(directory):
        if filename.endswith(".png") or filename.endswith(".PNG"):
            image = Image.open(directory + "/" + filename)
            gray_image = ImageOps.grayscale(image)
            converted_images.append(gray_image)
    return converted_images

def reduce_noise(directory: str, image_path: str) -> list:
    """Runs fastNLMeansDenoising on each img"""
    # reduced_dir = "/content/reduced/" + directory
    os.makedirs(image_path, exist_ok=True)

    for filename in os.listdir(directory):
        if filename.endswith(".png") or filename.endswith(".PNG"):
          noise_pic = cv2.imread(directory + "/" + filename)
          image_again = numpy.asarray(noise_pic)
          less_noise_pic = cv2.fastNlMeansDenoising(image_again, None, 15, 7, 21)
          curr_path = os.path.join(image_path, f"{filename}")
          cv2.imwrite(curr_path, less_noise_pic)

def resize(images: list, dimensions: tuple) -> None:
    """Resizes all the given images in a list"""
    for i in range(0, len(images)):
        image = images[i]
        images[i] = image.resize(dimensions)

# actual code to run
dimensions = (250, 250) # dimensions for the images, can be changed
directory_path = 'sfddata/sign_data/test/'
subdirectories = ["049", "049_forg"]

# Loop over the subdirectories
for subdirectory in subdirectories:
    curr_directory = directory_path + subdirectory # where the image files are located
    image_path = 'converted_images/test/' + subdirectory # where the images will be stored
    if subdirectory == "049":
      reduced_path = 'reduced/test/forgery'
    else:
      reduced_path = 'reduced/test/genuine'

    gray_images = convert_grayscale(curr_directory)
    resize(gray_images, dimensions)
    save_images = True

    os.makedirs(image_path, exist_ok=True)

    if save_images:
        for i in tqdm(range(0, len(gray_images))):
            # Saves the bitmap images
            image_filename = os.path.join(image_path, f"image{i}.png")
            gray_images[i].save(image_filename)
    # need to do post resize
    reduce_noise(image_path, reduced_path)

100%|██████████| 12/12 [00:00<00:00, 186.18it/s]
100%|██████████| 12/12 [00:00<00:00, 94.83it/s] 


## Use Tensorflow Datasets to make Test npys

In [None]:
import tensorflow as tf
from keras.utils import image_dataset_from_directory
import tensorflow_datasets as tfds
import pathlib
import numpy as np

BATCH_SIZE = 2232
IMG_HEIGHT = 250
IMG_WIDTH = 250

# Set directory to pull images from
DATA_DIR = pathlib.Path('/content/reduced/test')


# Make training & validation tensorflow datasets stored in list
train_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode='binary',
    validation_split=0,
    shuffle = True,
    seed=1107,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
)

dataset_array = tfds.as_numpy(train_ds)

for images, labels in dataset_array:
  np.save("/content/test_labels.npy", labels)
  np.save("/content/test_imgs.npy", images)

print(labels[1])

Found 24 files belonging to 2 classes.
[1.]


#Export Data

In [None]:
from google.colab import files

test_labels = numpy.load("/content/test_labels.npy")
print(test_labels.shape)
test_imgs = numpy.load("/content/test_imgs.npy")
print(test_imgs.shape)

# download label arrays
# files.download("/content/train_labels.npy")
files.download("/content/test_labels.npy")
# files.download("/content/val_labels.npy")

# download img arrays
files.download("/content/test_imgs.npy")
# files.download("/content/train_imgs.npy")
# files.download("/content/val_imgs.npy")

(24, 1)
(24, 250, 250, 3)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>