#Get Data

In [2]:
# Install Kaggle Library
!pip install kaggle

# Before next step, user needs to download the free API KEY from Kaggle settings
# Upload the kaggle.json file to Google Colab Files

# Make directory for Kaggle & Refer to API KEY
! mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json


mkdir: cannot create directory ‘/root/.kaggle’: File exists
cp: cannot stat 'kaggle.json': No such file or directory


In [3]:
# Download Dataset
! kaggle datasets download -d robinreni/signature-verification-dataset

Dataset URL: https://www.kaggle.com/datasets/robinreni/signature-verification-dataset
License(s): CC0-1.0
Downloading signature-verification-dataset.zip to /content
 98% 591M/601M [00:04<00:00, 209MB/s]
100% 601M/601M [00:04<00:00, 138MB/s]


In [4]:
# ! mkdir sfddata
! unzip signature-verification-dataset.zip -d sfddata

Archive:  signature-verification-dataset.zip
  inflating: sfddata/sign_data/sign_data/test/049/01_049.png  
  inflating: sfddata/sign_data/sign_data/test/049/02_049.png  
  inflating: sfddata/sign_data/sign_data/test/049/03_049.png  
  inflating: sfddata/sign_data/sign_data/test/049/04_049.png  
  inflating: sfddata/sign_data/sign_data/test/049/05_049.png  
  inflating: sfddata/sign_data/sign_data/test/049/06_049.png  
  inflating: sfddata/sign_data/sign_data/test/049/07_049.png  
  inflating: sfddata/sign_data/sign_data/test/049/08_049.png  
  inflating: sfddata/sign_data/sign_data/test/049/09_049.png  
  inflating: sfddata/sign_data/sign_data/test/049/10_049.png  
  inflating: sfddata/sign_data/sign_data/test/049/11_049.png  
  inflating: sfddata/sign_data/sign_data/test/049/12_049.png  
  inflating: sfddata/sign_data/sign_data/test/049_forg/01_0114049.PNG  
  inflating: sfddata/sign_data/sign_data/test/049_forg/01_0206049.PNG  
  inflating: sfddata/sign_data/sign_data/test/049_forg/

# Train: Convert Image to Grayscale


In [8]:
"""Code is used for processing images"""

from PIL import Image, ImageOps
from tqdm import tqdm
import shutil
import os
import cv2
import numpy

THRESHOLD = 128

def image_to_grayscale(image_dir: str) -> Image:
    """Used for testing purposes to convert one image to grayscale"""
    image = Image.open(image_dir)
    gray_image = ImageOps.grayscale(image)
    return gray_image

def convert_grayscale(directory: str) -> list:
    """Converts all images in the given directory into gray scale"""
    converted_images = []
    for filename in os.listdir(directory):
        if filename.endswith(".png") or filename.endswith(".PNG"):
            image = Image.open(directory + "/" + filename)
            gray_image = ImageOps.grayscale(image)
            converted_images.append(gray_image)
    return converted_images

def reduce_noise(directory: str, image_path: str) -> list:
    """Runs fastNLMeansDenoising on each img"""
    reduced_dir = "/content/reduced/" + directory
    os.makedirs(image_path, exist_ok=True)

    for filename in os.listdir(directory):
        if filename.endswith(".png") or filename.endswith(".PNG"):
          noise_pic = cv2.imread(directory + "/" + filename)
          image_again = numpy.asarray(noise_pic)
          less_noise_pic = cv2.fastNlMeansDenoising(image_again, None, 15, 7, 21)
          curr_path = os.path.join(image_path, f"{filename}")
          cv2.imwrite(curr_path, less_noise_pic)

def resize(images: list, dimensions: tuple) -> None:
    """Resizes all the given images in a list"""
    for i in range(0, len(images)):
        image = images[i]
        images[i] = image.resize(dimensions)

# actual code to run
dimensions = (250, 250) # dimensions for the images, can be changed
directory_path = 'sfddata/sign_data/train/'
subdirectories = [d for d in os.listdir(directory_path) if os.path.isdir(os.path.join(directory_path, d))]

# Loop over the subdirectories
for subdirectory in sorted(subdirectories):
    curr_directory = directory_path + subdirectory # where the image files are located
    image_path = 'converted_images/train/' + subdirectory # where the grey-scale images will be stored
    reduced_path = 'reduced/train/' + subdirectory # where the reduced imgs will b stored

    gray_images = convert_grayscale(curr_directory)
    resize(gray_images, dimensions)
    save_images = True

    os.makedirs(image_path, exist_ok=True)

    if save_images:
        for i in tqdm(range(0, len(gray_images))):
            # Saves the bitmap images
            image_filename = os.path.join(image_path, f"image{i}.png")
            gray_images[i].save(image_filename)
    # need to do post resize
    reduce_noise(image_path, reduced_path)

100%|██████████| 24/24 [00:00<00:00, 181.80it/s]
100%|██████████| 8/8 [00:00<00:00, 191.88it/s]
100%|██████████| 24/24 [00:00<00:00, 192.00it/s]
100%|██████████| 12/12 [00:00<00:00, 172.65it/s]
100%|██████████| 24/24 [00:00<00:00, 190.19it/s]
100%|██████████| 12/12 [00:00<00:00, 186.87it/s]
100%|██████████| 24/24 [00:00<00:00, 196.71it/s]
100%|██████████| 11/11 [00:00<00:00, 161.99it/s]
100%|██████████| 24/24 [00:00<00:00, 172.13it/s]
100%|██████████| 12/12 [00:00<00:00, 162.99it/s]
100%|██████████| 24/24 [00:00<00:00, 184.00it/s]
100%|██████████| 12/12 [00:00<00:00, 162.10it/s]
100%|██████████| 24/24 [00:00<00:00, 224.56it/s]
100%|██████████| 12/12 [00:00<00:00, 169.03it/s]
100%|██████████| 12/12 [00:00<00:00, 261.18it/s]
100%|██████████| 12/12 [00:00<00:00, 155.31it/s]
100%|██████████| 24/24 [00:00<00:00, 148.23it/s]
100%|██████████| 16/16 [00:00<00:00, 172.17it/s]
100%|██████████| 24/24 [00:00<00:00, 138.06it/s]
100%|██████████| 12/12 [00:00<00:00, 157.81it/s]
100%|██████████| 23/23

# Train: Combine All Data into Forgery or Genuine

In [9]:
import random
import numpy as np

data_path_reduced = "/content/reduced/train/"
! mkdir split
data_path_test = "/content/split"

# Define a list of image extensions
image_extensions = ['.png', '.PNG']

# Create a list of image filenames in 'data_path'
subdirectories = [d for d in os.listdir(data_path_reduced) if os.path.isdir(os.path.join(directory_path, d))]

os.makedirs(data_path_test + "/forgery", exist_ok=True)
os.makedirs(data_path_test + "/genuine", exist_ok=True)


# Loop over the subdirectories
for subdirectory in subdirectories:
  curr_data_path = data_path_reduced + subdirectory
  subdir_imgs = [filename for filename in os.listdir(curr_data_path) if os.path.splitext(filename)[-1] in image_extensions]
  if "forg" in subdirectory:
    for img in subdir_imgs:
      shutil.copy(curr_data_path + "/" + img, os.path.join(data_path_test + "/forgery/", subdirectory + "_" + img))
  else:
    for img in subdir_imgs:
      shutil.copy(curr_data_path + "/" + img, os.path.join(data_path_test + "/genuine/", subdirectory + "_" + img))

#Split Train Images into Tensorflow Datasets

In [10]:
import tensorflow as tf
from keras.utils import image_dataset_from_directory
import tensorflow_datasets as tfds
import pathlib

BATCH_SIZE = 2232
IMG_HEIGHT = 250
IMG_WIDTH = 250

# Set directory to pull images from
DATA_DIR = pathlib.Path('/content/split')


# Make training & validation tensorflow datasets stored in list
train_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode='binary',
    validation_split=0.18,
    subset="both",
    shuffle = True,
    seed=1107,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
)

dataset_array = tfds.as_numpy(train_ds)

train_dataset = dataset_array[0]
val_dataset = dataset_array[1]

for images, labels in dataset_array[0]:
  np.save("/content/train_labels.npy", labels)
  np.save("/content/train_imgs.npy", images)

for images, labels in dataset_array[1]:
  np.save("/content/val_labels.npy", labels)
  np.save("/content/val_imgs.npy", images)

print(labels[1])

Found 1649 files belonging to 2 classes.
Using 1353 files for training.
Using 296 files for validation.
[1.]


# Test: Convert Image to Grayscale


In [11]:
"""Code is used for processing images"""

from PIL import Image, ImageOps
from tqdm import tqdm
import shutil
import os
import cv2
import numpy

THRESHOLD = 128

def image_to_grayscale(image_dir: str) -> Image:
    """Used for testing purposes to convert one image to grayscale"""
    image = Image.open(image_dir)
    gray_image = ImageOps.grayscale(image)
    return gray_image

def convert_grayscale(directory: str) -> list:
    """Converts all images in the given directory into gray scale"""
    converted_images = []
    for filename in os.listdir(directory):
        if filename.endswith(".png") or filename.endswith(".PNG"):
            image = Image.open(directory + "/" + filename)
            gray_image = ImageOps.grayscale(image)
            converted_images.append(gray_image)
    return converted_images

def reduce_noise(directory: str, image_path: str) -> list:
    """Runs fastNLMeansDenoising on each img"""
    # reduced_dir = "/content/reduced/" + directory
    os.makedirs(image_path, exist_ok=True)

    for filename in os.listdir(directory):
        if filename.endswith(".png") or filename.endswith(".PNG"):
          noise_pic = cv2.imread(directory + "/" + filename)
          image_again = numpy.asarray(noise_pic)
          less_noise_pic = cv2.fastNlMeansDenoising(image_again, None, 15, 7, 21)
          curr_path = os.path.join(image_path, f"{filename}")
          cv2.imwrite(curr_path, less_noise_pic)

def resize(images: list, dimensions: tuple) -> None:
    """Resizes all the given images in a list"""
    for i in range(0, len(images)):
        image = images[i]
        images[i] = image.resize(dimensions)

# actual code to run
dimensions = (250, 250) # dimensions for the images, can be changed
directory_path = 'sfddata/sign_data/test/'
subdirectories = [d for d in os.listdir(directory_path) if os.path.isdir(os.path.join(directory_path, d))]

# Loop over the subdirectories
for subdirectory in subdirectories:
    curr_directory = directory_path + subdirectory # where the image files are located
    image_path = 'converted_images/test/' + subdirectory # where the grey-scale images will be stored
    reduced_path = 'reduced/test/' + subdirectory # where the reduced imgs will b stored

    gray_images = convert_grayscale(curr_directory)
    resize(gray_images, dimensions)
    save_images = True

    os.makedirs(image_path, exist_ok=True)

    if save_images:
        for i in tqdm(range(0, len(gray_images))):
            # Saves the grayscale images
            image_filename = os.path.join(image_path, f"image{i}.png")
            gray_images[i].save(image_filename)
    # need to do post resize
    reduce_noise(image_path, reduced_path)

100%|██████████| 12/12 [00:00<00:00, 245.46it/s]
100%|██████████| 12/12 [00:00<00:00, 191.25it/s]
100%|██████████| 12/12 [00:00<00:00, 205.03it/s]
100%|██████████| 12/12 [00:00<00:00, 165.09it/s]
100%|██████████| 20/20 [00:00<00:00, 167.72it/s]
100%|██████████| 12/12 [00:00<00:00, 268.21it/s]
100%|██████████| 16/16 [00:00<00:00, 175.55it/s]
100%|██████████| 12/12 [00:00<00:00, 222.77it/s]
100%|██████████| 8/8 [00:00<00:00, 158.73it/s]
100%|██████████| 12/12 [00:00<00:00, 257.92it/s]
100%|██████████| 12/12 [00:00<00:00, 294.77it/s]
100%|██████████| 12/12 [00:00<00:00, 250.10it/s]
100%|██████████| 12/12 [00:00<00:00, 214.25it/s]
100%|██████████| 12/12 [00:00<00:00, 213.80it/s]
100%|██████████| 12/12 [00:00<00:00, 212.00it/s]
100%|██████████| 12/12 [00:00<00:00, 164.26it/s]
100%|██████████| 8/8 [00:00<00:00, 162.57it/s]
100%|██████████| 12/12 [00:00<00:00, 183.20it/s]
100%|██████████| 12/12 [00:00<00:00, 181.33it/s]
100%|██████████| 12/12 [00:00<00:00, 270.39it/s]
100%|██████████| 12/12 [

# Test: Combine all forgeries and genuine

In [12]:
import random
import numpy as np

data_path_reduced = "/content/reduced/test/"
! mkdir test-split
data_path_test = "/content/test-split"

# Define a list of image extensions
image_extensions = ['.png', '.PNG']

# Create a list of image filenames in 'data_path'
subdirectories = [d for d in os.listdir(data_path_reduced) if os.path.isdir(os.path.join(directory_path, d))]

os.makedirs(data_path_test + "/forgery", exist_ok=True)
os.makedirs(data_path_test + "/genuine", exist_ok=True)


# Loop over the subdirectories
for subdirectory in subdirectories:
  curr_data_path = data_path_reduced + subdirectory
  subdir_imgs = [filename for filename in os.listdir(curr_data_path) if os.path.splitext(filename)[-1] in image_extensions]
  if "forg" in subdirectory:
    for img in subdir_imgs:
      shutil.copy(curr_data_path + "/" + img, os.path.join(data_path_test + "/forgery/", subdirectory + "_" + img))
  else:
    for img in subdir_imgs:
      shutil.copy(curr_data_path + "/" + img, os.path.join(data_path_test + "/genuine/", subdirectory + "_" + img))

#Split Test Images into Tensorflow Datasets

In [13]:
import tensorflow as tf
from keras.utils import image_dataset_from_directory
import tensorflow_datasets as tfds
import pathlib

BATCH_SIZE = 2232
IMG_HEIGHT = 250
IMG_WIDTH = 250

# Set directory to pull images from
DATA_DIR = pathlib.Path('/content/test-split')


# Make training & validation tensorflow datasets stored in list
train_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode='binary',
    validation_split=0,
    shuffle = True,
    seed=1107,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
)

dataset_array = tfds.as_numpy(train_ds)
print(dataset_array)
# print(dataset_array[0])
for images, labels in dataset_array:
  np.save("/content/test_labels.npy", labels)
  np.save("/content/test_imgs.npy", images)


Found 500 files belonging to 2 classes.
<tensorflow_datasets.core.dataset_utils._IterableDataset object at 0x7d73e831f670>


# Example to Load Data

In [None]:
import numpy

# train_labels = numpy.load("/content/train_labels.npy")
# print(train_labels.shape)
# train_imgs = numpy.load("/content/train_imgs.npy")
# print(train_imgs.shape)

# val_labels = numpy.load("/content/val_labels.npy")
# print(val_labels.shape)
# val_imgs  = numpy.load("/content/val_imgs.npy")
# print(val_imgs.shape)

# test_labels = numpy.load("/content/test_labels.npy")
# print(test_labels.shape)
# test_imgs = numpy.load("/content/test_imgs.npy")
# print(test_imgs.shape)

#Export Data

In [None]:
from google.colab import files

# download label arrays
# files.download("/content/train_labels.npy")
# files.download("/content/test_labels.npy")
# files.download("/content/val_labels.npy")

# download img arrays
# files.download("/content/test_imgs.npy")
# files.download("/content/train_imgs.npy")
# files.download("/content/val_imgs.npy")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>