In [44]:
import re
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from functools import partial
from sklearn.model_selection import train_test_split
import tempfile

In [45]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMAGE_SIZE = [1024, 1024]

In [63]:
# Split into train and validation sets.
train_fnames, valid_fnames, train_labels, valid_labels = train_test_split(
    tf.io.gfile.glob('train_jpg/*.jpg'), 
    np.load('labels_retained.npy'),
    test_size=0.2, 
    random_state=0
)

In [61]:
# Define functions for loading data.
# Turn a loaded JPEG image into a tensor.
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.reshape(image, [*IMAGE_SIZE, 3])  # What's the difference b/w reshape and resize?
    return image

# Load JPEG files.
def load_preprocess_image(path):
    image = tf.io.read_file(path)
    return preprocess_image(image)

# Make a dataset.
def load_dataset(filenames, labels):
    path_ds = tf.data.Dataset.from_tensor_slices(filenames)
    image_ds = path_ds.map(load_preprocess_image, num_parallel_calls=AUTOTUNE)
    label_ds = tf.data.Dataset.from_tensor_slices(labels)  # Load labels.
    image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))  # Zip images and labels.
    
    return image_label_ds

In [64]:
train_ds = load_dataset(train_fnames, train_labels)
valid_ds = load_dataset(valid_fnames, valid_labels)