In [1]:
import os
import numpy as np

# for shuffling data batches
from sklearn.utils import shuffle

In [2]:
ROOT_DIR = "./raw/numpy"

# load entire dataset into memory
# NOTE: depending on your machine, this may not be practical, you may have to
# create a smaller dataset (although this should have already been noticed when
# making this dataset)
X_dev = np.load(ROOT_DIR + '/images.npy')
y_dev = np.load(ROOT_DIR + '/masks.npy')

# shuffle set -- not necessary, but I like to do this in case there
# is some order to the data we don't know about.  Not that important
# for this particular problem (segmentation), but I still feel better
# calling a shuffle before splitting the data
X_dev, y_dev = shuffle(X_dev, y_dev, random_state=42)
print("X:{} , y:{}".format(X_dev.shape, y_dev.shape))

X:(75, 280, 300, 3) , y:(75, 280, 300)


In [3]:
# here we'll use 20% for test (80% training)
split_thresh = int(X_dev.shape[0] * 0.8)
X_tr = X_dev[:split_thresh]
y_tr = y_dev[:split_thresh]
X_test = X_dev[split_thresh:]
y_test = y_dev[split_thresh:]

print("Dev   :  X:{} , y:{}".format(X_tr.shape, y_tr.shape))
print("Test  :  X:{}  , y:{}".format(X_test.shape, y_test.shape))

Dev   :  X:(60, 280, 300, 3) , y:(60, 280, 300)
Test  :  X:(15, 280, 300, 3)  , y:(15, 280, 300)


In [4]:
# here we'll use ~20% of training for validation (80% training)
split_thresh = int(X_tr.shape[0] * 0.8)
X_train = X_tr[:split_thresh]
y_train = y_tr[:split_thresh]
X_val = X_tr[split_thresh:]
y_val = y_tr[split_thresh:]

print("Training   :  X:{} , y:{}".format(X_train.shape, y_train.shape))
print("Validation :  X:{}  , y:{}".format(X_val.shape, y_val.shape))

Training   :  X:(48, 280, 300, 3) , y:(48, 280, 300)
Validation :  X:(12, 280, 300, 3)  , y:(12, 280, 300)


In [5]:
# helper to create dirs if they don't already exist
def maybe_create_dir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        print("{} created".format(dir_path))
    else:
        print("{} already exists".format(dir_path))

In [6]:
maybe_create_dir("./raw/numpy_final")

./raw/numpy_final created


In [7]:
# training
np.save('./raw/numpy_final/'+  'train.npy', X_train)
np.save('./raw/numpy_final/' + 'train_masks.npy', y_train)

# val
np.save('./raw/numpy_final/' +  'validation.npy', X_val)
np.save('./raw/numpy_final/' + 'validation_masks.npy', y_val)

# test
np.save('./raw/numpy_final/' +  'test.npy', X_test)
np.save('./raw/numpy_final/' + 'test_masks.npy', y_test)