In [None]:
# use tfrecords
TRAIN_TFRECORDS = np.array([str(path) for path in (DATA_ROOT / "train_tfrecords").glob("*.tfrec")])
TEST_TFRECORDS = np.array([str(path) for path in (DATA_ROOT / "test_tfrecords").glob("*.tfrec")])

FEATURE_DESCRIPTION = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'image_name': tf.io.FixedLenFeature([], tf.string),
    'target': tf.io.FixedLenFeature([], tf.int64)
}


def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    return image


def read_tfrecord(example, is_train, n_classes=N_CLASSES):
    example = tf.io.parse_single_example(example, FEATURE_DESCRIPTION)
    image = decode_image(example["image"])
    if is_train:
        label = tf.cast(example["target"], tf.int32)
        return image, label
    image_name = decode_image(example["image_name"])
    return image, image_name


def load_dataset(filenames, is_train=True, ordered=True):
    option = tf.data.Options()
    if not ordered:
        option.experimental_deterministic = not is_train  # disable order
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTOTUNE)
    dataset = dataset.with_options(option)
    dataset = dataset.map(partial(read_tfrecord, is_train=is_train),
                          num_parallel_calls=AUTOTUNE)
    return dataset


def get_train_dataset(train_tfrecords, batch_size):
    train_ds = load_dataset(train_tfrecords, is_train=True, ordered=False)
    ds_size = sum(1 for _ in train_ds)
    train_ds = train_ds.map(data_augment, num_parallel_calls=AUTOTUNE)
    train_ds = train_ds.shuffle(ds_size)
    train_ds = train_ds.batch(batch_size).prefetch(AUTOTUNE)
    return train_ds
    

def get_val_dataset(val_tfrecords, batch_size):
    val_ds = load_dataset(val_tfrecords, is_train=True, ordered=True)
    val_ds = val_ds.batch(batch_size).cache()
    val_ds = val_ds.prefetch(AUTOTUNE)
    

def get_test_dataset(test_tfrecords, batch_size):
    test_ds = load_dataset(test_tfrecords, is_train=False, ordered=True)
    test_ds = test_ds.batch(batch_size).prefetch(AUTOTUNE)
    return test_ds


def get_kfold_datasets(train_batch_size, val_batch_size, n_split=5):
    kf = KFold(n_splits=n_split)
    for train_idx, val_idx in kf.split(TRAIN_TFRECORDS):
        train_ds = get_train_dataset(TRAIN_TFRECORDS[train_idx], train_batch_size)
        val_ds = get_val_dataset(TRAIN_TFRECORDS[val_idx], val_batch_size)
        yield train_ds, val_ds

In [None]:
dataset = tf.data.TFRecordDataset(TRAIN_TFRECORDS, num_parallel_reads=AUTOTUNE)
for example in dataset.take(1):
    image, label = read_tfrecord(example, is_train=True)

In [None]:
!pip install -e ../

In [None]:
!pip install -r ../requirements.txt

In [None]:
from pathlib import Path
import logging

import src

logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

In [None]:
config_dir = src.constant.CONFIG_ROOT
config_name = "config_v2.yaml"

cfg = src.utility.run_debug(
    lambda: src.utility.load_config(config_name, config_dir)
)
#cfg.train.val_batch_size = 16
src.utility.run_debug(
    lambda: src.train.train.train(cfg)
)

In [None]:
import glob

for i in range(5):
    print("i : {}".format(len(glob.glob("../data/{}/*.jpg".fomat(i)))))