# Create and Load TFRecords

A simple TensorFlow 2.0 example to parse a dataset into TFRecord format, and then read that dataset.

In this example, the Titanic Dataset (in CSV format) will be used as a toy dataset, for parsing all the dataset features into TFRecord format, and then building an input pipeline that can be used for training models.

In [1]:
import csv
import requests
import tensorflow as tf

In [2]:

# Download Titanic dataset (in csv format).
d = requests.get("https://raw.githubusercontent.com/tflearn/tflearn.github.io/master/resources/titanic_dataset.csv")
with open("titanic_dataset.csv", "wb") as f:
    f.write(d.content)

In [5]:
# Generate Integer Features.
def build_int64_feature(data):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[data]))

# Generate Float Features.
def build_float_feature(data):
    return tf.train.Feature(float_list=tf.train.FloatList(value=[data]))

# Generate String Features.
def build_string_feature(data):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[data]))

# Generate a TF `Example`, parsing all features of the dataset.
def convert_to_tfexample(survived, pclass, name, sex, age, sibsp, parch, ticket, fare):
    return tf.train.Example(
        features=tf.train.Features(
            feature={
                'survived': build_int64_feature(survived),
                'pclass': build_int64_feature(pclass),
                'name': build_string_feature(name),
                'sex': build_string_feature(sex),
                'age': build_float_feature(age),
                'sibsp': build_int64_feature(sibsp),
                'parch': build_int64_feature(parch),
                'ticket': build_string_feature(ticket),
                'fare': build_float_feature(fare),
            })
    )

In [6]:
# Open dataset file.
with open("titanic_dataset.csv") as f:
    # Output TFRecord file.
    with tf.io.TFRecordWriter("titanic_dataset.tfrecord") as w:
        # Generate a TF Example for all row in our dataset.
        # CSV reader will read and parse all rows.
        reader = csv.reader(f, skipinitialspace=True)
        for i, record in enumerate(reader):
            # Skip header.
            if i == 0:
                continue
            survived, pclass, name, sex, age, sibsp, parch, ticket, fare = record
            # Parse each csv row to TF Example using the above functions.
            example = convert_to_tfexample(int(survived), int(pclass), name, sex, float(age), int(sibsp), int(parch), ticket, float(fare))
            # Serialize each TF Example to string, and write to TFRecord file.
            w.write(example.SerializeToString())

TypeError: 'Allen, Miss. Elisabeth Walton' has type str, but expected one of: bytes