In [1]:
import pandas as pd
import numpy as np

In [2]:
origin = pd.read_csv("./data/01_inputs/iris.csv")
origin.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [3]:
from keras.utils import np_utils

# target_category = iris_df["variety"].unique() 
target_category = ['Setosa', 'Versicolor', 'Virginica']
origin['target'] = [target_category.index(category) for category in origin.variety]

x = origin.loc[:, ["petal.length", "petal.width"]]
y = np_utils.to_categorical(origin.target.values)

In [4]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1, stratify=y)

In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(x_train)
x_train_std = scaler.transform(x_train)
x_test_std = scaler.transform(x_test)

In [6]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(120, 2)
(120, 3)
(30, 2)
(30, 3)


In [7]:
import tensorflow as tf

dataset_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))

In [8]:
def _float_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _float_list_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def serialize(features, target):
    def _serialize(_features, _target):
        # print(_features, _target)
        feature = {
          'petal_length': _float_feature(_features[0]),
          'petal_width': _float_feature(_features[1]),
          'target': _float_list_feature(_target),
        }
        example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
        return example_proto.SerializeToString()
    
    tf_string = tf.py_function(
        _serialize, 
        (features, target), # 上記の関数にこれらの引数を渡す
        tf.string # 戻り値の型は tf.string
    )
    return tf.reshape(tf_string, ()) # 結果はスカラー


serialized_features_dataset_train = dataset_train.map(serialize)
serialized_features_dataset_test = dataset_test.map(serialize)


writer = tf.data.experimental.TFRecordWriter('./data/02_features/iris/train.tfrecord')
writer.write(serialized_features_dataset_train)
writer = tf.data.experimental.TFRecordWriter('./data/02_features/iris/test.tfrecord')
writer.write(serialized_features_dataset_test)

In [9]:
raw_dataset = tf.data.TFRecordDataset('./data/02_features/iris/train.tfrecord')
# for raw_record in raw_dataset.take(10):
#     print(repr(raw_record))
    
# 特徴の記述
feature_description = {
    'petal_length': tf.io.FixedLenFeature([], tf.float32, default_value=0),
    'petal_width': tf.io.FixedLenFeature([], tf.float32, default_value=0),
    'target': tf.io.FixedLenFeature([3], tf.float32, default_value=[0, 0, 0]),
}

def _parse_function(example_proto):
  # 上記の記述を使って入力の tf.Example を処理
  return tf.io.parse_single_example(example_proto, feature_description)

parsed_dataset = raw_dataset.map(_parse_function)

for raw_record in parsed_dataset.take(10):
    print(raw_record)


{'petal_length': <tf.Tensor: shape=(), dtype=float32, numpy=4.9>, 'petal_width': <tf.Tensor: shape=(), dtype=float32, numpy=2.0>, 'target': <tf.Tensor: shape=(3,), dtype=float32, numpy=array([0., 0., 1.], dtype=float32)>}
{'petal_length': <tf.Tensor: shape=(), dtype=float32, numpy=5.2>, 'petal_width': <tf.Tensor: shape=(), dtype=float32, numpy=2.0>, 'target': <tf.Tensor: shape=(3,), dtype=float32, numpy=array([0., 0., 1.], dtype=float32)>}
{'petal_length': <tf.Tensor: shape=(), dtype=float32, numpy=1.3>, 'petal_width': <tf.Tensor: shape=(), dtype=float32, numpy=0.3>, 'target': <tf.Tensor: shape=(3,), dtype=float32, numpy=array([1., 0., 0.], dtype=float32)>}
{'petal_length': <tf.Tensor: shape=(), dtype=float32, numpy=4.4>, 'petal_width': <tf.Tensor: shape=(), dtype=float32, numpy=1.2>, 'target': <tf.Tensor: shape=(3,), dtype=float32, numpy=array([0., 1., 0.], dtype=float32)>}
{'petal_length': <tf.Tensor: shape=(), dtype=float32, numpy=3.6>, 'petal_width': <tf.Tensor: shape=(), dtype=flo