Date: Jan 2, 2022

In [1]:
import tensorflow as tf
import pandas as pd

In [2]:
CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Setosa', 'Versicolor', 'Virginica']

In [3]:
train_path = tf.keras.utils.get_file(
    "iris_training.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv")
test_path = tf.keras.utils.get_file(
    "iris_test.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv")

train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)
test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)

In [4]:
train.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Species
0,6.4,2.8,5.6,2.2,2
1,5.0,2.3,3.3,1.0,1
2,4.9,2.5,4.5,1.7,2
3,4.9,3.1,1.5,0.1,0
4,5.7,3.8,1.7,0.3,0


In [5]:
train.shape

(120, 5)

In [6]:
test.describe()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Species
count,30.0,30.0,30.0,30.0,30.0
mean,5.836667,3.01,3.836667,1.206667,1.0
std,0.653628,0.463383,1.537459,0.694775,0.742781
min,4.3,2.2,1.1,0.1,0.0
25%,5.5,2.725,2.3,0.625,0.25
50%,5.75,3.0,4.25,1.3,1.0
75%,6.3,3.3,4.9,1.575,1.75
max,7.1,4.2,5.9,2.5,2.0


In [7]:
test.info

<bound method DataFrame.info of     SepalLength  SepalWidth  PetalLength  PetalWidth  Species
0           5.9         3.0          4.2         1.5        1
1           6.9         3.1          5.4         2.1        2
2           5.1         3.3          1.7         0.5        0
3           6.0         3.4          4.5         1.6        1
4           5.5         2.5          4.0         1.3        1
5           6.2         2.9          4.3         1.3        1
6           5.5         4.2          1.4         0.2        0
7           6.3         2.8          5.1         1.5        2
8           5.6         3.0          4.1         1.3        1
9           6.7         2.5          5.8         1.8        2
10          7.1         3.0          5.9         2.1        2
11          4.3         3.0          1.1         0.1        0
12          5.6         2.8          4.9         2.0        2
13          5.5         2.3          4.0         1.3        1
14          6.0         2.2          4

In [8]:
y_train = train.pop('Species')
y_test = test.pop('Species')

In [9]:
y_train.head()

0    2
1    1
2    2
3    0
4    0
Name: Species, dtype: int64

In [10]:
test.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth
0,5.9,3.0,4.2,1.5
1,6.9,3.1,5.4,2.1
2,5.1,3.3,1.7,0.5
3,6.0,3.4,4.5,1.6
4,5.5,2.5,4.0,1.3


In [11]:
print(train.shape)
print(test.shape)

(120, 4)
(30, 4)


In [12]:
train.dtypes

SepalLength    float64
SepalWidth     float64
PetalLength    float64
PetalWidth     float64
dtype: object

In [13]:
test.dtypes

SepalLength    float64
SepalWidth     float64
PetalLength    float64
PetalWidth     float64
dtype: object

### Classification

DNNClassifier in Tensorflow can be used to classify different classes.

- Step 1: generate feature column
- Step 2: intiate the model instance with the feature column
- Step 3: define an input function (either return data object or wrapping with function return
- Step 4: train the model using input function
- Step 5: evaluate the model using input function
- Step 6: predict using the model with input function (might be a new one)

In [14]:
# Generate feature columns from numeric inputs
features = train.keys()

feature_columns = []
for feature in features:
    feature_columns.append(tf.feature_column.numeric_column(feature))
print(feature_columns)

[NumericColumn(key='SepalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='SepalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]


In [15]:
# Initiate a DNN classifier with 2 hidden layers with 30 & 10 hidden nodes each 
classifier = tf.estimator.DNNClassifier(
    feature_columns=feature_columns,
    hidden_units=[30, 10],
    n_classes=3 # The goal is to classify among 3 classes
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/g2/xxm5yp6d5hq6_57sg5zpncgr0000gn/T/tmp437nhc_3', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


2021-12-24 22:13:06.005424: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
def input_fn(features, labels, training=True, batch_size=120):
    ds = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    if training:
        ds = ds.shuffle(1000).repeat()
    return ds.batch(batch_size)      

***input_fn*** returns a ***tf.data.Dataset*** object which defines a data pipeline to feed data into model for training. The data pipeline allows transformation & preprocessing of data: slicing, shuffling, batching etc.

***classifier.train*** takes an *input_fn* argument which must be a function thus the ***input_fn*** object must be wrapped and returned as a function using either a wrapper outside function return or a direct lambda function.

In [17]:
classifier.train(
    input_fn=lambda: input_fn(train, y_train, training=True),
    steps=5000)

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/g2/xxm5yp6d5hq6_57sg5zpncgr0000gn/T/tmp437nhc_3/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 1.308684, step = 0
INFO:tensorflow:global_step/sec: 266.455
INFO:tensorflow:loss = 1.0732169, step = 100 (0.376 sec)
INFO:tensorflow:global_step/sec: 437.661
INFO:tensorflow:loss = 1.0119334, step = 200 

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x7f9319c1c430>

In [18]:
eval_result = classifier.evaluate(
    input_fn=lambda: input_fn(test, y_test, training=False))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-12-24T22:13:29
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/g2/xxm5yp6d5hq6_57sg5zpncgr0000gn/T/tmp437nhc_3/model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 1.05152s
INFO:tensorflow:Finished evaluation at 2021-12-24-22:13:30
INFO:tensorflow:Saving dict for global step 5000: accuracy = 0.8333333, average_loss = 0.58103937, global_step = 5000, loss = 0.58103937
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 5000: /var/folders/g2/xxm5yp6d5hq6_57sg5zpncgr0000gn/T/tmp437nhc_3/model.ckpt-5000

Test set accuracy: 0.833



In [58]:
# Script below that allows you to type the features of a flower and see a prediction for its class.
def input_fn(features, batch_size=256):
    # Convert the inputs to a Dataset without labels.
    return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)

features = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']
predict = {}

print("Please type numeric values as prompted.")
for feature in features:
  valid = True
  while valid: 
    val = input(feature + ": ")
    if val.isdigit(): valid = False

  predict[feature] = [float(val)] #needs to be put in a list

predictions = classifier.predict(input_fn=lambda: input_fn(predict))

for pred_dict in predictions:
    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]

    print('Prediction is "{}" ({:.1f}%)'.format(
        SPECIES[class_id], 100 * probability))

Please type numeric values as prompted.
SepalLength: 3
SepalWidth: 2
PetalLength: 1
PetalWidth: 2
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/g2/xxm5yp6d5hq6_57sg5zpncgr0000gn/T/tmp437nhc_3/model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Prediction is "Setosa" (49.5%)
