In [1]:
### Classification ###

from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
import pandas as pd

In [2]:
CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Setosa', 'Versicolor', 'Virginica'] # 0, 1, 2 categorical
# Lets define some constants to help us later on

In [3]:
train_path = tf.keras.utils.get_file(
    "iris_training.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv")
test_path = tf.keras.utils.get_file(
    "iris_test.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv")

train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)
test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
# Here we use keras (a module inside of TensorFlow) to grab our datasets and read them into a pandas dataframe

In [4]:
train.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Species
0,6.4,2.8,5.6,2.2,2
1,5.0,2.3,3.3,1.0,1
2,4.9,2.5,4.5,1.7,2
3,4.9,3.1,1.5,0.1,0
4,5.7,3.8,1.7,0.3,0


In [5]:
train_y = train.pop('Species')
test_y = test.pop('Species')
# pop the response variable
train.head() # compare

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth
0,6.4,2.8,5.6,2.2
1,5.0,2.3,3.3,1.0
2,4.9,2.5,4.5,1.7
3,4.9,3.1,1.5,0.1
4,5.7,3.8,1.7,0.3


In [6]:
train.shape

(120, 4)

In [7]:
def input_fn(features, labels, training=True, batch_size=256):
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

    # Shuffle and repeat if you are in training mode.
    if training:
        dataset = dataset.shuffle(1000).repeat()
    
    return dataset.batch(batch_size)

In [8]:
# feature columns describe how to use the input
my_feature_columns = []
for key in train.keys():
    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
print(my_feature_columns)
# could have looped through CSV_COLUMN_NAMES minus 'Species'
# we did not have to do two loops for numeric and categorical variables...
# ..separately, like in linear regression

[NumericColumn(key='SepalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='SepalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]


In [9]:
# Choose a pre-built model type in Tensorflow
# DNNClassifier (Deep Neural Network)
# LinearClassifier

# *(!)* DNN may be best because we may not find linear correspondence...
# ...within our data (training).
# Note - a lot of the work comes in pre-processing data, rather than picking...
# ...the right model to use. So it will be easy to change models, since the data...
# ...is already prep'd.

# Build a DNN with 2 hidden layers with 30 and 10 hidden nodes each.

classifier = tf.estimator.DNNClassifier(
    feature_columns=my_feature_columns,
    # Two hidden layers of 30 and 10 nodes respectively.
    hidden_units=[30, 10],
    # The model must choose between 3 classes.
    n_classes=3)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\BRADWA~1\\AppData\\Local\\Temp\\tmprs_ftgkc', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [10]:
classifier.train(
    input_fn=lambda: input_fn(train, train_y, training=True),
    steps=5000)
# We include a lambda to avoid creating an inner function previously

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\BRADWA~1\AppData\Local\Temp\tmprs_ftgkc\model.ckpt.
INFO:

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x1460368b848>

In [11]:
eval_result = classifier.evaluate(
    input_fn=lambda: input_fn(test, test_y, training=False))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-01-01T23:07:40Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\BRADWA~1\AppData\Local\Temp\tmprs_ftgkc\model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.54826s
INFO:tensorflow:Finished evaluation at 2021-01-01-23:07:40
INFO:tensorflow:Saving dict for global step 5000: accuracy = 0.96666664, average_loss = 0.33454758, global_step = 5000, loss = 0.33454758
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 5000: C:\Users\BRADWA~1\AppData\Loc

In [12]:
def input_fn(features, batch_size=256):
    # Convert the inputs to a Dataset without labels.
    return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)

features = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']
#features = list(train.keys())
predict = {} # populate with keys:values and pass to input_fn(X) call

print("Please type numeric values as prompted.")
for feature in features:
  valid = True
  while valid: 
    val = input(feature + ": ")
    if not val.isdigit(): valid = False

  predict[feature] = [float(val)]

predictions = classifier.predict(input_fn=lambda: input_fn(predict))
#predictions returns a object that needs to be unpacked
for pred_dict in predictions:
    print(pred_dict)
    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]

    print('Prediction is "{}" at {:.1f}%'.format(
        SPECIES[class_id], 100 * probability))

Please type numeric values as prompted.
SepalLength: 1.5
SepalWidth: 1.5
PetalLength: 1.2
PetalWidth: 2.1
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\BRADWA~1\AppData\Local\Temp\tmprs_ftgkc\model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
{'logits': array([-0.59735596, -0.32982424,  0.81987756], dtype=float32), 'probabilities': array([0.15546235, 0.2031481 , 0.6413896 ], dtype=float32), 'class_ids': array([2], dtype=int64), 'classes': array([b'2'], dtype=object), 'all_class_ids': array([0, 1, 2]), 'all_classes': array([b'0', b'1', b'2'], dtype=object)}
Prediction is "Virginica" at 64.1%


In [13]:
# Here is some example input and expected classes you can try above
expected = ['Setosa', 'Versicolor', 'Virginica']
predict_x = {
    'SepalLength': [5.1, 5.9, 6.9],
    'SepalWidth': [3.3, 3.0, 3.1],
    'PetalLength': [1.7, 4.2, 5.4],
    'PetalWidth': [0.5, 1.5, 2.1],
}

In [14]:
### Clustering ###
#Now that we've covered regression and classification it's time to talk about clustering data!

#Clustering is a Machine Learning technique that involves the grouping of data points. 
#In theory, data points that are in the same group should have similar properties and/or features, 
#while data points in different groups should have highly dissimilar properties and/or features. 
#(https://towardsdatascience.com/the-5-clustering-algorithms-data-scientists-need-to-know-a36d136ef68)

#Unfortunalty there are issues with the current version of TensorFlow and the implementation for KMeans. 
#This means we cannot use KMeans without writing the algorithm from scratch. 
#We aren't quite at that level yet, so we'll just explain the basics of clustering for now.

#Basic Algorithm for K-Means.

#    Step 1: Randomly pick K points to place K centroids
#    Step 2: Assign all the data points to the centroids by distance. The closest centroid to a point is the one it is assigned to.
#    Step 3: Average all the points belonging to each centroid to find the middle of those clusters (center of mass). Place the corresponding centroids into that position.
#    Step 4: Reassign every point once again to the closest centroid.
#    Step 5: Repeat steps 3-4 until no point changes which centroid it belongs to.

#Please refer to the video for an explanation of KMeans clustering."


In [15]:
### Hidden Markov Models ###
# "The Hidden Markov Model is a finite set of states, each of which is associated with a (generally multidimensional) probability distribution []. 
#Transitions among the states are governed by a set of probabilities called transition probabilities." 
#http://jedlik.phy.bme.hu/~gerjanos/HMM/node4.html)

# A hidden markov model works with probabilities to predict future events or states. 
#In this section we will learn how to create a hidden markov model that can predict the weather.

# This section is based on the following TensorFlow tutorial. 
#https://www.tensorflow.org/probability/api_docs/python/tfp/distributions/HiddenMarkovModel

# np.linalg.solve(A,b)? 

#We can find these probabilities from large datasets or may already have these values. We'll run through an example in a second that should clear some things up, but let's discuss the components of a markov model.

#States: In each markov model we have a finite set of states. These states could be something like "warm" and "cold" or "high" and "low" or even "red", "green" and "blue". These states are "hidden" within the model, which means we do not direcly observe them.

#Observations: Each state has a particular outcome or observation associated with it based on a probability distribution. An example of this is the following: On a hot day Tim has a 80% chance of being happy and a 20% chance of being sad.

#Transitions: Each state will have a probability defining the likelyhood of transitioning to a different state. An example is the following: a cold day has a 30% chance of being followed by a hot day and a 70% chance of being follwed by another cold day.

#To create a hidden markov model we need.

    #1. States
    #2. Observation Distribution
    #3. Transition Distribution


In [17]:
from numpy import array
from scipy.linalg import lu

a = array([[0.7,0.3],[0.2,0.8]])

pl, u = lu(a, permute_l=True)

In [18]:
print(u)

[[0.7        0.3       ]
 [0.         0.71428571]]


In [3]:
## Tensorflow Example - Markov ##

import tensorflow_probability as tfp  # We are using a different module from tensorflow this time
import tensorflow as tf

In [29]:
# 1. Cold days are encoded as 0, and hot days are encoded as 1
# 2. The first day in our sequence has an 80% chance of being cold
# ... i.e. [0.8, 0.2]
# 3. A cold day has a 30% chance of being followed by a hot day, and therefore a 70% chance of remaining cold 
# ... i.e. 0 -> [0.7, 0.3]
# 4. A hot day has a 20% chance of being followed by a cold day, and therefore a 80% chance of remaining hot 
# ... i.e. 1 -> [0.2, 0.8]
# 5. On each day the temperature is normally distributed with mean and standard deviation 0 and 5 on a cold day 
# ... and mean and standard deviation 15 and 10 on a hot day.

tfd = tfp.distributions # making a shortcut for later on (i.e. below)
initial_dist = tfd.Categorical(probs=[0.2,0.8]) # refer point 2
transition_dist = tfd.Categorical(probs=[[0.5, 0.5],
                                        [0.2, 0.8]]) # refers to point 3 and 4
observation_dist = tfd.Normal(loc=[0., 15.], scale=[5., 10.]) # refer to point 5 
# in this case, loc = mean, scale = s.dev

In [30]:
model = tfd.HiddenMarkovModel(
    initial_distribution = initial_dist,
    transition_distribution = transition_dist,
    observation_distribution=observation_dist,
        num_steps=21)

In [31]:
mean = model.mean()

In [32]:
with tf.compat.v1.Session() as sess:  
    print(mean.numpy())
print(mean)

# Notice the convergence to a constant temperature. This is because the Markov model converges to a steady-state.

[12.       11.1      10.83     10.748999 10.724698 10.71741  10.715222
 10.714567 10.71437  10.71431  10.714293 10.714288 10.714285 10.714285
 10.714285 10.714285 10.714285 10.714285 10.714285 10.714285 10.714285]
tf.Tensor(
[12.       11.1      10.83     10.748999 10.724698 10.71741  10.715222
 10.714567 10.71437  10.71431  10.714293 10.714288 10.714285 10.714285
 10.714285 10.714285 10.714285 10.714285 10.714285 10.714285 10.714285], shape=(21,), dtype=float32)
