In [68]:
import pandas as pd

In [69]:
from sklearn.datasets import load_iris

In [70]:
iris = load_iris()

In [71]:
type(iris)

sklearn.utils.Bunch

In [72]:
iris.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [73]:
print(iris['DESCR'])

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [74]:
type(iris['feature_names'])

list

In [75]:
len(iris['feature_names'])

4

In [76]:
type(iris['data'])

numpy.ndarray

In [77]:
len(iris['data'])

150

In [78]:
df = pd.DataFrame(iris['data'], columns = iris['feature_names'])

In [79]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [80]:
len(iris['target'])

150

In [81]:
iris['data'].shape

(150, 4)

In [82]:
len(iris['feature_names'])

4

In [83]:
# for us to create a dataframe out of raw data or some unorganized data you first need to match the dimension for our dataframe
# just like we did above, you can see the dimension for iris['data'] and iris['feature_names'] are just perfect for creating
# a pandas data frame

In [84]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [85]:
# since tensorflow does not accept any spaces between columns names so we need to set the column names for our dataframes

df.columns

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)'],
      dtype='object')

In [86]:
df.columns = ['sepal_length', 'sepel_width', 'petal_length', 'petal_width',]

In [87]:
df.head()

Unnamed: 0,sepal_length,sepel_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [88]:
target = pd.Series(iris['target'])

In [89]:
df.head()

Unnamed: 0,sepal_length,sepel_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [90]:
target.shape

(150,)

In [91]:
type(target)

pandas.core.series.Series

In [92]:
target = target.apply(int)

In [93]:
type(target)

pandas.core.series.Series

In [95]:
from sklearn.model_selection import train_test_split

In [96]:
X_train, X_test, y_train, y_test = train_test_split(df,target, test_size = 0.3, random_state = 101)

In [97]:
import tensorflow as tf

In [132]:
# now we need to featurize our columns with tensorflow 'feature_columns' method in order to use them in our algrithm
# for that

# steps
# - set the feature_columns by applying tf.feature_columns.numeric_columns method
# - create 2 input functions for training and testing our model using tf.estimator.inputs.pandas_fn method
# - create the classifier by providing the hidden_units, n_classes, and feature_columns
# - train the classifier by calling 'train' method and passing in input_fn and steps off of classifier instance
# - now we are gonna call predictions off of our tf.estimator method to predict he algorithm outcome and compare with actual data

feat_cols = []

for column in df:
    feat_cols.append(tf.feature_column.numeric_column(column))

In [100]:
feat_cols

[NumericColumn(key='sepal_length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='sepel_width', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='petal_length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='petal_width', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [106]:
input_fn = tf.estimator.inputs.pandas_input_fn(x = X_train, y = y_train, batch_size = 10, num_epochs = 5, shuffle = True)

In [107]:
classifier = tf.estimator.DNNClassifier(hidden_units = [10,20,10], n_classes = 3, feature_columns = feat_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\umair\\AppData\\Local\\Temp\\tmpmnte0icr', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000208A94AEF28>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [108]:
classifier.train(input_fn = input_fn, steps = 50)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\umair\AppData\Local\Temp\tmpmnte0icr\model.ckpt.
INFO:tensorflow:loss = 19.66383, step = 1
INFO:tensorflow:Saving checkpoints for 50 into C:\Users\umair\AppData\Local\Temp\tmpmnte0icr\model.ckpt.
INFO:tensorflow:Loss for final step: 4.349932.


<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier at 0x208a94aecf8>

In [111]:
predict_fn = tf.estimator.inputs.pandas_input_fn(x=X_test, batch_size = len(X_test), shuffle = False)

In [112]:
predictions = list(classifier.predict(input_fn = predict_fn))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from C:\Users\umair\AppData\Local\Temp\tmpmnte0icr\model.ckpt-50
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [113]:
predictions

[{'logits': array([ 6.269969 ,  1.8229762, -6.0464187], dtype=float32),
  'probabilities': array([9.8841745e-01, 1.1578064e-02, 4.4258982e-06], dtype=float32),
  'class_ids': array([0], dtype=int64),
  'classes': array([b'0'], dtype=object)},
 {'logits': array([ 5.7695417,  1.710745 , -5.788058 ], dtype=float32),
  'probabilities': array([9.8301423e-01, 1.6976444e-02, 9.4006500e-06], dtype=float32),
  'class_ids': array([0], dtype=int64),
  'classes': array([b'0'], dtype=object)},
 {'logits': array([ 3.7384126,  1.2269439, -4.4686112], dtype=float32),
  'probabilities': array([9.2470860e-01, 7.5039133e-02, 2.5219694e-04], dtype=float32),
  'class_ids': array([0], dtype=int64),
  'classes': array([b'0'], dtype=object)},
 {'logits': array([-5.9164386 , -1.0120702 ,  0.67374766], dtype=float32),
  'probabilities': array([0.00115768, 0.15614566, 0.84269667], dtype=float32),
  'class_ids': array([2], dtype=int64),
  'classes': array([b'2'], dtype=object)},
 {'logits': array([-4.074848  , -0

In [114]:
type(predictions)

list

In [116]:
a = predictions[0]

In [117]:
type(a)

dict

In [118]:
a['class_ids']

array([0], dtype=int64)

In [126]:
final_predictions = []

for prediction in predictions:
    final_predictions.append(prediction['class_ids'][0])

In [128]:
#final_predictions

In [129]:
from sklearn.metrics import classification_report, confusion_matrix

In [130]:
print(confusion_matrix(y_test,final_predictions))
print('\n')
print(classification_report(y_test,final_predictions))

[[13  0  0]
 [ 0  8 12]
 [ 0  0 12]]


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      0.40      0.57        20
           2       0.50      1.00      0.67        12

   micro avg       0.73      0.73      0.73        45
   macro avg       0.83      0.80      0.75        45
weighted avg       0.87      0.73      0.72        45

