# Section 26: Python for Deep Learning

# 03 - TensorFlow Estimators 

In [1]:
### Tensorflow has an estimator object you can use to quickly create models without needing 
### to manually define the graph as we did before. -> Making a graph manually can be compliocated
### and thus we use estimators to make the model building process faster. 

### Estimator steps: -> Similar to sci-kit learn workflow
### - Read in the Data (Normalize if necessary)
### - Train/Test split the data 
### - Create Estimator Feature Columns -> A list of specialized feature columns 
### - Create Input Estimator Function  -> A way of organizing your training data 
### - Train estimator model 
### - Predict with new test Input Function

In [3]:
### ---------------------------- 1. READ-IN THE DATA -------------------------------
### Imports and getting the data 
import pandas as pd 
df = pd.read_csv('iris.csv')
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0.0
1,4.9,3.0,1.4,0.2,0.0
2,4.7,3.2,1.3,0.2,0.0
3,4.6,3.1,1.5,0.2,0.0
4,5.0,3.6,1.4,0.2,0.0


In [4]:
### To use TF estimator objects we need to: 
                                    # -Clean the column names -> No special characters or spaces on them 
                                    # -Target for classification must be an integer -> Target is float now
df.columns 

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)', 'target'],
      dtype='object')

In [5]:
### Lets deal with the column names first, we rename as follows:
df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'target']

In [6]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
0,5.1,3.5,1.4,0.2,0.0
1,4.9,3.0,1.4,0.2,0.0
2,4.7,3.2,1.3,0.2,0.0
3,4.6,3.1,1.5,0.2,0.0
4,5.0,3.6,1.4,0.2,0.0


In [7]:
### Now, lets change the data type of the target column 
df['target'] = df['target'].apply(int)

In [8]:
df.head() ## All set, features can be floats!

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [10]:
y = df['target']
X = df.drop('target', axis = 1)

In [12]:
y[0:10] ### To train and test we must shuffle, otherwise we will be feeding certain classes more frequently 

0    0
1    0
2    0
3    0
4    0
5    0
6    0
7    0
8    0
9    0
Name: target, dtype: int64

In [14]:
X.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [18]:
### ---------------------------- 2. TRAIN-TEST SPLIT -------------------------------
### Lets split our data into train/test sets: 
##Imports 
from sklearn.model_selection import train_test_split

In [16]:
train_test_split = X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [17]:
### Using Tensorflow
import tensorflow as tf 

In [22]:
### ---------------------------- 3. CREATING FEATURE COLUMNS ------------------------------
### Creating the feature columns: 

### We are working with numeric columns, thus we call the numeric column method 
feats_cols = []
for col in X.columns:
    feats_cols.append(tf.feature_column.numeric_column(col))

In [25]:
### We obtain a list of specialized numeric column objects where heir key syncs with 
### the declared pandas DataFrames
feats_cols

[NumericColumn(key='sepal_length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='sepal_width', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='petal_length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='petal_width', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [40]:
### ---------------------------- 3. CREATING INPUT ESTIMATOR FUNCTIONS -----------------------------

##Returns an input function that will feed a pd DataFrame into the model

##tf common error -> Empty predictions
##Need to tune batch size
##num_epochs -> In each epoch you get to see the whole training set -> When we reach the num_epochs limit we will
##have trained the estimator object completely 

input_func = tf.estimator.inputs.pandas_input_fn(x =X_train, y = y_train, 
                                                 batch_size = 10, num_epochs = 5, shuffle = True)
## Classifier estimator 
## hidden units -> Pass in a list with number of neurons per layer
## n_classes = 3 -> We have three target values 
## Feature columns -> List of numeric columns created before

classifier = tf.estimator.DNNClassifier(hidden_units=[10,20,10], n_classes = 3, feature_columns = feats_cols)       

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpcesb_oyi', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd8f2f308d0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [41]:
### ---------------------------- 4. TRAINING THE CLASSIFIER -----------------------------
classifier.train(input_fn = input_func, steps = 50)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpcesb_oyi/model.ckpt.
INFO:tensorflow:loss = 9.73947, step = 1
INFO:tensorflow:Saving checkpoints for 50 into /tmp/tmpcesb_oyi/model.ckpt.
INFO:tensorflow:Loss for final step: 0.9795073.


<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier at 0x7fd8f2dbcb10>

In [42]:
### ---------------------------- 5. TESTING THE CLASSIFIER ------------------------
pred_func = tf.estimator.inputs.pandas_input_fn(x =X_test, batch_size = len(X_test), shuffle = False)

##Outputs a generator -> Cast a list if you are not going
##                       to iterate through the list. 

predictions = list(classifier.predict(input_fn = pred_func)) 

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpcesb_oyi/model.ckpt-50
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [43]:
predictions ## Lost of info here

[{'logits': array([-0.4300766,  3.425398 ,  0.4841894], dtype=float32),
  'probabilities': array([0.01970599, 0.9311287 , 0.04916533], dtype=float32),
  'class_ids': array([1]),
  'classes': array([b'1'], dtype=object),
  'all_class_ids': array([0, 1, 2], dtype=int32),
  'all_classes': array([b'0', b'1', b'2'], dtype=object)},
 {'logits': array([-1.4666828,  3.1826391,  4.197844 ], dtype=float32),
  'probabilities': array([0.00253829, 0.2652874 , 0.73217434], dtype=float32),
  'class_ids': array([2]),
  'classes': array([b'2'], dtype=object),
  'all_class_ids': array([0, 1, 2], dtype=int32),
  'all_classes': array([b'0', b'1', b'2'], dtype=object)},
 {'logits': array([ 4.736955 , -1.6508582, -3.201774 ], dtype=float32),
  'probabilities': array([9.9796557e-01, 1.6785078e-03, 3.5593382e-04], dtype=float32),
  'class_ids': array([0]),
  'classes': array([b'0'], dtype=object),
  'all_class_ids': array([0, 1, 2], dtype=int32),
  'all_classes': array([b'0', b'1', b'2'], dtype=object)},
 {'l

In [44]:
## Obtaining the final predictions 
final_preds = []

for pred in predictions:
    final_preds.append(pred['class_ids'][0])

In [45]:
from sklearn.metrics import classification_report, confusion_matrix

In [46]:
print(confusion_matrix(y_test, final_preds))
print(classification_report(y_test, final_preds))


[[14  0  0]
 [ 0 13  0]
 [ 0  3 15]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       0.81      1.00      0.90        13
           2       1.00      0.83      0.91        18

    accuracy                           0.93        45
   macro avg       0.94      0.94      0.94        45
weighted avg       0.95      0.93      0.93        45

