# Estimators in Tensorflow 2.0
Inspired by https://www.tensorflow.org/alpha/tutorials/estimators/linear

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import shutil
import os
%matplotlib inline

In [None]:
df = pd.read_csv('../data/titanic-train.csv')
df.head()

In [None]:
df.info()

In [None]:
df.pop('Cabin')
df.loc[:, 'Age'] = df['Age'].fillna(df['Age'].mean())
df = df.dropna()
df.info()

In [None]:
y = df.pop('Survived')

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=0)

In [None]:
cat_cols = ['Sex', 'SibSp', 'Parch', 'Pclass', 'Embarked']
num_cols = ['Age', 'Fare']

feature_columns = []

for c in cat_cols:
    vocabulary = X_train[c].unique()
    cc = tf.feature_column.categorical_column_with_vocabulary_list(c, vocabulary)
    feature_columns.append(tf.feature_column.indicator_column(cc))

for c in num_cols:
    feature_columns.append(tf.feature_column.numeric_column(c, dtype=tf.float32))

In [None]:
feature_columns

In [None]:
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
        if shuffle:
            ds = ds.shuffle(1000)
        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    return input_function

train_input_fn = make_input_fn(X_train, y_train)
eval_input_fn = make_input_fn(X_test, y_test, num_epochs=1, shuffle=False)

In [None]:
ds = make_input_fn(X_train, y_train, batch_size=10)()
for feature_batch, label_batch in ds.take(1):
    print('Some feature keys:', list(feature_batch.keys()))
    print()
    print('A batch of Pclass:', feature_batch['Pclass'].numpy())
    print()
    print('A batch of Labels:', label_batch.numpy())

In [None]:
model_path = '/tmp/tensorboard/linear_estimator/'

In [None]:
if os.path.exists(model_path):
    shutil.rmtree(model_path)

In [None]:
model = tf.estimator.LinearClassifier(
    feature_columns=feature_columns,
    model_dir=model_path)

In [None]:
model.train(train_input_fn)

In [None]:
result = model.evaluate(eval_input_fn)

In [None]:
result

## Exercise

Tensorflow 2.0 implements many other estimators besides the LinearClassifier:

- BaselineClassifier
- BaselineEstimator
- BaselineRegressor
- BoostedTreesClassifier
- BoostedTreesRegressor
- DNNClassifier
- DNNEstimator
- DNNRegressor
- DNNLinearCombinedClassifier
- DNNLinearCombinedEstimator
- DNNLinearCombinedRegressor
- LinearClassifier
- LinearEstimator
- LinearRegressor

Pick one or more of the above estimators and re-train them. Make sure to change the save directory.