In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

import numpy as np
import json

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from google.cloud import bigquery
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder

In [11]:
query="""
SELECT
  weight_pounds,
  is_male,
  mother_age,
  plurality,
  gestation_weeks,
  year
FROM
  publicdata.samples.natality
WHERE year > 2000
LIMIT 10000
"""
df = bigquery.Client().query(query).to_dataframe()
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks,year
0,6.68662,True,18,1,43.0,2001
1,9.360828,True,32,1,41.0,2001
2,8.437091,False,30,1,39.0,2001
3,6.124442,False,24,1,40.0,2002
4,7.12534,False,26,1,41.0,2002


In [12]:
df.describe()

Unnamed: 0,weight_pounds,mother_age,plurality,gestation_weeks,year
count,9989.0,10000.0,10000.0,9890.0,10000.0
mean,7.297602,27.2989,1.0344,38.699798,2001.5141
std,1.291685,6.165838,0.192926,2.539957,0.699178
min,0.612885,12.0,1.0,17.0,2001.0
25%,6.624891,22.0,1.0,38.0,2001.0
50%,7.374463,27.0,1.0,39.0,2001.0
75%,8.124034,32.0,1.0,40.0,2002.0
max,12.257702,50.0,3.0,47.0,2005.0


In [13]:
df['is_male'].value_counts() / len(df)

True     0.515
False    0.485
Name: is_male, dtype: float64

In [14]:
df.isnull().sum()

weight_pounds       11
is_male              0
mother_age           0
plurality            0
gestation_weeks    110
year                 0
dtype: int64

In [15]:
df = df.dropna()
df = shuffle(df, random_state=2)
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks,year
39,8.375361,True,32,1,41.0,2001
6132,4.437905,False,28,1,30.0,2002
5986,7.936641,False,44,1,38.0,2001
7682,6.926924,False,34,1,38.0,2001
4910,7.874912,True,31,1,40.0,2001


In [16]:
labels = df['weight_pounds']
data = df.drop(columns=['weight_pounds', 'year'])
data['is_male'] = data['is_male'].astype(int)

In [17]:
data.head()

Unnamed: 0,is_male,mother_age,plurality,gestation_weeks
39,1,32,1,41.0
6132,0,28,1,30.0
5986,0,44,1,38.0
7682,0,34,1,38.0
4910,1,31,1,40.0


In [18]:
df.describe()

Unnamed: 0,weight_pounds,mother_age,plurality,gestation_weeks,year
count,9883.0,9883.0,9883.0,9883.0,9883.0
mean,7.299206,27.307295,1.033897,38.707579,2001.51199
std,1.28694,6.165118,0.19183,2.518117,0.697331
min,0.612885,12.0,1.0,17.0,2001.0
25%,6.624891,22.0,1.0,38.0,2001.0
50%,7.374463,27.0,1.0,39.0,2001.0
75%,8.124034,32.0,1.0,40.0,2002.0
max,12.257702,50.0,3.0,47.0,2005.0


In [19]:
x,y = data,labels
x_train,x_test,y_train,y_test = train_test_split(x,y)

In [20]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(len(x_train.iloc[0]),)),
    Dense(32, activation='relu'),
    Dense(1)]
)

In [21]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(),
              loss=tf.keras.losses.MeanSquaredError(),
              metrics=['mae', 'mse'])

In [22]:
model.fit(x_train, y_train, epochs=10, validation_split=0.1)

Train on 6670 samples, validate on 742 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f0e9bc83910>

In [23]:
num_examples = 10
predictions = model.predict(x_test[:num_examples])

In [24]:
for i in range(num_examples):
    print('Predicted val: ', predictions[i][0])
    print('Actual val: ',y_test.iloc[i])
    print()

Predicted val:  7.835817
Actual val:  7.31273323054

Predicted val:  7.202656
Actual val:  8.375361333379999

Predicted val:  8.027276
Actual val:  10.18756112702

Predicted val:  6.969439
Actual val:  6.5367060683

Predicted val:  8.096409
Actual val:  7.87491199864

Predicted val:  7.7644897
Actual val:  7.1870697412

Predicted val:  7.948204
Actual val:  7.81318256528

Predicted val:  7.4933505
Actual val:  7.6390173783

Predicted val:  7.2523656
Actual val:  6.3272669193999995

Predicted val:  7.5746527
Actual val:  5.6879263596



In [25]:
wit_data = pd.concat([x_test, y_test], axis=1)

In [26]:
def custom_predict(examples_to_infer):
    preds = model.predict(examples_to_infer)
    return preds

In [27]:
config_builder = (WitConfigBuilder(wit_data[:500].values.tolist(), data.columns.tolist() + ['weight_pounds'])
  .set_custom_predict_fn(custom_predict)
  .set_target_feature('weight_pounds')
  .set_model_type('regression'))
WitWidget(config_builder, height=800)

WitWidget(config={'model_type': 'regression', 'label_vocab': [], 'feature_names': ['is_male', 'mother_age', 'p…