In [1]:
import tempfile

import pandas as pd
import numpy as np
import tensorflow as tf


In [2]:
#Load data and reduce the number of features for the baseline models

train = pd.read_csv('dataset/train.csv', dtype={'source_system_tab': str})
test = pd.read_csv('dataset/test.csv', dtype={'source_system_tab': str })
members = pd.read_csv('dataset/members.csv', dtype={'msno': str, 'city': str, 'registered_via': str})
songs = pd.read_csv('dataset/songs.csv', dtype={'genre_ids': str, 'language': str, 'song_length': int})

#Infer a missing value based on other features

songs.loc[605127, 'language'] = '31.0'

#Impute missing values

train.fillna(value='unknown', axis=1, inplace=True)
test.fillna(value='unknown', axis=1, inplace=True)
members.fillna(value='unknown', axis=1, inplace=True)
songs.fillna(value='unknown', axis=1, inplace=True)

#Convert feature columns related to member registration to datetime format

members['expiration_date'] = pd.to_datetime(members['expiration_date'], format='%Y%m%d')
members['registration_init_time'] = pd.to_datetime(members['registration_init_time'], format='%Y%m%d')

#Create a feature indicating the number of days a member was registered

members['reg_duration'] = (members['expiration_date'] - members['registration_init_time']).dt.days


In [4]:
#Merge the training and test data with song and member data

train_set = train.merge(songs, on='song_id')
train_set = train_set.merge(members, on='msno')
test_set = test.merge(songs, on='song_id', how='left')
test_set = test_set.merge(members, on='msno', how='left')

#Separate the submission ids from the test set

ids = test_set['id']
test_set.drop('id', axis=1, inplace=True)

#Impute missing values in merged training and test sets

train_set.fillna(value='unknown', axis=1, inplace=True)
test_set.fillna(value='unknown', axis=1, inplace=True)


In [5]:
#Shuffle the data and split off 20% of the training set for use as a validation set

split_ratio = 0.8

train_set = train_set.sample(frac=1, random_state=6)
val_set = train_set[int(split_ratio*train_set.shape[0]):]
train_set = train_set[:int(split_ratio*train_set.shape[0])]

#Separate the labels from the training and validation sets

y_train = train_set['target']
train_set.drop('target', axis=1, inplace=True)

y_val = val_set['target']
val_set.drop('target', axis=1, inplace=True)


In [6]:
#Designate the target feature name and the features to be used in the dataset

FEATURES = ['msno', 'gender', 'city', 'bd',
            'song_id', 'language', 'genre_ids', 'composer', 'lyricist',
            'source_system_tab', 'source_screen_name', 'source_type', 'reg_duration']

LABEL = 'target'

#Use the feature_column module to input each feature column into the model

target = tf.feature_column.categorical_column_with_identity(key='target', num_buckets=2)

duration = tf.feature_column.numeric_column(key='reg_duration',
                                            default_value=-99,
                                            dtype=tf.int32)

gender = tf.feature_column.categorical_column_with_vocabulary_list(key='gender',
                                                                   vocabulary_list=('female', 'male', 'unknown'),
                                                                   dtype=tf.string,
                                                                   default_value=-99)

city = tf.feature_column.categorical_column_with_vocabulary_list(key='city',
                                                          vocabulary_list=members['city'].unique(),
                                                          dtype=tf.string,
                                                          default_value=-99)

language = tf.feature_column.categorical_column_with_vocabulary_list(key='language',
                                                                     vocabulary_list=songs['language'].unique(),
                                                                     dtype=tf.string,
                                                                     default_value=-99)

artist = tf.feature_column.categorical_column_with_vocabulary_list(key='artist_name',
                                                                   vocabulary_list=songs['artist_name'].unique(),
                                                                   dtype=tf.string,
                                                                   default_value=-99)

composer = tf.feature_column.categorical_column_with_vocabulary_list(key='composer',
                                                                     vocabulary_list=songs['composer'].unique(),
                                                                     dtype=tf.string,
                                                                     default_value=-99)

lyricist = tf.feature_column.categorical_column_with_vocabulary_list(key='lyricist',
                                                                     vocabulary_list=songs['lyricist'].unique(),
                                                                     dtype=tf.string,
                                                                     default_value=-99)

tab = tf.feature_column.categorical_column_with_vocabulary_list(key='source_system_tab',
                                                                vocabulary_list=train['source_system_tab'].unique(),
                                                                dtype=tf.string,
                                                                default_value=-99)

screen = tf.feature_column.categorical_column_with_vocabulary_list(key='source_screen_name',
                                                                   vocabulary_list=train['source_screen_name'].unique(),
                                                                   dtype=tf.string,
                                                                   default_value=-99)

source = tf.feature_column.categorical_column_with_vocabulary_list(key='source_type',
                                                                   vocabulary_list=train['source_type'].unique(),
                                                                   dtype=tf.string,
                                                                   default_value=-99)

#Bucket categorical features with many unique categories using a hash table with a size of approximately (n/0.8)*2

msno = tf.feature_column.categorical_column_with_hash_bucket(key='msno',
                                                             hash_bucket_size=90000,
                                                             dtype=tf.string)

song_id = tf.feature_column.categorical_column_with_hash_bucket(key='song_id',
                                                                hash_bucket_size=6000000,
                                                                dtype=tf.string)

hashed_genre = tf.feature_column.categorical_column_with_hash_bucket(key='genre_ids',
                                                                     hash_bucket_size=3000,
                                                                     dtype=tf.string)

#Perform one hot encoding on categorical features with few unique values

indicator_gender = tf.feature_column.indicator_column(gender)
indicator_city = tf.feature_column.indicator_column(city)
indicator_language = tf.feature_column.indicator_column(language)
indicator_tab = tf.feature_column.indicator_column(tab)
indicator_screen = tf.feature_column.indicator_column(screen)
indicator_source = tf.feature_column.indicator_column(source)

#Embed the categorical feature with <100 unique categories into dense vectors with approximately log2(n) dimensions

embedded_genre = tf.feature_column.embedding_column(hashed_genre, dimension=10)
embedded_song = tf.feature_column.embedding_column(song_id, dimension=22)
embedded_msno = tf.feature_column.embedding_column(msno, dimension=15)
embedded_composer = tf.feature_column.embedding_column(composer, dimension=18)
embedded_lyricist = tf.feature_column.embedding_column(lyricist, dimension=17)

#Bucket member age into age ranges, with nonsensical values going into the 0-14 or the >80 buckets

age = tf.feature_column.numeric_column(key='bd',
                                       default_value=0,
                                       dtype=tf.int32)

age_bucket = tf.feature_column.bucketized_column(age, boundaries=[0, 14, 20, 30, 40, 50, 80])

#Assign features to be used in either the wide or the deep model (or both)

wide_columns = []
cross_columns = []
deep_columns = [indicator_gender, indicator_city, indicator_language,
                indicator_tab, indicator_screen, indicator_source,
                embedded_genre, embedded_msno, embedded_song,
                embedded_composer, embedded_lyricist,
                duration, age_bucket]


In [7]:
def build_estimator(model_dir, model_type):
    if model_type == 'wide':
        model = tf.estimator.LinearClassifier(model_dir=model_dir,
                                              feature_columns=wide_columns + cross_columns)

    elif model_type == 'deep':
        model = tf.estimator.DNNClassifier(model_dir=model_dir,
                                           feature_columns=deep_columns,
                                           hidden_units=[1024, 512, 256],
                                           optimizer=tf.train.AdamOptimizer(learning_rate=0.001,
                                                                            name='Adam'))

    elif model_type == 'combined':
        model = tf.estimator.DNNLinearCombinedClassifier(model_dir=model_dir,
                                                         linear_feature_columns=cross_columns,
                                                         dnn_feature_columns=deep_columns,
                                                         dnn_hidden_units=[100, 50])

    return model


In [8]:
def input_fn(X, y, mode, batch_size):
    X.fillna(value='unknown', axis=1, inplace=True)    

    if mode == 'train':
        return tf.estimator.inputs.pandas_input_fn(x=pd.DataFrame({k: X[k].values for k in FEATURES}),
                                                    y=pd.Series(y.values),
                                                    batch_size=batch_size,
                                                    num_epochs=None,
                                                    shuffle=True,
                                                    num_threads=8,
                                                    target_column='target')
    
    elif mode == 'eval':
        return tf.estimator.inputs.pandas_input_fn(x = pd.DataFrame({k: X[k].values for k in FEATURES}),
                                                    y = pd.Series(y.values),
                                                    batch_size=batch_size,
                                                    num_epochs=1,
                                                    shuffle=False,
                                                    num_threads=1,
                                                    target_column='target')
    
    elif mode == 'predict':
        return tf.estimator.inputs.pandas_input_fn(x=pd.DataFrame({k: X[k].values for k in FEATURES}),
                                                    batch_size=batch_size,
                                                    num_epochs=1,
                                                    shuffle=False,num_threads=1)
    

In [9]:
def train_model(model_dir, model_type, train_steps, X_train, y_train, X_test, y_test, batch_size):

#Create a temporary directory to store the model if no model directory argument is given

    model_dir = tempfile.mkdtemp() if not model_dir else model_dir
    
    print('build_estimator')

    model = build_estimator(model_dir, model_type)
    
    print('train start')
    
#Wrap the estimator in an experiment so that metrics are calculated on the training set during training
    
    experiment = tf.contrib.learn.Experiment(estimator=model,
                                             train_input_fn=input_fn(X_train,
                                                                     y_train,
                                                                     mode='train',
                                                                     batch_size=batch_size),
                                            eval_input_fn=input_fn(X_train,
                                                                   y_train,
                                                                   mode='eval',
                                                                   batch_size=batch_size),
                                            train_steps=train_steps,
                                            min_eval_frequency=1000)
    
    experiment.train_and_evaluate()
    
#Evaluate the trained model on a separate validation set using n/batch_size steps
    
    model.evaluate(input_fn=input_fn(X_test, y_test, mode='eval', batch_size=batch_size))

    print('end!')
    
    return model
    

In [10]:
deep_model = train_model(model_dir='model', model_type='deep', train_steps=150000,
                         X_train=train_set, y_train=y_train,
                         X_test=val_set, y_test=y_val,
                         batch_size=100)


build_estimator
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000029E00EF5DD8>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
train start
Instructions for updating:
Monitors are deprecated. Please use tf.train.SessionRunHook.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into model\model.ckpt.
INFO:tensorflow:loss = 2819.1, step = 1
INFO:tensorflow:global_step/sec: 23.1738
INFO:tensorflow:loss = 240.969, step = 101 (4.316 sec)
INFO:tensorflow:global_step/sec: 24.976
INFO:tensorflow:loss = 114.8

INFO:tensorflow:loss = 59.2424, step = 2901 (4.088 sec)
INFO:tensorflow:global_step/sec: 24.7161
INFO:tensorflow:loss = 61.6555, step = 3001 (4.047 sec)
INFO:tensorflow:global_step/sec: 24.7958
INFO:tensorflow:loss = 57.3466, step = 3101 (4.031 sec)
INFO:tensorflow:global_step/sec: 24.7897
INFO:tensorflow:loss = 103.87, step = 3201 (4.034 sec)
INFO:tensorflow:global_step/sec: 24.7866
INFO:tensorflow:loss = 69.3025, step = 3301 (4.034 sec)
INFO:tensorflow:global_step/sec: 24.6277
INFO:tensorflow:loss = 68.4387, step = 3401 (4.061 sec)
INFO:tensorflow:global_step/sec: 24.649
INFO:tensorflow:loss = 66.4185, step = 3501 (4.057 sec)
INFO:tensorflow:global_step/sec: 24.8081
INFO:tensorflow:loss = 67.8149, step = 3601 (4.031 sec)
INFO:tensorflow:global_step/sec: 24.6886
INFO:tensorflow:loss = 69.2878, step = 3701 (4.050 sec)
INFO:tensorflow:global_step/sec: 24.9288
INFO:tensorflow:loss = 69.0717, step = 3801 (4.011 sec)
INFO:tensorflow:global_step/sec: 24.7099
INFO:tensorflow:loss = 68.9856, 

INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Evaluation [23/100]
INFO:tensorflow:Evaluation [24/100]
INFO:tensorflow:Evaluation [25/100]
INFO:tensorflow:Evaluation [26/100]
INFO:tensorflow:Evaluation [27/100]
INFO:tensorflow:Evaluation [28/100]
INFO:tensorflow:Evaluation [29/100]
INFO:tensorflow:Evaluation [30/100]

INFO:tensorflow:loss = 59.7152, step = 15201 (4.004 sec)
INFO:tensorflow:global_step/sec: 24.9912
INFO:tensorflow:loss = 52.5418, step = 15301 (4.002 sec)
INFO:tensorflow:global_step/sec: 24.9569
INFO:tensorflow:loss = 53.3302, step = 15401 (4.006 sec)
INFO:tensorflow:global_step/sec: 25.0633
INFO:tensorflow:loss = 64.0523, step = 15501 (3.992 sec)
INFO:tensorflow:global_step/sec: 24.9943
INFO:tensorflow:loss = 62.986, step = 15601 (3.999 sec)
INFO:tensorflow:global_step/sec: 25.1137
INFO:tensorflow:loss = 57.7483, step = 15701 (3.981 sec)
INFO:tensorflow:global_step/sec: 25.0068
INFO:tensorflow:loss = 58.0163, step = 15801 (3.999 sec)
INFO:tensorflow:global_step/sec: 25.0006
INFO:tensorflow:loss = 67.3061, step = 15901 (4.000 sec)
INFO:tensorflow:global_step/sec: 25.0507
INFO:tensorflow:loss = 58.9741, step = 16001 (3.991 sec)
INFO:tensorflow:global_step/sec: 25.0727
INFO:tensorflow:loss = 62.1575, step = 16101 (3.989 sec)
INFO:tensorflow:global_step/sec: 25.0006
INFO:tensorflow:loss 

INFO:tensorflow:loss = 62.8834, step = 23601 (4.051 sec)
INFO:tensorflow:global_step/sec: 24.7589
INFO:tensorflow:loss = 58.161, step = 23701 (4.039 sec)
INFO:tensorflow:global_step/sec: 24.8978
INFO:tensorflow:loss = 62.2117, step = 23801 (4.016 sec)
INFO:tensorflow:global_step/sec: 24.7008
INFO:tensorflow:loss = 62.8538, step = 23901 (4.048 sec)
INFO:tensorflow:global_step/sec: 24.6642
INFO:tensorflow:loss = 58.1893, step = 24001 (4.054 sec)
INFO:tensorflow:global_step/sec: 24.6125
INFO:tensorflow:loss = 49.5599, step = 24101 (4.063 sec)
INFO:tensorflow:global_step/sec: 24.5249
INFO:tensorflow:loss = 55.0062, step = 24201 (4.077 sec)
INFO:tensorflow:global_step/sec: 24.7191
INFO:tensorflow:loss = 57.0074, step = 24301 (4.045 sec)
INFO:tensorflow:global_step/sec: 23.7721
INFO:tensorflow:loss = 59.2286, step = 24401 (4.207 sec)
INFO:tensorflow:Saving checkpoints for 24441 into model\model.ckpt.
INFO:tensorflow:global_step/sec: 2.2249
INFO:tensorflow:loss = 55.0136, step = 24501 (44.945

INFO:tensorflow:global_step/sec: 24.5189
INFO:tensorflow:loss = 59.1505, step = 27501 (4.078 sec)
INFO:tensorflow:global_step/sec: 24.7436
INFO:tensorflow:loss = 55.9342, step = 27601 (4.041 sec)
INFO:tensorflow:global_step/sec: 24.6672
INFO:tensorflow:loss = 56.2485, step = 27701 (4.054 sec)
INFO:tensorflow:global_step/sec: 24.652
INFO:tensorflow:loss = 61.0133, step = 27801 (4.057 sec)
INFO:tensorflow:global_step/sec: 24.8513
INFO:tensorflow:loss = 58.4104, step = 27901 (4.023 sec)
INFO:tensorflow:global_step/sec: 24.6035
INFO:tensorflow:loss = 57.3694, step = 28001 (4.065 sec)
INFO:tensorflow:global_step/sec: 24.7559
INFO:tensorflow:loss = 63.7575, step = 28101 (4.039 sec)
INFO:tensorflow:global_step/sec: 24.6095
INFO:tensorflow:loss = 57.7631, step = 28201 (4.063 sec)
INFO:tensorflow:global_step/sec: 24.8112
INFO:tensorflow:loss = 63.2922, step = 28301 (4.030 sec)
INFO:tensorflow:global_step/sec: 24.6398
INFO:tensorflow:loss = 60.438, step = 28401 (4.059 sec)
INFO:tensorflow:global

INFO:tensorflow:global_step/sec: 25.0162
INFO:tensorflow:loss = 55.7447, step = 35901 (3.997 sec)
INFO:tensorflow:global_step/sec: 25.0162
INFO:tensorflow:loss = 52.834, step = 36001 (3.998 sec)
INFO:tensorflow:global_step/sec: 25.0256
INFO:tensorflow:loss = 56.3309, step = 36101 (3.995 sec)
INFO:tensorflow:global_step/sec: 24.8421
INFO:tensorflow:loss = 57.7372, step = 36201 (4.025 sec)
INFO:tensorflow:global_step/sec: 25.0162
INFO:tensorflow:loss = 56.3903, step = 36301 (4.041 sec)
INFO:tensorflow:global_step/sec: 24.6886
INFO:tensorflow:loss = 56.278, step = 36401 (4.006 sec)
INFO:tensorflow:global_step/sec: 24.7559
INFO:tensorflow:loss = 57.6411, step = 36501 (4.039 sec)
INFO:tensorflow:global_step/sec: 24.7314
INFO:tensorflow:loss = 55.876, step = 36601 (4.043 sec)
INFO:tensorflow:global_step/sec: 24.839
INFO:tensorflow:loss = 57.1064, step = 36701 (4.026 sec)
INFO:tensorflow:global_step/sec: 24.7866
INFO:tensorflow:loss = 59.2289, step = 36801 (4.035 sec)
INFO:tensorflow:global_s

INFO:tensorflow:loss = 50.5614, step = 39701 (4.009 sec)
INFO:tensorflow:global_step/sec: 25.1386
INFO:tensorflow:loss = 63.4351, step = 39801 (3.978 sec)
INFO:tensorflow:global_step/sec: 25.196
INFO:tensorflow:loss = 64.0551, step = 39901 (3.969 sec)
INFO:tensorflow:global_step/sec: 25.1086
INFO:tensorflow:loss = 66.6836, step = 40001 (3.982 sec)
INFO:tensorflow:global_step/sec: 25.139
INFO:tensorflow:loss = 55.7223, step = 40101 (3.978 sec)
INFO:tensorflow:global_step/sec: 25.3238
INFO:tensorflow:loss = 58.6215, step = 40201 (3.950 sec)
INFO:tensorflow:global_step/sec: 25.0948
INFO:tensorflow:loss = 59.5256, step = 40301 (3.984 sec)
INFO:tensorflow:global_step/sec: 25.0256
INFO:tensorflow:loss = 59.4409, step = 40401 (3.996 sec)
INFO:tensorflow:global_step/sec: 25.1516
INFO:tensorflow:loss = 54.5701, step = 40501 (3.976 sec)
INFO:tensorflow:global_step/sec: 25.2108
INFO:tensorflow:loss = 60.807, step = 40601 (3.968 sec)
INFO:tensorflow:global_step/sec: 25.0319
INFO:tensorflow:loss = 

INFO:tensorflow:loss = 54.7382, step = 48101 (4.048 sec)
INFO:tensorflow:global_step/sec: 24.6218
INFO:tensorflow:loss = 54.1818, step = 48201 (4.061 sec)
INFO:tensorflow:global_step/sec: 24.6887
INFO:tensorflow:loss = 60.2667, step = 48301 (4.050 sec)
INFO:tensorflow:global_step/sec: 24.6036
INFO:tensorflow:loss = 59.6266, step = 48401 (4.064 sec)
INFO:tensorflow:global_step/sec: 24.6704
INFO:tensorflow:loss = 60.2661, step = 48501 (4.053 sec)
INFO:tensorflow:global_step/sec: 24.8299
INFO:tensorflow:loss = 58.2693, step = 48601 (4.028 sec)
INFO:tensorflow:global_step/sec: 24.8669
INFO:tensorflow:loss = 61.2629, step = 48701 (4.021 sec)
INFO:tensorflow:global_step/sec: 24.6979
INFO:tensorflow:loss = 61.7617, step = 48801 (4.049 sec)
INFO:tensorflow:global_step/sec: 24.8021
INFO:tensorflow:loss = 59.8862, step = 48901 (4.032 sec)
INFO:tensorflow:global_step/sec: 24.901
INFO:tensorflow:loss = 54.0523, step = 49001 (4.016 sec)
INFO:tensorflow:global_step/sec: 24.8144
INFO:tensorflow:loss 

INFO:tensorflow:global_step/sec: 5.66697
INFO:tensorflow:loss = 57.2232, step = 52001 (17.690 sec)
INFO:tensorflow:global_step/sec: 24.495
INFO:tensorflow:loss = 63.0899, step = 52101 (4.039 sec)
INFO:tensorflow:global_step/sec: 24.7285
INFO:tensorflow:loss = 52.9848, step = 52201 (4.043 sec)
INFO:tensorflow:global_step/sec: 24.6248
INFO:tensorflow:loss = 58.8409, step = 52301 (4.061 sec)
INFO:tensorflow:global_step/sec: 24.5401
INFO:tensorflow:loss = 56.6965, step = 52401 (4.076 sec)
INFO:tensorflow:global_step/sec: 24.6188
INFO:tensorflow:loss = 58.9792, step = 52501 (4.062 sec)
INFO:tensorflow:global_step/sec: 24.3933
INFO:tensorflow:loss = 53.2569, step = 52601 (4.098 sec)
INFO:tensorflow:global_step/sec: 24.5885
INFO:tensorflow:loss = 58.4467, step = 52701 (4.067 sec)
INFO:tensorflow:global_step/sec: 24.6036
INFO:tensorflow:loss = 59.7987, step = 52801 (4.064 sec)
INFO:tensorflow:global_step/sec: 24.7132
INFO:tensorflow:loss = 62.8489, step = 52901 (4.046 sec)
INFO:tensorflow:glob

INFO:tensorflow:global_step/sec: 24.6887
INFO:tensorflow:loss = 61.05, step = 60401 (4.050 sec)
INFO:tensorflow:global_step/sec: 24.6248
INFO:tensorflow:loss = 58.6605, step = 60501 (4.062 sec)
INFO:tensorflow:global_step/sec: 24.6613
INFO:tensorflow:loss = 55.4084, step = 60601 (4.054 sec)
INFO:tensorflow:global_step/sec: 24.5854
INFO:tensorflow:loss = 60.3302, step = 60701 (4.067 sec)
INFO:tensorflow:global_step/sec: 24.8021
INFO:tensorflow:loss = 51.8679, step = 60801 (4.034 sec)
INFO:tensorflow:global_step/sec: 24.5643
INFO:tensorflow:loss = 63.2387, step = 60901 (4.069 sec)
INFO:tensorflow:global_step/sec: 24.3132
INFO:tensorflow:loss = 59.4592, step = 61001 (4.148 sec)
INFO:tensorflow:global_step/sec: 24.1108
INFO:tensorflow:loss = 56.0696, step = 61101 (4.113 sec)
INFO:tensorflow:global_step/sec: 24.4232
INFO:tensorflow:loss = 59.2223, step = 61201 (4.094 sec)
INFO:tensorflow:global_step/sec: 24.5582
INFO:tensorflow:loss = 64.6697, step = 61301 (4.072 sec)
INFO:tensorflow:global

INFO:tensorflow:Evaluation [68/100]
INFO:tensorflow:Evaluation [69/100]
INFO:tensorflow:Evaluation [70/100]
INFO:tensorflow:Evaluation [71/100]
INFO:tensorflow:Evaluation [72/100]
INFO:tensorflow:Evaluation [73/100]
INFO:tensorflow:Evaluation [74/100]
INFO:tensorflow:Evaluation [75/100]
INFO:tensorflow:Evaluation [76/100]
INFO:tensorflow:Evaluation [77/100]
INFO:tensorflow:Evaluation [78/100]
INFO:tensorflow:Evaluation [79/100]
INFO:tensorflow:Evaluation [80/100]
INFO:tensorflow:Evaluation [81/100]
INFO:tensorflow:Evaluation [82/100]
INFO:tensorflow:Evaluation [83/100]
INFO:tensorflow:Evaluation [84/100]
INFO:tensorflow:Evaluation [85/100]
INFO:tensorflow:Evaluation [86/100]
INFO:tensorflow:Evaluation [87/100]
INFO:tensorflow:Evaluation [88/100]
INFO:tensorflow:Evaluation [89/100]
INFO:tensorflow:Evaluation [90/100]
INFO:tensorflow:Evaluation [91/100]
INFO:tensorflow:Evaluation [92/100]
INFO:tensorflow:Evaluation [93/100]
INFO:tensorflow:Evaluation [94/100]
INFO:tensorflow:Evaluation [

INFO:tensorflow:global_step/sec: 24.64
INFO:tensorflow:loss = 53.4427, step = 72601 (4.057 sec)
INFO:tensorflow:global_step/sec: 24.8021
INFO:tensorflow:loss = 57.1948, step = 72701 (4.032 sec)
INFO:tensorflow:global_step/sec: 24.7683
INFO:tensorflow:loss = 58.4257, step = 72801 (4.037 sec)
INFO:tensorflow:global_step/sec: 24.6583
INFO:tensorflow:loss = 55.359, step = 72901 (4.055 sec)
INFO:tensorflow:global_step/sec: 24.7714
INFO:tensorflow:loss = 64.4708, step = 73001 (4.037 sec)
INFO:tensorflow:global_step/sec: 24.7591
INFO:tensorflow:loss = 55.1437, step = 73101 (4.039 sec)
INFO:tensorflow:global_step/sec: 24.8114
INFO:tensorflow:loss = 61.1522, step = 73201 (4.030 sec)
INFO:tensorflow:global_step/sec: 24.7162
INFO:tensorflow:loss = 56.163, step = 73301 (4.046 sec)
INFO:tensorflow:global_step/sec: 24.9166
INFO:tensorflow:loss = 64.084, step = 73401 (4.013 sec)
INFO:tensorflow:global_step/sec: 24.8175
INFO:tensorflow:loss = 54.3814, step = 73501 (4.030 sec)
INFO:tensorflow:global_st

INFO:tensorflow:global_step/sec: 24.6218
INFO:tensorflow:loss = 50.7924, step = 84801 (4.061 sec)
INFO:tensorflow:global_step/sec: 24.6887
INFO:tensorflow:loss = 55.2129, step = 84901 (4.050 sec)
INFO:tensorflow:global_step/sec: 24.5975
INFO:tensorflow:loss = 58.1452, step = 85001 (4.118 sec)
INFO:tensorflow:global_step/sec: 24.507
INFO:tensorflow:loss = 62.801, step = 85101 (4.027 sec)
INFO:tensorflow:global_step/sec: 24.8546
INFO:tensorflow:loss = 51.3382, step = 85201 (4.023 sec)
INFO:tensorflow:global_step/sec: 24.6006
INFO:tensorflow:loss = 59.9421, step = 85301 (4.065 sec)
INFO:tensorflow:global_step/sec: 24.6583
INFO:tensorflow:loss = 55.5681, step = 85401 (4.055 sec)
INFO:tensorflow:global_step/sec: 24.7009
INFO:tensorflow:loss = 60.033, step = 85501 (4.048 sec)
INFO:tensorflow:global_step/sec: 24.7101
INFO:tensorflow:loss = 57.2714, step = 85601 (4.046 sec)
INFO:tensorflow:global_step/sec: 24.8453
INFO:tensorflow:loss = 56.7634, step = 85701 (4.025 sec)
INFO:tensorflow:global_

INFO:tensorflow:global_step/sec: 24.7898
INFO:tensorflow:loss = 54.4022, step = 97001 (4.034 sec)
INFO:tensorflow:global_step/sec: 24.799
INFO:tensorflow:loss = 54.7069, step = 97101 (4.032 sec)
INFO:tensorflow:global_step/sec: 24.8669
INFO:tensorflow:loss = 60.5656, step = 97201 (4.021 sec)
INFO:tensorflow:global_step/sec: 24.6826
INFO:tensorflow:loss = 56.9094, step = 97301 (4.051 sec)
INFO:tensorflow:global_step/sec: 24.6979
INFO:tensorflow:loss = 63.1643, step = 97401 (4.049 sec)
INFO:tensorflow:global_step/sec: 24.7683
INFO:tensorflow:loss = 59.8029, step = 97501 (4.038 sec)
INFO:tensorflow:global_step/sec: 24.6826
INFO:tensorflow:loss = 48.2576, step = 97601 (4.051 sec)
INFO:tensorflow:global_step/sec: 24.7867
INFO:tensorflow:loss = 62.3083, step = 97701 (4.035 sec)
INFO:tensorflow:global_step/sec: 24.7775
INFO:tensorflow:loss = 57.1011, step = 97801 (4.035 sec)
INFO:tensorflow:global_step/sec: 24.8144
INFO:tensorflow:loss = 58.4012, step = 97901 (4.030 sec)
INFO:tensorflow:globa

INFO:tensorflow:global_step/sec: 24.9882
INFO:tensorflow:loss = 56.0788, step = 105301 (4.002 sec)
INFO:tensorflow:global_step/sec: 24.9508
INFO:tensorflow:loss = 65.0675, step = 105401 (4.009 sec)
INFO:tensorflow:global_step/sec: 25.1328
INFO:tensorflow:loss = 56.7698, step = 105501 (3.979 sec)
INFO:tensorflow:global_step/sec: 25.0133
INFO:tensorflow:loss = 53.0346, step = 105601 (3.998 sec)
INFO:tensorflow:global_step/sec: 25.0572
INFO:tensorflow:loss = 49.5731, step = 105701 (3.991 sec)
INFO:tensorflow:global_step/sec: 25.007
INFO:tensorflow:loss = 56.3573, step = 105801 (3.998 sec)
INFO:tensorflow:global_step/sec: 25.1518
INFO:tensorflow:loss = 58.4203, step = 105901 (3.976 sec)
INFO:tensorflow:Starting evaluation at 2017-12-15-03:36:14
INFO:tensorflow:Restoring parameters from model\model.ckpt-105141
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:ten

INFO:tensorflow:loss = 54.1189, step = 109101 (3.967 sec)
INFO:tensorflow:global_step/sec: 25.2216
INFO:tensorflow:loss = 60.4954, step = 109201 (3.965 sec)
INFO:tensorflow:global_step/sec: 25.2471
INFO:tensorflow:loss = 56.7711, step = 109301 (3.961 sec)
INFO:tensorflow:global_step/sec: 25.0886
INFO:tensorflow:loss = 56.1473, step = 109401 (3.985 sec)
INFO:tensorflow:global_step/sec: 25.1518
INFO:tensorflow:loss = 67.0633, step = 109501 (4.021 sec)
INFO:tensorflow:global_step/sec: 24.8391
INFO:tensorflow:loss = 62.923, step = 109601 (3.981 sec)
INFO:tensorflow:global_step/sec: 24.8953
INFO:tensorflow:loss = 55.2515, step = 109701 (4.017 sec)
INFO:tensorflow:global_step/sec: 24.7898
INFO:tensorflow:loss = 54.5145, step = 109801 (4.034 sec)
INFO:tensorflow:global_step/sec: 24.8886
INFO:tensorflow:loss = 56.9071, step = 109901 (4.018 sec)
INFO:tensorflow:global_step/sec: 24.8948
INFO:tensorflow:loss = 45.1412, step = 110001 (4.017 sec)
INFO:tensorflow:global_step/sec: 24.8607
INFO:tensor

INFO:tensorflow:loss = 61.8028, step = 117401 (4.023 sec)
INFO:tensorflow:global_step/sec: 24.8515
INFO:tensorflow:loss = 54.3808, step = 117501 (4.024 sec)
INFO:tensorflow:global_step/sec: 24.9695
INFO:tensorflow:loss = 67.8313, step = 117601 (4.005 sec)
INFO:tensorflow:global_step/sec: 24.8669
INFO:tensorflow:loss = 61.3431, step = 117701 (4.021 sec)
INFO:tensorflow:global_step/sec: 24.8948
INFO:tensorflow:loss = 47.6335, step = 117801 (4.017 sec)
INFO:tensorflow:global_step/sec: 24.8917
INFO:tensorflow:loss = 58.0315, step = 117901 (4.017 sec)
INFO:tensorflow:global_step/sec: 24.7837
INFO:tensorflow:loss = 62.8806, step = 118001 (4.035 sec)
INFO:tensorflow:global_step/sec: 24.8824
INFO:tensorflow:loss = 60.193, step = 118101 (4.019 sec)
INFO:tensorflow:global_step/sec: 24.6036
INFO:tensorflow:loss = 55.4416, step = 118201 (4.064 sec)
INFO:tensorflow:global_step/sec: 24.7346
INFO:tensorflow:loss = 56.3466, step = 118301 (4.043 sec)
INFO:tensorflow:global_step/sec: 24.6552
INFO:tensor

INFO:tensorflow:loss = 52.8101, step = 121201 (4.044 sec)
INFO:tensorflow:global_step/sec: 24.6036
INFO:tensorflow:loss = 53.7759, step = 121301 (4.065 sec)
INFO:tensorflow:global_step/sec: 24.5673
INFO:tensorflow:loss = 57.5751, step = 121401 (4.069 sec)
INFO:tensorflow:global_step/sec: 24.8515
INFO:tensorflow:loss = 59.4562, step = 121501 (4.025 sec)
INFO:tensorflow:global_step/sec: 24.7193
INFO:tensorflow:loss = 54.2487, step = 121601 (4.046 sec)
INFO:tensorflow:global_step/sec: 24.6097
INFO:tensorflow:loss = 60.6345, step = 121701 (4.061 sec)
INFO:tensorflow:global_step/sec: 24.8114
INFO:tensorflow:loss = 51.4506, step = 121801 (4.030 sec)
INFO:tensorflow:global_step/sec: 24.7009
INFO:tensorflow:loss = 59.3894, step = 121901 (4.048 sec)
INFO:tensorflow:global_step/sec: 24.8144
INFO:tensorflow:loss = 56.4715, step = 122001 (4.030 sec)
INFO:tensorflow:global_step/sec: 24.7929
INFO:tensorflow:loss = 54.0364, step = 122101 (4.033 sec)
INFO:tensorflow:global_step/sec: 24.6431
INFO:tenso

INFO:tensorflow:global_step/sec: 24.6218
INFO:tensorflow:loss = 57.6908, step = 129601 (4.061 sec)
INFO:tensorflow:global_step/sec: 24.8824
INFO:tensorflow:loss = 56.214, step = 129701 (4.019 sec)
INFO:tensorflow:global_step/sec: 24.6643
INFO:tensorflow:loss = 57.8846, step = 129801 (4.054 sec)
INFO:tensorflow:global_step/sec: 24.8083
INFO:tensorflow:loss = 52.232, step = 129901 (4.031 sec)
INFO:tensorflow:global_step/sec: 24.8206
INFO:tensorflow:loss = 55.4634, step = 130001 (4.029 sec)
INFO:tensorflow:global_step/sec: 25.0446
INFO:tensorflow:loss = 57.0263, step = 130101 (3.992 sec)
INFO:tensorflow:global_step/sec: 24.9945
INFO:tensorflow:loss = 56.1254, step = 130201 (4.001 sec)
INFO:tensorflow:global_step/sec: 25.0823
INFO:tensorflow:loss = 56.4826, step = 130301 (3.986 sec)
INFO:tensorflow:global_step/sec: 25.0792
INFO:tensorflow:loss = 53.5995, step = 130401 (3.988 sec)
INFO:tensorflow:global_step/sec: 25.174
INFO:tensorflow:loss = 54.0538, step = 130501 (3.972 sec)
INFO:tensorfl

INFO:tensorflow:global_step/sec: 24.8052
INFO:tensorflow:loss = 56.0348, step = 133401 (4.031 sec)
INFO:tensorflow:global_step/sec: 24.5703
INFO:tensorflow:loss = 51.444, step = 133501 (4.069 sec)
INFO:tensorflow:global_step/sec: 24.5945
INFO:tensorflow:loss = 51.1758, step = 133601 (4.066 sec)
INFO:tensorflow:global_step/sec: 24.9789
INFO:tensorflow:loss = 59.7237, step = 133701 (4.003 sec)
INFO:tensorflow:global_step/sec: 24.8731
INFO:tensorflow:loss = 60.3291, step = 133801 (4.022 sec)
INFO:tensorflow:global_step/sec: 24.8422
INFO:tensorflow:loss = 55.9083, step = 133901 (4.025 sec)
INFO:tensorflow:global_step/sec: 25.117
INFO:tensorflow:loss = 59.8121, step = 134001 (3.980 sec)
INFO:tensorflow:global_step/sec: 24.7315
INFO:tensorflow:loss = 54.0558, step = 134101 (4.089 sec)
INFO:tensorflow:global_step/sec: 24.707
INFO:tensorflow:loss = 55.8617, step = 134201 (4.002 sec)
INFO:tensorflow:global_step/sec: 25.1391
INFO:tensorflow:loss = 60.8318, step = 134301 (3.978 sec)
INFO:tensorfl

INFO:tensorflow:global_step/sec: 24.9446
INFO:tensorflow:loss = 60.8251, step = 141701 (4.009 sec)
INFO:tensorflow:global_step/sec: 25.1423
INFO:tensorflow:loss = 49.2067, step = 141801 (3.977 sec)
INFO:tensorflow:global_step/sec: 25.0164
INFO:tensorflow:loss = 49.5454, step = 141901 (3.998 sec)
INFO:tensorflow:global_step/sec: 25.0383
INFO:tensorflow:loss = 55.6454, step = 142001 (3.994 sec)
INFO:tensorflow:global_step/sec: 25.0352
INFO:tensorflow:loss = 53.8166, step = 142101 (3.996 sec)
INFO:tensorflow:global_step/sec: 25.0007
INFO:tensorflow:loss = 58.6509, step = 142201 (3.998 sec)
INFO:tensorflow:global_step/sec: 24.9976
INFO:tensorflow:loss = 59.199, step = 142301 (4.000 sec)
INFO:tensorflow:global_step/sec: 24.9757
INFO:tensorflow:loss = 47.0557, step = 142401 (4.003 sec)
INFO:tensorflow:global_step/sec: 24.8391
INFO:tensorflow:loss = 62.8963, step = 142501 (4.026 sec)
INFO:tensorflow:global_step/sec: 25.0446
INFO:tensorflow:loss = 51.9721, step = 142601 (3.993 sec)
INFO:tensor

INFO:tensorflow:Evaluation [93/100]
INFO:tensorflow:Evaluation [94/100]
INFO:tensorflow:Evaluation [95/100]
INFO:tensorflow:Evaluation [96/100]
INFO:tensorflow:Evaluation [97/100]
INFO:tensorflow:Evaluation [98/100]
INFO:tensorflow:Evaluation [99/100]
INFO:tensorflow:Evaluation [100/100]
INFO:tensorflow:Finished evaluation at 2017-12-15-04:09:16
INFO:tensorflow:Saving dict for global step 150000: accuracy = 0.7106, accuracy_baseline = 0.5076, auc = 0.789463, auc_precision_recall = 0.793312, average_loss = 0.555424, global_step = 150000, label/mean = 0.5076, loss = 55.5424, prediction/mean = 0.48604
INFO:tensorflow:Starting evaluation at 2017-12-15-04:09:23
INFO:tensorflow:Restoring parameters from model\model.ckpt-150000
INFO:tensorflow:Finished evaluation at 2017-12-15-04:12:23
INFO:tensorflow:Saving dict for global step 150000: accuracy = 0.711967, accuracy_baseline = 0.503512, auc = 0.789492, auc_precision_recall = 0.792645, average_loss = 0.553816, global_step = 150000, label/mean 

In [13]:
#Evaluate the model at the checkpoint with the best performance

deep_model.evaluate(input_fn=input_fn(val_set, y_val, mode='eval', batch_size=100),
                    checkpoint_path='model/model.ckpt-105141')


INFO:tensorflow:Starting evaluation at 2017-12-15-04:21:26
INFO:tensorflow:Restoring parameters from model/model.ckpt-105141
INFO:tensorflow:Finished evaluation at 2017-12-15-04:24:29
INFO:tensorflow:Saving dict for global step 105141: accuracy = 0.702944, accuracy_baseline = 0.503512, auc = 0.778551, auc_precision_recall = 0.780496, average_loss = 0.56393, global_step = 105141, label/mean = 0.503512, loss = 56.3915, prediction/mean = 0.487355


{'accuracy': 0.70294434,
 'accuracy_baseline': 0.50351244,
 'auc': 0.77855104,
 'auc_precision_recall': 0.78049606,
 'average_loss': 0.56393021,
 'global_step': 105141,
 'label/mean': 0.50351244,
 'loss': 56.391529,
 'prediction/mean': 0.48735479}

In [14]:
#Make predictions on the test set using the model at the checkpoint with the best performance

predictions = deep_model.predict(input_fn=input_fn(test_set, None, mode='predict',
                                                   batch_size=10000),
                                 #checkpoint_path='model/model.ckpt-126926'
                                )

submission = list()

for row in predictions:
    submission.append(row['probabilities'][1])

pd.DataFrame(data={'id': ids,
                   'target': np.array(submission)}).to_csv('submissions/embedded_deep_final.csv',
                                                           header=['id', 'target'],
                                                           index=False)


INFO:tensorflow:Restoring parameters from model\model.ckpt-150000
