# Repsly trial data

In [1]:
from repsly_data import RepslyData

repsly_data = RepslyData()
print('Reading data (this might take a minute or so)...', end='')
repsly_data.read_data('data/trial_users_analysis.csv', mode='FC')
print('done.')

Reading data (this might take a minute or so)...done.



Let's see what the data looks like:

In [2]:
read_batch = repsly_data.read_batch(batch_size=20)

X, y = next(read_batch)
print('X{}: {}'.format(list(X.shape), X))
print('y:', y)

X[20, 241]: [[ 303.    1.    4. ...,    0.    0.    0.]
 [ 192.    4.    3. ...,    0.    0.    0.]
 [ 363.    0.    0. ...,    0.    0.    0.]
 ..., 
 [ 180.    0.    0. ...,    0.    0.    0.]
 [ 336.    0.    0. ...,    0.    0.    0.]
 [ 459.    2.    3. ...,    0.    0.    0.]]
y: [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0]


As you can see above, each input vector `X` has `1+15*16=241` values, most of which are zeros. The first one is the trial start date as offset from `2016-01-01` and the rest is different usage parameters for the following `16` days. Data provided by batch read is randomly shuffled. Output values are stored in `y` and they represent if the user purchased the Repsly service after the trial or not.

# Training

First, we create a network with two fully connected hidden layers of size 250 and 50% dropout:

In [3]:
from repsly_nn import RepslyFC

repsly_nn = RepslyFC()

Then we train it for some number of epochs.

In [4]:
import numpy as np

arch = [250, 250]

keep_probs = np.linspace(0.4, 0.6, 6)
input_keep_probs = np.linspace(0.7, 1.0, 6)

learning_rate = 0.001
decay_steps=10
decay_rate=0.99

batch_size = 1024
epochs_loops = 1
epochs_at_once = 40
skip_steps=20

stats = {}

for i in range(epochs_loops):
    print('{}\n*\n* {}[{}/{}] Training for {} epochs\n*\n{}\n'.\
          format('*'*116, ' '*40, i+1, epochs_loops, epochs_at_once, '*'*116))

    for keep_prob in keep_probs:
        for input_keep_prob in input_keep_probs:
            arch_dict = {'keep_prob': keep_prob, 'input_keep_prob': input_keep_prob}

            repsly_nn.create_net(arch, arch_dict, learning_rate, decay_steps, decay_rate)
            stats[(keep_prob, input_keep_prob)] = repsly_nn.train(data=repsly_data, batch_size=batch_size, epochs=epochs_at_once, skip_steps=skip_steps)
            print(stats[(keep_prob, input_keep_prob)])

********************************************************************************************************************
*
*                                         [1/1] Training for 40 epochs
*
********************************************************************************************************************

Checkpoint directory is: /home/davor_runje/deep_learning/repsly_challenge/checkpoints/RepslyFC-(250,250)/keep_prob-0.4/input_keep_prob-0.7/lr-0.001/dr-0.99/ds-10
Creating tf.train.Saver()...done
self.checkpoint_path: checkpoints/RepslyFC-(250,250)/keep_prob-0.4/input_keep_prob-0.7/lr-0.001/dr-0.99/ds-10
ckpt: model_checkpoint_path: "checkpoints/RepslyFC-(250,250)/keep_prob-0.4/input_keep_prob-0.7/lr-0.001/dr-0.99/ds-10/checkpoint-2200"
all_model_checkpoint_paths: "checkpoints/RepslyFC-(250,250)/keep_prob-0.4/input_keep_prob-0.7/lr-0.001/dr-0.99/ds-10/checkpoint-2120"
all_model_checkpoint_paths: "checkpoints/RepslyFC-(250,250)/keep_prob-0.4/input_keep_prob-0.7/lr-0.001/dr-0.99/ds-10

[02200/0.1 sec]   train/validation loss = 0.06576/0.78842
[02220/0.4 sec]   train/validation loss = 0.07796/0.87877
[02240/0.6 sec]   train/validation loss = 0.08273/0.62435
[02260/0.9 sec]   train/validation loss = 0.09385/0.75872
[02280/1.1 sec]   train/validation loss = 0.06666/0.59067
[02300/1.4 sec]   train/validation loss = 0.07744/0.90722
[02320/1.7 sec]   train/validation loss = 0.05858/0.77175
[02340/1.9 sec]   train/validation loss = 0.06707/0.41186
[02360/2.2 sec]   train/validation loss = 0.06224/0.70364
[02380/2.4 sec]   train/validation loss = 0.06186/0.76621
[02400/2.7 sec]   train/validation loss = 0.08641/1.00193
[02420/3.0 sec]   train/validation loss = 0.08148/0.94677
{'loss': 1.0214353799819946, 'accuracy': 0.93333333333333335, 'precision': 0.7142857142857143, 'recall': 0.66666666666666663, 'f1_score': 0.68965517241379304}
Checkpoint directory is: /home/davor_runje/deep_learning/repsly_challenge/checkpoints/RepslyFC-(250,250)/keep_prob-0.4/input_keep_prob-0.94/lr-0.

[02200/0.1 sec]   train/validation loss = 0.09145/1.67206
[02220/0.4 sec]   train/validation loss = 0.06829/1.47986
[02240/0.6 sec]   train/validation loss = 0.06431/2.30453
[02260/0.9 sec]   train/validation loss = 0.07610/1.82160
[02280/1.2 sec]   train/validation loss = 0.07130/1.75135
[02300/1.5 sec]   train/validation loss = 0.08216/0.75220
[02320/1.7 sec]   train/validation loss = 0.06863/2.20107
[02340/2.0 sec]   train/validation loss = 0.06907/1.20016
[02360/2.3 sec]   train/validation loss = 0.08519/0.65620
[02380/2.6 sec]   train/validation loss = 0.07608/1.01263
[02400/2.9 sec]   train/validation loss = 0.06966/0.78783
[02420/3.1 sec]   train/validation loss = 0.06811/2.26467
{'loss': 0.85033619403839111, 'accuracy': 0.92592592592592593, 'precision': 0.80000000000000004, 'recall': 0.5714285714285714, 'f1_score': 0.66666666666666663}
Checkpoint directory is: /home/davor_runje/deep_learning/repsly_challenge/checkpoints/RepslyFC-(250,250)/keep_prob-0.44/input_keep_prob-0.82/lr-

[02200/0.1 sec]   train/validation loss = 0.06036/0.42588
[02220/0.4 sec]   train/validation loss = 0.12212/0.44246
[02240/0.6 sec]   train/validation loss = 0.09278/0.85336
[02260/0.9 sec]   train/validation loss = 0.08774/0.72505
[02280/1.2 sec]   train/validation loss = 0.08430/0.55502
[02300/1.5 sec]   train/validation loss = 0.08145/1.03739
[02320/1.7 sec]   train/validation loss = 0.07959/0.88901
[02340/2.0 sec]   train/validation loss = 0.10396/1.05200
[02360/2.3 sec]   train/validation loss = 0.09448/1.15631
[02380/2.6 sec]   train/validation loss = 0.09839/0.85626
[02400/2.8 sec]   train/validation loss = 0.08577/1.05653
[02420/3.1 sec]   train/validation loss = 0.07559/0.96906
{'loss': 0.65802198648452759, 'accuracy': 0.937037037037037, 'precision': 0.82758620689655171, 'recall': 0.66666666666666663, 'f1_score': 0.7384615384615385}
Checkpoint directory is: /home/davor_runje/deep_learning/repsly_challenge/checkpoints/RepslyFC-(250,250)/keep_prob-0.48/input_keep_prob-0.7/lr-0.0

[02200/0.1 sec]   train/validation loss = 0.07165/1.16823
[02220/0.4 sec]   train/validation loss = 0.05463/1.07877
[02240/0.6 sec]   train/validation loss = 0.07815/1.97999
[02260/0.9 sec]   train/validation loss = 0.07310/1.16365
[02280/1.2 sec]   train/validation loss = 0.09109/0.51884
[02300/1.5 sec]   train/validation loss = 0.09255/1.02600
[02320/1.7 sec]   train/validation loss = 0.07407/1.16712
[02340/2.0 sec]   train/validation loss = 0.07125/0.57678
[02360/2.3 sec]   train/validation loss = 0.08150/0.55907
[02380/2.6 sec]   train/validation loss = 0.05698/0.76022
[02400/2.8 sec]   train/validation loss = 0.07609/0.56847
[02420/3.1 sec]   train/validation loss = 0.09421/1.17964
{'loss': 1.9235163927078247, 'accuracy': 0.92222222222222228, 'precision': 0.68000000000000005, 'recall': 0.56666666666666665, 'f1_score': 0.61818181818181828}
Checkpoint directory is: /home/davor_runje/deep_learning/repsly_challenge/checkpoints/RepslyFC-(250,250)/keep_prob-0.48/input_keep_prob-0.94/lr-

[02200/0.1 sec]   train/validation loss = 0.08449/2.07077
[02220/0.4 sec]   train/validation loss = 0.07753/1.80989
[02240/0.6 sec]   train/validation loss = 0.06455/1.67577
[02260/0.9 sec]   train/validation loss = 0.05815/2.46454
[02280/1.2 sec]   train/validation loss = 0.07292/1.76866
[02300/1.4 sec]   train/validation loss = 0.06996/2.60755
[02320/1.7 sec]   train/validation loss = 0.09836/1.15811
[02340/2.0 sec]   train/validation loss = 0.07540/1.28311
[02360/2.2 sec]   train/validation loss = 0.07627/1.47875
[02380/2.5 sec]   train/validation loss = 0.06603/1.96507
[02400/2.8 sec]   train/validation loss = 0.05657/0.58727
[02420/3.1 sec]   train/validation loss = 0.07677/0.32088
{'loss': 0.78894269466400146, 'accuracy': 0.94074074074074077, 'precision': 0.86956521739130432, 'recall': 0.60606060606060608, 'f1_score': 0.7142857142857143}
Checkpoint directory is: /home/davor_runje/deep_learning/repsly_challenge/checkpoints/RepslyFC-(250,250)/keep_prob-0.52/input_keep_prob-0.82/lr-

[02200/0.1 sec]   train/validation loss = 0.07891/0.80849
[02220/0.4 sec]   train/validation loss = 0.08616/1.16778
[02240/0.6 sec]   train/validation loss = 0.06421/1.89600
[02260/0.9 sec]   train/validation loss = 0.07633/1.49558
[02280/1.2 sec]   train/validation loss = 0.05784/0.91289
[02300/1.4 sec]   train/validation loss = 0.06498/1.59471
[02320/1.7 sec]   train/validation loss = 0.05160/0.95153
[02340/2.0 sec]   train/validation loss = 0.06624/0.75721
[02360/2.2 sec]   train/validation loss = 0.06429/0.86884
[02380/2.5 sec]   train/validation loss = 0.08765/0.59023
[02400/2.8 sec]   train/validation loss = 0.08327/0.88686
[02420/3.1 sec]   train/validation loss = 0.07356/0.71049
{'loss': 1.3069121837615967, 'accuracy': 0.937037037037037, 'precision': 0.73913043478260865, 'recall': 0.6071428571428571, 'f1_score': 0.66666666666666663}
Checkpoint directory is: /home/davor_runje/deep_learning/repsly_challenge/checkpoints/RepslyFC-(250,250)/keep_prob-0.56/input_keep_prob-0.7/lr-0.00

[02200/0.1 sec]   train/validation loss = 0.05779/1.20585
[02220/0.4 sec]   train/validation loss = 0.09683/0.82708
[02240/0.6 sec]   train/validation loss = 0.06967/1.26119
[02260/0.9 sec]   train/validation loss = 0.09305/0.77338
[02280/1.2 sec]   train/validation loss = 0.06436/1.18849
[02300/1.4 sec]   train/validation loss = 0.10944/1.35428
[02320/1.7 sec]   train/validation loss = 0.08339/1.21113
[02340/2.0 sec]   train/validation loss = 0.06851/1.34791
[02360/2.2 sec]   train/validation loss = 0.08326/1.20329
[02380/2.5 sec]   train/validation loss = 0.07110/0.74278
[02400/2.8 sec]   train/validation loss = 0.07414/0.98988
[02420/3.0 sec]   train/validation loss = 0.08759/1.36587
{'loss': 1.3922137022018433, 'accuracy': 0.92962962962962958, 'precision': 0.75, 'recall': 0.51724137931034486, 'f1_score': 0.6122448979591838}
Checkpoint directory is: /home/davor_runje/deep_learning/repsly_challenge/checkpoints/RepslyFC-(250,250)/keep_prob-0.56/input_keep_prob-0.94/lr-0.001/dr-0.99/ds

[02200/0.1 sec]   train/validation loss = 0.07221/0.93426
[02220/0.4 sec]   train/validation loss = 0.09131/0.75529
[02240/0.6 sec]   train/validation loss = 0.08418/1.17067
[02260/0.9 sec]   train/validation loss = 0.09470/1.26887
[02280/1.2 sec]   train/validation loss = 0.06397/1.75107
[02300/1.4 sec]   train/validation loss = 0.05214/0.55283
[02320/1.7 sec]   train/validation loss = 0.07622/1.99184
[02340/2.0 sec]   train/validation loss = 0.04958/1.58739
[02360/2.2 sec]   train/validation loss = 0.07118/0.82047
[02380/2.5 sec]   train/validation loss = 0.06721/0.79272
[02400/2.8 sec]   train/validation loss = 0.10935/1.95923
[02420/3.0 sec]   train/validation loss = 0.07624/0.46356
{'loss': 1.9211173057556152, 'accuracy': 0.90370370370370368, 'precision': 0.59999999999999998, 'recall': 0.40000000000000002, 'f1_score': 0.47999999999999998}
Checkpoint directory is: /home/davor_runje/deep_learning/repsly_challenge/checkpoints/RepslyFC-(250,250)/keep_prob-0.6/input_keep_prob-0.82/lr-0

[01980/0.1 sec]   train/validation loss = 0.08394/0.64996
[02000/0.4 sec]   train/validation loss = 0.07399/1.07703
[02020/0.6 sec]   train/validation loss = 0.09249/1.21370
[02040/0.9 sec]   train/validation loss = 0.07612/0.58803
[02060/1.2 sec]   train/validation loss = 0.06494/0.60585
[02080/1.4 sec]   train/validation loss = 0.08748/0.50402
[02100/1.7 sec]   train/validation loss = 0.09648/0.98803
[02120/2.0 sec]   train/validation loss = 0.08359/0.88274
[02140/2.2 sec]   train/validation loss = 0.07505/0.42630
[02160/2.5 sec]   train/validation loss = 0.05470/0.33894
[02180/2.8 sec]   train/validation loss = 0.06130/0.60544
[02200/3.0 sec]   train/validation loss = 0.07944/0.77765
{'loss': 0.30908951163291931, 'accuracy': 0.92222222222222228, 'precision': 0.70588235294117652, 'recall': 0.42857142857142855, 'f1_score': 0.53333333333333333}


In [6]:
{key: stats[key]['f1_score'] for key in stats.keys()}

{(0.40000000000000002, 0.69999999999999996): 0.70833333333333337,
 (0.40000000000000002, 0.76000000000000001): 0.69230769230769229,
 (0.40000000000000002, 0.81999999999999995): 0.76923076923076916,
 (0.40000000000000002, 0.88): 0.68965517241379304,
 (0.40000000000000002, 0.93999999999999995): 0.62962962962962954,
 (0.40000000000000002, 1.0): 0.60465116279069764,
 (0.44, 0.69999999999999996): 0.7450980392156864,
 (0.44, 0.76000000000000001): 0.66666666666666663,
 (0.44, 0.81999999999999995): 0.53333333333333333,
 (0.44, 0.88): 0.7450980392156864,
 (0.44, 0.93999999999999995): 0.65384615384615374,
 (0.44, 1.0): 0.7384615384615385,
 (0.47999999999999998, 0.69999999999999996): 0.70588235294117652,
 (0.47999999999999998, 0.76000000000000001): 0.61538461538461531,
 (0.47999999999999998, 0.81999999999999995): 0.6071428571428571,
 (0.47999999999999998, 0.88): 0.61818181818181828,
 (0.47999999999999998, 0.93999999999999995): 0.61538461538461542,
 (0.47999999999999998, 1.0): 0.72727272727272718,

In [9]:
list(map(lambda x: x, stats))

[(0.40000000000000002, 0.69999999999999996),
 (0.40000000000000002, 0.76000000000000001),
 (0.40000000000000002, 0.81999999999999995),
 (0.40000000000000002, 0.88),
 (0.40000000000000002, 0.93999999999999995),
 (0.40000000000000002, 1.0),
 (0.44, 0.69999999999999996),
 (0.44, 0.76000000000000001),
 (0.44, 0.81999999999999995),
 (0.44, 0.88),
 (0.44, 0.93999999999999995),
 (0.44, 1.0),
 (0.47999999999999998, 0.69999999999999996),
 (0.47999999999999998, 0.76000000000000001),
 (0.47999999999999998, 0.81999999999999995),
 (0.47999999999999998, 0.88),
 (0.47999999999999998, 0.93999999999999995),
 (0.47999999999999998, 1.0),
 (0.52000000000000002, 0.69999999999999996),
 (0.52000000000000002, 0.76000000000000001),
 (0.52000000000000002, 0.81999999999999995),
 (0.52000000000000002, 0.88),
 (0.52000000000000002, 0.93999999999999995),
 (0.52000000000000002, 1.0),
 (0.56000000000000005, 0.69999999999999996),
 (0.56000000000000005, 0.76000000000000001),
 (0.56000000000000005, 0.81999999999999995),