# Repsly trial data

In [1]:
from repsly_data import RepslyData

repsly_data = RepslyData()
print('Reading data (this might take a minute or so)...', end='')
repsly_data.read_data('data/trial_users_analysis.csv', mode='FC')
print('done.')

Reading data (this might take a minute or so)...done.



Let's see what the data looks like:

In [2]:
read_batch = repsly_data.read_batch(batch_size=20)

X, y = next(read_batch)
print('X{}: {}'.format(list(X.shape), X))
print('y:', y)

X[20, 241]: [[ 303.    1.    4. ...,    0.    0.    0.]
 [ 192.    4.    3. ...,    0.    0.    0.]
 [ 363.    0.    0. ...,    0.    0.    0.]
 ..., 
 [ 180.    0.    0. ...,    0.    0.    0.]
 [ 336.    0.    0. ...,    0.    0.    0.]
 [ 459.    2.    3. ...,    0.    0.    0.]]
y: [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0]


As you can see above, each input vector `X` has `1+15*16=241` values, most of which are zeros. The first one is the trial start date as offset from `2016-01-01` and the rest is different usage parameters for the following `16` days. Data provided by batch read is randomly shuffled. Output values are stored in `y` and they represent if the user purchased the Repsly service after the trial or not.

# Training

First, we create a network with two fully connected hidden layers of size 250 and 50% dropout:

In [3]:
from repsly_nn import RepslyFC

repsly_nn = RepslyFC()

Then we train it for some number of epochs.

In [26]:
import numpy as np

arch = [250, 250]

keep_probs = np.linspace(0.4, 0.6, 3)
input_keep_probs = np.linspace(0.7, 1.0, 3)

learning_rate = 0.001
decay_steps=10
decay_rate=0.99

batch_size = 1024
epochs_loops = 5
epochs_at_once = 80
skip_steps=20

stats = {}

for i in range(epochs_loops):
    print('{}\n*\n* {}[{}/{}] Training for {} epochs\n*\n{}\n'.\
          format('*'*116, ' '*40, i+1, epochs_loops, epochs_at_once, '*'*116))

    for keep_prob in keep_probs:
        for input_keep_prob in input_keep_probs:
            arch_dict = {'keep_prob': keep_prob, 'input_keep_prob': input_keep_prob}

            repsly_nn.create_net(arch, arch_dict, learning_rate, decay_steps, decay_rate)
            stats[(keep_prob, input_keep_prob)] = repsly_nn.train(data=repsly_data, batch_size=batch_size, epochs=epochs_at_once, skip_steps=skip_steps)
            print(stats[(keep_prob, input_keep_prob)])

********************************************************************************************************************
*
*                                         [1/5] Training for 80 epochs
*
********************************************************************************************************************

Checkpoint directory is: /home/davor_runje/deep_learning/repsly_challenge/checkpoints/RepslyFC-(250,250)/keep_prob-0.4/input_keep_prob-0.7/lr-0.001/dr-0.99/ds-10
Creating tf.train.Saver()...done
self.checkpoint_path: checkpoints/RepslyFC-(250,250)/keep_prob-0.4/input_keep_prob-0.7/lr-0.001/dr-0.99/ds-10
ckpt: None
[00000/0.1 sec]   train/validation loss = 19.43472/18.81339
[00020/0.4 sec]   train/validation loss = 1.39263/1.17592
[00040/0.6 sec]   train/validation loss = 0.41470/0.52748
[00060/0.9 sec]   train/validation loss = 0.22811/0.35033
[00080/1.1 sec]   train/validation loss = 0.21353/0.44778
[00100/1.4 sec]   train/validation loss = 0.19738/0.39875
[00120/1.7 sec]   train/

[00040/0.6 sec]   train/validation loss = 0.52080/0.70842
[00060/0.9 sec]   train/validation loss = 0.25471/0.57827
[00080/1.2 sec]   train/validation loss = 0.23217/0.29641
[00100/1.4 sec]   train/validation loss = 0.20784/0.28615
[00120/1.7 sec]   train/validation loss = 0.15995/0.33436
[00140/1.9 sec]   train/validation loss = 0.19333/0.26199
[00160/2.2 sec]   train/validation loss = 0.16453/0.33021
[00180/2.5 sec]   train/validation loss = 0.15556/0.48690
[00200/2.7 sec]   train/validation loss = 0.14534/0.35010
[00220/3.0 sec]   train/validation loss = 0.20644/0.44119
[00240/3.3 sec]   train/validation loss = 0.16906/0.46548
[00260/3.5 sec]   train/validation loss = 0.24883/0.47653
[00280/3.8 sec]   train/validation loss = 0.12668/0.44293
[00300/4.1 sec]   train/validation loss = 0.13596/0.25192
[00320/4.3 sec]   train/validation loss = 0.14385/0.25202
[00340/4.6 sec]   train/validation loss = 0.14468/0.74772
[00360/4.9 sec]   train/validation loss = 0.13657/0.19026
[00380/5.1 sec

[00300/4.0 sec]   train/validation loss = 0.17624/0.61217
[00320/4.3 sec]   train/validation loss = 0.16353/0.37341
[00340/4.5 sec]   train/validation loss = 0.13884/0.61280
[00360/4.8 sec]   train/validation loss = 0.11592/0.33204
[00380/5.1 sec]   train/validation loss = 0.21020/0.77276
[00400/5.3 sec]   train/validation loss = 0.13221/0.60628
[00420/5.6 sec]   train/validation loss = 0.11497/0.40258
[00440/5.8 sec]   train/validation loss = 0.14050/0.56647
[00460/6.1 sec]   train/validation loss = 0.14448/0.44058
{'loss': 0.20328398048877716, 'accuracy': 0.96296296296296291, 'precision': 0.86363636363636365, 'recall': 0.73076923076923073, 'f1_score': 0.79166666666666663}
********************************************************************************************************************
*
*                                         [2/5] Training for 80 epochs
*
********************************************************************************************************************

Checkpo

[00460/0.1 sec]   train/validation loss = 0.10321/0.75671
[00480/0.4 sec]   train/validation loss = 0.09239/0.28895
[00500/0.6 sec]   train/validation loss = 0.12910/0.37033
[00520/0.9 sec]   train/validation loss = 0.09766/0.89088
[00540/1.2 sec]   train/validation loss = 0.10523/0.75109
[00560/1.5 sec]   train/validation loss = 0.10782/0.36412
[00580/1.8 sec]   train/validation loss = 0.10714/0.46056
[00600/2.0 sec]   train/validation loss = 0.12503/0.80101
[00620/2.3 sec]   train/validation loss = 0.09554/0.90136
[00640/2.6 sec]   train/validation loss = 0.10191/0.76333
[00660/2.9 sec]   train/validation loss = 0.10222/1.20457
[00680/3.1 sec]   train/validation loss = 0.10493/1.07373
[00700/3.4 sec]   train/validation loss = 0.10897/0.21713
[00720/3.7 sec]   train/validation loss = 0.11317/1.06505
[00740/3.9 sec]   train/validation loss = 0.14432/1.01740
[00760/4.2 sec]   train/validation loss = 0.12307/0.90680
[00780/4.5 sec]   train/validation loss = 0.12437/0.21948
[00800/4.7 sec

[00460/0.1 sec]   train/validation loss = 0.11679/0.41150
[00480/0.4 sec]   train/validation loss = 0.12468/0.62581
[00500/0.6 sec]   train/validation loss = 0.11380/0.53602
[00520/0.9 sec]   train/validation loss = 0.11739/0.58413
[00540/1.2 sec]   train/validation loss = 0.10211/0.44540
[00560/1.4 sec]   train/validation loss = 0.09816/0.52539
[00580/1.7 sec]   train/validation loss = 0.10482/0.54065
[00600/2.0 sec]   train/validation loss = 0.11087/0.57389
[00620/2.2 sec]   train/validation loss = 0.11544/0.75118
[00640/2.5 sec]   train/validation loss = 0.09412/0.38233
[00660/2.8 sec]   train/validation loss = 0.11349/0.59475
[00680/3.1 sec]   train/validation loss = 0.11024/0.33438
[00700/3.3 sec]   train/validation loss = 0.12334/0.68444
[00720/3.6 sec]   train/validation loss = 0.11350/0.30792
[00740/3.9 sec]   train/validation loss = 0.08974/0.57023
[00760/4.2 sec]   train/validation loss = 0.09531/0.43141
[00780/4.4 sec]   train/validation loss = 0.07916/0.48652
[00800/4.7 sec

[00460/0.1 sec]   train/validation loss = 0.11489/0.70486
[00480/0.4 sec]   train/validation loss = 0.15528/1.01290
[00500/0.7 sec]   train/validation loss = 0.14074/0.84303
[00520/1.0 sec]   train/validation loss = 0.12670/0.28961
[00540/1.2 sec]   train/validation loss = 0.11500/0.29613
[00560/1.5 sec]   train/validation loss = 0.12416/0.45164
[00580/1.8 sec]   train/validation loss = 0.11722/0.48124
[00600/2.0 sec]   train/validation loss = 0.12111/0.70479
[00620/2.3 sec]   train/validation loss = 0.11159/0.91594
[00640/2.6 sec]   train/validation loss = 0.10955/0.95767
[00660/2.9 sec]   train/validation loss = 0.11382/0.86969
[00680/3.2 sec]   train/validation loss = 0.10206/0.64503
[00700/3.5 sec]   train/validation loss = 0.12623/0.44737
[00720/3.7 sec]   train/validation loss = 0.11180/0.41316
[00740/4.0 sec]   train/validation loss = 0.11870/0.71020
[00760/4.3 sec]   train/validation loss = 0.11078/0.71290
[00780/4.6 sec]   train/validation loss = 0.11164/0.63456
[00800/4.8 sec

[00920/0.1 sec]   train/validation loss = 0.10200/0.26738
[00940/0.4 sec]   train/validation loss = 0.07874/1.37149
[00960/0.7 sec]   train/validation loss = 0.11965/0.30068
[00980/0.9 sec]   train/validation loss = 0.11172/1.33685
[01000/1.2 sec]   train/validation loss = 0.10090/1.30323
[01020/1.5 sec]   train/validation loss = 0.10133/0.34792
[01040/1.8 sec]   train/validation loss = 0.09170/0.57139
[01060/2.1 sec]   train/validation loss = 0.09524/1.63916
[01080/2.3 sec]   train/validation loss = 0.09222/1.58899
[01100/2.6 sec]   train/validation loss = 0.12338/0.45686
[01120/2.9 sec]   train/validation loss = 0.09555/1.54132
[01140/3.2 sec]   train/validation loss = 0.09194/0.33602
[01160/3.5 sec]   train/validation loss = 0.10181/1.68307
[01180/3.7 sec]   train/validation loss = 0.09385/0.37673
[01200/4.0 sec]   train/validation loss = 0.08890/1.62250
[01220/4.3 sec]   train/validation loss = 0.09013/1.46022
[01240/4.6 sec]   train/validation loss = 0.07648/0.45607
[01260/4.9 sec

[00920/0.1 sec]   train/validation loss = 0.08955/0.79302
[00940/0.4 sec]   train/validation loss = 0.09684/0.47210
[00960/0.6 sec]   train/validation loss = 0.09463/0.89512
[00980/0.9 sec]   train/validation loss = 0.08501/1.10934
[01000/1.2 sec]   train/validation loss = 0.09012/0.81470
[01020/1.5 sec]   train/validation loss = 0.13622/1.10600
[01040/1.8 sec]   train/validation loss = 0.07364/0.90592
[01060/2.0 sec]   train/validation loss = 0.08528/0.56209
[01080/2.3 sec]   train/validation loss = 0.11337/0.98016
[01100/2.6 sec]   train/validation loss = 0.10546/0.49050
[01120/2.9 sec]   train/validation loss = 0.09617/0.91004
[01140/3.1 sec]   train/validation loss = 0.08631/1.02389
[01160/3.4 sec]   train/validation loss = 0.09666/1.15019
[01180/3.7 sec]   train/validation loss = 0.08673/1.18135
[01200/4.0 sec]   train/validation loss = 0.06943/0.64202
[01220/4.3 sec]   train/validation loss = 0.09296/0.62741
[01240/4.5 sec]   train/validation loss = 0.06885/1.00103
[01260/4.8 sec

[00920/0.1 sec]   train/validation loss = 0.07820/0.82626
[00940/0.4 sec]   train/validation loss = 0.10657/0.74977
[00960/0.6 sec]   train/validation loss = 0.10929/0.80393
[00980/0.9 sec]   train/validation loss = 0.13589/0.66398
[01000/1.2 sec]   train/validation loss = 0.10423/1.15885
[01020/1.5 sec]   train/validation loss = 0.09494/0.90179
[01040/1.8 sec]   train/validation loss = 0.09679/1.11012
[01060/2.0 sec]   train/validation loss = 0.10518/0.90142
[01080/2.3 sec]   train/validation loss = 0.10661/0.32138
[01100/2.6 sec]   train/validation loss = 0.09827/1.04345
[01120/2.9 sec]   train/validation loss = 0.09601/1.19990
[01140/3.2 sec]   train/validation loss = 0.08050/1.24469
[01160/3.5 sec]   train/validation loss = 0.08278/0.62690
[01180/3.7 sec]   train/validation loss = 0.10249/0.84782
[01200/4.0 sec]   train/validation loss = 0.11409/0.40964
[01220/4.3 sec]   train/validation loss = 0.10092/1.05351
[01240/4.6 sec]   train/validation loss = 0.07442/0.90336
[01260/4.8 sec

[01380/0.1 sec]   train/validation loss = 0.13316/0.52828
[01400/0.4 sec]   train/validation loss = 0.08057/1.66852
[01420/0.6 sec]   train/validation loss = 0.09719/1.63718
[01440/0.9 sec]   train/validation loss = 0.08035/1.86538
[01460/1.2 sec]   train/validation loss = 0.10139/0.51600
[01480/1.4 sec]   train/validation loss = 0.09194/1.95252
[01500/1.7 sec]   train/validation loss = 0.06930/0.47474
[01520/1.9 sec]   train/validation loss = 0.06099/0.70142
[01540/2.2 sec]   train/validation loss = 0.09068/0.61682
[01560/2.5 sec]   train/validation loss = 0.08815/1.84791
[01580/2.8 sec]   train/validation loss = 0.08315/1.95946
[01600/3.0 sec]   train/validation loss = 0.08135/2.27647
[01620/3.3 sec]   train/validation loss = 0.09048/1.57621
[01640/3.6 sec]   train/validation loss = 0.09051/0.73990
[01660/3.9 sec]   train/validation loss = 0.08117/2.02673
[01680/4.1 sec]   train/validation loss = 0.08297/0.65153
[01700/4.4 sec]   train/validation loss = 0.08163/1.78167
[01720/4.7 sec

[01380/0.1 sec]   train/validation loss = 0.07794/0.45708
[01400/0.4 sec]   train/validation loss = 0.07245/0.72350
[01420/0.7 sec]   train/validation loss = 0.09039/0.83382
[01440/0.9 sec]   train/validation loss = 0.05637/0.84016
[01460/1.2 sec]   train/validation loss = 0.09317/0.72921
[01480/1.5 sec]   train/validation loss = 0.09907/0.88688
[01500/1.7 sec]   train/validation loss = 0.08629/1.25830
[01520/2.0 sec]   train/validation loss = 0.07922/1.05527
[01540/2.3 sec]   train/validation loss = 0.08055/1.25662
[01560/2.5 sec]   train/validation loss = 0.09226/0.60629
[01580/2.8 sec]   train/validation loss = 0.07494/0.78382
[01600/3.1 sec]   train/validation loss = 0.07398/0.48364
[01620/3.4 sec]   train/validation loss = 0.04997/1.15127
[01640/3.6 sec]   train/validation loss = 0.08111/0.85496
[01660/3.9 sec]   train/validation loss = 0.11402/1.21295
[01680/4.2 sec]   train/validation loss = 0.08569/1.06450
[01700/4.4 sec]   train/validation loss = 0.08756/0.66908
[01720/4.7 sec

[01380/0.1 sec]   train/validation loss = 0.08432/0.87850
[01400/0.4 sec]   train/validation loss = 0.10374/1.00754
[01420/0.6 sec]   train/validation loss = 0.09482/1.70040
[01440/0.9 sec]   train/validation loss = 0.09146/1.48898
[01460/1.2 sec]   train/validation loss = 0.08585/0.67890
[01480/1.5 sec]   train/validation loss = 0.07312/1.07674
[01500/1.7 sec]   train/validation loss = 0.06931/1.18985
[01520/2.0 sec]   train/validation loss = 0.12024/1.05074
[01540/2.3 sec]   train/validation loss = 0.08004/0.91773
[01560/2.6 sec]   train/validation loss = 0.08723/0.76718
[01580/2.8 sec]   train/validation loss = 0.12685/0.57595
[01600/3.1 sec]   train/validation loss = 0.09358/1.53753
[01620/3.4 sec]   train/validation loss = 0.08540/1.37834
[01640/3.7 sec]   train/validation loss = 0.08483/1.89392
[01660/3.9 sec]   train/validation loss = 0.08994/1.06084
[01680/4.2 sec]   train/validation loss = 0.08909/0.61302
[01700/4.5 sec]   train/validation loss = 0.07736/0.50907
[01720/4.8 sec

[01840/0.1 sec]   train/validation loss = 0.08878/0.63926
[01860/0.4 sec]   train/validation loss = 0.07012/2.06489
[01880/0.6 sec]   train/validation loss = 0.08492/1.05763
[01900/0.9 sec]   train/validation loss = 0.06153/2.26603
[01920/1.2 sec]   train/validation loss = 0.10627/1.88164
[01940/1.5 sec]   train/validation loss = 0.09186/0.62562
[01960/1.7 sec]   train/validation loss = 0.10710/0.69014
[01980/2.0 sec]   train/validation loss = 0.07770/2.43819
[02000/2.3 sec]   train/validation loss = 0.08314/0.72655
[02020/2.6 sec]   train/validation loss = 0.07750/0.90484
[02040/2.8 sec]   train/validation loss = 0.07939/1.74409
[02060/3.1 sec]   train/validation loss = 0.09245/0.49992
[02080/3.4 sec]   train/validation loss = 0.07809/0.62278
[02100/3.7 sec]   train/validation loss = 0.07369/0.74416
[02120/3.9 sec]   train/validation loss = 0.08374/2.20850
[02140/4.2 sec]   train/validation loss = 0.08744/0.94804
[02160/4.5 sec]   train/validation loss = 0.06903/2.23233
[02180/4.8 sec

[01840/0.1 sec]   train/validation loss = 0.07579/0.88177
[01860/0.4 sec]   train/validation loss = 0.07136/0.67929
[01880/0.7 sec]   train/validation loss = 0.09276/1.16414
[01900/0.9 sec]   train/validation loss = 0.07176/1.03152
[01920/1.2 sec]   train/validation loss = 0.09631/0.88360
[01940/1.5 sec]   train/validation loss = 0.08239/1.07453
[01960/1.8 sec]   train/validation loss = 0.09569/0.54454
[01980/2.0 sec]   train/validation loss = 0.07312/1.41538
[02000/2.3 sec]   train/validation loss = 0.06674/0.96290
[02020/2.6 sec]   train/validation loss = 0.09091/0.67040
[02040/2.9 sec]   train/validation loss = 0.06840/1.03302
[02060/3.1 sec]   train/validation loss = 0.08199/0.91906
[02080/3.4 sec]   train/validation loss = 0.06941/1.52004
[02100/3.7 sec]   train/validation loss = 0.07781/0.82350
[02120/4.0 sec]   train/validation loss = 0.06926/1.00580
[02140/4.3 sec]   train/validation loss = 0.08513/0.91787
[02160/4.5 sec]   train/validation loss = 0.08407/1.23029
[02180/4.8 sec

[01840/0.1 sec]   train/validation loss = 0.08116/1.29946
[01860/0.4 sec]   train/validation loss = 0.08551/0.91095
[01880/0.7 sec]   train/validation loss = 0.07154/0.73896
[01900/0.9 sec]   train/validation loss = 0.11334/1.02858
[01920/1.2 sec]   train/validation loss = 0.11921/0.93536
[01940/1.5 sec]   train/validation loss = 0.07576/1.78726
[01960/1.8 sec]   train/validation loss = 0.07746/1.24114
[01980/2.1 sec]   train/validation loss = 0.09199/0.64048
[02000/2.3 sec]   train/validation loss = 0.09853/1.65194
[02020/2.6 sec]   train/validation loss = 0.09534/0.95210
[02040/2.9 sec]   train/validation loss = 0.07977/1.00732
[02060/3.2 sec]   train/validation loss = 0.09584/1.91934
[02080/3.4 sec]   train/validation loss = 0.08926/0.88842
[02100/3.7 sec]   train/validation loss = 0.10879/1.75633
[02120/4.0 sec]   train/validation loss = 0.06957/0.68349
[02140/4.3 sec]   train/validation loss = 0.07564/1.06484
[02160/4.6 sec]   train/validation loss = 0.08528/0.69571
[02180/4.8 sec

In [27]:
{key: stats[key]['f1_score'] for key in stats.keys()}

{(0.40000000000000002, 0.69999999999999996): 0.62222222222222223,
 (0.40000000000000002, 0.84999999999999998): 0.74193548387096775,
 (0.40000000000000002, 1.0): 0.60465116279069764,
 (0.5, 0.69999999999999996): 0.72340425531914898,
 (0.5, 0.84999999999999998): 0.71186440677966101,
 (0.5, 1.0): 0.45000000000000001,
 (0.59999999999999998, 0.69999999999999996): 0.64516129032258063,
 (0.59999999999999998, 0.84999999999999998): 0.76000000000000001,
 (0.59999999999999998, 1.0): 0.61016949152542366}

In [28]:
for ix in sorted(stats, key=lambda key: stats[key]['f1_score'], reverse=True)[:20]:
    print('{}: {} {}'.format(ix, stats[ix]['f1_score'], stats[ix]['loss']))

(0.59999999999999998, 0.84999999999999998): 0.76 0.4683229923248291
(0.40000000000000002, 0.84999999999999998): 0.7419354838709677 0.7694442272186279
(0.5, 0.69999999999999996): 0.723404255319149 0.5739132761955261
(0.5, 0.84999999999999998): 0.711864406779661 1.0702329874038696
(0.59999999999999998, 0.69999999999999996): 0.6451612903225806 1.57352876663208
(0.40000000000000002, 0.69999999999999996): 0.6222222222222222 0.7321505546569824
(0.59999999999999998, 1.0): 0.6101694915254237 1.8669357299804688
(0.40000000000000002, 1.0): 0.6046511627906976 1.0380613803863525
(0.5, 1.0): 0.45 1.4436469078063965
