In [6]:
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn import preprocessing

In [8]:
df = pd.read_csv('E:/dataset/analysis/Audiobooks_data.csv')
df.head(10)

Unnamed: 0,00994,1620,1620.1,19.73,19.73.1,1,10.00,0.99,1603.80,5,92,0
0,1143,2160.0,2160,5.33,5.33,0,8.91,0.0,0.0,0,0,0
1,2059,2160.0,2160,5.33,5.33,0,8.91,0.0,0.0,0,388,0
2,2882,1620.0,1620,5.96,5.96,0,8.91,0.42,680.4,1,129,0
3,3342,2160.0,2160,5.33,5.33,0,8.91,0.22,475.2,0,361,0
4,3416,2160.0,2160,4.61,4.61,0,8.91,0.0,0.0,0,0,0
5,4949,2160.0,2160,5.33,5.33,0,8.91,0.04,86.4,0,366,0
6,9011,648.0,648,5.33,5.33,0,8.91,0.0,0.0,0,0,1
7,9282,2160.0,2160,5.33,5.33,0,8.91,0.26,561.6,0,33,0
8,10500,2160.0,2160,5.33,5.33,1,10.0,0.27,583.2,0,366,0
9,12898,540.0,540,5.33,5.33,0,8.91,0.28,151.2,0,34,0


In [2]:
raw_data = np.loadtxt('E:/dataset/analysis/Audiobooks_data.csv', delimiter=',')

In [3]:
raw_data.shape

(14084, 12)

In [4]:
unscaled_inputs_all = raw_data[:,1:-1]

In [5]:
targets_all = raw_data[:,-1]

In [6]:
# Balancing the Data

In [7]:
# Count how many targets are 1 (meaning that the customer did convert)
num_one_targets = int(np.sum(targets_all))

# Set a counter for targets that are 0 (meaning that the customer did not convert)
zero_targets_counter = 0

# We want to create a "balanced" dataset, so we will have to remove some input/target pairs.
# Declare a variable that will do that:
indices_to_remove = []

# Count the number of targets that are 0. 
# Once there are as many 0s as 1s, mark entries where the target is 0.
for i in range(targets_all.shape[0]):
    if targets_all[i] == 0:
        zero_targets_counter += 1
        if zero_targets_counter > num_one_targets:
            indices_to_remove.append(i)

# Create two new variables, one that will contain the inputs, and one that will contain the targets.
# We delete all indices that we marked "to remove" in the loop above.
unscaled_inputs_equal_priors = np.delete(unscaled_inputs_all, indices_to_remove, axis=0)
targets_equal_priors = np.delete(targets_all, indices_to_remove, axis=0)

In [8]:
# Standardize

In [9]:
scaled_inputs = preprocessing.scale(unscaled_inputs_equal_priors)

In [10]:
# Shuffle Datashape

In [11]:
shuffled_indices = np.arange(scaled_inputs.shape[0])

In [12]:
np.random.shuffle(shuffled_indices)

In [13]:
shuffled_inputs = scaled_inputs[shuffled_indices]
shuffled_targets = targets_equal_priors[shuffled_indices]

In [14]:
# Split Dataset

In [15]:
samples_count = shuffled_inputs.shape[0]

In [16]:
samples_count

4474

In [17]:
train_samples = int(samples_count*.8)
validation_samples = int(samples_count*.1)
test_samples = samples_count - train_samples - validation_samples

In [18]:
train_inputs = shuffled_inputs[:train_samples]
train_targets = shuffled_targets[:train_samples]

In [19]:
validation_inputs = shuffled_inputs[train_samples:train_samples+validation_samples]
validation_targets = shuffled_targets[train_samples:train_samples+validation_samples]

In [21]:
test_inputs = shuffled_inputs[train_samples+validation_samples:]
test_targets = shuffled_targets[train_samples+validation_samples:]

In [22]:
# Save dataset in .npz

In [23]:
np.savez('Audiobook_train', inputs = train_inputs, targets = train_targets)
np.savez('Audiobook_validation', inputs = validation_inputs, targets = validation_targets)
np.savez('Audiobook_test', inputs = test_inputs, targets = test_targets)

In [58]:
npz = np.load('Audiobook_train.npz')
train_inputs = npz['inputs'].astype(np.float)
train_targets = npz['targets'].astype(np.int)

In [59]:
npz = np.load('Audiobook_validation.npz')
validation_inputs, validation_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

In [60]:
npz = np.load('Audiobook_test.npz')
test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

In [61]:
# Define the Model

In [78]:
input_size = 10
hidden_layer_size = 100
output_size = 2

In [79]:
model = tf.keras.Sequential([
                        tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
                        tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
                        tf.keras.layers.Dense(output_size, activation = 'softmax')
                        ])

In [80]:
model.compile(optimizer = 'adam' ,loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [84]:
batch_size = 100
epochs = 100

In [85]:
early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

In [86]:
model.fit(train_inputs, train_targets,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=[early_stopping],
          validation_data=(validation_inputs,validation_targets),
          verbose = 2
          )

Epoch 1/100
36/36 - 0s - loss: 0.3034 - accuracy: 0.8351 - val_loss: 0.3222 - val_accuracy: 0.8277
Epoch 2/100
36/36 - 0s - loss: 0.3080 - accuracy: 0.8368 - val_loss: 0.3264 - val_accuracy: 0.8277
Epoch 3/100
36/36 - 0s - loss: 0.3053 - accuracy: 0.8365 - val_loss: 0.3104 - val_accuracy: 0.8300
Epoch 4/100
36/36 - 0s - loss: 0.3052 - accuracy: 0.8335 - val_loss: 0.3123 - val_accuracy: 0.8389
Epoch 5/100
36/36 - 0s - loss: 0.3041 - accuracy: 0.8346 - val_loss: 0.3154 - val_accuracy: 0.8322


<tensorflow.python.keras.callbacks.History at 0x24edb8bfac0>

In [68]:
# Testing Model

In [69]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)

