In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

# **Importing Relevant Libraries**

In [None]:
from sklearn import preprocessing
import numpy as np
import tensorflow as tf

# **Loading the Data**

In [None]:
raw_csv_data = np.loadtxt('../input/Audiobooks-data.csv', delimiter = ',')

unscaled_inputs_all = raw_csv_data[:,1:-1]
targets_all = raw_csv_data[:,-1]

### **Balancing the Dataset**

In [None]:
num_one_targets = int(np.sum(targets_all))
zero_targets_counter = 0
indices_to_remove = []

for i in range(targets_all.shape[0]):
    if targets_all[i] == 0:
        zero_targets_counter +=1
        if zero_targets_counter>num_one_targets:
            indices_to_remove.append(i)
            
unscaled_inputs_equal_priors = np.delete(unscaled_inputs_all, indices_to_remove, axis = 0)
targets_equal_priors = np.delete(targets_all, indices_to_remove, axis = 0)

### **Standardizing the Input Data**

In [None]:
scaled_inputs = preprocessing.scale(unscaled_inputs_equal_priors)

### **Shuffling the Data**

In [None]:
shuffled_indices = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs = scaled_inputs[shuffled_indices]
shuffled_targets = targets_equal_priors[shuffled_indices]

### **Split Data into Train, Validation and Test**

In [None]:
samples_count = shuffled_inputs.shape[0]
train_samples_count = int(0.8*samples_count)
validation_samples_count = int(0.1*samples_count)
test_samples_count = samples_count - train_samples_count - validation_samples_count

In [None]:
train_inputs = shuffled_inputs[:train_samples_count]
train_targets = shuffled_targets[:train_samples_count]

validation_inputs = shuffled_inputs[train_samples_count : train_samples_count + validation_samples_count]
validation_targets = shuffled_targets[train_samples_count : train_samples_count + validation_samples_count]

test_inputs = shuffled_inputs[train_samples_count+validation_samples_count :]
test_targets = shuffled_targets[train_samples_count+validation_samples_count :]

### **Saving the data as .npz**

In [None]:
np.savez('Audiobooks_train_data', inputs = train_inputs, targets = train_targets)
np.savez('Audiobooks_validation_data', inputs = validation_inputs, targets = validation_targets)
np.savez('Audiobooks_test_data', inputs = test_inputs, targets = test_targets)

### **Loading the .npz Data**

In [None]:
npz = np.load('/kaggle/working/Audiobooks_train_data.npz')
train_inputs, train_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

npz = np.load('/kaggle/working/Audiobooks_validation_data.npz')
validation_inputs, validation_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

npz = np.load('/kaggle/working/Audiobooks_test_data.npz')
test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

# **Model**

In [None]:
input_size = 10
output_size = 2 # Since output is either 0 or 1
hidden_layer_size = 50

model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size,  activation = 'relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(output_size, activation = 'softmax')
])

model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

batch_size = 100
max_epochs = 100

early_stopping = tf.keras.callbacks.EarlyStopping(patience = 2)

model.fit(train_inputs, train_targets,
          batch_size = batch_size,
          epochs = max_epochs,
          callbacks = [early_stopping],
         validation_data = (validation_inputs, validation_targets),
         verbose = 2)

In [None]:
model.evaluate(test_inputs, test_targets)