In [44]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics 
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn import tree
from sklearn import svm
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier

In [75]:
#file_name = "book_list_8_topics.csv"
#dimension = 8

file_name = "book_list_20_topics.csv"
dimension = 20

In [76]:
df = pd.read_csv(file_name)

In [77]:
dv = df[[str(i) for i in range(dimension)]].to_numpy()
cat = df["category"].to_numpy()

In [78]:
X_train, X_test, y_train, y_test = train_test_split(dv, cat, test_size=0.33, random_state=12)

def run_NB(X_train, X_test, y_train, y_test):
    text_clf = Pipeline([
                      ('clf', MultinomialNB())     #ML Model
                    ])
    text_clf.fit(X_train,y_train )
    predicted = text_clf.predict(X_test)
    print(metrics.confusion_matrix(y_test, predicted))
    print("NB:",np.mean(predicted == y_test) )
    
def run_DT(X_train, X_test, y_train, y_test):
    text_clf = Pipeline([
                          ('clf', tree.DecisionTreeClassifier())
                        ])
    clf = text_clf.fit(X_train, y_train) 

    predicted = clf.predict(X_test)

    print(metrics.confusion_matrix(y_test, predicted))
    print("DT:",np.mean(predicted == y_test) )
    
def run_SVM(X_train, X_test, y_train, y_test):
    
    text_clf = Pipeline([
                          ('clf', svm.LinearSVC(C=1.0))
                        ])
    text_clf.fit(X_train, y_train) 

    predicted = text_clf.predict(X_test)

    print(metrics.confusion_matrix(y_test, predicted))
    print(np.mean(predicted == y_test) )
    print(metrics.classification_report(y_test, predicted))

In [50]:
run_NB(X_train, X_test, y_train, y_test)
run_DT(X_train, X_test, y_train, y_test)
run_SVM(X_train, X_test, y_train, y_test)


[[  0   5   0   0   0   1   0   0   0]
 [  0 561   0   0   0  13   0   0   0]
 [  0  29   0   0   0   7   0   0   0]
 [  0  86   0   0   0   1   0   0   0]
 [  0 137   0   0   0  19   0   0   0]
 [  0 454   0   0   0  27   0   0   0]
 [  0 156   0   0   0   5   0   0   0]
 [  0  51   0   0   0   4   0   0   0]
 [  0 245   0   0   0  22   0   0   0]]
NB: 0.32254525507405374
[[  0   2   0   0   1   0   3   0   0]
 [ 11 149  13  53  21 194  42  10  81]
 [  0   8   0   3   0  23   0   0   2]
 [  1  60   3   5   2   2   1   4   9]
 [  0  18   3   3   7  79  41   0   5]
 [  2 216  41  12  63  59  12  30  46]
 [ 10  46   6   2  28  12   3  10  44]
 [  0  14   1   4   1  26   4   3   2]
 [  1 100  10   8  14  45  46   0  43]]
DT: 0.14755896873285793
[[  0   0   0   0   0   6   0   0   0]
 [  0 357   0   0   0 190   0   0  27]
 [  0  18   0   0   0  15   1   0   2]
 [  0  69   0   0   0  14   0   0   4]
 [  0  45   0   0   0  90   0   0  21]
 [  0 201   0   0   1 244   1   0  34]
 [  0  58   0 

  _warn_prf(average, modifier, msg_start, len(result))


In [79]:
class_mapping = {
    "adventure": 0,
    "fantasy": 1,
    "historical": 2,
    "horror": 3,
    "mystery": 4,
    "romance": 5,
    "science-fiction": 6,
    "thriller": 7,
    "young-adult": 8
}
X_train, X_test, y_train, y_test = train_test_split(dv, cat, test_size=0.33, random_state=12)
y_train = np.vectorize(class_mapping.get)(y_train)
y_test = np.vectorize(class_mapping.get)(y_test)
print(y_train.shape)

(3699,)


In [80]:
import tensorflow as tf
from tensorflow import keras as ks
from tensorflow.keras.utils import to_categorical
# Validating the TensorFlow version
print(tf.__version__)

AUTOTUNE = tf.data.experimental.AUTOTUNE

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
print(X_train.shape)
print(y_train.shape)

2.3.1
(3699, 20)
(3699, 9)


In [89]:
i = tf.keras.Input(shape=(20))
x = ks.layers.Dense(64, activation='relu')(i)
x = ks.layers.Dense(128, activation='relu')(x)
x = ks.layers.Dense(128, activation='relu')(x)
x = ks.layers.Dense(9, activation='sigmoid', name='prediction_layer')(x)
model = ks.Model(i, x)
model.summary()
model.compile(loss="CategoricalCrossentropy",optimizer='adam',metrics=['accuracy'])

Model: "functional_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         [(None, 20)]              0         
_________________________________________________________________
dense_24 (Dense)             (None, 64)                1344      
_________________________________________________________________
dense_25 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_26 (Dense)             (None, 128)               16512     
_________________________________________________________________
prediction_layer (Dense)     (None, 9)                 1161      
Total params: 27,337
Trainable params: 27,337
Non-trainable params: 0
_________________________________________________________________


In [90]:
print(y_train)
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=200)

[[0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200


Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200


Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200


Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<tensorflow.python.keras.callbacks.History at 0x271683f8508>