In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

df = pd.read_csv("ecommerceDataset.csv")
df = df.sample(n=5000, random_state=1)
df = df.dropna(subset=['Text'])

le = LabelEncoder()
df["Label"] = le.fit_transform(df["Type"])

tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(df['Text'])
sequences = tokenizer.texts_to_sequences(df["Text"])
padded = pad_sequences(sequences, padding='post', maxlen=100)

X_train, X_test, y_train, y_test = train_test_split(padded, df['Label'], test_size=0.2, random_state=1)

num_classes = len(le.classes_)

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(10000, 16, input_length=100),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))
loss, accuracy = model.evaluate(X_test, y_test)
print(accuracy)


model_cnn = tf.keras.Sequential([
    tf.keras.layers.Embedding(10000, 64, input_length=100),
    tf.keras.layers.Conv1D(128, 5, activation='relu'),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model_cnn.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_cnn.summary()

model_cnn.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))
loss, accuracy = model.evaluate(X_test, y_test)
print(accuracy)



Epoch 1/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.3846 - loss: 1.3416 - val_accuracy: 0.4680 - val_loss: 1.1982
Epoch 2/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5127 - loss: 1.1161 - val_accuracy: 0.6340 - val_loss: 0.9088
Epoch 3/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6969 - loss: 0.8107 - val_accuracy: 0.7980 - val_loss: 0.6723
Epoch 4/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8383 - loss: 0.5882 - val_accuracy: 0.8980 - val_loss: 0.4989
Epoch 5/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9019 - loss: 0.4107 - val_accuracy: 0.9050 - val_loss: 0.4053
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9027 - loss: 0.4049
0.9049999713897705


Epoch 1/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.4618 - loss: 1.2329 - val_accuracy: 0.8310 - val_loss: 0.5681
Epoch 2/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.8996 - loss: 0.3644 - val_accuracy: 0.9170 - val_loss: 0.2696
Epoch 3/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9728 - loss: 0.1047 - val_accuracy: 0.9250 - val_loss: 0.2341
Epoch 4/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9946 - loss: 0.0322 - val_accuracy: 0.9310 - val_loss: 0.2346
Epoch 5/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9984 - loss: 0.0111 - val_accuracy: 0.9320 - val_loss: 0.2510
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9027 - loss: 0.4049 
0.9049999713897705
