In [1]:
# Importing libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import tensorflow as tf

In [2]:
data = pd.read_csv("./data.csv")

# Categorise columns
data['tail'] = data['tail'].apply(lambda x: 1 if x == 'yes' else 0)

vectoriser = TfidfVectorizer(max_features=3000, min_df=2, max_df=0.95, ngram_range=(1, 2))
le = LabelEncoder()

# Feature: Message length
data['message_length'] = data['message'].apply(lambda x: len(x.split()))

X_numeric = data[['fingers', 'tail']].values
X_numeric = np.hstack((X_numeric, data[['message_length']].values))
X_text = vectoriser.fit_transform(data['message']).toarray()
scaler = StandardScaler()
X_numeric_scaled = scaler.fit_transform(X_numeric)

encoded_data = np.concatenate((X_text, X_numeric_scaled), axis=1)
encoded_species = le.fit_transform(data['species'])

In [3]:
X_train, X_test, y_train, y_test = train_test_split(encoded_data, encoded_species, test_size=0.1, random_state=7)

def sequentialAPI(X_train, X_test, y_train, y_test, epoch, batch_size):
  sum = 0
  num = 10

  for i in range(num):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(2048, activation='relu', input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dense(1024, activation='relu'),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(len(le.classes_), activation='softmax')
    ])

    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    optimizer = tf.keras.optimizers.AdamW(learning_rate=0.001, weight_decay=1e-5)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=epoch, validation_data=(X_test, y_test), batch_size=batch_size, 
                callbacks=[early_stopping], 
                verbose=0)

    result = model.evaluate(X_test, y_test, verbose=0)

    sum += (result[1] * 100)

  return (sum / num)

print(f'Accuracy (40 128): {sequentialAPI(X_train, X_test, y_train, y_test, 40, 128)}%')
print(f'Accuracy (50, 128): {sequentialAPI(X_train, X_test, y_train, y_test, 50, 128)}%')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Accuracy (40 128): 89.59999859333038%
Accuracy (50, 128): 90.0%


In [5]:
X_train, X_test, y_train, y_test = train_test_split(encoded_data, encoded_species, test_size=0.1, random_state=7)

def sequentialAPI(X_train, X_test, y_train, y_test, epoch, batch_size):
  sum = 0
  num = 10

  for i in range(num):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(2048, activation='relu', input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dense(1024, activation='relu'),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(len(le.classes_), activation='softmax')
    ])

    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # optimizer = tf.keras.optimizers.AdamW(learning_rate=0.001, weight_decay=1e-5)
    model.compile(
    #   optimizer=optimizer, 
      loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=epoch, validation_data=(X_test, y_test), batch_size=batch_size, 
                callbacks=[early_stopping], 
                verbose=0)

    result = model.evaluate(X_test, y_test, verbose=0)

    sum += (result[1] * 100)

  return (sum / num)

print(f'Accuracy (40 128): {sequentialAPI(X_train, X_test, y_train, y_test, 40, 128)}%')
print(f'Accuracy (50, 128): {sequentialAPI(X_train, X_test, y_train, y_test, 50, 128)}%')