In [43]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [44]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder


# load dataset

In [45]:
datapath = '/content/drive/MyDrive/ml_code/nlp/Restaurant_Reviews.tsv'

df = pd.read_csv(datapath, delimiter='\t', quoting=3)


# **understanding data**

In [46]:
df.head(10)

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1
5,Now I am getting angry and I want my damn pho.,0
6,Honeslty it didn't taste THAT fresh.),0
7,The potatoes were like rubber and you could te...,0
8,The fries were great too.,1
9,A great touch.,1


In [47]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Review  1000 non-null   object
 1   Liked   1000 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 15.8+ KB
None


# handeling missing data

In [48]:
max_non_null_val = df["Liked"].count()
print("Percentage of missing values in each column:\n")
missing_percent = []
for col in df.columns:
  missing_percent.append(df[col].isnull().sum()/max_non_null_val*100)
  print(f"{col} : {df[col].isnull().sum()/max_non_null_val*100:.2f}%")

Percentage of missing values in each column:

Review : 0.00%
Liked : 0.00%


# Creating a VADER model


In [49]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [50]:
sia.polarity_scores("this is good stuff.")

{'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}

In [51]:
scores_list =[]
for i in range(df["Review"].count()):
  score = sia.polarity_scores(df["Review"].values[i])
  scores_list.append(score)
print(scores_list[1])


{'neg': 0.445, 'neu': 0.555, 'pos': 0.0, 'compound': -0.3412}


In [52]:
scores_pd = pd.DataFrame(scores_list)

In [53]:
scores_pd

Unnamed: 0,neg,neu,pos,compound
0,0.000,0.435,0.565,0.5994
1,0.445,0.555,0.000,-0.3412
2,0.340,0.660,0.000,-0.5574
3,0.093,0.585,0.322,0.6908
4,0.000,0.728,0.272,0.6249
...,...,...,...,...
995,0.000,1.000,0.000,0.0000
996,0.000,1.000,0.000,0.0000
997,0.242,0.758,0.000,-0.3724
998,0.000,1.000,0.000,0.0000


In [54]:
review_pred = []
for i in range(scores_pd["compound"].count()):
  if(scores_pd["compound"][i] > 0):
    review_pred.append(1);
  else:
    review_pred.append(0)
review_pred = np.array(review_pred)
print(review_pred[2])

0


In [55]:
y = np.array(df["Liked"])

In [56]:
from sklearn.metrics import accuracy_score, confusion_matrix
cm = confusion_matrix(y, review_pred)
print("Confusion Matrix:")
print(cm)
print("Accuracy Score:")
print(accuracy_score(y, review_pred))

Confusion Matrix:
[[406  94]
 [ 94 406]]
Accuracy Score:
0.812


#Loading the Spacy model


In [57]:
!pip install spacy
!python -m spacy download en_core_web_lg


Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [58]:
import spacy
nlp = spacy.load("en_core_web_lg")

## cleaning the data

In [59]:
corpus = []
for i in range(0, 1000):
    review = df['Review'][i]
    doc = nlp(review)
    tokens = [token.lemma_.lower() for token in doc if not token.is_stop and token.is_alpha]
    review = ' '.join(tokens)
    corpus.append(review)

print(corpus)



# creating bag of words model

In [60]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=1500)
X = cv.fit_transform(corpus).toarray()
y = df["Liked"].values

In [61]:
X

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [62]:
y

array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1,

# Splitting data into training and testing datasets

In [63]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

#Trainnig Logistic regression model



In [64]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='liblinear',random_state = 0)
classifier.fit(X_train, y_train)

In [65]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[85 12]
 [38 65]]


0.75

###Testing model for overfitting

In [66]:
y_train_pred = classifier.predict(X_train)
cm = confusion_matrix(y_train, y_train_pred)
print(cm)
accuracy_score(y_train, y_train_pred)


[[388  15]
 [ 26 371]]


0.94875

###Hyperparameter tuning with optuna

In [67]:
!pip install optuna




In [68]:
import optuna
from sklearn.metrics import accuracy_score


def objective(trial):

    params = {
    'C': trial.suggest_float('C', 0.01, 10.0),
    'solver': trial.suggest_categorical('solver', ['liblinear', 'lbfgs', 'saga']),
    'penalty': trial.suggest_categorical('penalty', ['l2']),
    'max_iter': trial.suggest_int('max_iter', 100, 1000)
}


    log_class = LogisticRegression(**params)
    log_class.fit(
        X_train, y_train,

    )
    y_pred = log_class.predict(X_test)
    return 1 - accuracy_score(y_test, y_pred)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)
print("Best hyperparameters: ", study.best_params)


[I 2024-08-03 07:45:26,118] A new study created in memory with name: no-name-aeb7c444-5903-4a02-a225-505d2e12a584
[I 2024-08-03 07:45:26,324] Trial 0 finished with value: 0.235 and parameters: {'C': 4.994923203009074, 'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 829}. Best is trial 0 with value: 0.235.
[I 2024-08-03 07:45:29,683] Trial 1 finished with value: 0.245 and parameters: {'C': 2.4617291524510465, 'solver': 'saga', 'penalty': 'l2', 'max_iter': 198}. Best is trial 0 with value: 0.235.
[I 2024-08-03 07:45:29,913] Trial 2 finished with value: 0.235 and parameters: {'C': 3.654128490106907, 'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 760}. Best is trial 0 with value: 0.235.
[I 2024-08-03 07:45:36,250] Trial 3 finished with value: 0.235 and parameters: {'C': 4.467615471756433, 'solver': 'saga', 'penalty': 'l2', 'max_iter': 149}. Best is trial 0 with value: 0.235.
[I 2024-08-03 07:45:36,308] Trial 4 finished with value: 0.235 and parameters: {'C': 4.811798711287491, 'solver': '

Best hyperparameters:  {'C': 3.05945647040965, 'solver': 'saga', 'penalty': 'l2', 'max_iter': 133}


###building model with best parameters

In [69]:
best_classifier = LogisticRegression(**study.best_params)
best_classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = best_classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[83 14]
 [33 70]]




0.765

#Training a knn model




In [70]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier()
classifier.fit(X_train, y_train)

In [71]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[85 12]
 [61 42]]


0.635

###testing for overfiting

In [72]:
y_train_pred = classifier.predict(X_train)
cm = confusion_matrix(y_train, y_train_pred)
print(cm)
accuracy_score(y_train, y_train_pred)


[[366  37]
 [138 259]]


0.78125

###Grid search

In [73]:
from sklearn.model_selection import GridSearchCV
parameters = {
    'n_neighbors': list(range(1, 32)),
    'metric': ['euclidean', 'manhattan', 'minkowski'],
    'weights': ['uniform', 'distance']
}
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 5,
                           n_jobs = -1,
                           verbose = 2)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
best_base_classifier = grid_search.best_estimator_

print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

Fitting 5 folds for each of 186 candidates, totalling 930 fits
Best Accuracy: 69.50 %
Best Parameters: {'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}


###building model with best parameters

In [74]:
best_base_classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = best_base_classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[80 17]
 [52 51]]


0.655

#Training a Descision Tree model

In [75]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 42)
classifier.fit(X_train, y_train)

In [76]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[76 21]
 [46 57]]


0.665

###Grid search


In [77]:
from sklearn.model_selection import GridSearchCV
parameters = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 10, 20, 30, 40, 50]
}
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
best_base_classifier = grid_search.best_estimator_

print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

Best Accuracy: 76.63 %
Best Parameters: {'criterion': 'entropy', 'max_depth': 20, 'splitter': 'random'}


###building model with best parameters

In [78]:
best_base_classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = best_base_classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[93  4]
 [53 50]]


0.715

#XG boost model

In [79]:
from xgboost import XGBClassifier
classifier = XGBClassifier()
classifier.fit(X_train, y_train)

In [80]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[86 11]
 [45 58]]


0.72

###Hyperparam tuning with optuna

In [81]:
!pip install optuna




In [82]:
import optuna
import xgboost as xgb
from sklearn.metrics import accuracy_score


def objective(trial):
    params = {
        'objective': 'binary:logistic',
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'n_estimators': trial.suggest_int('n_estimators', 50, 200),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0)
    }

    xgb_model = xgb.XGBClassifier(**params)
    xgb_model.fit(
        X_train, y_train,
        eval_set=[(X_test, y_test)],
        verbose=False
    )
    y_pred = xgb_model.predict(X_test)
    return 1 - accuracy_score(y_test, y_pred)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)
print("Best hyperparameters: ", study.best_params)


[I 2024-08-03 07:48:51,890] A new study created in memory with name: no-name-c7aa6629-ed22-44f8-960f-f455285bedd6
[I 2024-08-03 07:48:53,033] Trial 0 finished with value: 0.29000000000000004 and parameters: {'learning_rate': 0.14549849062682663, 'max_depth': 3, 'n_estimators': 183, 'colsample_bytree': 0.747476306310505, 'subsample': 0.9220421844414486}. Best is trial 0 with value: 0.29000000000000004.
[I 2024-08-03 07:48:53,875] Trial 1 finished with value: 0.28 and parameters: {'learning_rate': 0.10618004609085897, 'max_depth': 4, 'n_estimators': 100, 'colsample_bytree': 0.8591425632292939, 'subsample': 0.7861041419306607}. Best is trial 1 with value: 0.28.
[I 2024-08-03 07:48:54,973] Trial 2 finished with value: 0.30500000000000005 and parameters: {'learning_rate': 0.03509911463204879, 'max_depth': 8, 'n_estimators': 107, 'colsample_bytree': 0.7507904599105157, 'subsample': 0.5014997474210974}. Best is trial 1 with value: 0.28.
[I 2024-08-03 07:48:55,830] Trial 3 finished with value:

Best hyperparameters:  {'learning_rate': 0.075293913075744, 'max_depth': 5, 'n_estimators': 123, 'colsample_bytree': 0.8464265966947099, 'subsample': 0.527246698777878}


###building model with best parameters

In [83]:
model = xgb.XGBClassifier(**study.best_params)

model.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[93  4]
 [48 55]]


0.74

###test for overfitting

In [84]:
y_train_pred = model.predict(X_train)
cm = confusion_matrix(y_train, y_train_pred)
print(cm)
accuracy_score(y_train, y_train_pred)

[[372  31]
 [137 260]]


0.79

#Training a random forest model

In [85]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier()
classifier.fit(X_train, y_train)

In [86]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[79 18]
 [47 56]]


0.675

###Grid search

In [87]:
from sklearn.model_selection import GridSearchCV
parameters = {
    'n_estimators': [10, 50, 100, 200],
    'criterion': ['gini', 'entropy', 'log_loss'],
    'max_depth': [None, 10, 20, 30, 40, 50],
    'max_features':['sqrt', 'log2', None]
}
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 3,
                           n_jobs = -1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
best_base_classifier = grid_search.best_estimator_

print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

Best Accuracy: 78.88 %
Best Parameters: {'criterion': 'entropy', 'max_depth': 20, 'max_features': 'log2', 'n_estimators': 100}


###building model with best parameters

In [88]:
best_base_classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = best_base_classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[88  9]
 [36 67]]


0.775

#Training a svm model

In [89]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 110)
classifier.fit(X_train, y_train)

In [90]:
y_pred = classifier.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)
print("Accuracy Score:")
print(accuracy_score(y_test, y_pred))

Confusion Matrix:
[[82 15]
 [31 72]]
Accuracy Score:
0.77


###test for overfitting

In [91]:
y_train_pred = classifier.predict(X_train)
cm = confusion_matrix(y_train, y_train_pred)
print(cm)
accuracy_score(y_train, y_train_pred)


[[398   5]
 [ 12 385]]


0.97875

###Grid search

In [92]:
from sklearn.model_selection import GridSearchCV
parameters = {
    'C': [0.25, 0.5, 0.75, 1],
    'kernel': ['linear'],
    'gamma': [0,0.1, 0.5, 1, 2, 5]
}

grid_search = GridSearchCV(estimator = classifier,param_grid = parameters,scoring = 'accuracy',cv = 3,n_jobs = -1,verbose = 2)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
best_base_classifier = grid_search.best_estimator_

print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)


Fitting 3 folds for each of 24 candidates, totalling 72 fits
Best Accuracy: 78.13 %
Best Parameters: {'C': 1, 'gamma': 0, 'kernel': 'linear'}


In [93]:
y_pred = best_base_classifier.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)
print("Accuracy Score:")
print(accuracy_score(y_test, y_pred))

Confusion Matrix:
[[82 15]
 [31 72]]
Accuracy Score:
0.77


###model bagging


In [94]:
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score

param_grid = {
    'n_estimators': [50, 100, 150],
    'max_samples': [0.5, 1.0],
    'max_features': [0.5, 1.0]
}

clf = BaggingClassifier(random_state=0)

grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5, n_jobs=-1, scoring='accuracy',verbose =2)
grid_search.fit(X_train, y_train)

best_clf = grid_search.best_estimator_

y_pred = best_clf.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)
print("Accuracy Score:")
print(accuracy_score(y_test, y_pred))


Fitting 5 folds for each of 12 candidates, totalling 60 fits
Confusion Matrix:
[[73 24]
 [31 72]]
Accuracy Score:
0.725


# Training naive bayes model

In [95]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

In [96]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[47 50]
 [11 92]]


0.695

###test for overfitting

In [97]:
y_train_pred = classifier.predict(X_train)
cm = confusion_matrix(y_train, y_train_pred)
print(cm)
accuracy_score(y_train, y_train_pred)


[[332  71]
 [  0 397]]


0.91125

#building an ANN

In [98]:
import numpy as np
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

size_input_layer = X_train.shape[1]
size_hidden_layer = (2*size_input_layer // 3) + 2

model = Sequential([
    Dense(units=size_hidden_layer, input_shape=(size_input_layer,), activation='relu'),

    Dense(units=1, activation='sigmoid')
])

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [99]:
model.fit(
    x=X_train,
    y=y_train,
    batch_size=3,
    epochs=100,
    validation_split = 0.1,
    shuffle=True,
    verbose=2
)

Epoch 1/100
240/240 - 5s - 21ms/step - accuracy: 0.7139 - loss: 0.5874 - val_accuracy: 0.8500 - val_loss: 0.4034
Epoch 2/100
240/240 - 3s - 15ms/step - accuracy: 0.9153 - loss: 0.2348 - val_accuracy: 0.8625 - val_loss: 0.3904
Epoch 3/100
240/240 - 7s - 28ms/step - accuracy: 0.9694 - loss: 0.0944 - val_accuracy: 0.8375 - val_loss: 0.4084
Epoch 4/100
240/240 - 5s - 22ms/step - accuracy: 0.9819 - loss: 0.0561 - val_accuracy: 0.8250 - val_loss: 0.4099
Epoch 5/100
240/240 - 5s - 20ms/step - accuracy: 0.9792 - loss: 0.0457 - val_accuracy: 0.8500 - val_loss: 0.4766
Epoch 6/100
240/240 - 5s - 22ms/step - accuracy: 0.9847 - loss: 0.0323 - val_accuracy: 0.8250 - val_loss: 0.4802
Epoch 7/100
240/240 - 4s - 16ms/step - accuracy: 0.9903 - loss: 0.0313 - val_accuracy: 0.8375 - val_loss: 0.5097
Epoch 8/100
240/240 - 4s - 16ms/step - accuracy: 0.9889 - loss: 0.0270 - val_accuracy: 0.8500 - val_loss: 0.5376
Epoch 9/100
240/240 - 5s - 23ms/step - accuracy: 0.9903 - loss: 0.0256 - val_accuracy: 0.8125 - 

<keras.src.callbacks.history.History at 0x7967c1f25cf0>

In [100]:
from sklearn.metrics import confusion_matrix, accuracy_score

y_pred = model.predict(X_test)
y_pred = (y_pred >= 0.5).astype(int)

cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[[74 23]
 [30 73]]


0.735