## Preprocessing

In [1]:
# Import the library
import pandas as pd
from sklearn.metrics import classification_report


def preprocess(path):
    """Read and preprocess the data"""
    df = pd.read_csv(path, index_col=0)
    return df


# Raw data
train_raw = preprocess("data/TRAIN.csv")
valid_raw = preprocess("data/VALIDATION.csv")
# test_raw = preprocess("data/TEST_NO_LABELS.csv")

# TFIDF
train_X = preprocess("data/TFIDF_TRAIN.csv")
valid_X = preprocess("data/TFIDF_VALIDATION.csv")
# test_X = preprocess("data/TFIDF_TEST.csv")


def partition_vert(df):
    """Partition the dataset into features and labels"""

    X = df.iloc[:, 1:-1]
    y_i = df.iloc[:, -1]
    return X, y_i


# Obtain the true labels
_, train_y_true = partition_vert(train_raw)
_, valid_y_true = partition_vert(valid_raw)


def split_by_gender(df):
    """Split the data frame based on gender"""

    female_samples = df[df["dr_id_gender"] == 0]
    male_samples = df[df["dr_id_gender"] == 1]
    ungendered_samples = df[df["dr_id_gender"] == 2]

    return female_samples, male_samples, ungendered_samples


# Partition datasets vertically and horizontally by gender
train_raw_with_id = train_raw.reset_index()
valid_raw_with_id = valid_raw.reset_index()
train_X_with_id = train_X.reset_index()
valid_X_with_id = valid_X.reset_index()

train_raw_female, train_raw_male, _ = split_by_gender(train_raw_with_id)
valid_raw_female, valid_raw_male, _ = split_by_gender(valid_raw_with_id)

_, train_y_true_female = partition_vert(train_raw_female)
_, valid_y_true_female = partition_vert(valid_raw_female)
train_X_female = train_X_with_id[train_raw_with_id["dr_id_gender"]
                                 == 0].iloc[:, 1:]
valid_X_female = valid_X_with_id[valid_raw_with_id["dr_id_gender"]
                                 == 0].iloc[:, 1:]

_, train_y_true_male = partition_vert(train_raw_male)
_, valid_y_true_male = partition_vert(valid_raw_male)
train_X_male = train_X_with_id[train_raw_with_id["dr_id_gender"]
                               == 1].iloc[:, 1:]
valid_X_male = valid_X_with_id[valid_raw_with_id["dr_id_gender"]
                               == 1].iloc[:, 1:]


def cal_pos_comment_dist(df):
    """Calculate the distribution of the positive comments"""

    pos_comment_count = len(df[df["rating"] == 1])
    neg_comment_count = len(df[df["rating"] == -1])

    return pos_comment_count / (pos_comment_count + neg_comment_count)


print("Positive Comment Rate in the ... Validation Set")
print("* Whole: " + str(cal_pos_comment_dist(valid_raw)))
print("* Female: " + str(cal_pos_comment_dist(valid_raw_female)))
print("* Male: " + str(cal_pos_comment_dist(valid_raw_male)))

Positive Comment Rate in the ... Validation Set
* Whole: 0.7341818181818182
* Female: 0.7381429525718103
* Male: 0.7257254464285714


### Zero-R

In [2]:
# Import the library for baseline
from sklearn.dummy import DummyClassifier


def zero_R_classify(training_feat, training_label, valid_feat, valid_label_true):
    """Define the ZeroR classifier"""

    # Implement the ZeroR classifier
    ZeroR_clf = DummyClassifier(strategy="most_frequent", random_state=42)
    ZeroR_clf.fit(X=training_feat, y=training_label)
    valid_y_pred = ZeroR_clf.predict(X=valid_feat)

    # Show evaluation results
    print(classification_report(y_true=valid_label_true,
          y_pred=valid_y_pred, zero_division=0.0))
    print()


print("ZeroR Baseline > Whole Validation set")
zero_R_classify(train_X, train_y_true, valid_X, valid_y_true)

print("ZeroR Baseline > Female Validation set")
zero_R_classify(train_X_female, train_y_true_female,
                valid_X_female, valid_y_true_female)

print("ZeroR Baseline > Male Validation set")
zero_R_classify(train_X_male, train_y_true_male,
                valid_X_male, valid_y_true_male)

ZeroR Baseline > Whole Validation set
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00      1462
           1       0.73      1.00      0.85      4038

    accuracy                           0.73      5500
   macro avg       0.37      0.50      0.42      5500
weighted avg       0.54      0.73      0.62      5500


ZeroR Baseline > Female Validation set
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00       392
           1       0.74      1.00      0.85      1105

    accuracy                           0.74      1497
   macro avg       0.37      0.50      0.42      1497
weighted avg       0.54      0.74      0.63      1497


ZeroR Baseline > Male Validation set
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00       983
           1       0.73      1.00      0.84      2601

    accuracy                           0.73      3584
   macro avg

### KNN

In [3]:
# Import the library for KNN classifier
from sklearn.neighbors import KNeighborsClassifier


def knn_classify(training_feat, training_label, valid_feat, valid_label_true):
    """Define the KNN classifier"""

    # Implement the KNN classifier for TF-IDF
    KNNclf = KNeighborsClassifier(weights="distance", metric="cosine")
    KNNclf.fit(X=training_feat, y=training_label)
    valid_label_pred = KNNclf.predict(X=valid_feat)

    # Show evaluation results
    print(classification_report(y_true=valid_label_true, y_pred=valid_label_pred))


print("KNN > Whole Validation Set")
knn_classify(train_X, train_y_true, valid_X, valid_y_true)

print("KNN > Female Validation Set")
knn_classify(train_X_female, train_y_true_female,
             valid_X_female, valid_y_true_female)

print("KNN > Male Validation Set")
knn_classify(train_X_male, train_y_true_male, valid_X_male, valid_y_true_male)

KNN > Whole Validation Set
              precision    recall  f1-score   support

          -1       0.71      0.61      0.66      1462
           1       0.87      0.91      0.89      4038

    accuracy                           0.83      5500
   macro avg       0.79      0.76      0.77      5500
weighted avg       0.82      0.83      0.83      5500

KNN > Female Validation Set
              precision    recall  f1-score   support

          -1       0.71      0.64      0.68       392
           1       0.88      0.91      0.89      1105

    accuracy                           0.84      1497
   macro avg       0.79      0.78      0.78      1497
weighted avg       0.83      0.84      0.84      1497

KNN > Male Validation Set
              precision    recall  f1-score   support

          -1       0.73      0.65      0.69       983
           1       0.87      0.91      0.89      2601

    accuracy                           0.84      3584
   macro avg       0.80      0.78      0.79    

### NB

In [4]:
# Import the library for the MultinomialNB classifier
from sklearn.naive_bayes import MultinomialNB


def nb_classify(training_feat, training_label, valid_feat, valid_label_true):
    """Define the naive Bayesian classifier"""

    # Implement the MultinomialNB classifier for TF-IDF
    NBclf = MultinomialNB(alpha=0.001)
    NBclf.fit(X=training_feat, y=training_label)
    valid_label_pred = NBclf.predict(X=valid_feat)

    # Show evaluation results
    print(classification_report(y_true=valid_label_true, y_pred=valid_label_pred))


print("NB > Whole Validation Set")
nb_classify(train_X, train_y_true, valid_X, valid_y_true)

print("NB > Female Validation Set")
nb_classify(train_X_female, train_y_true_female,
            valid_X_female, valid_y_true_female)

print("NB > Male Validation Set")
nb_classify(train_X_male, train_y_true_male, valid_X_male, valid_y_true_male)

NB > Whole Validation Set
              precision    recall  f1-score   support

          -1       0.87      0.66      0.75      1462
           1       0.89      0.97      0.93      4038

    accuracy                           0.89      5500
   macro avg       0.88      0.81      0.84      5500
weighted avg       0.88      0.89      0.88      5500

NB > Female Validation Set
              precision    recall  f1-score   support

          -1       0.83      0.74      0.78       392
           1       0.91      0.94      0.93      1105

    accuracy                           0.89      1497
   macro avg       0.87      0.84      0.85      1497
weighted avg       0.89      0.89      0.89      1497

NB > Male Validation Set
              precision    recall  f1-score   support

          -1       0.89      0.66      0.76       983
           1       0.88      0.97      0.92      2601

    accuracy                           0.88      3584
   macro avg       0.89      0.81      0.84      3

### LR

In [5]:
# Import the library for logistic regressor
from sklearn.linear_model import LogisticRegression


def lr_classify(training_feat, training_label, valid_feat, valid_label_true):
    """Define the logistic regression classifier"""

    # Implements the logistic regressor for TF-IDF
    LRclf = LogisticRegression(max_iter=1_000, random_state=42)
    LRclf.fit(X=training_feat, y=training_label)
    valid_label_pred = LRclf.predict(X=valid_feat)

    # Shows evaluation results
    print(classification_report(y_true=valid_label_true, y_pred=valid_label_pred))


print("LR > Whole Validation Set")
lr_classify(train_X, train_y_true, valid_X, valid_y_true)

print("LR > Female Validation Set")
lr_classify(train_X_female, train_y_true_female,
            valid_X_female, valid_y_true_female)

print("LR > Male Validation Set")
lr_classify(train_X_male, train_y_true_male, valid_X_male, valid_y_true_male)

LR > Whole Validation Set
              precision    recall  f1-score   support

          -1       0.81      0.84      0.83      1462
           1       0.94      0.93      0.94      4038

    accuracy                           0.91      5500
   macro avg       0.88      0.89      0.88      5500
weighted avg       0.91      0.91      0.91      5500

LR > Female Validation Set
              precision    recall  f1-score   support

          -1       0.84      0.85      0.84       392
           1       0.95      0.94      0.94      1105

    accuracy                           0.92      1497
   macro avg       0.89      0.90      0.89      1497
weighted avg       0.92      0.92      0.92      1497

LR > Male Validation Set
              precision    recall  f1-score   support

          -1       0.85      0.84      0.84       983
           1       0.94      0.94      0.94      2601

    accuracy                           0.91      3584
   macro avg       0.89      0.89      0.89      3

### MLP

In [6]:
# Import the library for the MLP classifier
from sklearn.neural_network import MLPClassifier


def mlp_classify(training_feat, training_label, valid_feat, valid_label_true):
    """Define the MLP classifier"""

    # Implement the MLP classifier for TF-IDF
    MLPclf = MLPClassifier(activation="logistic",
                           max_iter=1_000, random_state=42)
    MLPclf.fit(X=training_feat, y=training_label)
    valid_label_pred = MLPclf.predict(X=valid_feat)

    # Show evaluation results
    print(classification_report(y_true=valid_label_true, y_pred=valid_label_pred))


print("MLP > Whole Validation Set")
mlp_classify(train_X, train_y_true, valid_X, valid_y_true)

print("MLP > Female Validation Set")
mlp_classify(train_X_female, train_y_true_female,
             valid_X_female, valid_y_true_female)

print("MLP > Male Validation Set")
mlp_classify(train_X_male, train_y_true_male, valid_X_male, valid_y_true_male)

MLP > Whole Validation Set
              precision    recall  f1-score   support

          -1       0.80      0.85      0.83      1462
           1       0.94      0.92      0.93      4038

    accuracy                           0.90      5500
   macro avg       0.87      0.89      0.88      5500
weighted avg       0.91      0.90      0.91      5500

MLP > Female Validation Set
              precision    recall  f1-score   support

          -1       0.81      0.85      0.83       392
           1       0.95      0.93      0.94      1105

    accuracy                           0.91      1497
   macro avg       0.88      0.89      0.88      1497
weighted avg       0.91      0.91      0.91      1497

MLP > Male Validation Set
              precision    recall  f1-score   support

          -1       0.84      0.84      0.84       983
           1       0.94      0.94      0.94      2601

    accuracy                           0.91      3584
   macro avg       0.89      0.89      0.89    

## Balancing Genders

In [7]:
def balance_gender_on_datasets(raw_df, tfidf_df):
    female_samples = raw_df[raw_df["dr_id_gender"] == 0]
    male_samples = raw_df[raw_df["dr_id_gender"] == 1]

    min_samples = min(len(female_samples), len(male_samples))
    balanced_female_samples = female_samples.sample(
        n=min_samples, random_state=42)
    balanced_male_samples = male_samples.sample(n=min_samples, random_state=42)

    # Concatenate balanced samples
    balanced_raw = pd.concat([balanced_female_samples, balanced_male_samples])

    # Find corresponding rows in train_X
    balanced_X = tfidf_df[tfidf_df.index.isin(balanced_raw.index)]

    return balanced_raw, balanced_X


# Raw data
train_raw = pd.read_csv("data/TRAIN.csv")
valid_raw = pd.read_csv("data/VALIDATION.csv")
# test_raw = pd.read_csv("data/TEST_NO_LABELS.csv")

# TFIDF
train_X = pd.read_csv("data/TFIDF_TRAIN.csv")
valid_X = pd.read_csv("data/TFIDF_VALIDATION.csv")
# test_X = pd.read_csv("data/TFIDF_TEST.csv")

# Balance the datasets by gender
train_raw, train_X = balance_gender_on_datasets(train_raw, train_X)
valid_raw, valid_X = balance_gender_on_datasets(valid_raw, valid_X)

train_raw = train_raw.sort_values(by="Unnamed: 0")
train_X = train_X.sort_values(by="Unnamed: 0")
valid_raw = valid_raw.sort_values(by="Unnamed: 0")
valid_X = valid_X.sort_values(by="Unnamed: 0")

# Obtain the true labels
train_y_true = train_raw.iloc[:, -1]
valid_y_true = valid_raw.iloc[:, -1]

# Split the set by gender
train_raw_female, train_raw_male, _ = split_by_gender(train_raw)
valid_raw_female, valid_raw_male, _ = split_by_gender(valid_raw)

train_y_true_female = train_raw_female.iloc[:, -1]
valid_y_true_female = valid_raw_female.iloc[:, -1]
train_X_female = train_X[train_raw["dr_id_gender"] == 0]
valid_X_female = valid_X[valid_raw["dr_id_gender"] == 0]

train_y_true_male = train_raw_male.iloc[:, -1]
valid_y_true_male = valid_raw_male.iloc[:, -1]
train_X_male = train_X[train_raw["dr_id_gender"] == 1]
valid_X_male = valid_X[valid_raw["dr_id_gender"] == 1]

# Remove the index


def remove_index(df):
    if "Unnamed: 0" in df.columns:
        df = df.copy()
        df.drop("Unnamed: 0", axis=1, inplace=True)
    return df


train_X = remove_index(train_X)
train_X_female = remove_index(train_X_female)
train_X_male = remove_index(train_X_male)

valid_X = remove_index(valid_X)
valid_X_female = remove_index(valid_X_female)
valid_X_male = remove_index(valid_X_male)

### Zero-R

In [8]:
print("ZeroR Baseline > Whole Validation set")
zero_R_classify(train_X, train_y_true, valid_X, valid_y_true)

print("ZeroR Baseline > Female Validation set")
zero_R_classify(train_X_female, train_y_true_female,
                valid_X_female, valid_y_true_female)

print("ZeroR Baseline > Male Validation set")
zero_R_classify(train_X_male, train_y_true_male,
                valid_X_male, valid_y_true_male)

ZeroR Baseline > Whole Validation set
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00       803
           1       0.73      1.00      0.85      2191

    accuracy                           0.73      2994
   macro avg       0.37      0.50      0.42      2994
weighted avg       0.54      0.73      0.62      2994


ZeroR Baseline > Female Validation set
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00       392
           1       0.74      1.00      0.85      1105

    accuracy                           0.74      1497
   macro avg       0.37      0.50      0.42      1497
weighted avg       0.54      0.74      0.63      1497


ZeroR Baseline > Male Validation set
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00       411
           1       0.73      1.00      0.84      1086

    accuracy                           0.73      1497
   macro avg

### KNN

In [9]:
print("KNN > Whole Validation Set")
knn_classify(train_X, train_y_true, valid_X, valid_y_true)

print("KNN > Female Validation Set")
knn_classify(train_X_female, train_y_true_female,
             valid_X_female, valid_y_true_female)

print("KNN > Male Validation Set")
knn_classify(train_X_male, train_y_true_male, valid_X_male, valid_y_true_male)

KNN > Whole Validation Set
              precision    recall  f1-score   support

          -1       0.71      0.62      0.66       803
           1       0.87      0.91      0.89      2191

    accuracy                           0.83      2994
   macro avg       0.79      0.77      0.78      2994
weighted avg       0.83      0.83      0.83      2994

KNN > Female Validation Set
              precision    recall  f1-score   support

          -1       0.71      0.64      0.68       392
           1       0.88      0.91      0.89      1105

    accuracy                           0.84      1497
   macro avg       0.79      0.78      0.78      1497
weighted avg       0.83      0.84      0.84      1497

KNN > Male Validation Set
              precision    recall  f1-score   support

          -1       0.75      0.60      0.67       411
           1       0.86      0.92      0.89      1086

    accuracy                           0.84      1497
   macro avg       0.80      0.76      0.78    

### NB

In [10]:
print("NB > Whole Validation Set")
nb_classify(train_X, train_y_true, valid_X, valid_y_true)

print("NB > Female Validation Set")
nb_classify(train_X_female, train_y_true_female,
            valid_X_female, valid_y_true_female)

print("NB > Male Validation Set")
nb_classify(train_X_male, train_y_true_male, valid_X_male, valid_y_true_male)

NB > Whole Validation Set
              precision    recall  f1-score   support

          -1       0.87      0.70      0.78       803
           1       0.90      0.96      0.93      2191

    accuracy                           0.89      2994
   macro avg       0.88      0.83      0.85      2994
weighted avg       0.89      0.89      0.89      2994

NB > Female Validation Set
              precision    recall  f1-score   support

          -1       0.83      0.74      0.78       392
           1       0.91      0.94      0.93      1105

    accuracy                           0.89      1497
   macro avg       0.87      0.84      0.85      1497
weighted avg       0.89      0.89      0.89      1497

NB > Male Validation Set
              precision    recall  f1-score   support

          -1       0.90      0.67      0.77       411
           1       0.89      0.97      0.93      1086

    accuracy                           0.89      1497
   macro avg       0.89      0.82      0.85      1

### LR

In [11]:
print("LR > Whole Validation Set")
lr_classify(train_X, train_y_true, valid_X, valid_y_true)

print("LR > Female Validation Set")
lr_classify(train_X_female, train_y_true_female,
            valid_X_female, valid_y_true_female)

print("LR > Male Validation Set")
lr_classify(train_X_male, train_y_true_male, valid_X_male, valid_y_true_male)

LR > Whole Validation Set
              precision    recall  f1-score   support

          -1       0.85      0.85      0.85       803
           1       0.95      0.94      0.94      2191

    accuracy                           0.92      2994
   macro avg       0.90      0.90      0.90      2994
weighted avg       0.92      0.92      0.92      2994

LR > Female Validation Set
              precision    recall  f1-score   support

          -1       0.84      0.85      0.84       392
           1       0.95      0.94      0.94      1105

    accuracy                           0.92      1497
   macro avg       0.89      0.90      0.89      1497
weighted avg       0.92      0.92      0.92      1497

LR > Male Validation Set
              precision    recall  f1-score   support

          -1       0.87      0.81      0.84       411
           1       0.93      0.95      0.94      1086

    accuracy                           0.92      1497
   macro avg       0.90      0.88      0.89      1

### MLP

In [12]:
print("MLP > Whole Validation Set")
mlp_classify(train_X, train_y_true, valid_X, valid_y_true)

print("MLP > Female Validation Set")
mlp_classify(train_X_female, train_y_true_female,
             valid_X_female, valid_y_true_female)

print("MLP > Male Validation Set")
mlp_classify(train_X_male, train_y_true_male, valid_X_male, valid_y_true_male)

MLP > Whole Validation Set
              precision    recall  f1-score   support

          -1       0.84      0.85      0.84       803
           1       0.95      0.94      0.94      2191

    accuracy                           0.92      2994
   macro avg       0.89      0.90      0.89      2994
weighted avg       0.92      0.92      0.92      2994

MLP > Female Validation Set
              precision    recall  f1-score   support

          -1       0.81      0.85      0.83       392
           1       0.95      0.93      0.94      1105

    accuracy                           0.91      1497
   macro avg       0.88      0.89      0.88      1497
weighted avg       0.91      0.91      0.91      1497

MLP > Male Validation Set
              precision    recall  f1-score   support

          -1       0.83      0.83      0.83       411
           1       0.93      0.94      0.94      1086

    accuracy                           0.91      1497
   macro avg       0.88      0.88      0.88    

## Balancing Classes

In [13]:
def balance_class_on_datasets(raw_df, tfidf_df):
    pos_comments = raw_df[raw_df["rating"] == 1]
    neg_comments = raw_df[raw_df["rating"] == -1]

    min_samples = min(len(pos_comments), len(neg_comments))
    balanced_pos_samples = pos_comments.sample(n=min_samples, random_state=42)
    balanced_neg_samples = neg_comments.sample(n=min_samples, random_state=42)

    # Concatenate balanced samples
    balanced_raw = pd.concat([balanced_pos_samples, balanced_neg_samples])

    # Find corresponding rows in train_X
    balanced_X = tfidf_df[tfidf_df.index.isin(balanced_raw.index)]

    return balanced_raw, balanced_X


# Raw data
train_raw = pd.read_csv("data/TRAIN.csv")
valid_raw = pd.read_csv("data/VALIDATION.csv")
# test_raw = pd.read_csv("data/TEST_NO_LABELS.csv")

# TFIDF
train_X = pd.read_csv("data/TFIDF_TRAIN.csv")
valid_X = pd.read_csv("data/TFIDF_VALIDATION.csv")
# test_X = pd.read_csv("data/TFIDF_TEST.csv")

# Balances the datasets by gender
train_raw, train_X = balance_class_on_datasets(train_raw, train_X)
valid_raw, valid_X = balance_class_on_datasets(valid_raw, valid_X)

train_raw = train_raw.sort_values(by="Unnamed: 0")
train_X = train_X.sort_values(by="Unnamed: 0")
valid_raw = valid_raw.sort_values(by="Unnamed: 0")
valid_X = valid_X.sort_values(by="Unnamed: 0")

# Obtains the true labels
train_y_true = train_raw.iloc[:, -1]
valid_y_true = valid_raw.iloc[:, -1]

# Splits the set by gender
train_raw_female, train_raw_male, _ = split_by_gender(train_raw)
valid_raw_female, valid_raw_male, _ = split_by_gender(valid_raw)

train_y_true_female = train_raw_female.iloc[:, -1]
valid_y_true_female = valid_raw_female.iloc[:, -1]
train_X_female = train_X[train_raw["dr_id_gender"] == 0]
valid_X_female = valid_X[valid_raw["dr_id_gender"] == 0]

train_y_true_male = train_raw_male.iloc[:, -1]
valid_y_true_male = valid_raw_male.iloc[:, -1]
train_X_male = train_X[train_raw["dr_id_gender"] == 1]
valid_X_male = valid_X[valid_raw["dr_id_gender"] == 1]

train_X = remove_index(train_X)
train_X_female = remove_index(train_X_female)
train_X_male = remove_index(train_X_male)

valid_X = remove_index(valid_X)
valid_X_female = remove_index(valid_X_female)
valid_X_male = remove_index(valid_X_male)

### Zero-R

In [14]:
print("ZeroR Baseline > Whole Validation set")
zero_R_classify(train_X, train_y_true, valid_X, valid_y_true)

print("ZeroR Baseline > Female Validation set")
zero_R_classify(train_X_female, train_y_true_female,
                valid_X_female, valid_y_true_female)

print("ZeroR Baseline > Male Validation set")
zero_R_classify(train_X_male, train_y_true_male,
                valid_X_male, valid_y_true_male)

ZeroR Baseline > Whole Validation set
              precision    recall  f1-score   support

          -1       0.50      1.00      0.67      1462
           1       0.00      0.00      0.00      1462

    accuracy                           0.50      2924
   macro avg       0.25      0.50      0.33      2924
weighted avg       0.25      0.50      0.33      2924


ZeroR Baseline > Female Validation set
              precision    recall  f1-score   support

          -1       0.48      1.00      0.65       392
           1       0.00      0.00      0.00       424

    accuracy                           0.48       816
   macro avg       0.24      0.50      0.32       816
weighted avg       0.23      0.48      0.31       816


ZeroR Baseline > Male Validation set
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00       983
           1       0.48      1.00      0.65       914

    accuracy                           0.48      1897
   macro avg

### KNN

In [15]:
print("KNN > Whole Validation Set")
knn_classify(train_X, train_y_true, valid_X, valid_y_true)

print("KNN > Female Validation Set")
knn_classify(train_X_female, train_y_true_female,
             valid_X_female, valid_y_true_female)

print("KNN > Male Validation Set")
knn_classify(train_X_male, train_y_true_male, valid_X_male, valid_y_true_male)

KNN > Whole Validation Set
              precision    recall  f1-score   support

          -1       0.79      0.86      0.82      1462
           1       0.85      0.77      0.81      1462

    accuracy                           0.82      2924
   macro avg       0.82      0.82      0.82      2924
weighted avg       0.82      0.82      0.82      2924

KNN > Female Validation Set
              precision    recall  f1-score   support

          -1       0.79      0.87      0.83       392
           1       0.87      0.78      0.82       424

    accuracy                           0.82       816
   macro avg       0.83      0.83      0.82       816
weighted avg       0.83      0.82      0.82       816

KNN > Male Validation Set
              precision    recall  f1-score   support

          -1       0.81      0.86      0.83       983
           1       0.84      0.79      0.81       914

    accuracy                           0.82      1897
   macro avg       0.83      0.82      0.82    

### NB

In [16]:
print("NB > Whole Validation Set")
nb_classify(train_X, train_y_true, valid_X, valid_y_true)

print("NB > Female Validation Set")
nb_classify(train_X_female, train_y_true_female,
            valid_X_female, valid_y_true_female)

print("NB > Male Validation Set")
nb_classify(train_X_male, train_y_true_male, valid_X_male, valid_y_true_male)

NB > Whole Validation Set
              precision    recall  f1-score   support

          -1       0.86      0.91      0.88      1462
           1       0.90      0.85      0.88      1462

    accuracy                           0.88      2924
   macro avg       0.88      0.88      0.88      2924
weighted avg       0.88      0.88      0.88      2924

NB > Female Validation Set
              precision    recall  f1-score   support

          -1       0.86      0.93      0.89       392
           1       0.93      0.86      0.89       424

    accuracy                           0.89       816
   macro avg       0.89      0.89      0.89       816
weighted avg       0.89      0.89      0.89       816

NB > Male Validation Set
              precision    recall  f1-score   support

          -1       0.89      0.84      0.87       983
           1       0.84      0.89      0.87       914

    accuracy                           0.87      1897
   macro avg       0.87      0.87      0.87      1

### LR

In [17]:
print("LR > Whole Validation Set")
lr_classify(train_X, train_y_true, valid_X, valid_y_true)

print("LR > Female Validation Set")
lr_classify(train_X_female, train_y_true_female,
            valid_X_female, valid_y_true_female)

print("LR > Male Validation Set")
lr_classify(train_X_male, train_y_true_male, valid_X_male, valid_y_true_male)

LR > Whole Validation Set
              precision    recall  f1-score   support

          -1       0.88      0.92      0.90      1462
           1       0.92      0.88      0.90      1462

    accuracy                           0.90      2924
   macro avg       0.90      0.90      0.90      2924
weighted avg       0.90      0.90      0.90      2924

LR > Female Validation Set
              precision    recall  f1-score   support

          -1       0.89      0.93      0.91       392
           1       0.93      0.89      0.91       424

    accuracy                           0.91       816
   macro avg       0.91      0.91      0.91       816
weighted avg       0.91      0.91      0.91       816

LR > Male Validation Set
              precision    recall  f1-score   support

          -1       0.89      0.92      0.91       983
           1       0.91      0.88      0.90       914

    accuracy                           0.90      1897
   macro avg       0.90      0.90      0.90      1

### MLP

In [18]:
print("MLP > Whole Validation Set")
mlp_classify(train_X, train_y_true, valid_X, valid_y_true)

print("MLP > Female Validation Set")
mlp_classify(train_X_female, train_y_true_female,
             valid_X_female, valid_y_true_female)

print("MLP > Male Validation Set")
mlp_classify(train_X_male, train_y_true_male, valid_X_male, valid_y_true_male)

MLP > Whole Validation Set
              precision    recall  f1-score   support

          -1       0.88      0.92      0.90      1462
           1       0.92      0.88      0.90      1462

    accuracy                           0.90      2924
   macro avg       0.90      0.90      0.90      2924
weighted avg       0.90      0.90      0.90      2924

MLP > Female Validation Set
              precision    recall  f1-score   support

          -1       0.89      0.92      0.91       392
           1       0.93      0.89      0.91       424

    accuracy                           0.91       816
   macro avg       0.91      0.91      0.91       816
weighted avg       0.91      0.91      0.91       816

MLP > Male Validation Set
              precision    recall  f1-score   support

          -1       0.90      0.91      0.90       983
           1       0.91      0.89      0.90       914

    accuracy                           0.90      1897
   macro avg       0.90      0.90      0.90    