# ML Classifiers

**Goal:** Given a sentence as input, classify it as either a prediction or non-prediction.

In [1]:
import os
import sys
import warnings

import pandas as pd

from tqdm import tqdm

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

# import log_files
from data_processing import DataProcessing
from feature_extraction import SpacyFeatureExtraction
# from classification_models import SkLearnPerceptronModel, SkLearnSGDClassifier, EvaluationMetric
from classification_models import SkLearnModelFactory, EvaluationMetric

In [2]:
pd.set_option('max_colwidth', 800)
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_columns', 40)
pd.set_option('display.max_rows', None)

warnings.filterwarnings('ignore')

## Load Data

In [3]:
print("======= LOAD DATA =======")



In [4]:
base_data_path = os.path.join(notebook_dir, '../data')
combine_data_path = os.path.join(base_data_path, 'financial_phrase_bank/combined_generated_fin_phrase_bank')
data_path = os.path.join(combine_data_path, 'combined_generated_fin_phrase_bank-v1.csv')

In [5]:
df = DataProcessing.load_from_file(data_path, 'csv', sep=',')
print(len(df))
df.drop(columns=['Unnamed: 0'], inplace=True)
print(f"\tShape: {df.shape}, \nSubset of Data:{df.head(7)}")
df.shape, df.tail(3)

2825
	Shape: (2825, 2), 
Subset of Data:                                                                                                                   Base Sentence  \
0                                     JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.   
1                                  On August 21, 2024, Bank of America speculates the revenue at Microsoft will likely increase.   
2                                                   Citigroup predicts on 2024-08-21, the operating income at Alphabet may rise.   
3                              According to Goldman Sachs, the research and development expenses at Facebook would fall in 2025.   
4  In 21 August 2024, Morgan Stanley envisions that the gross profit at Johnson & Johnson has some probability to remain stable.   
5                                              The stock price at Visa should stay same in Q2 of 2026, according to Wells Fargo.   
6                                   

((2825, 2),
                                                                                                        Base Sentence  \
 2822  These moderate but significant changes resulted in a significant 24-32 % reduction in the estimated CVD risk .   
 2823                Uponor improved its performance in spite of the decrease in residential construction in the US .   
 2824                                                                       The inventor was issued U.S. Patent No. .   
 
       Sentence Label  
 2822               0  
 2823               0  
 2824               0  )

## Shuffle Data

In [6]:
df.head(3)

Unnamed: 0,Base Sentence,Sentence Label
0,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,1
1,"On August 21, 2024, Bank of America speculates the revenue at Microsoft will likely increase.",1
2,"Citigroup predicts on 2024-08-21, the operating income at Alphabet may rise.",1


In [7]:
print("======= SHUFFLE DATA =======")



In [8]:
shuffled_df = DataProcessing.shuffle_df(df)
print(f"\tShape: {shuffled_df.shape}, \nSubset of Data:{shuffled_df.head(7)}")

	Shape: (2825, 2), 
Subset of Data:                                                                                                                                                                                                              Base Sentence  \
0                                                                                                         In 2023 Q4, the National Oceanic and Atmospheric Administration envisioned that the snowfall in Boston decreased.   
1                                                                                                             According to the top executive at JPMorgan Chase, the revenue at Microsoft had risen from Q1 2026 to Q4 2026.   
2                                                                                                                                            Bank of America predicts on 2024-09-15, the operating income at Visa may rise.   
3                                                                        

## Extract Sentence Embeddings

In [9]:
print("======= EMBED SENTENCES: Spacy =======")



In [10]:
spacy_fe = SpacyFeatureExtraction(shuffled_df, 'Base Sentence')
spacy_fe

<feature_extraction.SpacyFeatureExtraction at 0x360261490>

In [11]:
spacy_sentence_embeddings_df = spacy_fe.sentence_feature_extraction(attach_to_df=True)
# print(f"{spacy_sentence_embeddings_df.head(3)}")

  0%|          | 0/2825 [00:00<?, ?it/s]

100%|██████████| 2825/2825 [00:09<00:00, 295.54it/s]


## Normalize Embeddings

- Why: Getting the below warnings
    1. sklearn/utils/extmath.py:203: RuntimeWarning: divide by zero encountered in matmul ret = a @ b
    2. sklearn/utils/extmath.py:203: RuntimeWarning: overflow encountered in matmul ret = a @ b
    3. sklearn/utils/extmath.py:203: RuntimeWarning: invalid value encountered in matmul ret = a @ b

- Normalize will place data within "boundaries" to be all on one scale

In [12]:
print("======= NORMALIZE EMBEDDINGS =======")



In [13]:
from sklearn.preprocessing import StandardScaler

# Convert embeddings to matrix if not already
embeddings_matrix = pd.DataFrame(spacy_sentence_embeddings_df["Embedding"].tolist())

# Scale the embeddings
scaler = StandardScaler()
scaled_embeddings = scaler.fit_transform(embeddings_matrix)

spacy_sentence_embeddings_df['Normalized Embeddings'] = list(scaled_embeddings)

In [14]:
# spacy_sentence_embeddings_df.columns.

In [15]:
# print(f"{spacy_sentence_embeddings_df.head(3)}")
# spacy_sentence_embeddings_df
# print(f"{spacy_sentence_embeddings_df.to_dict()}")

for idx, row in spacy_sentence_embeddings_df.iterrows():
    text = row['Base Sentence']
    label = row['Sentence Label']
    embedding = row['Embedding']
    norm_embedding = row['Normalized Embeddings']
    if idx < 7:
        print(f"{idx}\n Sentence: {text}\n Label: {label}\n Embeddings Shape: {embedding.shape}\n\t Embeddings Subset [:6]: {embedding[:6]} \n Norm Embeddings: {norm_embedding.shape}, \n\tNorm Embeddings Subset [:6]: {norm_embedding[:6]}")

0
 Sentence: In 2023 Q4, the National Oceanic and Atmospheric Administration envisioned that the snowfall in Boston decreased.
 Label: 0
 Embeddings Shape: (300,)
	 Embeddings Subset [:6]: [ 0.04494967  0.11323709  0.0477355  -0.02516405  0.07923155  0.11287378] 
 Norm Embeddings: (300,), 
	Norm Embeddings Subset [:6]: [ 1.5806407  -1.5839013   0.63837487  0.17236419  0.33720988  2.0125403 ]
1
 Sentence: According to the top executive at JPMorgan Chase, the revenue at Microsoft had risen from Q1 2026 to Q4 2026.
 Label: 0
 Embeddings Shape: (300,)
	 Embeddings Subset [:6]: [-0.06890249  0.16694139  0.02429623  0.02060641  0.0189305   0.05144866] 
 Norm Embeddings: (300,), 
	Norm Embeddings Subset [:6]: [ 0.20303898 -0.8589743   0.27973002  0.9056544  -0.4063508   1.1576595 ]
2
 Sentence: Bank of America predicts on 2024-09-15, the operating income at Visa may rise.
 Label: 1
 Embeddings Shape: (300,)
	 Embeddings Subset [:6]: [-0.24173503  0.31656978  0.05173684  0.01124636  0.03359347

In [16]:
embeddings_col_name = 'Normalized Embeddings'

## Split Data

In [17]:
print("======= SPLIT DATA =======")



In [18]:
# spacy_embeds = spacy_sentence_embeddings_df['Embedding'].to_list()
labels_col = spacy_sentence_embeddings_df['Sentence Label']
X_train_df, X_test_df, y_train_df, y_test_df = DataProcessing.split_data(spacy_sentence_embeddings_df, labels_col)
# print(f"{X_train_df.head(3)}")

In [19]:
combine_data_path

'/Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_classification_experiments-v2/../data/financial_phrase_bank/combined_generated_fin_phrase_bank'

In [20]:
save_df = True

if save_df == True:
    print("Save test set so we can pass these into LLMs")
    # save_path = os.path.join(base_data_path, 'combined_generated_fin_phrase_bank')
    DataProcessing.save_to_file(X_test_df, combine_data_path, 'x_test_set', 'csv')
    DataProcessing.save_to_file(y_test_df, combine_data_path, 'y_test_set', 'csv')

Save test set so we can pass these into LLMs
Using file number: 5
Saving CSV file to: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_classification_experiments-v2/../data/financial_phrase_bank/combined_generated_fin_phrase_bank/x_test_set-v5.csv
Using file number: 5
Saving CSV file to: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_classification_experiments-v2/../data/financial_phrase_bank/combined_generated_fin_phrase_bank/y_test_set-v5.csv


In [21]:
len(y_train_df)


2260

In [22]:
X_train_df[embeddings_col_name].to_list()

[array([-1.34629750e+00,  1.36705950e-01, -6.57694340e-01,  8.43474209e-01,
        -1.12555516e+00,  4.31900263e-01,  1.43859163e-01,  5.20388424e-01,
         1.66922235e+00, -1.59964705e+00,  1.86344433e+00, -4.57653143e-02,
         2.16881290e-01,  9.74830925e-01,  1.06139231e+00,  1.31786954e+00,
        -6.44562900e-01, -1.23764515e+00,  3.96417499e-01,  2.65132397e-01,
         1.43096817e+00,  3.37861866e-01,  1.46143115e+00,  1.06361079e+00,
        -3.30831021e-01,  9.59911466e-01,  1.88134268e-01,  5.84242344e-01,
         5.39615333e-01,  1.56469905e+00, -9.18031037e-01, -1.20016670e+00,
         6.88765496e-02,  4.59676266e-01, -1.07421041e+00,  4.94335592e-01,
         1.21734357e+00,  1.75553620e+00, -2.03279495e+00, -5.45142472e-01,
         5.94559550e-01,  7.18101323e-01,  1.00166178e+00,  1.42629862e+00,
         1.75901914e+00, -1.12639558e+00,  1.00316334e+00, -1.28546369e+00,
        -4.33197081e-01, -1.20801663e+00,  1.34890556e+00,  3.98505569e-01,
        -1.4

## Models

In [23]:
print("======= TRAIN x TEST MODELS =======")



> Track loss: try BCE (Binary Cross Entropy)

In [24]:
sklmf = SkLearnModelFactory
perception_model = sklmf.select_model('perceptron')
sgd_classifier_model = sklmf.select_model('sgd_classifier')
logistic_regression_model = sklmf.select_model('logistic_regression')
ridge_classifier_model = sklmf.select_model('ridge_classifier')
# linear_regression_model = sklmf.select_model('linear_regression')
# elastic_net_model = sklmf.select_model('elastic_net')
decision_tree_classifier_model = sklmf.select_model('decision_tree_classifier')
random_forest_classifier_model = sklmf.select_model('random_forest_classifier')
gradient_boosting_classifier_model = sklmf.select_model('gradient_boosting_classifier')

ml_models = [perception_model, sgd_classifier_model, logistic_regression_model, ridge_classifier_model, decision_tree_classifier_model, random_forest_classifier_model, gradient_boosting_classifier_model]

In [25]:
models_with_predictions = {}
for ml_model in ml_models:
    print(f"Train -> Predict for {ml_model.get_model_name()}")
    ml_model.train_model(X_train_df[embeddings_col_name].to_list(), y_train_df)
    ml_model_predictions = ml_model.predict(X_test_df[embeddings_col_name].to_list())
    models_with_predictions[ml_model.get_model_name()] = ml_model_predictions

models_with_predictions

Train -> Predict for Perceptron
Train -> Predict for SDG Classifier
Train -> Predict for Logistic Regression
Train -> Predict for Ridge Classifier
Train -> Predict for Decision Tree
Train -> Predict for Random Forest
Train -> Predict for Gradient Boosting Machine


{'Perceptron': 0      0
 1      0
 2      0
 3      0
 4      1
 5      0
 6      1
 7      0
 8      0
 9      1
 10     1
 11     0
 12     0
 13     0
 14     0
 15     1
 16     1
 17     0
 18     1
 19     0
 20     0
 21     0
 22     1
 23     0
 24     0
 25     1
 26     1
 27     0
 28     1
 29     0
 30     0
 31     0
 32     0
 33     0
 34     1
 35     1
 36     1
 37     0
 38     0
 39     0
 40     1
 41     0
 42     0
 43     1
 44     1
 45     0
 46     0
 47     0
 48     0
 49     0
 50     0
 51     0
 52     0
 53     0
 54     1
 55     1
 56     0
 57     0
 58     1
 59     0
 60     1
 61     0
 62     1
 63     0
 64     0
 65     0
 66     0
 67     1
 68     0
 69     0
 70     0
 71     1
 72     1
 73     0
 74     0
 75     0
 76     1
 77     1
 78     1
 79     0
 80     0
 81     1
 82     0
 83     0
 84     0
 85     0
 86     0
 87     0
 88     0
 89     0
 90     0
 91     0
 92     0
 93     0
 94     0
 95     1
 96     0
 97     1
 98   

In [26]:
# models_predictions_df = pd.DataFrame(models_to_predictions)
# models_predictions_df

In [27]:
y_test_df.rename(index='Actual Label', inplace=True)

1091    0
2329    0
1475    0
2591    0
1105    1
239     0
1374    1
2666    0
1859    0
1061    1
1536    1
1068    0
1558    0
407     0
1928    0
178     1
2397    0
1724    0
1336    1
2098    0
1234    0
610     0
194     1
2333    0
1936    0
196     1
1718    1
1222    0
1468    1
1269    0
2791    0
1124    0
2236    0
2470    0
1847    1
2682    1
2733    1
2242    0
2303    0
594     0
1174    1
2284    0
2550    0
907     1
450     1
141     0
321     0
1865    0
1539    0
1230    0
1554    0
471     0
2798    0
2823    0
2493    1
1801    1
1551    0
1760    0
1241    1
2400    0
1532    0
2232    0
1770    1
464     0
1349    0
2383    0
2723    0
2421    1
1826    1
1502    0
1736    0
2506    1
436     1
567     0
511     1
1602    0
2808    1
1446    1
2522    0
1178    0
1932    0
2582    1
2686    0
32      0
2254    0
67      0
2819    0
2521    0
2132    0
2167    0
2273    0
1435    0
568     0
1701    0
2243    0
685     1
2396    0
296     1
572     1
1418    0


In [28]:
test_and_models_df = pd.concat([X_test_df.loc[:, :], y_test_df], axis=1)
# test_and_models_df = pd.concat([test_df, models_predictions_df])

for key, value in models_with_predictions.items():
    test_and_models_df[key] = value.to_numpy().ravel()

test_and_models_df.head(3)

Unnamed: 0,Base Sentence,Sentence Label,Embedding,Normalized Embeddings,Actual Label,Perceptron,SDG Classifier,Logistic Regression,Ridge Classifier,Decision Tree,Random Forest,Gradient Boosting Machine
1091,"The storm chaser observed that the temperature at the National Weather Service had remained stable on August 19, 2024.",0,"[0.01788343, 0.24866207, 0.094443955, -0.03021181, -0.070139244, -0.007567964, -0.017894398, 0.08168157, 0.028781995, 2.0201428, -0.12591818, -0.023087604, 0.07538171, -0.03156038, -0.19919248, -0.036773805, -0.03879786, 1.1738267, -0.12394501, -0.07521798, 0.003590569, -0.10820109, -0.03967277, 0.03305886, 0.04910738, 0.04788186, -0.15314269, -0.09712033, 0.040994942, 0.11488184, 0.0055432254, -0.03674871, -0.024847856, 0.095107935, -0.04302918, 0.025193632, -0.016964603, -0.06956665, 0.061514445, 0.008832348, 0.021080341, -0.019033525, 0.08621238, 0.030166881, 0.14876842, -0.057972807, -0.1393736, -0.14312293, 0.014926011, -0.050347142, 0.09787629, 0.09848925, -0.09339185, -0.03614776, -0.068874896, 0.023000754, 0.048213284, -0.07686259, 0.017088624, -0.06080929, -0.008908651, 0.0876...","[1.2531414, 0.24413149, 1.3530619, 0.09149392, -1.5046525, 0.3362991, -0.13333066, 1.1558955, -0.084135026, 0.7854441, 1.8465815, -0.66463435, 0.032934606, -0.14174084, -1.5404884, 0.013807795, -0.47947663, 0.46723703, -0.09412069, -0.6974345, -0.31674334, -2.1957548, -0.487613, 1.5533975, 0.2608468, -0.20929825, -0.37727737, -1.8314586, 0.4497357, 0.6107614, 0.06841177, -0.5643112, -0.3896921, 0.18595253, -1.2937557, 0.790129, -0.0006915705, -1.7482202, 1.1252714, 0.43824947, 0.22022118, -1.0021782, 0.118851766, 1.0710559, 1.70971, -1.0555644, -1.0757576, -1.1931561, -0.32876036, -0.8100587, 1.3646387, 1.1098807, -0.7020886, 0.2623699, -1.3238336, 1.0658816, 0.6163266, -0.67403257, 0.33669582, 0.91720897, 0.4291731, 2.0079625, 1.1437397, 1.5941138, -2.016392, 1.1990359, -0.38339648, -...",0,0,0,0,0,0,0,0
2329,The stock rose for a third day on Tuesday bringing its three-day rise to GBX10 .50 or 1.8 % .,0,"[-0.122866996, 0.23532273, -0.09065495, 0.06255348, 0.111611985, -0.21670307, -0.085599996, 0.030712312, -0.0044496804, 1.7492541, -0.2926319, 0.060443584, 0.001246726, -0.13316242, 0.05390657, -0.092290394, -0.05079866, 1.2144053, -0.115928225, -0.05597436, -0.06949908, -0.042026952, -0.09294324, 0.00829468, 0.13220097, 0.07152723, -0.15697181, 0.04359168, 0.0018059551, 0.106987946, -0.028565396, 0.009832043, 0.038703185, -0.044848997, 0.049407903, -0.0015720055, -0.051098183, -0.023336614, 0.06144732, -0.0067662257, -0.03323334, 0.07588687, 0.07649146, 0.026397228, 0.1119164, 0.043576375, -0.12757336, -0.004263361, -0.011416227, 0.006952594, -0.035715047, 0.04671627, 0.00077275594, 0.059992958, 0.110920265, -0.044970155, -0.10352495, -0.009647067, -0.0711915, -0.14921726, -0.02177001...","[-0.44992706, 0.06407054, -1.4791405, 1.5776898, 0.73648673, -2.5743272, -1.2913442, 0.5489351, -0.54177654, -0.11669941, -0.13502954, 0.40553966, -1.1143013, -1.6511116, 0.982206, -0.91091937, -0.678029, 0.70789826, 0.027599826, -0.3895433, -1.5207351, -1.2699041, -1.2626793, 1.2119122, 1.5237813, 0.12603293, -0.42895207, 0.29218906, -0.14803556, 0.5002266, -0.51172674, 0.10759708, 0.6268649, -1.7459861, 0.1202845, 0.3441922, -0.47172827, -1.033927, 1.1243105, 0.23223971, -0.6983841, 0.46343842, -0.0032748124, 1.0123496, 1.1940337, 0.6141557, -0.90957665, 0.4381301, -0.683872, -0.035031475, -0.6176828, 0.32919613, 0.88948053, 1.4200419, 0.97033346, -0.022963, -1.6892537, 0.2557468, -1.1210399, -0.50342745, 0.2100613, -0.10963998, -0.20473205, 0.29108208, 1.2376289, -0.6684119, -0.0941...",0,0,0,0,0,0,0,0
1475,"The new agreement , which expands a long-established cooperation between the companies , involves the transfer of certain engineering and documentation functions from Larox to Etteplan .",0,"[0.06650338, 0.04327274, -0.14482032, -0.08664496, 0.075403735, 0.0019220297, -0.048090253, -0.09958013, 0.040488176, 2.0577347, -0.1796832, 0.085284956, 0.14378415, -0.05734751, -0.085376784, -0.045510996, -0.0961719, 1.3323777, -0.11504536, -0.030800238, 0.012436869, -0.03126648, -0.17845981, -0.03242341, 0.046014324, 0.05701886, 0.01010977, 0.009858055, -0.032325912, 0.03412071, 0.024486408, 0.14996095, -0.005516243, 0.09620823, 0.08602662, -0.04526235, -0.0031309344, 0.042869862, 0.017937409, -0.056879763, 0.031342182, 0.06334552, 0.052892737, -0.06482789, -0.04092641, 0.02764573, -0.045015063, 0.06616259, -0.06473491, 0.031538345, 0.10318553, 0.043523118, -0.073244005, 0.035300985, 0.002962932, -0.0012619036, 0.100434616, -0.071352966, 0.02605265, -0.05511761, 0.07167475, 0.033185...","[1.8414388, -2.5283144, -2.3079262, -0.81262356, 0.2900098, 0.46837556, -0.6497902, -1.0026343, 0.077073656, 0.9106369, 1.2075139, 0.72379917, 1.0914596, -0.52482706, -0.4060621, -0.13172561, -1.4287258, 1.4075619, 0.041004527, 0.013233974, -0.17101993, -1.119353, -2.5069149, 0.65043074, 0.21383561, -0.079720184, 1.8258436, -0.21692407, -0.66866773, -0.5201012, 0.390608, 2.128899, -0.08046528, 0.20114078, 0.6804529, -0.3837237, 0.19021034, -0.010980808, 0.50145286, -0.42960805, 0.39377934, 0.26979402, -0.299752, -0.40833664, -0.94472015, 0.35221645, 0.25307867, 1.265476, -1.4026448, 0.29751152, 1.4434209, 0.2810466, -0.36154997, 1.1227152, -0.40719056, 0.67721146, 1.4098012, -0.59781903, 0.48471534, 1.008669, 1.8020276, 1.2084849, 0.6241578, -0.06833091, 0.2462933, -0.5214658, -0.22949...",0,0,0,0,0,0,0,0


## Save Output

In [29]:
DataProcessing.save_to_file(test_and_models_df, combine_data_path, 'ml_classifiers', '.csv')

Using file number: 4
Saving CSV file to: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_classification_experiments-v2/../data/financial_phrase_bank/combined_generated_fin_phrase_bank/ml_classifiers-v4.csv


## Evaluation

In [30]:
print("======= EVALUATION/RESULTS =======")



In [31]:
get_metrics = EvaluationMetric()
get_metrics

<classification_models.EvaluationMetric at 0x3738cbc90>

> - Results may differ (from previous runs and even terminal runs) because we shuffle the data.

In [33]:
eval_reports = []
actual_label = test_and_models_df['Actual Label'].values
for ml_model in ml_models:
    ml_model_name = ml_model.get_model_name()
    print(f"Actual Label:\t\t{actual_label}")
    ml_model_predictions = test_and_models_df[ml_model_name].values
    print(f"{ml_model_name}:\t\t{ml_model_predictions}")
    print()
    eval_report = get_metrics.eval_classification_report(y_test_df, ml_model_predictions)
    eval_reports.append(eval_report)

Actual Label:		[0 0 0 0 1 0 1 0 0 1 1 0 0 0 0 1 0 0 1 0 0 0 1 0 0 1 1 0 1 0 0 0 0 0 1 1 1
 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0 0 0 0 1 1 0 0 1 1 0
 1 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 1 0 0 0 1 1 0 0 1
 0 1 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1
 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 1 1 1 0 1 0 0 0 0 1 0 1 0
 0 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1
 1 0 1 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 1 1 0
 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 0 1
 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 1 0
 1 1 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 1 0 0
 1 0 1 0 1 0 0 1 0 0 0 0 0 0 0 1 0 1 1 0 1 0 0 0 0 1 0 0 1 0 0 1 0 1 1 1 0
 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 1 0 1 1 0 0 0 1 0 1 0 0 0 1 0 0
 1 0 0 0 0

In [34]:
eval_reports

['              precision    recall  f1-score   support\n\n           0       0.98      0.96      0.97       402\n           1       0.91      0.94      0.92       163\n\n    accuracy                           0.96       565\n   macro avg       0.94      0.95      0.95       565\nweighted avg       0.96      0.96      0.96       565\n',
 '              precision    recall  f1-score   support\n\n           0       0.95      0.99      0.97       402\n           1       0.97      0.87      0.92       163\n\n    accuracy                           0.95       565\n   macro avg       0.96      0.93      0.94       565\nweighted avg       0.95      0.95      0.95       565\n',
 '              precision    recall  f1-score   support\n\n           0       0.98      0.97      0.97       402\n           1       0.93      0.94      0.94       163\n\n    accuracy                           0.96       565\n   macro avg       0.95      0.96      0.95       565\nweighted avg       0.96      0.96      0.