# ML Classifiers

**Goal:** Given a sentence as input, classify it as either a prediction or non-prediction.

In [1]:
import os
import sys
import warnings

import pandas as pd

from tqdm import tqdm

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

# import log_files
from data_processing import DataProcessing
from feature_extraction import SpacyFeatureExtraction
# from classification_models import SkLearnPerceptronModel, SkLearnSGDClassifier, EvaluationMetric
from classification_models import SkLearnModelFactory, EvaluationMetric

In [2]:
pd.set_option('max_colwidth', 800)
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_columns', 40)
pd.set_option('display.max_rows', None)

warnings.filterwarnings('ignore')

## Load Data

In [3]:
print("======= LOAD DATA =======")



In [4]:
base_data_path = os.path.join(notebook_dir, '../data')
combine_data_path = os.path.join(base_data_path, 'financial_phrase_bank/combined_generated_fin_phrase_bank')
data_path = os.path.join(combine_data_path, 'combined_generated_fin_phrase_bank-v1.csv')

In [5]:
df = DataProcessing.load_from_file(data_path, 'csv', sep=',')
print(len(df))
df.drop(columns=['Unnamed: 0'], inplace=True)
print(f"\tShape: {df.shape}, \nSubset of Data:{df.head(7)}")
df.shape, df.tail(3)

2825
	Shape: (2825, 2), 
Subset of Data:                                                                                                                   Base Sentence  \
0                                     JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.   
1                                  On August 21, 2024, Bank of America speculates the revenue at Microsoft will likely increase.   
2                                                   Citigroup predicts on 2024-08-21, the operating income at Alphabet may rise.   
3                              According to Goldman Sachs, the research and development expenses at Facebook would fall in 2025.   
4  In 21 August 2024, Morgan Stanley envisions that the gross profit at Johnson & Johnson has some probability to remain stable.   
5                                              The stock price at Visa should stay same in Q2 of 2026, according to Wells Fargo.   
6                                   

((2825, 2),
                                                                                                        Base Sentence  \
 2822  These moderate but significant changes resulted in a significant 24-32 % reduction in the estimated CVD risk .   
 2823                Uponor improved its performance in spite of the decrease in residential construction in the US .   
 2824                                                                       The inventor was issued U.S. Patent No. .   
 
       Sentence Label  
 2822               0  
 2823               0  
 2824               0  )

## Shuffle Data

In [6]:
df.head(3)

Unnamed: 0,Base Sentence,Sentence Label
0,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,1
1,"On August 21, 2024, Bank of America speculates the revenue at Microsoft will likely increase.",1
2,"Citigroup predicts on 2024-08-21, the operating income at Alphabet may rise.",1


In [7]:
print("======= SHUFFLE DATA =======")



In [8]:
shuffled_df = DataProcessing.shuffle_df(df)
print(f"\tShape: {shuffled_df.shape}, \nSubset of Data:{shuffled_df.head(7)}")

	Shape: (2825, 2), 
Subset of Data:                                                                                                                                                                                                                                                                 Base Sentence  \
0                                                                                                                                                                 On 12/31/2025, Dr. Smith speculates the average life expectancy in developed countries will likely increase.   
1                                                                                                                                          The average temperature in the Arctic should stay the same in 2024/08/21, according to the miscellaneous climate scientist at NOAA.   
2                                                                                                                                              

## Extract Sentence Embeddings

In [9]:
print("======= EMBED SENTENCES: Spacy =======")



In [10]:
spacy_fe = SpacyFeatureExtraction(shuffled_df, 'Base Sentence')
spacy_fe

<feature_extraction.SpacyFeatureExtraction at 0x3402b9650>

In [11]:
spacy_sentence_embeddings_df = spacy_fe.sentence_feature_extraction(attach_to_df=True)
# print(f"{spacy_sentence_embeddings_df.head(3)}")

100%|██████████| 2825/2825 [00:09<00:00, 284.47it/s]


## Normalize Embeddings

- Why: Getting the below warnings
    1. sklearn/utils/extmath.py:203: RuntimeWarning: divide by zero encountered in matmul ret = a @ b
    2. sklearn/utils/extmath.py:203: RuntimeWarning: overflow encountered in matmul ret = a @ b
    3. sklearn/utils/extmath.py:203: RuntimeWarning: invalid value encountered in matmul ret = a @ b

- Normalize will place data within "boundaries" to be all on one scale

In [12]:
print("======= NORMALIZE EMBEDDINGS =======")



In [13]:
from sklearn.preprocessing import StandardScaler

# Convert embeddings to matrix if not already
embeddings_matrix = pd.DataFrame(spacy_sentence_embeddings_df["Embedding"].tolist())

# Scale the embeddings
scaler = StandardScaler()
scaled_embeddings = scaler.fit_transform(embeddings_matrix)

spacy_sentence_embeddings_df['Normalized Embeddings'] = list(scaled_embeddings)

In [14]:
# spacy_sentence_embeddings_df.columns.

In [15]:
# print(f"{spacy_sentence_embeddings_df.head(3)}")
# spacy_sentence_embeddings_df
# print(f"{spacy_sentence_embeddings_df.to_dict()}")

for idx, row in spacy_sentence_embeddings_df.iterrows():
    text = row['Base Sentence']
    label = row['Sentence Label']
    embedding = row['Embedding']
    norm_embedding = row['Normalized Embeddings']
    if idx < 7:
        print(f"{idx}\n Sentence: {text}\n Label: {label}\n Embeddings Shape: {embedding.shape}\n\t Embeddings Subset [:6]: {embedding[:6]} \n Norm Embeddings: {norm_embedding.shape}, \n\tNorm Embeddings Subset [:6]: {norm_embedding[:6]}")

0
 Sentence: On 12/31/2025, Dr. Smith speculates the average life expectancy in developed countries will likely increase.
 Label: 1
 Embeddings Shape: (300,)
	 Embeddings Subset [:6]: [-0.15039584  0.29167947  0.00727376 -0.07923958 -0.06741213 -0.07566581] 
 Norm Embeddings: (300,), 
	Norm Embeddings Subset [:6]: [-0.7830238   0.8248015   0.01926889 -0.69398165 -1.471025   -0.6114489 ]
1
 Sentence: The average temperature in the Arctic should stay the same in 2024/08/21, according to the miscellaneous climate scientist at NOAA.
 Label: 1
 Embeddings Shape: (300,)
	 Embeddings Subset [:6]: [-0.04126545  0.09564788  0.00595577  0.01884359 -0.00823823  0.08642977] 
 Norm Embeddings: (300,), 
	Norm Embeddings Subset [:6]: [ 5.3744483e-01 -1.8213291e+00 -8.9771638e-04  8.7741214e-01
 -7.4136311e-01  1.6445072e+00]
2
 Sentence: According to Senior Level Executive Emily Chen, the sales figures at Amazon will fall in Q3 of 2029.
 Label: 1
 Embeddings Shape: (300,)
	 Embeddings Subset [:6]: [-

In [16]:
embeddings_col_name = 'Normalized Embeddings'

## Split Data

In [17]:
print("======= SPLIT DATA =======")



In [18]:
# spacy_embeds = spacy_sentence_embeddings_df['Embedding'].to_list()
labels_col = spacy_sentence_embeddings_df['Sentence Label']
X_train_df, X_test_df, y_train_df, y_test_df = DataProcessing.split_data(spacy_sentence_embeddings_df, labels_col)
# print(f"{X_train_df.head(3)}")

In [19]:
save_df = True

if save_df == True:
    print("Save test set so we can pass these into LLMs")
    save_path = os.path.join(base_data_path, 'combined_generated_fin_phrase_bank')
    DataProcessing.save_to_file(X_test_df, save_path, 'x_test_set', 'csv')
    DataProcessing.save_to_file(y_test_df, save_path, 'y_test_set', 'csv')

Save test set so we can pass these into LLMs
Saved to: 
	/Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_classification_experiments-v2/../data/combined_generated_fin_phrase_bank/x_test_set-v1.csv
Saved to: 
	/Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_classification_experiments-v2/../data/combined_generated_fin_phrase_bank/y_test_set-v1.csv


In [20]:
len(y_train_df)


2260

In [21]:
X_train_df[embeddings_col_name].to_list()

[array([ 3.87633651e-01, -5.78997254e-01, -1.34146714e+00, -1.57290852e+00,
        -7.72671819e-01,  7.50585735e-01,  1.12398505e+00, -8.31468821e-01,
        -5.61952949e-01,  1.11076975e+00,  3.72011602e-01, -1.02509248e+00,
        -1.05857897e+00, -3.14088970e-01, -1.00380170e+00,  1.29361987e+00,
         2.79411167e-01, -7.51305163e-01,  4.23018873e-01, -1.39049336e-01,
         1.66497186e-01, -6.64078414e-01,  4.94642735e-01,  8.54104757e-01,
        -1.53744876e-01,  1.32038742e-01,  3.09985559e-02,  2.87040025e-01,
         1.27504337e+00, -1.14576995e+00,  8.18920791e-01, -4.28337604e-01,
        -1.24328127e-02, -6.55506730e-01, -6.45619750e-01,  3.45556766e-01,
        -8.77893209e-01, -1.07097173e+00, -1.41179466e+00,  7.30838120e-01,
         2.39740655e-01,  1.80528343e-01, -6.02324724e-01, -6.31598413e-01,
         8.44345629e-01, -3.72290342e-05,  1.99610516e-01, -7.94145048e-01,
         5.63102007e-01, -1.69613755e+00, -6.91048265e-01,  1.18127191e+00,
        -9.0

## Models

In [22]:
print("======= TRAIN x TEST MODELS =======")



> Track loss: try BCE (Binary Cross Entropy)

In [23]:
sklmf = SkLearnModelFactory
perception_model = sklmf.select_model('perceptron')
sgd_classifier_model = sklmf.select_model('sgd_classifier')
logistic_regression_model = sklmf.select_model('logistic_regression')
ridge_classifier_model = sklmf.select_model('ridge_classifier')
# linear_regression_model = sklmf.select_model('linear_regression')
# elastic_net_model = sklmf.select_model('elastic_net')

ml_models = [perception_model, sgd_classifier_model, logistic_regression_model, ridge_classifier_model]

In [24]:
models_with_predictions = {}
for ml_model in ml_models:
    print(f"Train -> Predict for {ml_model.get_model_name()}")
    ml_model.train_model(X_train_df[embeddings_col_name].to_list(), y_train_df)
    ml_model_predictions = ml_model.predict(X_test_df[embeddings_col_name].to_list())
    models_with_predictions[ml_model.get_model_name()] = ml_model_predictions

models_with_predictions

Train -> Predict for Perceptron
Train -> Predict for SDG Classifier
Train -> Predict for Logistic Regression
Train -> Predict for Ridge Classifier


{'Perceptron': 0      0
 1      0
 2      0
 3      0
 4      0
 5      0
 6      0
 7      0
 8      1
 9      0
 10     0
 11     1
 12     0
 13     0
 14     0
 15     0
 16     1
 17     0
 18     0
 19     0
 20     1
 21     0
 22     1
 23     0
 24     0
 25     0
 26     0
 27     1
 28     0
 29     0
 30     1
 31     0
 32     0
 33     0
 34     1
 35     0
 36     0
 37     1
 38     1
 39     0
 40     1
 41     0
 42     0
 43     0
 44     1
 45     0
 46     0
 47     0
 48     0
 49     0
 50     0
 51     0
 52     0
 53     1
 54     1
 55     0
 56     0
 57     1
 58     0
 59     0
 60     0
 61     0
 62     0
 63     0
 64     1
 65     0
 66     1
 67     0
 68     0
 69     0
 70     0
 71     0
 72     0
 73     0
 74     1
 75     0
 76     0
 77     0
 78     1
 79     0
 80     0
 81     1
 82     1
 83     0
 84     0
 85     0
 86     0
 87     1
 88     1
 89     0
 90     0
 91     0
 92     1
 93     0
 94     0
 95     1
 96     1
 97     0
 98   

In [25]:
# models_predictions_df = pd.DataFrame(models_to_predictions)
# models_predictions_df

In [26]:
y_test_df.rename(index='Actual Label', inplace=True)

1091    0
2329    0
1475    0
2591    0
1105    0
239     0
1374    0
2666    0
1859    1
1061    0
1536    0
1068    1
1558    0
407     0
1928    0
178     0
2397    1
1724    0
1336    0
2098    0
1234    1
610     0
194     1
2333    0
1936    0
196     0
1718    0
1222    1
1468    0
1269    0
2791    1
1124    0
2236    0
2470    0
1847    1
2682    0
2733    0
2242    0
2303    1
594     0
1174    1
2284    0
2550    0
907     0
450     1
141     0
321     0
1865    0
1539    0
1230    0
1554    0
471     0
2798    0
2823    1
2493    1
1801    0
1551    0
1760    1
1241    0
2400    0
1532    0
2232    0
1770    0
464     0
1349    1
2383    0
2723    1
2421    0
1826    0
1502    0
1736    0
2506    0
436     0
567     0
511     1
1602    0
2808    0
1446    0
2522    1
1178    0
1932    0
2582    1
2686    1
32      0
2254    0
67      0
2819    0
2521    1
2132    1
2167    0
2273    0
1435    0
568     1
1701    0
2243    0
685     1
2396    1
296     0
572     0
1418    1


In [27]:
test_and_models_df = pd.concat([X_test_df.loc[:, :], y_test_df], axis=1)
# test_and_models_df = pd.concat([test_df, models_predictions_df])

for key, value in models_with_predictions.items():
    test_and_models_df[key] = value.to_numpy().ravel()

test_and_models_df.head(3)

Unnamed: 0,Base Sentence,Sentence Label,Embedding,Normalized Embeddings,Actual Label,Perceptron,SDG Classifier,Logistic Regression,Ridge Classifier
1091,"Dr. Maria Rodriguez, a renowned climate scientist, noted on 2025-02-14 that the wind speed in Chicago fell.",0,"[-0.053382035, 0.29605392, 0.0416867, -0.0014075264, 0.068240814, 0.070354186, 0.055441696, -0.044797793, -0.11456451, 1.6136333, -0.24468136, 0.0066391476, -0.00585275, -0.047630608, -0.24670327, -0.045335755, 0.026132127, 1.0484475, -0.059191827, -0.11277295, 0.09063209, 0.070430085, 0.030755753, -0.14085571, 0.0673193, -0.0031903007, -0.12214286, 0.018819036, 0.10367923, 0.051561426, 0.05919209, 0.100362174, -0.07448221, -0.03646524, 0.09928752, -0.047658592, 0.05136287, 0.010339641, -0.08169663, -0.05596817, 0.117678136, 0.023340704, 0.11158737, -0.04071751, 0.022786805, 0.05680235, -0.12911336, -0.08672076, 0.08478724, 0.038602516, -0.0069369148, 0.16260676, -0.09194405, -0.017646583, 0.002044499, -0.0068996274, 0.036731724, 0.022976456, 0.059084296, -0.19271524, -0.045397352, -0....","[0.3908352, 0.88384986, 0.54582196, 0.5529683, 0.20168522, 1.4207762, 1.1209849, -0.35026643, -2.0581949, -0.5683589, 0.43492547, -0.28378505, -1.2241656, -0.38047555, -2.0140388, -0.12880665, 0.59478503, -0.2763546, 0.88904005, -1.2983011, 1.117075, 0.3034963, 0.53709686, -0.84479505, 0.5376486, -0.9335876, 0.04107084, -0.08168354, 1.4058938, -0.2758865, 0.98090166, 1.413457, -1.1836389, -1.6302584, 0.88330925, -0.423647, 0.94221413, -0.5136003, -0.9248387, -0.41756865, 1.8539761, -0.34789968, 0.43764463, -0.032855786, -0.053170927, 0.83162355, -0.9312643, -0.530558, 0.6130175, 0.3930604, -0.19065279, 2.0767083, -0.67761785, 0.48515055, -0.41890967, 0.5868992, 0.44187045, 0.7070218, 1.0301542, -1.2024007, -0.19246452, 0.2535662, 0.4490124, 0.31755912, -0.5983049, -1.3797044, -1.171849...",0,0,0,0,0
2329,"We succeeded in increasing our market share of sold apartment '' , comments Mr Kari Kauniskangas , Head of YIT International Construction Services .",0,"[-0.047133807, 0.25161278, 0.02611006, -0.12686542, 0.17688423, 0.00066329475, 0.05202985, -0.036127325, 0.025282735, 1.7972957, -0.27255872, -0.015411332, 0.11387389, -0.115707785, -0.11125055, -0.043372747, -0.021267518, 0.97609216, -0.13950096, 0.06794446, 0.046314668, 0.15894125, 0.02581278, -0.018399246, 0.07523509, 0.02809562, -0.0059614847, 0.04350786, 0.015681, 0.12558688, 0.09250436, -0.026178792, -0.046082918, 0.08237546, 0.034930628, -0.13255031, 0.004810281, 0.03488911, 0.000102087855, 0.019833555, -0.10256171, -0.06577009, 0.061782334, -0.049995974, -0.013178603, 0.11788439, 0.017624771, 0.011914167, 0.10026301, -0.011411051, 0.021787012, -0.017350623, -0.0617542, 0.08609483, 0.08431935, -0.011886631, 0.008703574, -0.08348992, 0.021295542, -0.059176996, -0.018335046, -0.09...","[0.46643826, 0.28396165, 0.3074835, -1.4569969, 1.5413463, 0.4508572, 1.0626299, -0.24701537, -0.13232419, 0.043293975, 0.103566475, -0.56628853, 0.6285996, -1.3918105, -0.6639517, -0.09610927, -0.18943849, -0.70547533, -0.33030978, 1.5931151, 0.38704258, 1.5418671, 0.4651784, 0.84381676, 0.6579603, -0.4899005, 1.6089592, 0.29092404, 0.06360818, 0.76065934, 1.5474954, -0.41184446, -0.7293663, 0.010195858, -0.1011791, -1.8380105, 0.29979753, -0.1342902, 0.24613483, 0.5835421, -1.8709322, -1.7238133, -0.18806958, -0.17735313, -0.5564406, 1.8359636, 1.1352255, 0.6281795, 0.8216415, -0.28341526, 0.23557287, -0.6368683, -0.16734943, 1.7343459, 0.63090867, 0.507011, 0.01599794, -0.76570714, 0.40616304, 0.9434384, 0.26858085, -0.6297688, -0.63668746, -1.6477914, 0.96522474, 0.48633134, 0.8250...",0,0,0,0,0
1475,"The Centers for Disease Control and Prevention reported that on 4/12/2032, the prevalence of obesity among children increased.",0,"[-0.1512464, 0.28964388, -0.06674159, -0.1259642, -0.14523233, -0.18587688, 0.0991561, -0.012665493, 0.019185383, 2.25272, -0.27994934, -0.016527355, 0.1052234, 0.017642036, -0.030889833, -0.1089197, 0.062093288, 1.319708, -0.28975517, -0.0224788, -0.059800822, -0.007737101, -0.15598598, -0.121320404, 0.05341375, -0.030276502, -0.033418756, 0.025585774, 0.05203686, -0.007901804, -0.029025227, 0.043659445, 0.0042203004, -0.01848876, 0.17833035, 0.016509792, -0.1630252, -0.031816907, 0.035716176, 0.18008734, 0.027868968, 0.080309436, -0.13602385, -0.14892352, 0.094681755, -0.13964505, -0.17541939, 0.19732544, 0.10568105, -0.056175493, -0.16196427, -0.02911737, -0.017208582, -0.08945286, 0.10460229, 0.014809077, 0.097089306, 0.03795483, -0.077857785, -0.06591865, 0.12163401, 0.16330715, -...","[-0.7933155, 0.7973241, -1.1132418, -1.4425584, -2.430611, -2.1453054, 1.868661, 0.032376587, -0.2162923, 1.5599988, 0.015719289, -0.5805866, 0.4947336, 0.5891961, 0.13702141, -1.1879098, 1.1897597, 1.3324213, -2.6116498, 0.1463741, -1.3609776, -0.7901505, -2.1799288, -0.5754131, 0.326299, -1.3177156, 1.2384183, 0.020441137, 0.6181642, -1.108524, -0.5195478, 0.5955435, 0.07527963, -1.3821145, 2.092453, 0.64544934, -2.016299, -1.1649547, 0.75596136, 2.7000082, 0.33503696, 0.53172505, -2.6731625, -1.7179923, 0.9528666, -2.3984585, -1.5833848, 2.806343, 0.8946805, -0.8888921, -2.491057, -0.814299, 0.5855601, -0.3795, 0.8897168, 0.93465686, 1.3589711, 0.91421473, -1.2311177, 0.8351062, 2.6531556, 3.116941, 0.4768215, 0.44716027, 1.0576525, -1.3872534, 0.3788775, -2.2309947, 0.59360904, 0.8...",0,0,0,0,0


## Save Output

In [28]:
DataProcessing.save_to_file(test_and_models_df, combine_data_path, 'ml_classifiers', '.csv')

Saved to: 
	/Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_classification_experiments-v2/../data/financial_phrase_bank/combined_generated_fin_phrase_bank/ml_classifiers-v1.csv


## Evaluation

In [29]:
print("======= EVALUATION/RESULTS =======")



In [30]:
get_metrics = EvaluationMetric()
get_metrics

<classification_models.EvaluationMetric at 0x37fc62d90>

> - Results may differ (from previous runs and even terminal runs) because we shuffle the data.

In [31]:
actual_label = test_and_models_df['Actual Label'].values
for ml_model in ml_models:
    ml_model_name = ml_model.get_model_name()
    print(f"Actual Label:\t\t{actual_label}")
    ml_model_predictions = test_and_models_df[ml_model_name].values
    print(f"{ml_model_name}:\t\t{ml_model_predictions}")
    print()
    get_metrics.eval_classification_report(y_test_df, ml_model_predictions)

Actual Label:		[0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 1 0 0 1 0 0 0 1 0 0
 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0
 1 0 0 0 1 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 1 1 0 0 1 1 0 0 0 0 1 0 1 0 0 0
 0 1 0 0 0 0 1 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 1 1 0 0 0 1
 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 1 1 1 1 0 0 0 0
 1 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 1 0 1 0 1 1 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0
 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 1 0 0 0 1 0 0
 0 0 0 0 1 0 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1
 0 0 0 0 1 0 0 0 1 1 0 0 1 1 1 1 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0
 0 1 0 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0
 0 0 1 1 1 1 0 0 0 0 0 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0
 0 0 0 1 1 0 0 0 0 0 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0
 1 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0
 1 0 0 0 0