# Extract Embeddings: Combine Generated Synthetic Data with Financial PhraseBank Data

In [1]:
import os
import sys

import pandas as pd

from tqdm import tqdm

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

# import log_files
from data_processing import DataProcessing
from feature_extraction import SpacyFeatureExtraction

In [2]:
pd.set_option('max_colwidth', 800)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Load Data

In [3]:
base_data_path = os.path.join(notebook_dir, '../data/')
combine_data_path = os.path.join(base_data_path, 'combined_generated_fin_phrase_bank/combined_generated_fin_phrase_bank-v1.csv')

In [4]:
df = DataProcessing.load_from_file(combine_data_path, 'csv')
df.drop(columns=['Unnamed: 0'], inplace=True)
df

Unnamed: 0,Base Sentence,Sentence Label
0,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,1
1,"On August 21, 2024, Bank of America speculates the revenue at Microsoft will likely increase.",1
2,"Citigroup predicts on 2024-08-21, the operating income at Alphabet may rise.",1
3,"According to Goldman Sachs, the research and development expenses at Facebook would fall in 2025.",1
4,"In 21 August 2024, Morgan Stanley envisions that the gross profit at Johnson & Johnson has some probability to remain stable.",1
5,"The stock price at Visa should stay same in Q2 of 2026, according to Wells Fargo.",1
6,JPMorgan forecasts that the revenue at Microsoft potentially decrease in Q3 of 2027.,1
7,"On August 25, 2024, to September 25, 2025, Citigroup speculates the net profit at Johnson & Johnson will likely increase.",1
8,"Bank of America predicts on 2024-08-21, the operating income at Visa may rise.",1
9,"According to Goldman Sachs, the research and development expenses at Alphabet would fall in 2029 Q2.",1


## Shuffle Data

In [5]:
shuffled_df = DataProcessing.shuffle_df(df)
shuffled_df

Unnamed: 0,Base Sentence,Sentence Label
0,The World Health Organization forecasts that the prevalence of chronic illnesses at urban health centers in Africa will potentially decrease in Q2 2028.,1
1,"According to AccuWeather, the humidity at Atlanta would fall in August 20, 2027.",1
2,"According to Senator Emily Patel, the public perception of climate change at the Environmental Protection Agency would fall in Q2 2028.",1
3,"According to Goldman Sachs, the research and development expenses at Alphabet would fall in 2029 Q2.",1
4,"Lifetree was founded in 2000 , and its revenues have risen on an average by 40 % with margins in late 30s .",0
5,Financial analyst Rachel Kim forecasts that the stock prices at Tesla Motors potentially decrease in Q3 of 2027.,1
6,"According to Senator James Davis, the public approval rating of the current administration would fall in late 2029.",1
7,"Precipitation levels in Denver should stay same 2024-08-22, according to Meteorologist Ava Moreno.",1
8,"STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMENE Credit Suisse First Boston ( CFSB ) raised the fair value for shares in four of the largest Nordic forestry groups .",0
9,"In 2024-08-20, research advisor Michael Brown envisions that the success rate of startups in Silicon Valley has some probability to remain stable.",1


## Extract Sentence Embeddings

In [6]:
spacy_fe = SpacyFeatureExtraction(shuffled_df, 'Base Sentence')
spacy_fe

<feature_extraction.SpacyFeatureExtraction at 0x343985d10>

In [7]:
spacy_sentence_embeddings_df = spacy_fe.sentence_feature_extraction(attach_to_df=True)
spacy_sentence_embeddings_df

100%|██████████| 105/105 [00:00<00:00, 238.57it/s]


Unnamed: 0,Base Sentence,Sentence Label,Embedding
0,The World Health Organization forecasts that the prevalence of chronic illnesses at urban health centers in Africa will potentially decrease in Q2 2028.,1,"[-0.050391436, 0.21542074, -0.0011249166, -0.054818284, -0.007305803, -0.08386881, -0.033107918, 0.13726442, 0.07594663, 2.0798728, -0.477793, -0.080915086, 0.14637612, 0.005206253, 0.118821286, 0.007487831, 0.028000748, 1.2403663, -0.19665611, -0.09846013, -0.017312765, 0.10566571, 0.0073270095, -0.1540863, 0.093233086, -0.0050276686, -0.016229676, 0.044184323, 0.051974136, 0.0768252, -0.086339295, -0.0010114548, -0.015987009, -0.0030544156, 0.069034144, 0.016882526, 0.030196322, 0.009553486, -0.0029840518, 0.11107146, 0.055105314, 0.18011671, -0.03869079, -0.069838636, -0.0126219615, -0.11006174, -0.102911144, 0.009440036, 0.0077483314, -0.07951612, -0.13898046, -0.044616546, 0.051183794, -0.24516165, 0.07636787, 0.032805786, 0.14591599, -0.010505627, 0.02454693, -0.17052372, -0.0308..."
1,"According to AccuWeather, the humidity at Atlanta would fall in August 20, 2027.",1,"[0.03162525, 0.29787716, 0.11421819, 0.0026611239, 0.013965315, -0.0047671124, 0.013265284, 0.034321122, 0.069639996, 1.627681, -0.27688375, 0.088616185, 0.05571481, 0.013421791, -0.2631225, -0.040006068, -0.014861839, 1.0533181, -0.08972626, -0.044433814, 0.01918231, 0.06919962, 0.018825883, -0.09294643, -0.020183489, 0.026140377, -0.23672879, -0.052829128, 0.07069842, -0.017743036, 0.026953511, 0.015749838, -0.06793124, 0.21660674, 0.07798705, -0.020887684, 0.03762939, 0.036421146, 0.09656139, -0.034954872, -0.08600481, 0.00024756044, 0.055638686, -0.026969079, 0.010629369, 0.013288778, -0.14023246, -0.17799746, 0.01980644, -0.046690486, 0.04884378, 0.058728375, -0.0772405, -0.086232185, -0.01179844, -0.03322912, 0.11228432, -0.004365446, 0.030528072, -0.1181475, -0.020901, -0.015274..."
2,"According to Senator Emily Patel, the public perception of climate change at the Environmental Protection Agency would fall in Q2 2028.",1,"[-0.0023228205, 0.18098423, -0.034874, 0.0024004423, -0.04584048, 0.10756784, 0.013326728, -0.019026825, -0.0825722, 1.9829903, -0.35860792, -0.075514264, 0.10257652, -0.08648719, -0.15397298, 0.03156992, -0.028908743, 0.9673553, -0.12112371, -0.013378174, 0.10735669, 0.08025565, 0.025664348, -0.077843525, 0.110326745, 0.0699168, -0.13805102, 0.0586719, -0.055631664, 0.17454804, -0.023520213, 0.013255272, 0.014434391, 0.10158296, 0.12709518, -0.04670052, -0.014367746, 0.00084156834, -0.05589422, -0.05014218, 0.15510936, 0.048783477, -0.06318195, -0.038425304, -0.04987922, -0.00357371, -0.11914672, -0.12908421, 0.019965172, -0.08629929, 0.03776013, 0.07963033, -0.014022472, -0.10924131, -0.04190474, -0.019500697, -0.08016822, 0.033979245, 0.033408154, -0.15452598, 0.014864454, -0.073259..."
3,"According to Goldman Sachs, the research and development expenses at Alphabet would fall in 2029 Q2.",1,"[-0.11628243, 0.22326306, -0.047700845, -0.024760777, -0.0060433885, 0.040430173, -0.032331407, -0.0009683321, 0.033874664, 1.7219961, -0.31886628, -0.028796997, 0.16275401, 0.098720506, -0.02195267, 0.018108122, -0.047012333, 0.98207146, -0.1300645, 0.0032067248, 0.102472685, 0.16903724, -0.015188108, -0.074178495, 0.033625007, 0.15126984, -0.080254026, 0.11120188, 0.033887886, 0.094139695, -0.024003705, -0.021056285, 0.07501833, 0.17900781, 0.10551355, -0.06575423, 0.07326032, 0.099584356, -0.10427231, -0.004633605, 0.07771451, 0.13058156, -0.030298881, -0.047136262, 0.034383997, 0.06631747, -0.013091054, -0.08305079, 0.07282719, -0.027485996, 0.01007017, -0.06944488, 0.03269739, -0.04110561, 0.030196607, -0.043924816, 0.015587447, -0.03212212, -0.061001938, -0.15006572, 0.022369374,..."
4,"Lifetree was founded in 2000 , and its revenues have risen on an average by 40 % with margins in late 30s .",0,"[-0.108991444, 0.2853636, 0.07630584, -0.03331838, 0.15054572, -0.076436356, -0.07666226, 0.03715405, 0.068958566, 1.9577957, -0.21506692, 0.07676075, -0.0277127, 0.050278213, -0.01718174, -0.051387705, 0.06413222, 1.0552706, -0.12393668, -0.053046044, -0.071945265, -0.057660513, -0.056091134, 0.007406742, 0.10398702, -0.018665433, -0.13595897, 0.10269392, 0.011057222, 0.00906526, 0.042911045, 0.010051137, -0.103941254, -0.06652095, 0.02781605, -0.08690635, -0.14914055, -0.051307824, 0.026637182, -0.008841171, 0.07276959, 0.050831962, 0.11663874, -0.0789234, 0.076834105, 0.024507476, -0.021868784, -0.096129045, 0.06255703, 0.023527058, -0.004301128, 0.05599234, -0.1328328, -0.11299923, 0.09738191, -0.13845475, -0.050847434, -0.13309741, 0.031739928, -0.12970898, -0.05061209, -0.0594370..."
5,Financial analyst Rachel Kim forecasts that the stock prices at Tesla Motors potentially decrease in Q3 of 2027.,1,"[-0.23196825, 0.27390388, 0.018557893, 0.052316047, 0.008552848, -0.044823796, -0.007169994, -0.029850414, 0.038414214, 1.3923486, -0.26796222, -0.05400531, 0.13204126, -0.100816615, -0.03190125, -0.09589203, -0.10528231, 1.0387743, -0.21819331, -0.003916478, 0.12840861, 0.1271431, 0.059314847, -0.1853363, -0.08030811, 0.2792083, -0.1456319, 0.12947504, 0.07037379, 0.14068083, -0.0143037755, -0.009745937, 0.0070031146, 0.103649914, 0.08241899, 0.012185253, 0.03262384, 0.19746572, -0.02868717, 0.024812732, -0.06397921, 0.08667306, 0.19090958, 0.14727926, 0.069378786, -0.019938786, -0.11273138, -0.16662972, 0.035387002, -0.065463364, 0.03296405, -0.033905793, -0.08206885, -0.017304005, 0.00275037, -0.07205812, -0.065694265, -0.09481099, -0.024046823, -0.21282287, -0.06000252, -0.14511494..."
6,"According to Senator James Davis, the public approval rating of the current administration would fall in late 2029.",1,"[-0.025730893, 0.10285038, 0.12314634, -0.003555748, 0.0055651525, 0.0009315841, -0.09635434, -0.0766766, 0.07399541, 2.2563138, -0.29832122, -0.0352404, 0.14146826, 0.013034147, -0.13410088, -0.07841789, 0.029764146, 0.9673759, -0.030618006, -0.076574646, 0.0462191, -0.009786705, -0.06989678, -0.120316885, 0.017192101, 0.11968502, -0.21612278, 0.066093974, -0.047104426, 0.14143716, -0.022382597, 0.08836566, -0.004706496, 0.11893604, 0.0616827, -0.069974855, -0.043679215, 0.06835945, -0.09976026, -0.015532896, 0.037242956, 0.08669291, 0.013774095, -0.019870382, -0.005102186, 1.4518946e-05, -0.07170414, -0.0029755614, 0.06258004, -0.0689053, -0.0302135, -0.0023039237, -0.0296361, -0.10095634, 0.015427749, -0.002752049, -0.12115407, -0.08400632, -0.05100911, -0.19926617, -0.06338594, 0.0..."
7,"Precipitation levels in Denver should stay same 2024-08-22, according to Meteorologist Ava Moreno.",1,"[-0.10761004, 0.3033786, 0.03796122, 0.03157227, 0.035167105, -0.0071123675, 0.044625353, 0.05666279, -0.0804619, 1.3586631, -0.2311181, 0.08044368, 0.0016260013, 0.022307878, -0.22388422, -0.03678817, 0.0049874745, 1.0272851, -0.0485021, -0.06933815, -0.015766049, -0.010004849, 0.035275478, -0.13646662, 0.0132225845, 0.07002726, -0.11064392, -0.011693896, -0.0014215693, 0.030836707, -1.4100812e-05, 0.02678552, 0.08385458, 0.016638314, 0.09858864, 0.009113806, 0.042373408, 0.014452605, 0.035281107, -0.080417536, 0.015137525, -0.026718322, 0.15094258, 0.013348573, 0.0070212125, 0.048103523, -0.025277568, -0.06297563, 0.07443521, 0.0066858963, 0.0068072095, 0.07891625, 0.006752506, -0.16243646, -0.002369131, 0.049456313, -0.029553378, 0.029349681, -0.033258628, -0.13693127, -0.04287531, ..."
8,"STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMENE Credit Suisse First Boston ( CFSB ) raised the fair value for shares in four of the largest Nordic forestry groups .",0,"[-0.005943053, 0.15729912, -0.101663664, -0.04747168, 0.17984045, 0.026353298, 0.040704496, -0.031741887, -0.0013212783, 1.2830931, -0.30156302, 0.04194991, 0.047622945, -0.11694745, -0.014253504, 0.0237644, -0.03577601, 0.83854586, -0.054321844, 0.0024243295, 0.0076728556, 0.048846744, 0.04741292, -0.0002675462, 0.06477594, 0.0076617124, -0.007715884, 0.06146602, 0.009252985, 0.12989178, 0.0676145, -0.0073582274, 0.032229114, 0.1501138, 0.13244116, -0.009811595, -0.013941116, 0.0010493121, -0.019024882, -0.00927352, -0.026203226, 0.078611776, 0.08331343, 0.026452126, -0.015870191, 0.009790953, 0.035399493, -0.01720727, 0.07744529, -0.037347812, -0.101326965, 0.07356499, 0.037135232, 0.04475773, 0.0045982115, -0.008885927, -0.0028634022, -0.090517275, 0.007497437, -0.098485835, -0.0290..."
9,"In 2024-08-20, research advisor Michael Brown envisions that the success rate of startups in Silicon Valley has some probability to remain stable.",1,"[-0.09114365, 0.30092236, -0.012888364, -0.018045718, 0.032763705, 0.0135402875, -0.06792159, -0.013553535, 0.040758938, 1.7716188, -0.24500741, 0.03857392, 0.05680104, 0.037514783, -0.059713785, 0.018157791, 0.021112932, 1.2188282, -0.075656936, -0.014860285, -0.0053458917, 0.022940332, -0.07500619, -0.070736125, 0.020625515, 0.0713354, -0.063580476, 0.06305198, 0.01606812, 0.03506783, -0.0030780954, 0.073579684, 0.061061714, -0.037812788, 0.089274906, 0.033480447, 0.05531295, 0.01799476, -0.015975405, -0.0632777, 0.04633307, 0.08803033, 0.15923567, -0.06340877, -0.025047002, 0.036848333, -0.004306891, -0.06578126, 0.051983003, -0.056408424, -0.007867892, 0.08906277, -0.1549845, -0.083119795, 0.066002324, -0.043202963, -0.006774015, -0.048954826, -0.012994779, -0.018365601, -0.0310869..."


## Split Data

In [8]:
# spacy_embeds = spacy_sentence_embeddings_df['Embedding'].to_list()
labels_col = spacy_sentence_embeddings_df['Sentence Label']
X_train_df, X_test_df, y_train_df, y_test_df = DataProcessing.split_data(spacy_sentence_embeddings_df, labels_col)
X_train_df.head(3)

Unnamed: 0,Base Sentence,Sentence Label,Embedding
55,"According to the company 's updated strategy for the years 2009-2012 , Basware targets a long-term net sales growth in the range of 20 % -40 % with an operating profit margin of 10 % -20 % of net sales .",1,"[-0.26332003, 0.32854047, 0.04485582, -0.010942283, 0.033938576, -0.1253878, 0.02093402, 0.083297595, 0.11035638, 1.7451077, -0.39815328, 0.1412647, -0.08980998, -0.014772703, 0.057716373, -0.050922345, -0.042725757, 1.4127717, -0.20974772, 0.015678, -0.0036482685, 0.059785444, -0.07894007, -0.013989352, 0.11359537, 0.09830269, -0.051417813, 0.07068994, 0.024710523, 0.10660922, -0.0041964804, -0.01851969, -0.014895713, 0.099013925, 0.110973224, -0.025971042, -0.060882527, 0.044830512, 0.0436691, -0.092702195, 0.0636729, 0.019875364, 0.21089193, -0.08540008, 0.04674657, -0.020476433, -0.0369948, 0.11620767, 0.11443856, 0.107366644, 0.02788813, 0.04243621, -0.09458176, -0.104578346, 0.05265203, -0.05567572, -0.05343007, -0.101762705, -0.05209775, -0.14008449, 0.057324965, -0.107380845, -..."
22,"Operating profit for the nine-month period increased from EUR3 .1 m and net sales increased from EUR61 .5 m , as compared to the corresponding period in 2007 .",0,"[-0.12223279, 0.19578847, -0.043924607, 0.044549696, -0.0025346745, -0.09855458, 0.04405799, 0.04755729, 0.12560616, 1.78751, -0.28111476, 0.15011911, -0.03935436, -0.04218087, 0.09283486, -0.05538293, 0.010327645, 1.4036056, -0.1772326, 0.0065043573, 0.004707342, 0.010466768, -0.108335264, -0.0025163826, 0.06346877, -0.051507223, -0.099609725, 0.037490916, 0.0015974367, 0.1350515, -0.07617932, -0.024661342, -0.09695535, 0.01997616, 0.01878939, -0.04746242, -0.12688075, -0.015738908, 0.00340791, -0.0051635285, 0.0025936994, 0.054457135, 0.092765085, -0.015601827, -0.040241227, 0.00025815974, -0.027688673, 0.07940252, 0.017257804, 0.07352595, 0.029921843, -0.06150454, -0.059887134, -0.04409721, 0.12642619, -0.07766971, -0.047417086, -0.1308405, -0.08981071, -0.067921616, -0.0116810575, ..."
76,"In 2024-08-21, Environmental scientist Lisa Nguyen envisions that the carbon emissions at ExxonMobil has some probability to remain stable.",1,"[-0.10615556, 0.2840127, -0.034138516, 0.048429795, -0.051459327, 0.10221126, -0.03155069, 0.04775752, -0.119019, 1.550164, -0.20750885, 0.037153725, -0.03891483, -0.0021015552, -0.14981, 0.08462622, -0.03353012, 1.0599174, -0.12298432, -0.053530406, 0.033334557, 0.06669029, 0.045342278, -0.019844405, 0.14324325, 0.09565377, -0.07173137, 0.016630044, 0.080680445, 0.08708542, -0.076131225, 0.053336438, 0.06115622, 0.005948001, 0.06305403, -0.021259127, 0.064786345, 0.05284349, 0.0088904025, -0.09685647, 0.09848902, -0.013660461, 0.15108989, 0.020437771, -0.026175521, 0.00019325197, -0.072186835, -0.16497515, -0.009737886, 0.0045437724, 0.160857, 0.14052878, -0.06872517, -0.101791374, 0.050171312, -0.0170684, 0.019054309, -0.07746705, 0.055305306, -0.0142838815, -0.059969477, -0.12987989..."


In [9]:
len(y_train_df)

84

In [10]:
X_train_df['Embedding'].to_list()

[array([-2.63320029e-01,  3.28540474e-01,  4.48558182e-02, -1.09422831e-02,
         3.39385755e-02, -1.25387803e-01,  2.09340192e-02,  8.32975954e-02,
         1.10356383e-01,  1.74510765e+00, -3.98153275e-01,  1.41264707e-01,
        -8.98099765e-02, -1.47727029e-02,  5.77163734e-02, -5.09223454e-02,
        -4.27257568e-02,  1.41277170e+00, -2.09747717e-01,  1.56779997e-02,
        -3.64826852e-03,  5.97854443e-02, -7.89400712e-02, -1.39893517e-02,
         1.13595366e-01,  9.83026922e-02, -5.14178127e-02,  7.06899390e-02,
         2.47105230e-02,  1.06609218e-01, -4.19648038e-03, -1.85196903e-02,
        -1.48957130e-02,  9.90139246e-02,  1.10973224e-01, -2.59710420e-02,
        -6.08825274e-02,  4.48305123e-02,  4.36691009e-02, -9.27021950e-02,
         6.36729002e-02,  1.98753644e-02,  2.10891932e-01, -8.54000822e-02,
         4.67465706e-02, -2.04764325e-02, -3.69948000e-02,  1.16207667e-01,
         1.14438564e-01,  1.07366644e-01,  2.78881304e-02,  4.24362086e-02,
        -9.4

> Track loss: try BCE (Binary Cross Entropy)

In [11]:
from classification_models import SkLearnPerceptronModel, SkLearnSGDClassifier, EvaluationMetric
perception_model = SkLearnPerceptronModel()
perception_model.train_model(X_train_df['Embedding'].to_list(), y_train_df)
perceptron_predictions = perception_model.predict(X_test_df['Embedding'].to_list())
perceptron_predictions.to_numpy().ravel()

sgd_model = SkLearnSGDClassifier()
sgd_model.train_model(X_train_df['Embedding'].to_list(), y_train_df)
sgd_predictions = perception_model.predict(X_test_df['Embedding'].to_list())
sgd_predictions.to_numpy().ravel()

  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


array([1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1])

In [12]:
model_predictions_df = pd.concat([X_test_df['Base Sentence'], y_test_df], axis=1)
model_predictions_df.columns = ['Sentence', 'Actual Label']
model_predictions_df['Perceptron Predicted Label'] = perceptron_predictions.to_numpy().ravel()
model_predictions_df['SGD Predicted Label'] = sgd_predictions.to_numpy().ravel()
model_predictions_df

Unnamed: 0,Sentence,Actual Label,Perceptron Predicted Label,SGD Predicted Label
30,"The win percentage at the Golden State Warriors should stay the same in 08/2024, according to Analyst Sophia Rodriguez.",1,1,1
65,"Meteorologist Ethan Kim predicts on 08/15/2024, the wind speed at Los Angeles may rise.",1,1,1
64,"On August 15, 2025, the International Energy Agency speculates that investment in renewable energy projects at emerging markets will likely increase.",1,1,1
53,"The stock price at Visa should stay same in Q2 of 2026, according to Wells Fargo.",1,1,1
45,"According to economist Emily Patel, the unemployment rate at the United States would fall in January 2029.",1,1,1
94,"On August 15, 2027, a health researcher speculates that the average blood pressure levels at rural health clinics will likely increase.",1,1,1
104,"In 2024/08/20, Dr. Liam Chen envisions that the temperature at San Francisco has some probability to remain stable.",1,1,1
47,"MegaFon 's subscriber base increased 16.1 % in 2009 to 50.5 million users as of December 31 , while its market share by the number of customers amounted to 24 % as of late 2009 , up from 23 % as of late 2008 , according to TeliaSonera estimates .",0,0,0
10,"According to Coach James Davis, the scoring average at the Denver Broncos would fall in 2024 of Q3.",1,1,1
0,The World Health Organization forecasts that the prevalence of chronic illnesses at urban health centers in Africa will potentially decrease in Q2 2028.,1,1,1


## Evaluation

In [13]:
get_metrics = EvaluationMetric()
get_metrics

<classification_models.EvaluationMetric at 0x355cea350>

In [17]:
metrics = get_metrics.eval_classification_report(y_test_df, perceptron_predictions)
metrics

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       1.00      1.00      1.00        16

    accuracy                           1.00        21
   macro avg       1.00      1.00      1.00        21
weighted avg       1.00      1.00      1.00        21



In [18]:
metrics = get_metrics.eval_classification_report(y_test_df, sgd_predictions)
metrics

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       1.00      1.00      1.00        16

    accuracy                           1.00        21
   macro avg       1.00      1.00      1.00        21
weighted avg       1.00      1.00      1.00        21

