# Error Analysis: Identify characteristics of misclassified sentences

- For the sentences that were incorrectly classified, what are the characteristics in them?
- For the ones that are classified as a prediction, do our prediction properties exist?

In [1]:
import os
import sys

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

notebook_dir = os.getcwd()

sys.path.append(os.path.join(notebook_dir, '../'))

from data_processing import DataProcessing

In [2]:
pd.set_option('max_colwidth', 800)
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_columns', 40)
pd.set_option('display.max_rows', None)

## Load Data

In [3]:
base_data_path = os.path.join(notebook_dir, '../data')
combine_data_path = os.path.join(base_data_path, 'financial_phrase_bank/combined_generated_fin_phrase_bank')

In [4]:
model_results_path = os.path.join(combine_data_path, 'sentence_label-all_classifiers_with_results-v1.csv')
model_results_df = DataProcessing.load_from_file(model_results_path, 'csv', sep=',')
model_results_df.head(3)

Unnamed: 0,Base Sentence,Sentence Label,Author Type,Embedding,Normalized Embeddings,perceptron,sgd_classifier,logistic_regression,ridge_classifier,decision_tree_classifier,random_forest_classifier,gradient_boosting_classifier,llama-3.1-70b-instruct,llama-3.1-8b-instruct,llama-3.3-70b-instruct,mistral-7b-instruct,mistral-small-3.1,codestral-22b,gpt-oss-20b,gpt-oss-120b,granite-3.3-8b-instruct
0,Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .,0,1,[ 3.10125723e-02 1.48145273e-01 1.18822167e-02 -4.00886834e-02\n 2.18477882e-02 -7.52243698e-02 -6.43576309e-02 -1.18781418e-01\n -9.42035094e-02 1.69367039e+00 -1.97394520e-01 6.25598105e-03\n 4.40751575e-02 -4.00502793e-02 -4.52028252e-02 -1.13160096e-01\n 8.78665820e-02 1.32680905e+00 -4.84675802e-02 -3.56700621e-03\n 5.54845780e-02 9.94077399e-02 -1.34666905e-01 1.01711877e-01\n 6.37347773e-02 5.76260537e-02 -8.55392031e-03 -5.17340861e-02\n -9.67892632e-02 -4.00202125e-02 -4.66998853e-02 2.48341095e-02\n -1.75986104e-02 2.24689487e-02 4.00095731e-02 -1.50968343e-01\n 3.12316287e-02 8.40115175e-02 6.51817173e-02 -1.11669578e-01\n 4.81054671e-02 9.21993256e-02 1.43066615e-01 -2.96952873e-01\n -6.12954162e-02 -9.03191715e-02 3.97677906e-02 -3.97728458e-02\n -7....,[ 1.4120030e+00 -1.1126934e+00 8.9782916e-02 -6.6743813e-02\n -3.7037823e-01 -6.0530519e-01 -9.2802191e-01 -1.2312902e+00\n -1.7777984e+00 -3.0181044e-01 9.9699152e-01 -2.8869405e-01\n -4.5153311e-01 -2.6786438e-01 -5.6393570e-03 -1.2585411e+00\n 1.6161768e+00 1.3745358e+00 1.0518684e+00 4.4895628e-01\n 5.3809673e-01 7.0892632e-01 -1.8697438e+00 2.5000875e+00\n 4.8316771e-01 -7.1109168e-02 1.5739738e+00 -1.1464823e+00\n -1.6519629e+00 -1.5582638e+00 -8.2016826e-01 3.2399574e-01\n -2.7373379e-01 -8.1674218e-01 -2.3484817e-02 -2.1448696e+00\n 6.6440696e-01 6.2469262e-01 1.1777695e+00 -1.1532152e+00\n 6.7729610e-01 7.1531069e-01 8.3312702e-01 -4.0233145e+00\n -1.2297469e+00 -1.5874187e+00 1.4470621e+00 2.0973889e-02\n -1.5879422e+00 3.4062797e-01 -3.7409711e-01 -4.989...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Finnish consulting and engineering group Poyry Plc ( OMX Helsinki : POY ) said on Wednesday ( 1 October ) that it has been awarded a contract by Tanqia Dibba FZC as owner-engineer for the wastewater system of Dibba , Emirate of Fujairah , UAE .",0,1,[ 4.70041996e-03 1.10228069e-01 -4.77573602e-03 -9.56104398e-02\n 5.52534722e-02 -7.00817332e-02 -2.05142405e-02 -1.88917577e-01\n -1.04249775e-01 1.33263934e+00 -2.44876042e-01 6.75175413e-02\n -1.49942748e-02 -4.06556427e-02 -5.41962422e-02 -5.62920608e-02\n -5.34963198e-02 8.25075388e-01 -4.66980301e-02 -1.90773290e-02\n 9.21309367e-02 -4.51208055e-02 -1.37235259e-03 2.78238151e-02\n 5.95768094e-02 7.14695230e-02 -6.53408244e-02 -2.12525483e-02\n 1.03413671e-01 4.48262393e-02 4.36710976e-02 -1.38843171e-02\n 3.21916752e-02 8.28981474e-02 3.51038612e-02 6.20842911e-03\n 1.43408813e-02 3.44430916e-02 -3.06746140e-02 -1.25587946e-02\n 5.69351614e-02 2.96466853e-02 3.29996310e-02 3.74354306e-03\n -6.46679699e-02 -1.41989226e-02 -7.01283365e-02 2.36671865e-02\n 2....,[ 1.09362805e+00 -1.62451851e+00 -1.65100798e-01 -9.56259787e-01\n 4.15408053e-02 -5.33732831e-01 -1.78139552e-01 -2.06649685e+00\n -1.91614807e+00 -1.50415599e+00 4.32611406e-01 4.96168882e-01\n -1.36563015e+00 -2.76857466e-01 -9.52787399e-02 -3.11303347e-01\n -7.22661614e-01 -1.60111678e+00 1.07873595e+00 2.00796470e-01\n 1.14176524e+00 -1.31319058e+00 6.96443990e-02 1.48120904e+00\n 4.19971019e-01 1.25214577e-01 8.07624400e-01 -6.86450183e-01\n 1.40184307e+00 -3.70196342e-01 7.16911912e-01 -2.34501705e-01\n 5.22707343e-01 1.74109451e-02 -9.85290855e-02 4.73820418e-01\n 4.31317925e-01 -1.41181558e-01 -1.94443032e-01 1.55737445e-01\n 8.26632380e-01 -2.50532269e-01 -5.49674511e-01 6.59554422e-01\n -1.27693951e+00 -3.35813284e-01 -1.00587562e-01 7.66250908e-01\n -1....,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Diluted EPS rose to EUR3 .68 from EUR0 .50 .,0,1,[-2.69290894e-01 4.16594967e-02 -1.22851051e-01 -2.70933006e-02\n 3.50714996e-02 -1.15661696e-01 -6.43265843e-02 1.75575197e-01\n -1.25396490e-01 7.68171012e-01 -2.24086478e-01 7.85705000e-02\n -2.09025033e-02 -5.41161001e-02 1.49662405e-01 -1.13877594e-01\n 4.11872976e-02 8.62753034e-01 -1.40215501e-01 -1.27536893e-01\n 4.39678207e-02 -1.11634195e-01 -1.07523814e-01 -1.85717091e-01\n 9.68070105e-02 1.18459016e-02 -2.06287235e-01 -5.30498102e-02\n 5.26940357e-03 6.65820986e-02 2.13034455e-05 -3.96589972e-02\n -1.49358094e-01 -1.68723017e-02 -2.74695046e-02 -7.49292001e-02\n -2.03455035e-02 -7.13056028e-02 9.15624946e-02 7.88197964e-02\n 7.60333985e-02 -6.34257048e-02 1.22492693e-01 1.20451011e-01\n 1.57845110e-01 4.86440361e-02 -2.06761993e-02 -1.56963691e-01\n -1....,[-2.22164416e+00 -2.55009079e+00 -1.97177386e+00 1.41455680e-01\n -2.07319200e-01 -1.16808939e+00 -9.27490950e-01 2.27401471e+00\n -2.20736480e+00 -3.38401103e+00 6.79722607e-01 6.37775719e-01\n -1.45705986e+00 -4.76822168e-01 1.93662560e+00 -1.27049232e+00\n 8.43871653e-01 -1.37766039e+00 -3.41158807e-01 -1.53451908e+00\n 3.48383397e-01 -2.24378777e+00 -1.47482145e+00 -1.46341038e+00\n 9.85830665e-01 -7.20348954e-01 -1.09447253e+00 -1.16633940e+00\n -9.52056646e-02 -6.55586347e-02 -2.55081672e-02 -6.06290877e-01\n -2.38134718e+00 -1.35980129e+00 -1.05573428e+00 -8.77996981e-01\n -4.73473296e-02 -1.77508843e+00 1.55541837e+00 1.36257136e+00\n 1.14964020e+00 -1.68761480e+00 5.74651241e-01 2.47708774e+00\n 1.83672142e+00 6.97480619e-01 5.95839024e-01 -1.35575378e+00\n -2....,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [5]:
cols_to_drop = ["Author Type", "Embedding", "Normalized Embeddings"]
compare_y_vs_yhat_df = DataProcessing.drop_df_columns(model_results_df, cols_to_drop)
compare_y_vs_yhat_df.head(3)

Unnamed: 0,Base Sentence,Sentence Label,perceptron,sgd_classifier,logistic_regression,ridge_classifier,decision_tree_classifier,random_forest_classifier,gradient_boosting_classifier,llama-3.1-70b-instruct,llama-3.1-8b-instruct,llama-3.3-70b-instruct,mistral-7b-instruct,mistral-small-3.1,codestral-22b,gpt-oss-20b,gpt-oss-120b,granite-3.3-8b-instruct
0,Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Finnish consulting and engineering group Poyry Plc ( OMX Helsinki : POY ) said on Wednesday ( 1 October ) that it has been awarded a contract by Tanqia Dibba FZC as owner-engineer for the wastewater system of Dibba , Emirate of Fujairah , UAE .",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Diluted EPS rose to EUR3 .68 from EUR0 .50 .,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [6]:
model_col_names = compare_y_vs_yhat_df.columns.to_list()[2:]
model_col_names

['perceptron',
 'sgd_classifier',
 'logistic_regression',
 'ridge_classifier',
 'decision_tree_classifier',
 'random_forest_classifier',
 'gradient_boosting_classifier',
 'llama-3.1-70b-instruct',
 'llama-3.1-8b-instruct',
 'llama-3.3-70b-instruct',
 'mistral-7b-instruct',
 'mistral-small-3.1',
 'codestral-22b',
 'gpt-oss-20b',
 'gpt-oss-120b',
 'granite-3.3-8b-instruct']

In [7]:
col_names = compare_y_vs_yhat_df.columns.to_list()
col_names

['Base Sentence',
 'Sentence Label',
 'perceptron',
 'sgd_classifier',
 'logistic_regression',
 'ridge_classifier',
 'decision_tree_classifier',
 'random_forest_classifier',
 'gradient_boosting_classifier',
 'llama-3.1-70b-instruct',
 'llama-3.1-8b-instruct',
 'llama-3.3-70b-instruct',
 'mistral-7b-instruct',
 'mistral-small-3.1',
 'codestral-22b',
 'gpt-oss-20b',
 'gpt-oss-120b',
 'granite-3.3-8b-instruct']

## Get misalignment 

- Per y and $y_{hat}$
- Per model
- Possibly realign where all models disagree on misalignment. Or, maybe not as worse case could be they misalign?

In [8]:
compare_specific_dfs = []
for model_col_names_idx in range(len(model_col_names)):
    model_col_name = model_col_names[model_col_names_idx]
    col_ints = [0, 1, model_col_names_idx + 2]
    compare_specific_df = compare_y_vs_yhat_df.iloc[:, col_ints]
    compare_specific_dfs.append(compare_specific_df)

In [16]:
compare_specific_dfs[15].head(3)

Unnamed: 0,Base Sentence,Sentence Label,granite-3.3-8b-instruct
0,Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .,0,0
1,"Finnish consulting and engineering group Poyry Plc ( OMX Helsinki : POY ) said on Wednesday ( 1 October ) that it has been awarded a contract by Tanqia Dibba FZC as owner-engineer for the wastewater system of Dibba , Emirate of Fujairah , UAE .",0,0
2,Diluted EPS rose to EUR3 .68 from EUR0 .50 .,0,0


In [10]:
mislabelled_dfs = []
for model_col_names_idx in range(len(model_col_names)):
    model_col_name = model_col_names[model_col_names_idx]
    print(f"{model_col_names_idx}-{model_col_name}")
    
    filt_mislabelled = compare_specific_dfs[model_col_names_idx]['Sentence Label'].eq(compare_specific_dfs[model_col_names_idx][model_col_names[model_col_names_idx]]) == False
    mislabelled_df = compare_specific_dfs[model_col_names_idx][filt_mislabelled]
    # mislabelled_dfs[model_col_names_idx].index.to_list()
    mislabelled_dfs.append(mislabelled_df)

0-perceptron
1-sgd_classifier
2-logistic_regression
3-ridge_classifier
4-decision_tree_classifier
5-random_forest_classifier
6-gradient_boosting_classifier
7-llama-3.1-70b-instruct
8-llama-3.1-8b-instruct
9-llama-3.3-70b-instruct
10-mistral-7b-instruct
11-mistral-small-3.1
12-codestral-22b
13-gpt-oss-20b
14-gpt-oss-120b
15-granite-3.3-8b-instruct


In [11]:
# all_mislabelled_dfs = DataProcessing.concat_dfs(mislabelled_dfs, axis=1)
# all_mislabelled_dfs

In [12]:
mislabelled_dfs[1]

Unnamed: 0,Base Sentence,Sentence Label,sgd_classifier
7,"STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMENE Credit Suisse First Boston ( CFSB ) raised the fair value for shares in four of the largest Nordic forestry groups .",1,0
31,We are now in a position to pursue novel clinical candidates going forward . '',0,1
57,"Senator Emily Patel predicts that on November 10, 2026, the defense spending at her state's military bases may rise.",1,0
118,"The Centers for Disease Control and Prevention's health screening participation should stay the same in late 2025, according to a report by the CDC.",1,0
146,"According to Analyst Kevin White, the goal count at the Barcelona football team would fall in Q3 of 2026.",1,0
156,TeliaSonera TLSN said the offer is in line with its strategy to increase its ownership in core business holdings and would strengthen Eesti Telekom 's offering to its customers .,1,0
161,"According to Sport Expert Kevin White, the touchdown rate at the New England Patriots would fall in 2024-08-21.",1,0
174,"However , Simdax will remain available in the markets where it already has been approved as it has marketing authorisations or submitted applications in more than 40 countries , Orion added .",1,0
176,"Its board of directors will propose a dividend of EUR0 .12 per share for 2010 , up from the EUR0 .08 per share paid in 2009 .",0,1
185,Stora Enso will record a capital gain of EUR 33 million as a non-recurring financial item in its fourth quarter 2006 results .,1,0


In [13]:
mismatch_per_sentence = []
for idx, row in compare_y_vs_yhat_df.iterrows():
    sentence = row['Base Sentence']
    y = row['Sentence Label']
    print(f"{idx}-{sentence}")

    for model_col_name in model_col_names:
        y_hat = row[model_col_name]

        # if idx < 3:
        #     print(f" Actual Label-{model_col_name} Label")
        #     print(f"\t{y}-{y_hat}")
        
        if y != y_hat:
            mismatch = (idx, sentence, y, y_hat, model_col_name)
            print(f"\tMismatch: {mismatch}")
            mismatch_per_sentence.append(mismatch)

        # print('d', )
        

0-Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .
1-Finnish consulting and engineering group Poyry Plc ( OMX Helsinki : POY ) said on Wednesday ( 1 October ) that it has been awarded a contract by Tanqia Dibba FZC as owner-engineer for the wastewater system of Dibba , Emirate of Fujairah , UAE .
2-Diluted EPS rose to EUR3 .68 from EUR0 .50 .
3-Professor Thompson forecasts that the graduation rate at Harvard University will drop in 2027.
	Mismatch: (3, 'Professor Thompson forecasts that the graduation rate at Harvard University will drop in 2027.', 1, 0, 'decision_tree_classifier')
	Mismatch: (3, 'Professor Thompson forecasts that the graduation rate at Harvard University will drop in 2027.', 1, 0, 'random_forest_classifier')
4-Exports accounted for 65.4 % of net sales , representing an all time record for the company .
5-In 2030, the Department of Education observed that student loan defaults decreased.
6-Economist Dr. Sarah 

In [14]:
mismatch_col_names = ["Row", "Sentence", "True Label", "Model Label", "Model Name"]
mismatch_per_sentence_df = pd.DataFrame(mismatch_per_sentence, columns=mismatch_col_names)
mismatch_per_sentence_df.head()

Unnamed: 0,Row,Sentence,True Label,Model Label,Model Name
0,3,Professor Thompson forecasts that the graduation rate at Harvard University will drop in 2027.,1,0,decision_tree_classifier
1,3,Professor Thompson forecasts that the graduation rate at Harvard University will drop in 2027.,1,0,random_forest_classifier
2,7,"STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMENE Credit Suisse First Boston ( CFSB ) raised the fair value for shares in four of the largest Nordic forestry groups .",1,0,perceptron
3,7,"STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMENE Credit Suisse First Boston ( CFSB ) raised the fair value for shares in four of the largest Nordic forestry groups .",1,0,sgd_classifier
4,7,"STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMENE Credit Suisse First Boston ( CFSB ) raised the fair value for shares in four of the largest Nordic forestry groups .",1,0,logistic_regression


In [15]:
mislabelled_pivot_table = pd.pivot_table(mismatch_per_sentence_df, values=["True Label", "Model Label"], index=["Row", "Sentence", "Model Name"])
mislabelled_pivot_table.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model Label,True Label
Row,Sentence,Model Name,Unnamed: 3_level_1,Unnamed: 4_level_1
3,Professor Thompson forecasts that the graduation rate at Harvard University will drop in 2027.,decision_tree_classifier,0.0,1.0
3,Professor Thompson forecasts that the graduation rate at Harvard University will drop in 2027.,random_forest_classifier,0.0,1.0
7,"STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMENE Credit Suisse First Boston ( CFSB ) raised the fair value for shares in four of the largest Nordic forestry groups .",codestral-22b,0.0,1.0
