In [None]:
%cd PATH

Prompt 3:

"Você está realizando análise de texto. O texto a seguir foi utilizado em um contexto de uma rede social. "
                "Responda apenas 'sim' ou 'não' caso o texto abaixo contenha discurso de ódio, ofensa, agressividade ou outras formas de toxicidade. Caso o texto contenha palavrões, palavras de baixo calão ou palavras obsenas, classifique como texto ofensivo, portanto tóxico: "

In [None]:
!pip install unidecode

In [None]:
import pandas as pd
import os
import re
from unidecode import unidecode

# Function to map classifications
def mapear_classificacao(texto):
    # Check if the text is already numerical
    if isinstance(texto, (int, float)):
        return texto

    texto = unidecode(texto).lower()  # Remove accents and convert to lowercase

    if re.search(r'\bsim\b', texto):
        return 1.0
    elif re.search(r'\bnao\b', texto):
        return 0.0
    else:  # For other non -corresponding variations or texts
        return 1.0

# Explicit choice of 4 files
selected_files = [
    'Maritaca_datasetName_zeroshot_prompt_2.csv',
    'Maritaca_datasetName_few-shot_10_prompt2_v2.csv',
    'Maritaca_datasetName_zeroshot_prompt_3.csv'
]

# Mapping the names of the model for a name of your choice
model_name_mapping = {
    'Maritaca_datasetName_zeroshot_prompt_2': 'MariTalk (Sabiá-65B) Zero-shot - Prompt 2',
    'Maritaca_datasetName_few-shot_10_prompt2_v2': 'MariTalk (Sabiá-65B) Few-shot - Prompt 2',
    'Maritaca_datasetName_zeroshot_prompt_3': 'MariTalk (Sabiá-65B) Zero-shot - Prompt 3'
}

path = 'results'

# Load the first CSV
first_file = os.path.join(path, selected_files[0])
df = pd.read_csv(first_file)
final_df = df[['text', 'Toxic']]

# Add the Predictions column from the first file to the final dataframe
first_column_name = model_name_mapping[os.path.basename(os.path.splitext(selected_files[0])[0])]
df['predictions'] = df['predictions'].apply(mapear_classificacao)
final_df.loc[:, first_column_name] = df['predictions']

# Item on the other selected CSV files
for file in selected_files[1:]:
    file_path = os.path.join(path, file)
    temp_df = pd.read_csv(file_path)

    # mapearAColunaPredictions
    temp_df['predictions'] = temp_df['predictions'].apply(mapear_classificacao)

    # Rename the Predictions column according to the mapping
    column_name = model_name_mapping[os.path.basename(os.path.splitext(file)[0])]  # Map the file name to the personalized name
    final_df[column_name] = temp_df['predictions']

#print(final_df.head())


In [None]:
!pip install mlxtend


In [None]:
import pandas as pd
import os
import re
from unidecode import unidecode
from sklearn.metrics import classification_report, confusion_matrix
from mlxtend.plotting import plot_confusion_matrix
import matplotlib.pyplot as plt

# Mapping the names of the model for a name of your choice
model_name_mapping = {
    'Maritaca_datasetName_zeroshot_prompt_2': 'MariTalk (Sabiá-65B) Zero-shot - Prompt 2',
    'Maritaca_datasetName_few-shot_10_prompt2_v2': 'MariTalk (Sabiá-65B) Few-shot - Prompt 2',
    'Maritaca_datasetName_zeroshot_prompt_3': 'MariTalk (Sabiá-65B) Zero-shot - Prompt 3'
}

# For each model, calculate the classification report and the confusion matrix
for col in final_df.columns:
    if col not in ['text', 'Toxic']:
        # Obtain the personalized name of the model
        modelo_nome = model_name_mapping.get(col, col)  # If you don't find the name mapped, use the original name
        print(f"Modelo: {modelo_nome}")

        # Fill Nans with a default value (for example, 0.0)
        final_df = final_df.fillna(1.0)

        # Classification report
        print(classification_report(final_df['Toxic'], final_df[col]))

        #Confusion matrix
        cm = confusion_matrix(final_df['Toxic'], final_df[col])

        #Using MLXTEND to plot the confusion matrix
        fig, ax = plot_confusion_matrix(conf_mat=cm,
                                        show_absolute=True,
                                        show_normed=True,
                                        colorbar=False,
                                        figsize=(10,7),
                                        cmap="Greys")
        ax.set_title(f'{modelo_nome}')
        plt.xlabel('Predicted labels')
        plt.ylabel('True labels')
        plt.show()

        # False positive and false negative rates
        tn, fp, fn, tp = cm.ravel()
        tpr = tp / (tp + fn)  # True positive rate (sensitivity)
        fpr = fp / (fp + tn)  #False rate
        fnr = fn / (fn + tp)  # False negative rate
        tnr = tn / (tn + fp)  # True negative rate (specificity)

        print(f"Taxa de Falso Positivo (FPR): {fpr:.2f}")
        print(f"Taxa de Falso Negativo (FNR): {fnr:.2f}\n")


In [None]:
import pandas as pd
import os
import re
from unidecode import unidecode
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Mapping the names of the model for a name of your choice
model_name_mapping = {
    'Maritaca_datasetName_zeroshot_prompt_2': 'MariTalk (Sabiá-65B) Zero-shot - Prompt 2',
    'Maritaca_datasetName_few-shot_10_prompt2_v2': 'MariTalk (Sabiá-65B) Few-shot - Prompt 2',
    'Maritaca_datasetName_zeroshot_prompt_3': 'MariTalk (Sabiá-65B) Zero-shot - Prompt 3'
}

# List for storing metrics of each model
data = []

# For each model, calculate the classification report
for col in final_df.columns:
    if col not in ['text', 'Toxic']:
        # Obtain the personalized name of the model
        modelo_nome = model_name_mapping.get(col, col)  # If you don't find the name mapped, use the original name
        print(f"Modelo: {modelo_nome}")

        # Classification report
        report = classification_report(final_df['Toxic'], final_df[col], output_dict=True)
        precision_0, recall_0, f1_0 = report['0.0']['precision'], report['0.0']['recall'], report['0.0']['f1-score']
        precision_1, recall_1, f1_1 = report['1.0']['precision'], report['1.0']['recall'], report['1.0']['f1-score']

        # Add metrics to the list
        data.append([modelo_nome, precision_0, recall_0, f1_0, precision_1, recall_1, f1_1])

# Convert List to Dataframe
df_metrics = pd.DataFrame(data, columns=['Modelo', 'Precision_NonToxic', 'Recall_NonToxic', 'F1_NonToxic', 'Precision_Toxic', 'Recall_Toxic', 'F1_Toxic'])


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Function to create radar/spider chart
def plot_spider_chart(df, title):
    # Number of variables
    categories = list(df)[1:]
    N = len(categories)

    # Angles for each axis
    angles = [n / float(N) * 2 * np.pi for n in range(N)]
    angles += angles[:1]

    # Initialize the chart
    ax = plt.subplot(111, polar=True)

    # First axis at the top
    ax.set_theta_offset(np.pi / 2)
    ax.set_theta_direction(-1)

    # Labels for each axis
    plt.xticks(angles[:-1], categories)

    # Define the label for the Y axis
    ax.set_rlabel_position(0)
    plt.yticks([0.2, 0.4, 0.6, 0.8], ["0.2", "0.4", "0.6", "0.8"], color="grey", size=7)
    plt.ylim(0,1)

    # Colors for each model
    colors = ['b', 'r', 'y', 'g', 'c', 'm', 'k', 'orange']


    # Plot metrics for each model
    for index, row in df.iterrows():
        values = row.drop('Modelo').values.flatten().tolist()
        values += values[:1]
        ax.plot(angles, values, linewidth=2, linestyle='solid', label=row['Modelo'], color=colors[index % len(colors)])
        ax.fill(angles, values, color=colors[index % len(colors)], alpha=0.1)

    # Legend
    plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))

    # Title
    plt.title(title, size=11, color='blue', y=1.1)

# Plot radar/spider graph for all models on the same chart
plt.figure(figsize=(10, 8))
plot_spider_chart(df_metrics, "")
plt.show()


## Filter and display the first 20 instances where the zero-shot model made a mistake and the Few-Shot model hit.If there are less than 20 instances that meet this criterion, it will show them all.

In [None]:
import pandas as pd
import os
import re
from unidecode import unidecode

# Function to map classifications
def mapear_classificacao(texto):
    if texto in [0.0, 1.0]:
        return texto

    texto = unidecode(texto).lower()  # Remove accents and convert to lowercase

    if re.search(r'\bsim\b', texto):
        return 1.0
    elif re.search(r'\bnao\b', texto):
        return 0.0
    else:  # For other non -corresponding variations or texts
        return 0.0

# CSVs
path_zeroshot = 'Maritaca_datasetName_zeroshot_prompt_2.csv'
path_fewshot = 'Maritaca_datasetName_few-shot_10_prompt2_v2.csv'

df_zeroshot = pd.read_csv(path_zeroshot)
df_fewshot = pd.read_csv(path_fewshot)

# Map the prediction column
df_zeroshot['predictions'] = df_zeroshot['predictions'].apply(mapear_classificacao)
df_fewshot['predictions'] = df_fewshot['predictions'].apply(mapear_classificacao)

# Identify instances where the zero-shot model made a mistake and the unce-shot got it right
errors_zeroshot_correct_fewshot = df_zeroshot[(df_zeroshot['predictions'] != df_zeroshot['Toxic']) & (df_fewshot['predictions'] == df_zeroshot['Toxic'])]

pd.set_option('display.max_colwidth', None)

# Show the first 20 instances
print(errors_zeroshot_correct_fewshot[['text', 'Toxic', 'predictions']].head(20))


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Function to create radar/spider chart
def plot_spider_chart(df, title):
    # Number of variables
    categories = list(df)[1:]
    N = len(categories)

    # Angles for each axis
    angles = [n / float(N) * 2 * np.pi for n in range(N)]
    angles += angles[:1]

    # Initialize the chart
    ax = plt.subplot(111, polar=True)

    # First axis at the top
    ax.set_theta_offset(np.pi / 2)
    ax.set_theta_direction(-1)

    # Labels for each axis
    plt.xticks(angles[:-1], categories)

    # Define the label for the Y axis
    ax.set_rlabel_position(0)
    plt.yticks([0.2, 0.4, 0.6, 0.8], ["0.2", "0.4", "0.6", "0.8"], color="grey", size=7)
    plt.ylim(0,1)

    # Plot metrics to the model
    values = df.iloc[0].drop('Modelo').values.flatten().tolist()
    values += values[:1]
    ax.plot(angles, values, linewidth=2, linestyle='solid')
    ax.fill(angles, values, 'b', alpha=0.1)

    # Title
    plt.title(title, size=11, color='blue', y=1.1)

# Plot radar/spider graph for each model
for index, row in df_metrics.iterrows():
    plt.figure(figsize=(8, 6))
    plot_spider_chart(pd.DataFrame(row).T, row['Modelo'])
    plt.show()



## Instances in which Bertimbau hit exclusively

In [None]:

# Configure so that the dataframe columns are not truncated when displayed
pd.set_option('display.max_colwidth', None)

# Mask for instances in which Bertimbau_test_datasetName got it right
bertimbau_acertos = final_df['Bertimbau_test_datasetName'] == final_df['labels']

# Masks for instances in which other models made mistakes
outros_modelos_erros = [final_df[col] != final_df['labels'] for col in final_df.columns if col not in ['text', 'labels', 'Bertimbau_test_datasetName']]

#Combine all masks
mascara_final = bertimbau_acertos
for mascara in outros_modelos_erros:
    mascara_final = mascara_final & mascara

# Filter the dataframe using the final mask
resultados_exclusivos_bertimbau = final_df[mascara_final]

print(resultados_exclusivos_bertimbau)


## Instances in which maritaca zero-shot hit exclusively

In [None]:
# Configure so that the dataframe columns are not truncated when displayed
pd.set_option('display.max_colwidth', None)

# Mask for instances where maritaca_datasetName_zeroshot_prompt_2 hit
maritaca_acertos = final_df['Maritaca_datasetName_zeroshot_prompt_2'] == final_df['labels']

# Masks for instances in which other models made mistakes
outros_modelos_erros = [final_df[col] != final_df['labels'] for col in final_df.columns if col not in ['text', 'labels', 'Maritaca_datasetName_zeroshot_prompt_2']]

# Combine all masks
mascara_final = maritaca_acertos
for mascara in outros_modelos_erros:
    mascara_final = mascara_final & mascara

# Filter the dataframe using the final mask
resultados_exclusivos_maritaca = final_df[mascara_final]

print(resultados_exclusivos_maritaca)


## Text length:

We can create a new dataframe column to store the length of each text.Thus, analyze the error rate in relation to the length of the text for each model.

## Text complexity:

We can use the Flesch-Kincaid readability index to calculate the complexity of the text.The Flesch-Kincaid index measures the complexity of the text based on the total number of words, sentences and syllables.
And so, analyze the error rate in relation to the complexity of the text for each model.

In [None]:
!pip install textstat

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from textstat import flesch_kincaid_grade

# Add text length column
final_df['text_length'] = final_df['text'].apply(len)

# Add text complexity column using Flesch-Kincaid
final_df['text_complexity'] = final_df['text'].apply(flesch_kincaid_grade)

# For each model, analyze the error rate in relation to the length and complexity of the text
for col in final_df.columns:
    if col not in ['text', 'labels', 'text_length', 'text_complexity']:
        # Calculate errors
        errors = final_df[final_df['labels'] != final_df[col]]

        # Plot errors vs.Text length
        plt.figure(figsize=(12, 6))
        plt.scatter(errors['text_length'], errors['text_complexity'], alpha=0.5)
        plt.title(f'Model errors {col} vs. Text length and complexity')
        plt.xlabel('Text length')
        plt.ylabel('Text complexity (Flesch-Kincaid)')
        plt.show()


Text length: If most errors occur in shorter texts (0-100 characters), it may be that these texts are more ambiguous, containing short sentences, exclamations or colloquial language that models have difficulty interpreting.

Text complexity: If errors are concentrated in low complexity texts (0-10 in the Flesch-Kincaid index), this may indicate that models have difficulty dealing with simple but ambiguous language, or with slang and colloquialism.