In [1]:
import pandas as pd
import numpy as np
import moralstrength
from moralstrength.moralstrength import estimate_morals
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
import spacy
import warnings
# import plotly.express as px
from tqdm import tqdm
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer
from scipy.sparse import hstack
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import cross_val_predict
from sklearn.linear_model import SGDClassifier
from tqdm.notebook import tqdm

# Suppress all warnings
warnings.filterwarnings("ignore")


#### The Chosen Dataset for testing:
Chose the test data you want to run the moralStrength lexicon

In [None]:
df = pd.read_csv("path/to/testData/")

In [None]:
df.isna().sum()

### Calculations of moral scores for multi label predictions:

In [23]:
def calculate_moral_scores(text):
    synopsis_moral_annot_dt = estimate_morals(text, process=True)
    return synopsis_moral_annot_dt

In [24]:
moral_anotations =  calculate_moral_scores(df['cleaned_text'])

In [25]:
moral_anotations

Unnamed: 0,care,fairness,loyalty,authority,purity
0,,,7.2,,
1,,,,,8.0
2,,,,,
3,,,,,
4,,,,,
...,...,...,...,...,...
7681,7.8,7.6,7.5,5.0,8.0
7682,,,,,
7683,,,,,
7684,,,,,


In [26]:
moral_anotations = moral_anotations.fillna(5)

In [27]:
moral_anotations

Unnamed: 0,care,fairness,loyalty,authority,purity
0,5.0,5.0,7.2,5.0,5.0
1,5.0,5.0,5.0,5.0,8.0
2,5.0,5.0,5.0,5.0,5.0
3,5.0,5.0,5.0,5.0,5.0
4,5.0,5.0,5.0,5.0,5.0
...,...,...,...,...,...
7681,7.8,7.6,7.5,5.0,8.0
7682,5.0,5.0,5.0,5.0,5.0
7683,5.0,5.0,5.0,5.0,5.0
7684,5.0,5.0,5.0,5.0,5.0


#### Convert the MoralStrength values into multi-label binnary values:

In [28]:
moral_anotations['harm'] = 0
moral_anotations['cheating'] = 0
moral_anotations['betrayal'] = 0
moral_anotations['subversion'] = 0
moral_anotations['degradation'] = 0

In [29]:
# Update 'care' and 'harm'
mask = moral_anotations['care'] < 5
mask1 = moral_anotations['care'] > 5


moral_anotations.loc[mask, 'care'] = 0
moral_anotations.loc[mask, 'harm'] = 1
moral_anotations.loc[mask1, 'care'] = 1



# Update 'fairness' and 'cheating'
mask = moral_anotations['fairness'] < 5
mask1 = moral_anotations['fairness'] > 5

moral_anotations.loc[mask, 'fairness'] = 0
moral_anotations.loc[mask, 'cheating'] = 1
moral_anotations.loc[mask1, 'fairness'] = 1


# Update 'loylaty' and 'betrayal'
mask = moral_anotations['loyalty'] < 5
mask1 = moral_anotations['loyalty'] > 5


moral_anotations.loc[mask, 'loyalty'] = 0
moral_anotations.loc[mask, 'betrayal'] = 1
moral_anotations.loc[mask1, 'loyalty'] = 1


# Update 'authority' and 'subversion'
mask = moral_anotations['authority'] < 5
mask1 = moral_anotations['authority'] > 5

moral_anotations.loc[mask, 'authority'] = 0
moral_anotations.loc[mask, 'subversion'] = 1
moral_anotations.loc[mask1, 'authority'] = 1



# Update 'purity' and 'degradation'
mask = moral_anotations['purity'] < 5
mask1 = moral_anotations['purity'] > 5

moral_anotations.loc[mask, 'purity'] = 0
moral_anotations.loc[mask, 'degradation'] = 1
moral_anotations.loc[mask1, 'purity'] = 1



In [30]:
moral_anotations

Unnamed: 0,care,fairness,loyalty,authority,purity,harm,cheating,betrayal,subversion,degradation
0,5.0,5.0,1.0,5.0,5.0,0,0,0,0,0
1,5.0,5.0,5.0,5.0,1.0,0,0,0,0,0
2,5.0,5.0,5.0,5.0,5.0,0,0,0,0,0
3,5.0,5.0,5.0,5.0,5.0,0,0,0,0,0
4,5.0,5.0,5.0,5.0,5.0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
7681,1.0,1.0,1.0,5.0,1.0,0,0,0,0,0
7682,5.0,5.0,5.0,5.0,5.0,0,0,0,0,0
7683,5.0,5.0,5.0,5.0,5.0,0,0,0,0,0
7684,5.0,5.0,5.0,5.0,5.0,0,0,0,0,0


In [31]:
moral_anotations = moral_anotations.replace(5, 0)

In [32]:
moral_anotations

Unnamed: 0,care,fairness,loyalty,authority,purity,harm,cheating,betrayal,subversion,degradation
0,0.0,0.0,1.0,0.0,0.0,0,0,0,0,0
1,0.0,0.0,0.0,0.0,1.0,0,0,0,0,0
2,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0
3,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0
4,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
7681,1.0,1.0,1.0,0.0,1.0,0,0,0,0,0
7682,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0
7683,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0
7684,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0


In [33]:
moral_anotations.rename(columns = {'care': 'pred_care', 'harm':'pred_harm', 'fairness':'pred_fairness',
                                   'cheating':'pred_cheating', 'loyalty':'pred_loyalty',
       'betrayal': 'pred_betrayal', 'authority':'pred_authority', 'subversion':'pred_subversion',
                                   'purity':'pred_purity', 'degradation':'pred_degradation'}, inplace = True)

#### Classification Report:

In [34]:
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from sklearn.utils import resample
import numpy as np
import pandas as pd

possible_labels = ["care", "harm", "fairness", "cheating", "loyalty", "betrayal",
                   "authority", "subversion", "purity", "degradation"]



n_bootstrap_iters = 1000  # Number of bootstrap iterations
bootstrap_results = {label: {metric: [] for metric in ["F1 (Binary)",  "F1 (Macro)", "F1 (Weighted)",
                                                       "Precision (Binary)", "Precision (Macro)", "Precision (Weighted)",
                                                       "Recall (Binary)", "Recall (Macro)", "Recall (Weighted)", "Accuracy"]} for label in possible_labels}

for _ in range(n_bootstrap_iters):
    for lab in possible_labels:
        # Resampling with replacement
        sample_indices = resample(np.arange(len(df)), replace=True)
        true = df.loc[sample_indices, lab].values
        candidate = moral_anotations.loc[sample_indices, f"pred_{lab}"].values
        
        # computing metrics for bootstrap sample
        bootstrap_results[lab]["F1 (Binary)"].append(f1_score(true, candidate, average="binary", zero_division=0))
        bootstrap_results[lab]["F1 (Macro)"].append(f1_score(true, candidate, average="macro", zero_division=0))
        bootstrap_results[lab]["F1 (Weighted)"].append(f1_score(true, candidate, average="weighted", zero_division=0))
        bootstrap_results[lab]["Precision (Binary)"].append(precision_score(true, candidate, average="binary", zero_division=0))
        bootstrap_results[lab]["Precision (Macro)"].append(precision_score(true, candidate, average="macro", zero_division=0))
        bootstrap_results[lab]["Precision (Weighted)"].append(precision_score(true, candidate, average="weighted", zero_division=0))
        bootstrap_results[lab]["Recall (Binary)"].append(recall_score(true, candidate, average="binary", zero_division=0))
        bootstrap_results[lab]["Recall (Macro)"].append(recall_score(true, candidate, average="macro", zero_division=0))
        bootstrap_results[lab]["Recall (Weighted)"].append(recall_score(true, candidate, average="weighted", zero_division=0))
        bootstrap_results[lab]["Accuracy"].append(accuracy_score(true, candidate))

# standard deviations calculations from bootstrap results
std_devs = {label: {metric: np.std(values) for metric, values in metrics.items()} for label, metrics in bootstrap_results.items()}

# original metrics calculations with standard deviations
final_results = []
for lab in possible_labels:
    result = {"Moral Value": lab}
    true = df[lab].values
    candidate = moral_anotations[f"pred_{lab}"].values
    
    # metrics
    result["F1 Score (Binary)"] = f"{f1_score(true, candidate, average='binary', zero_division=0):.2f} ± {std_devs[lab]['F1 (Binary)']:.2f}"
    result["F1 Score (Macro)"] = f"{f1_score(true, candidate, average='macro', zero_division=0):.2f} ± {std_devs[lab]['F1 (Macro)']:.2f}"  
    result["F1 Score (Weighted)"] = f"{f1_score(true, candidate, average='weighted', zero_division=0):.2f} ± {std_devs[lab]['F1 (Weighted)']:.2f}"
    
    result["Precision Score (Binary)"] = f"{precision_score(true, candidate, average='binary', zero_division=0):.2f} ± {std_devs[lab]['Precision (Binary)']:.2f}"
    result["Precision Score (Macro)"] = f"{precision_score(true, candidate, average='macro', zero_division=0):.2f} ± {std_devs[lab]['Precision (Macro)']:.2f}"    
    result["Precision Score (Weighted)"] = f"{precision_score(true, candidate, average='weighted', zero_division=0):.2f} ± {std_devs[lab]['Precision (Weighted)']:.2f}"
    result["Recall Score (Binary)"] = f"{recall_score(true, candidate, average='binary', zero_division=0):.2f} ± {std_devs[lab]['Recall (Binary)']:.2f}"
    result["Recall Score (Macro)"] = f"{recall_score(true, candidate, average='macro', zero_division=0):.2f} ± {std_devs[lab]['Recall (Macro)']:.2f}"
    result["Recall Score (Weighted)"] = f"{recall_score(true, candidate, average='weighted', zero_division=0):.2f} ± {std_devs[lab]['Recall (Weighted)']:.2f}"
    result["Accuracy"] = f"{accuracy_score(true, candidate):.2f} ± {std_devs[lab]['Accuracy']:.2f}"
    
    final_results.append(result)

results_df = pd.DataFrame(final_results)

In [37]:
results_df

Unnamed: 0,Moral Value,F1 Score (Binary),F1 Score (Macro),F1 Score (Weighted),Precision Score (Binary),Precision Score (Macro),Precision Score (Weighted),Recall Score (Binary),Recall Score (Macro),Recall Score (Weighted),Accuracy
0,care,0.31 ± 0.02,0.63 ± 0.01,0.89 ± 0.00,0.33 ± 0.02,0.63 ± 0.01,0.89 ± 0.00,0.29 ± 0.02,0.62 ± 0.01,0.90 ± 0.00,0.90 ± 0.00
1,harm,0.38 ± 0.02,0.65 ± 0.01,0.88 ± 0.00,0.35 ± 0.02,0.64 ± 0.01,0.88 ± 0.00,0.42 ± 0.02,0.67 ± 0.01,0.87 ± 0.00,0.87 ± 0.00
2,fairness,0.32 ± 0.02,0.62 ± 0.01,0.88 ± 0.00,0.26 ± 0.01,0.61 ± 0.01,0.90 ± 0.00,0.42 ± 0.02,0.66 ± 0.01,0.87 ± 0.00,0.87 ± 0.00
3,cheating,0.19 ± 0.02,0.57 ± 0.01,0.88 ± 0.00,0.41 ± 0.03,0.67 ± 0.02,0.88 ± 0.01,0.12 ± 0.01,0.55 ± 0.01,0.91 ± 0.00,0.91 ± 0.00
4,loyalty,0.36 ± 0.02,0.66 ± 0.01,0.93 ± 0.00,0.28 ± 0.02,0.63 ± 0.01,0.95 ± 0.00,0.52 ± 0.03,0.73 ± 0.01,0.92 ± 0.00,0.92 ± 0.00
5,betrayal,0.14 ± 0.02,0.55 ± 0.01,0.92 ± 0.00,0.13 ± 0.02,0.55 ± 0.01,0.92 ± 0.00,0.15 ± 0.02,0.55 ± 0.01,0.92 ± 0.00,0.92 ± 0.00
6,authority,0.24 ± 0.02,0.59 ± 0.01,0.91 ± 0.00,0.17 ± 0.01,0.57 ± 0.01,0.94 ± 0.00,0.44 ± 0.03,0.67 ± 0.01,0.89 ± 0.00,0.89 ± 0.00
7,subversion,0.25 ± 0.02,0.60 ± 0.01,0.93 ± 0.00,0.18 ± 0.02,0.58 ± 0.01,0.95 ± 0.00,0.39 ± 0.03,0.66 ± 0.02,0.92 ± 0.00,0.92 ± 0.00
8,purity,0.17 ± 0.02,0.56 ± 0.01,0.94 ± 0.00,0.11 ± 0.01,0.55 ± 0.01,0.97 ± 0.00,0.35 ± 0.04,0.64 ± 0.02,0.93 ± 0.00,0.93 ± 0.00
9,degradation,0.28 ± 0.02,0.62 ± 0.01,0.95 ± 0.00,0.24 ± 0.02,0.61 ± 0.01,0.95 ± 0.00,0.32 ± 0.03,0.64 ± 0.01,0.94 ± 0.00,0.94 ± 0.00
