# Predicting Moral Values From Social Media Discourse

In [1]:
import os
import random
import re

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from itertools import product
from scipy.special import softmax
from sklearn.metrics import (accuracy_score, classification_report, f1_score,
                             multilabel_confusion_matrix as mcm, precision_score,
                             recall_score)
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
from sklearn.utils import resample
from sklearn.utils.class_weight import compute_class_weight, compute_sample_weight
from tabulate import tabulate
from torch.autograd import Function
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
                              TensorDataset)
from tqdm import trange
from tqdm.auto import trange
from transformers import (AutoModel, AutoModelForSequenceClassification,
                          AutoTokenizer, BertModel, get_linear_schedule_with_warmup)


In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Sat Jun 15 15:43:39 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.86.10              Driver Version: 535.86.10    CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-PCIE-40GB          On  | 00000000:86:00.0 Off |                    0 |
| N/A   31C    P0              33W / 250W |      7MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

## Define Base Model

In [3]:
# base_model = "allenai/longformer-base-4096"
base_model = "bert-base-uncased"

### Datasets:
- These are the datasets retreived from different sources. Keep in mind that due to different cleaning, pre-processing, you might have different data sizes. 

#### Twitter:

In [4]:
# Download the corpus here: https://osf.io/k5n7y/ 
# Concatenate all the subsets and use one-hot-encoding for multi-label mft annotations
mftc_df = pd.read_csv('path/to/downloaded/MFTC')

In [5]:
# General domain:
mftc_df['domain'] = 0

In [6]:
mftc_df.head()

Unnamed: 0,text,cleaned_text,annotations,new_label,subdomain,care,harm,fairness,cheating,loyalty,betrayal,authority,subversion,purity,degradation,domain
0,The courage to be impatient with evil and pati...,The courage to be impatient with evil and pati...,"[{'annotation': 'fairness', 'annotator': 'anno...",fairness,BLM,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,#NotAllCops but OMFG already. 😡 Protect and se...,but OMFG already. enraged_face Protect and ser...,"[{'annotation': 'care', 'annotator': 'annotato...",harm,BLM,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,"stop shaving, it's your manly dignity #blackje...","stop shaving, it's your manly dignity","[{'annotation': 'nm', 'annotator': 'annotator0...",nm,BLM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,ARABS MORTAL HATRED AND ENSLAVEMENT OF THE BLA...,ARABS MORTAL HATRED AND ENSLAVEMENT OF THE BLA...,"[{'annotation': 'harm,cheating', 'annotator': ...",harm,BLM,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,“@Babbsgirl2: #SheriffDavidClarke is my hero! ...,@user: is my hero! @user True patriot,"[{'annotation': 'nm', 'annotator': 'annotator0...",nm,BLM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [7]:
mftc_df.subdomain.value_counts()

subdomain
BLM          4304
Elections    4139
Baltimore    3602
Sandy        3186
Davidson     2725
ALM          2665
Name: count, dtype: int64

In [8]:
mftc_df['non-moral'] = mftc_df['new_label'].apply(lambda x: 1 if x == 'nm' else 0)

In [11]:
mftc_df.shape

(20628, 17)

#### Reddit:

In [86]:
# Download the corpus here:https://huggingface.co/datasets/USC-MOLA-Lab/MFRC
# Use one-hot-encoding for multi-label mft annotations
mfrc_df = pd.read_csv('path/to/downloaded/MFRC')

In [13]:
mfrc_df.shape

(17741, 24)

#### Let's also add the general domain:

In [14]:
mfrc_df['domain'] = 1

In [15]:
mfrc_df.head(5)

Unnamed: 0,text,cleaned_text,subdomain,bucket,annotation,final_annotation,care,fairness,loyalty,authority,...,degradation,equality,proportionality,thin morality,non-moral,inconclusive,vader_neg,vader_neu,vader_pos,domain
0,That particular part of the debate is especial...,That particular part of the debate is especial...,europe,French politics,"{'annotator03': {'annotation': 'Non-Moral', 'c...",inconclusive,0,0,0,0,...,0,0,0,0,0,1,0.169,0.725,0.106,1
1,"/r/france is pretty lively, with it's own ling...","/r/france is pretty lively, with it's own ling...",europe,French politics,"{'annotator03': {'annotation': 'Non-Moral', 'c...",Non-Moral,0,0,0,0,...,0,0,0,0,1,0,0.142,0.679,0.18,1
2,TBH Marion Le Pen would be better. Closet fasc...,TBH Marion Le Pen would be better. Closet fasc...,neoliberal,French politics,"{'annotator03': {'annotation': 'Non-Moral', 'c...",inconclusive,0,0,0,0,...,0,0,0,0,0,1,0.358,0.498,0.144,1
3,it really is a very unusual situation isn't it...,it really is a very unusual situation isn't it...,europe,French politics,"{'annotator03': {'annotation': 'Non-Moral', 'c...",Non-Moral,0,0,0,0,...,0,0,0,0,1,0,0.118,0.772,0.11,1
4,The Le Pen brand of conservatism and classical...,The Le Pen brand of conservatism and classical...,europe,French politics,"{'annotator03': {'annotation': 'Authority', 'c...",inconclusive,0,0,0,0,...,0,0,0,0,0,1,0.0,0.795,0.205,1


In [16]:
mfrc_df = mfrc_df[~mfrc_df['final_annotation'].isin(['inconclusive', 'Thin Morality'])]

In [17]:
mfrc_df.subdomain.value_counts()

subdomain
europe                 2050
worldnews              1960
Conservative           1482
antiwork               1424
politics               1368
neoliberal             1346
nostalgia              1206
relationship_advice    1043
AmItheAsshole          1011
confession             1006
geopolitics              99
Name: count, dtype: int64

In [18]:
mfrc_df.groupby(['bucket','subdomain']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,text,cleaned_text,annotation,final_annotation,care,fairness,loyalty,authority,purity,harm,...,degradation,equality,proportionality,thin morality,non-moral,inconclusive,vader_neg,vader_neu,vader_pos,domain
bucket,subdomain,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Everyday Morality,AmItheAsshole,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,...,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011
Everyday Morality,confession,1006,1006,1006,1006,1006,1006,1006,1006,1006,1006,...,1006,1006,1006,1006,1006,1006,1006,1006,1006,1006
Everyday Morality,nostalgia,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,...,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206
Everyday Morality,relationship_advice,1043,1043,1043,1043,1043,1043,1043,1043,1043,1043,...,1043,1043,1043,1043,1043,1043,1043,1043,1043,1043
French politics,Conservative,103,103,103,103,103,103,103,103,103,103,...,103,103,103,103,103,103,103,103,103,103
French politics,europe,2050,2050,2050,2050,2050,2050,2050,2050,2050,2050,...,2050,2050,2050,2050,2050,2050,2050,2050,2050,2050
French politics,geopolitics,99,99,99,99,99,99,99,99,99,99,...,99,99,99,99,99,99,99,99,99,99
French politics,neoliberal,1346,1346,1346,1346,1346,1346,1346,1346,1346,1346,...,1346,1346,1346,1346,1346,1346,1346,1346,1346,1346
French politics,worldnews,1960,1960,1960,1960,1960,1960,1960,1960,1960,1960,...,1960,1960,1960,1960,1960,1960,1960,1960,1960,1960
US Politics,Conservative,1379,1379,1379,1379,1379,1379,1379,1379,1379,1379,...,1379,1379,1379,1379,1379,1379,1379,1379,1379,1379


In [19]:
mfrc_df['non-moral'] = mfrc_df['final_annotation'].apply(lambda x: 1 if x == 'Non-Moral' else 0)

In [20]:
mfrc_df.shape

(13995, 25)

In [21]:
category_counts = mfrc_df[['care','harm', 'fairness', 'cheating', 'loyalty', 
       'betrayal', 'authority','subversion', 'purity', 'degradation', 'domain', 'non-moral']].sum()
category_counts

care             737
harm            1014
fairness         623
cheating         841
loyalty          241
betrayal         188
authority        330
subversion       357
purity           100
degradation      187
domain         13995
non-moral       9843
dtype: int64

In [22]:
mfrc_df[['text', 'cleaned_text', 'subdomain', 'bucket', 'annotation',
       'final_annotation', 'care','harm', 'fairness','cheating', 'loyalty', 'betrayal', 
       'authority', 'subversion', 'purity', 'degradation',
       'equality', 'proportionality', 'thin morality', 'non-moral',
       'inconclusive', 'vader_neg', 'vader_neu', 'vader_pos', 'domain']]

Unnamed: 0,text,cleaned_text,subdomain,bucket,annotation,final_annotation,care,harm,fairness,cheating,...,degradation,equality,proportionality,thin morality,non-moral,inconclusive,vader_neg,vader_neu,vader_pos,domain
1,"/r/france is pretty lively, with it's own ling...","/r/france is pretty lively, with it's own ling...",europe,French politics,"{'annotator03': {'annotation': 'Non-Moral', 'c...",Non-Moral,0,0,0,0,...,0,0,0,0,1,0,0.142,0.679,0.180,1
3,it really is a very unusual situation isn't it...,it really is a very unusual situation isn't it...,europe,French politics,"{'annotator03': {'annotation': 'Non-Moral', 'c...",Non-Moral,0,0,0,0,...,0,0,0,0,1,0,0.118,0.772,0.110,1
5,"Macrons face just screams\n""I do not know her,...","Macrons face just screams ""I do not know her, ...",europe,French politics,"{'annotator03': {'annotation': 'Non-Moral', 'c...",Non-Moral,0,0,0,0,...,0,0,0,0,1,0,0.135,0.865,0.000,1
6,"Clinton lead polls by 4%, well within a reason...","Clinton lead polls by 4%, well within a reason...",worldnews,French politics,"{'annotator03': {'annotation': 'Non-Moral', 'c...",Non-Moral,0,0,0,0,...,0,0,0,0,1,0,0.094,0.833,0.073,1
7,"Hey, fuck you. Us leftists will never support ...","Hey, fuck you. Us leftists will never support ...",worldnews,French politics,"{'annotator03': {'annotation': 'Loyalty,Equali...",Equality,0,0,0,1,...,0,1,0,0,0,0,0.391,0.609,0.000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17734,"“I don’t want words, I want action!” \n\nCoo...","I don t want words, I want action! Cool, elect...",politics,US Politics,"{'annotator01': {'annotation': 'Authority', 'c...",Non-Moral,0,0,0,0,...,0,0,0,0,1,0,0.089,0.605,0.306,1
17735,"Music, history, nature channels have all turne...","Music, history, nature channels have all turne...",Conservative,US Politics,"{'annotator01': {'annotation': 'Non-Moral', 'c...",Non-Moral,0,0,0,0,...,0,0,0,0,1,0,0.000,0.921,0.079,1
17737,Not being discriminated against\n\n\nBeing pro...,Not being discriminated against Being protecte...,Conservative,US Politics,"{'annotator01': {'annotation': 'Equality', 'co...",Equality,0,0,0,1,...,0,1,0,0,0,0,0.134,0.738,0.128,1
17738,*Jaime [Lannister] reached for the flagon to r...,*Jaime [Lannister] reached for the flagon to r...,politics,US Politics,"{'annotator01': {'annotation': 'Authority', 'c...",Authority,0,0,0,0,...,0,0,0,0,0,0,0.098,0.767,0.135,1


#### Facebook:
- For the facebook data, please contact the authors of this paper: https://dl.acm.org/doi/10.1145/3543507.3583865



In [23]:
mffp_df = pd.read_csv('path/to/downloaded/FbVaccinationPosts/')

In [24]:
mffp_df['domain'] = 2

In [25]:
mffp_df.head()

Unnamed: 0,text,cleaned_text,comment_id,page_id,page,class,care,harm,fairness,cheating,...,betrayal,authority,subversion,purity,degradation,liberty,oppression,non-moral,subdomain,domain
0,I just contacted my rep. Julia Brown and reque...,I just contacted my rep. Julia Brown and reque...,1525407617778182_1525810737737870,1374879262831019,1,0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,vaccination,2
1,"The thing is, myself, and many other people ar...","The thing is, myself, and many other people ar...",2563364117066915_2563717157031611,414643305272351,1,1,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0,vaccination,2
2,Soooooo....how do they explain all the dead ki...,Soooooo....how do they explain all the dead ki...,2563364117066915_2563383687064958,414643305272351,1,1,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,vaccination,2
3,"Well writer, I’m going to use a bit of your so...","Well writer, I m going to use a bit of your so...",2563364117066915_2563432760393384,414643305272351,1,1,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,vaccination,2
4,"Well imo, F***k the AVN! I am proud to say my ...","Well imo, F***k the AVN! I am proud to say my ...",10152270557898588_31662936,143367983587,1,1,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,vaccination,2


In [26]:
category_counts = mffp_df[['care','harm', 'fairness', 'cheating', 'loyalty', 
       'betrayal', 'authority','subversion', 'purity', 'degradation', 'domain', 'non-moral']].sum()
category_counts

care            357.0
harm            132.0
fairness        174.0
cheating        123.0
loyalty          40.0
betrayal         38.0
authority       110.0
subversion      204.0
purity           80.0
degradation     112.0
domain         3020.0
non-moral       248.0
dtype: float64

#### Concatenate the datasets together based on 10 Moral Values: Care/Harm, Fairness/Cheating, Loyalty/Betrayal Authority/Subversion and Purity/Degradation:

In [34]:
mft_columns = ['care', 'harm', 'fairness', 'cheating', 'loyalty', 'betrayal',
       'authority', 'subversion', 'purity', 'degradation']

In [35]:
cat_columns = ["cleaned_text"]+mft_columns+["subdomain", "domain"]

In [36]:
df = pd.concat([mftc_df[cat_columns], mfrc_df[cat_columns], mffp_df[cat_columns]], axis=0)

In [37]:
df = df.dropna(subset = ['cleaned_text'])
df = df.drop_duplicates(subset = ['cleaned_text'])
df = df[~(df[['care', 'fairness', 'loyalty', 'authority', 'purity', "harm", "subversion", "degradation", "cheating", "betrayal"]] == 2).any(axis=1)]
df.reset_index(drop = True, inplace = True)

In [38]:
df

Unnamed: 0,cleaned_text,care,harm,fairness,cheating,loyalty,betrayal,authority,subversion,purity,degradation,subdomain,domain
0,The courage to be impatient with evil and pati...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BLM,0
1,but OMFG already. enraged_face Protect and ser...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BLM,0
2,"stop shaving, it's your manly dignity",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BLM,0
3,ARABS MORTAL HATRED AND ENSLAVEMENT OF THE BLA...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BLM,0
4,@user: is my hero! @user True patriot,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BLM,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
34296,Sadly most Canadians wont ever research vaccin...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,vaccination,2
34297,Stop poisoning the children,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,vaccination,2
34298,Was pregnant and ended up with the flu back in...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,vaccination,2
34299,My mother developed shingles last year and she...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,vaccination,2


In [39]:
df.shape

(34301, 13)

In [40]:
df.subdomain.value_counts()

subdomain
Elections              3912
BLM                    3383
Baltimore              3345
Sandy                  3102
Davidson               2716
ALM                    2340
europe                 2050
worldnews              1960
vaccination            1509
Conservative           1482
antiwork               1424
politics               1368
neoliberal             1345
nostalgia              1206
relationship_advice    1043
AmItheAsshole          1011
confession             1006
geopolitics              99
Name: count, dtype: int64

In [42]:
df[['care', 'harm', 'fairness', 'cheating', 'loyalty',
       'betrayal', 'authority', 'subversion', 'purity', 'degradation']].sum()

care           2752.0
harm           3173.0
fairness       2369.0
cheating       3001.0
loyalty        1308.0
betrayal       1564.0
authority      1264.0
subversion     1126.0
purity          715.0
degradation    1045.0
dtype: float64

In [43]:
neutral_text = df[(df['care']==0) & (df['harm']==0) 
   & (df['fairness']==0) & (df['cheating']==0) 
   & (df['loyalty']==0)  & (df['betrayal']==0)
   & (df['authority']==0)& (df['subversion']==0)  
   & (df['purity']==0) & (df['degradation']==0)]

print("Neutral (Non-Moral) text: ", len(neutral_text))

Neutral (Non-Moral) text:  17948


In [44]:
len(neutral_text)/len(df)

0.5232500510189207

#### Suffle the data

In [45]:
df = df.sample(frac=1).reset_index(drop=True)

In [46]:
df.shape

(34301, 13)

In [47]:
df.isna().sum()

cleaned_text    0
care            0
harm            0
fairness        0
cheating        0
loyalty         0
betrayal        0
authority       0
subversion      0
purity          0
degradation     0
subdomain       0
domain          0
dtype: int64

In [48]:
df.domain.value_counts()

domain
0    18798
1    13994
2     1509
Name: count, dtype: int64

In [49]:
df

Unnamed: 0,cleaned_text,care,harm,fairness,cheating,loyalty,betrayal,authority,subversion,purity,degradation,subdomain,domain
0,France has serious problems with unemployment....,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Conservative,1
1,You don't understand Marine Le Pen very well. ...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,worldnews,1
2,"Just be careful saying that, please. Saying th...",0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,politics,1
3,Well i'm French so maybe I can help you even t...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,neoliberal,1
4,"Worse yet, she called him a misogynist, implyi...",0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,relationship_advice,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
34296,RT @user: Here's the AP's take on :,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Baltimore,0
34297,makes the heart hurt -,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BLM,0
34298,"Name one, just one Source stating that Macron ...",0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,worldnews,1
34299,Right parties Fillon or Lepen think the coloni...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,europe,1


#### Text and label values:

In [51]:
text = df.cleaned_text.values
labels = df.iloc[:, 1:-2].values # 10 moral dimensions
labels2 = df.domain.values

In [52]:
df.iloc[:, 1:-2]

Unnamed: 0,care,harm,fairness,cheating,loyalty,betrayal,authority,subversion,purity,degradation
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
34296,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34297,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34298,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
34299,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [54]:
df.domain.unique()

array([1, 2, 0])

In [55]:
# Tokenizer initialization
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModel.from_pretrained(base_model)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [56]:
# Original Input and attention masks without the augmented dictionary terms:
original_input_id = []
original_attention_masks = []
original_token_type_id = []

# Input id and attention masks with the dictionary terms:
input_id = []
attention_masks = []
token_type_id = []

def preprocessing(input_text, tokenizer):
    '''
    Returns <class transformers.tokenization_utils_base.BatchEncoding> with the following fields:
    - input_ids: list of token ids
    - token_type_ids: list of token type ids
    - attention_mask: list of indices (0,1) specifying which tokens should considered by the model (return_attention_mask = True).
    '''
    return tokenizer.encode_plus(
                        input_text,
                        add_special_tokens = True,
                        max_length = 150,
                        padding = 'max_length',
                        return_attention_mask = True,
                        return_token_type_ids = True,  # Add this line
                        return_tensors = 'pt',
                        truncation=True
                   )

for sample in text:
    # Original Input
    original_encoding_dict = preprocessing(sample, tokenizer)
    original_input_id.append(original_encoding_dict['input_ids'])
    original_attention_masks.append(original_encoding_dict['attention_mask'])


    # Calculate token type ids
    original_token_type = torch.zeros_like(original_encoding_dict['input_ids'])
    original_token_type[original_encoding_dict['input_ids'] != 0] = 0
    original_token_type_id.append(original_token_type)

original_input_id = torch.cat(original_input_id, dim=0)
original_attention_masks = torch.cat(original_attention_masks, dim=0)
original_token_type_id = torch.cat(original_token_type_id, dim = 0)
labels = torch.tensor(labels)  # add a new axis at index 1
labels2 = torch.tensor(labels2)  # add a new axis at index 1

### Test the implementation random inputs:

In [57]:
print('original_attention_masks.shape ', original_attention_masks.shape)
print('original_input_id.shape ', original_input_id.shape)
print('original_token_type_id.shape ', original_token_type_id.shape)

original_attention_masks.shape  torch.Size([34301, 150])
original_input_id.shape  torch.Size([34301, 150])
original_token_type_id.shape  torch.Size([34301, 150])


#### Encode corups for original embeddings

In [58]:
def encode_corpus(inputs, attentions, model, batch_size=16):
    all_embeddings = []
    for start_index in range(0, len(inputs), batch_size):
        b_input_ids = inputs[start_index:start_index+batch_size].to(model.device)
        b_input_mask = attentions[start_index:start_index+batch_size].to(model.device)
        with torch.no_grad():
            output = model(b_input_ids,
                      token_type_ids = None,
                      attention_mask = b_input_mask).last_hidden_state[:,0,:].detach().cpu()
            all_embeddings.extend(output)
    return torch.stack(all_embeddings)

In [59]:
print(labels.shape)
print(labels2.shape)

torch.Size([34301, 10])
torch.Size([34301])


In [60]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Doman Adversarial Function:

In [61]:
class GradientReversal(Function):
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.save_for_backward(x, alpha)
        return x

    @staticmethod
    def backward(ctx, grad_output):
        grad_input = None
        _, alpha = ctx.saved_tensors
        if ctx.needs_input_grad[0]:
            grad_input = - alpha*grad_output
        return grad_input, None
revgrad = GradientReversal.apply

class GradientReversal(nn.Module):
    def __init__(self, alpha):
        super().__init__()
        self.alpha = torch.tensor(alpha, requires_grad=False)

    def forward(self, x):
        return revgrad(x, self.alpha)

class AdversarialBERT(nn.Module):
    def __init__(self, bert_model, moral_label=5, domain_label=2, class_weight=[1,1],
                 domain_weight=1, identity_weight=1, reconstruction_weight=1, moral_weight=1,
                 alpha=1.0,
                 freeze_bert=False): # class_weight[0] = -1 deactivates the baalancing tentatives

        super(AdversarialBERT, self).__init__()
        self.bert = bert_model
        bert_dim = 768
        self.invariant_trans = nn.Linear(768, 768)
        print(' self.invariant_trans ',  self.invariant_trans)
        if identity_weight+reconstruction_weight+domain_weight==0:
            self.moral_classification = nn.Linear(768, moral_label)
        else:
            self.moral_classification = nn.Sequential(nn.Linear(768,768),
                                                      nn.ReLU(),
                                                      nn.Linear(768, moral_label))

        self.domain_classification = nn.Sequential(GradientReversal(alpha),
                                                   nn.Linear(768,768),
                                                   nn.ReLU(),
                                                   nn.Dropout(0.3),
                                                   nn.Linear(768, domain_label))

        # Dynamically adjustable alpha for gradient reversal
        self.alpha = alpha
        self.domain_weight = domain_weight

        if moral_label>2:
                self.loss_fn_moral = FocalLoss(logits=True) #nn.BCEWithLogitsLoss() #nn.CrossEntropyLoss()

        else:
            if class_weight[0]>0:
                weights = torch.tensor(class_weight).float()
            else:
                weights = torch.tensor([1.0 for _ in range(moral_label)]).float()
            if moral_label>2:
                self.loss_fn_moral = FocalLoss(logits=True) #nn.BCEWithLogitsLoss(pos_weight=weights) #BCEWithLogitsLoss

            else:
                self.loss_fn_moral = nn.CrossEntropyLoss(weight=weights) # FocalLoss(logits=True)

        self.loss_fn_domain = nn.CrossEntropyLoss()
        self.reconstruction_feed = nn.Linear(768, 768)
        self.loss_reconstruction = nn.MSELoss()
        self.weight_identity = identity_weight
        self.reconstruction_weight = reconstruction_weight
        self.moral_weight = moral_weight
        self.identity = torch.eye(768).to(device)
        self.freeze=freeze_bert

    def update_alpha(self, new_alpha):
        # Method to update alpha for the gradient reversal layer
        self.domain_classification[0].alpha = new_alpha

    def update_model_params(self, domain_weight, moral_weight, reconstruction_weight, identity_weight):
        # Update the relevant parameters
        self.domain_weight = domain_weight
        self.moral_weight = moral_weight
        self.reconstruction_weight = reconstruction_weight
        self.identity_weight = identity_weight

    def forward(self, b_input_ids, b_token_type_ids, b_input_mask, b_labels, b_domain_labels, original_bert_embeddings=None, test=False):
        # Forward pass
        if self.freeze:
            with torch.no_grad():
                pooled_output = self.bert(b_input_ids,
                                    token_type_ids = b_token_type_ids, #it was None
                                    attention_mask = b_input_mask).last_hidden_state[:,0,:]

        else:
            pooled_output = self.bert(b_input_ids,
                                token_type_ids = b_token_type_ids, #it was None
                                attention_mask = b_input_mask).last_hidden_state[:,0,:]


        pooled_output = self.invariant_trans(pooled_output)
#         print('transformed_output with without Attention ',combined_output.shape)


        logits = self.moral_classification(pooled_output)
#         print(f"Logits shape: {logits.shape}, Labels shape: {b_labels.float().shape}")

        loss_moral = self.loss_fn_moral(logits, b_labels)

        if test:
            return loss_moral, logits
        if self.domain_weight>0:
            loss_domain = self.loss_fn_domain(self.domain_classification(pooled_output), b_domain_labels)
        else:
            loss_domain=0
        if original_bert_embeddings is not None:
            loss_reconstruction = self.loss_reconstruction(self.reconstruction_feed(pooled_output), original_bert_embeddings)*self.reconstruction_weight
        else:
            loss_reconstruction=0
        if self.weight_identity>0:
            loss_identity = torch.norm(self.invariant_trans.weight-self.identity)*self.weight_identity
        else:
            loss_identity=0
        total_loss = loss_moral*self.moral_weight+loss_reconstruction+loss_identity+self.domain_weight*loss_domain
        return  total_loss

# Single Label Experiments

In [75]:
batch_size = 16

def adjust_domain_label(labels, test_domain):
    return torch.tensor([l if l<test_domain else l-1 for l in labels.tolist()])

test_domain = True

if test_domain:
  test_domain = 1 # Change this for testing in different domains
  train_idx = df[df.domain!=test_domain].index
  val_idx = df[df.domain==test_domain].index

if test_domain==0:
  suffix="_tuned_with_reddit_FB_posts_MFT_10"
  print(suffix)  
elif test_domain==1:
  suffix="_tuned_with_twitter_FB_posts_MFT_10"
  print(suffix)  
elif test_domain==2:
  suffix="_tuned_with_twitter_reddit_posts_MFT_10"
  print(suffix)  

_tuned_with_twitter_FB_posts_MFT_10


### Training:

In [65]:
# Initialize scaler for mixed precision training
scaler = GradScaler()


os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

possible_labels = ["care", "harm", "fairness", "cheating", "loyalty", "betrayal",
                   "authority", "subversion", "purity", "degradation"]

bert_original_embeddings = None
adversarial = True

for lab_idx, lab in enumerate(possible_labels):
#     if lab not in ["betrayal"]:
#         continue
    best_f1 = 0

    num_labels = 2
    num_domains = 2
 
    print(f"Number of Domains: {num_domains}")

    epochs = 5
    batch_size = 16
    epoch_count = 0
    rw =0.1
    iw = 0.01
    dw = 0.1
    
    # for parameter_tuple in list(product([0.1, 0.5], [0.01, 0.5], [0.1, 1])):
    #     rw, iw, dw = parameter_tuple  # Assuming rw, iw, dw correspond to the elements in the parameter tuple

    # Initialize the model with dynamic alpha for gradient reversal
    initial_alpha = 0.1
    alpha_growth_rate = 0.1  # How much to increase alpha after each epoch

    print('Models for predicting 10 Moral Dimensions with BERT Base')
    print(f'Parameters: reconstruction_weight = {rw}; identity_weight = {iw}; domain_weight = {dw}.')

    bert_model = AutoModel.from_pretrained(base_model).cuda()
    
    if bert_original_embeddings is None:
        bert_original_embeddings = encode_corpus(original_input_id[train_idx],
                                                 original_attention_masks[train_idx],
                                                 bert_model)
        

    bert_model = bert_model.to("cuda")
    new_labels = []

    for ex in labels[train_idx]:
        if ex[lab_idx]:
            new_labels.append(1)
        else:
            new_labels.append(0)

    
    pos_weight = [sum(new_labels), len(new_labels)]

    train_set = TensorDataset(original_input_id[train_idx],
                              original_token_type_id[train_idx],
                              original_attention_masks[train_idx],
                              torch.tensor(new_labels),
                              adjust_domain_label(labels2[train_idx], test_domain),
                              bert_original_embeddings)

    new_labels = []

    for ex in labels[val_idx]:
        if ex[lab_idx]:
            new_labels.append(1)
        else:
            new_labels.append(0)

    val_set = TensorDataset(original_input_id[val_idx],
                            original_token_type_id[val_idx],
                            original_attention_masks[val_idx],
                            torch.tensor(new_labels),
                            labels2[val_idx])

    pos_weight = pos_weight[0] / pos_weight[1]
    class_weight = [pos_weight, 1 - pos_weight]
    
    # Prepare DataLoader
    train_dataloader = DataLoader(
        train_set,
        sampler=RandomSampler(train_set),
        batch_size=batch_size
    )

    validation_dataloader = DataLoader(
        val_set,
        batch_size=batch_size
    )

    total_steps = len(train_dataloader) * epochs

    def pick_the_model(adversarial, model, class_weight, epoch):
        
        if adversarial:
            print('Adversarial', adversarial)
            model.update_model_params(domain_weight=dw, moral_weight=1,
                                      reconstruction_weight=rw, identity_weight=iw)
        else:
            model.update_model_params(domain_weight=0, moral_weight=1,
                                      reconstruction_weight=0, identity_weight=0)
            print('Adversarial', adversarial)
        model = model.to(device)
        return model

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
   
    # Initialize the base model outside of the training loop.
    model = AdversarialBERT(bert_model, moral_label=num_labels,
                            domain_label=num_domains, domain_weight=dw, moral_weight=0.5,
                            reconstruction_weight=rw, identity_weight=iw,
                            alpha=initial_alpha, class_weight=class_weight,
                            freeze_bert=False).to(device)

    optimizer = torch.optim.AdamW(model.parameters(),  lr = 1e-5)
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

    for epoch in trange(epochs, desc='Epoch'):
        print('Epoch: ', epoch)
        model = pick_the_model(adversarial, model, class_weight, epoch)
        model.train()
        current_alpha = torch.tensor(initial_alpha + alpha_growth_rate * epoch, requires_grad=False).to(device)
        model.update_alpha(current_alpha)  

        tr_loss, nb_tr_examples, nb_tr_steps = 0, 0, 0

        for step, batch in enumerate(train_dataloader):
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_token_type_ids, b_input_mask, b_labels, b_domain_labels, bert_embeddings = batch
            optimizer.zero_grad()
            loss = model(b_input_ids, b_token_type_ids, b_input_mask, b_labels, b_domain_labels, bert_embeddings)
            
            scheduler.step()
            loss.backward()
            optimizer.step()
            
            tr_loss += loss.item()
            nb_tr_examples += b_input_ids.size(0)
            nb_tr_steps += 1

        # Validation step
        model.eval()
        val_loss, nb_val_examples, nb_val_steps = 0, 0, 0
        y_true, y_pred, logits_list = [], [],[]

        for batch in validation_dataloader:
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_token_type_ids, b_input_mask, b_labels, b_domain_labels = batch

            with torch.no_grad():
                with autocast():
                    loss, logits = model(b_input_ids, b_token_type_ids, b_input_mask, b_labels, b_domain_labels, test=True)

                val_loss += loss.item()
                logits = logits.detach().cpu().numpy()
                nb_val_examples += b_input_ids.size(0)
                nb_val_steps += 1
                label_ids = b_labels.to('cpu').numpy()
                predicted_labels = np.argmax(logits, axis=1)
                y_true.extend(label_ids)
                y_pred.extend(predicted_labels)
                logits_list.extend(logits)
            

        epoch_count = epoch_count + 1
        
        
        
        print('Evaluation')
      
        # Single-LABEL CLASSIFICATION REPORT
        #################################
        target_names = [f"Non-{lab}", lab]
        report = classification_report(y_true, y_pred, target_names=target_names)
        f1 = f1_score(y_true, y_pred, average="binary")
        print('F1 score', f1)
        print("\nClassification Report:")
        print(report)
        #################################

        if f1 > best_f1:
            best_f1 = f1
            print('best_F1', best_f1)
            print("We are saving the state of this model")
            torch.save(model.state_dict(), f"path/to/saved_models/{lab}{suffix}.bin")

        try:
            print('\n\t - Train loss: {:.4f}'.format(tr_loss / nb_tr_steps))
            print('\t - Validation loss: {:.4f}'.format(val_loss / nb_val_steps))

        except ZeroDivisionError:
            print("No predicted positives...")

# Predictions

In [67]:
test_df =df[df['domain']==1]

In [69]:
test_df.shape

(13994, 13)

In [70]:
# pd.merge(test_df, lib_opp, how = 'inner', on = 'text')

In [71]:
base_model = "bert-base-uncased"

input_files = test_df["cleaned_text"].values

tokenizer = AutoTokenizer.from_pretrained(base_model)
bert_model = AutoModel.from_pretrained(base_model)

# Original Input and attention masks without the augmented dictionary terms:
original_input_id = []
original_attention_masks = []
original_token_type_id = []

# Input id and attention masks with the dictionary terms:
input_id = []
attention_masks = []
token_type_id = []

def preprocessing(input_text, tokenizer):
    '''
    Returns <class transformers.tokenization_utils_base.BatchEncoding> with the following fields:
    - input_ids: list of token ids
    - token_type_ids: list of token type ids
    - attention_mask: list of indices (0,1) specifying which tokens should considered by the model (return_attention_mask = True).
    '''
    return tokenizer.encode_plus(
                        input_text,
                        add_special_tokens = True,
                        max_length = 150,
                        padding = 'max_length',
                        return_attention_mask = True,
                        return_token_type_ids = True,  # Add this line
                        return_tensors = 'pt',
                        truncation=True
                   )

for sample in input_files:
    # Original Input
    original_encoding_dict = preprocessing(sample, tokenizer)
    original_input_id.append(original_encoding_dict['input_ids'])
    original_attention_masks.append(original_encoding_dict['attention_mask'])


    # Calculate token type ids
    original_token_type = torch.zeros_like(original_encoding_dict['input_ids'])
    original_token_type[original_encoding_dict['input_ids'] != 0] = 0

    original_token_type_id.append(original_token_type)

original_input_id = torch.cat(original_input_id, dim=0)
original_attention_masks = torch.cat(original_attention_masks, dim=0)
original_token_type_id = torch.cat(original_token_type_id, dim = 0)

labels = test_df.loc[:, "care":"degradation"].values
labels2 = torch.tensor([0 for _ in range(len(input_files))])  # add a new axis at index 1

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [74]:
suffix

'_tuned_with_reddit_FB_posts_MFT_10'

In [76]:

possible_labels = ["care", "harm", "fairness", "cheating", "loyalty", "betrayal",
                   "authority", "subversion", "purity", "degradation"]
predictions = []

for lab_idx, lab in enumerate(possible_labels):
    best_f1 = 0

    for th in np.arange(0.05, 1, 0.05): # This allows for a customisable threshold; use 0.5 for a standard threshold.

        new_labels = []
        for ex in labels:
            if ex[lab_idx]:
                new_labels.append(1)
            else:
                new_labels.append(0)

        val_set = TensorDataset(original_input_id,
                                original_token_type_id,
                                original_attention_masks,
                                torch.tensor(new_labels),
                                labels2)

        validation_dataloader = DataLoader(
            val_set,
            batch_size=batch_size
        )

        # Run the models from the checkpoints
        checkpoint_folder = f"Path/to/MoralBERT/Checkpoints/"
        model_checkpoint = f"{checkpoint_folder}model_bert_{lab}{suffix}.bin"

        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        # Initialize the base model outside of the training loop.
        model = AdversarialBERT(bert_model, moral_label=num_labels,
                                domain_label=num_domains, domain_weight=dw, moral_weight=1,
                                reconstruction_weight=rw, identity_weight=iw,
                                alpha=0, class_weight=[0, 0],
                                freeze_bert=False).to(device)

        model.load_state_dict(torch.load(model_checkpoint))

        ex_id = 0

        model.eval()
        val_loss, nb_val_examples, nb_val_steps = 0, 0, 0
        y_true, y_pred = [], []

        for batch in validation_dataloader:
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_token_type_ids, b_input_mask, b_labels, b_domain_labels = batch

            with torch.no_grad():
                loss, logits = model(b_input_ids, b_token_type_ids, b_input_mask, b_labels, b_domain_labels, test=True)

                val_loss += loss.item()
                logits = logits.detach().cpu().numpy()
                nb_val_examples += b_input_ids.size(0)
                nb_val_steps += 1
                label_ids = b_labels.to('cpu').numpy()
                predicted_labels = [l[1] > th for l in softmax(logits, axis=1)]
                y_true.extend(label_ids)
                y_pred.extend(predicted_labels)

        f1 = f1_score(y_true, y_pred, average="binary")
        if f1 > best_f1:
            best_f1 = f1
            best_y = y_pred.copy()
            best_th = th

    if not lab_idx:
        for l, g in zip(best_y, y_true):
            predictions.append({"pred_" + lab: l, "true_" + lab: g, "id": ex_id})
            ex_id += 1
    else:
        for l, g in zip(best_y, y_true):
            predictions[ex_id]["pred_" + lab] = l
            predictions[ex_id]["true_" + lab] = g
            ex_id += 1

    print('Evaluation')

    # Single-LABEL CLASSIFICATION REPORT
    #################################
    print(f"best threshold: {best_th}")
    target_names = [f"Non-{lab}", lab]
    report = classification_report(y_true, best_y, target_names=target_names)
    f1 = f1_score(y_true, best_y, average="binary")
    print("\nClassification Report:")
    print(report)
    #################################

pred_df = pd.DataFrame(predictions)

 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invari

 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invari

 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
 self.invariant_trans  Linear(in_features=768, out_features=768, bias=True)
Evaluation
best threshold: 0.6000000000000001

Classification Report:
                 precision    recall  f1-score   support

Non-degradation       0.99      0.99      0.99     13807
    degradation       0.34      0.39      0.36       187

       accuracy                           0.98     13994
      macro avg       0.66      0.69      0.67     13994
   weighted avg       0.98      0.98      0.98     13994



In [77]:
pd.set_option('display.max_columns', None)


In [79]:
results = []

for idx_lab, lab in enumerate(possible_labels):
    result = {"Moral Value":lab}
    true = test_df[lab].values
    candidate = pred_df["pred_"+lab].values

    result["F1 Score (Binary)"] = f1_score(true, candidate, average="binary")
    result["F1 Score (Weighted)"] = f1_score(true, candidate, average="weighted")

    result["Precision Score (Binary)"] = precision_score(true, candidate, average="binary")
    result["Precision Score (Weighted)"] = precision_score(true, candidate, average="weighted")

    result["Recall Score (Binary)"] = recall_score(true, candidate, average="binary")
    result["Recall Score (Weighted)"] = recall_score(true, candidate, average="weighted")

    result["Accuracy"] = accuracy_score(true, candidate)

    results.append(result)

results = pd.DataFrame(results)

In [80]:
results

Unnamed: 0,Moral Value,F1 Score (Binary),F1 Score (Weighted),Precision Score (Binary),Precision Score (Weighted),Recall Score (Binary),Recall Score (Weighted),Accuracy
0,care,0.373762,0.930666,0.343572,0.934008,0.409769,0.927683,0.927683
1,harm,0.399594,0.890806,0.304281,0.916965,0.581854,0.873303,0.873303
2,fairness,0.180109,0.902528,0.130186,0.927824,0.292135,0.881592,0.881592
3,cheating,0.236181,0.899144,0.204526,0.907852,0.279429,0.891382,0.891382
4,loyalty,0.215962,0.974588,0.248649,0.973182,0.190871,0.976133,0.976133
5,betrayal,0.226027,0.981552,0.317308,0.979819,0.175532,0.98385,0.98385
6,authority,0.123162,0.944694,0.088391,0.959101,0.20303,0.931828,0.931828
7,subversion,0.220762,0.958897,0.207921,0.960217,0.235294,0.957625,0.957625
8,purity,0.134752,0.985073,0.104396,0.987778,0.19,0.982564,0.982564
9,degradation,0.359102,0.982244,0.336449,0.982899,0.385027,0.981635,0.981635


### Bootstraping:

In [82]:
possible_labels = ["care", "harm", "fairness", "cheating", "loyalty", "betrayal",
                   "authority", "subversion", "purity", "degradation"]


test_df.reset_index(drop = True, inplace = True)
n_bootstrap_iters = 1000  # Number of bootstrap iterations
bootstrap_results = {label: {metric: [] for metric in ["F1 (Binary)",  "F1 (Macro)", "F1 (Weighted)",
                                                       "Precision (Binary)", "Precision (Macro)", "Precision (Weighted)",
                                                       "Recall (Binary)", "Recall (Macro)", "Recall (Weighted)", "Accuracy"]} for label in possible_labels}

for _ in range(n_bootstrap_iters):
    for lab in possible_labels:
        # resampling with replacement
        sample_indices = resample(np.arange(len(test_df)), replace=True)
        true = test_df.loc[sample_indices, lab].values
        candidate = pred_df.loc[sample_indices, f"pred_{lab}"].values
        
        # computing metrics for bootstrap sample
        bootstrap_results[lab]["F1 (Binary)"].append(f1_score(true, candidate, average="binary", zero_division=0))
        bootstrap_results[lab]["F1 (Macro)"].append(f1_score(true, candidate, average="macro", zero_division=0))
        bootstrap_results[lab]["F1 (Weighted)"].append(f1_score(true, candidate, average="weighted", zero_division=0))
        bootstrap_results[lab]["Precision (Binary)"].append(precision_score(true, candidate, average="binary", zero_division=0))
        bootstrap_results[lab]["Precision (Macro)"].append(precision_score(true, candidate, average="macro", zero_division=0))
        bootstrap_results[lab]["Precision (Weighted)"].append(precision_score(true, candidate, average="weighted", zero_division=0))
        bootstrap_results[lab]["Recall (Binary)"].append(recall_score(true, candidate, average="binary", zero_division=0))
        bootstrap_results[lab]["Recall (Macro)"].append(recall_score(true, candidate, average="macro", zero_division=0))
        bootstrap_results[lab]["Recall (Weighted)"].append(recall_score(true, candidate, average="weighted", zero_division=0))
        bootstrap_results[lab]["Accuracy"].append(accuracy_score(true, candidate))

# standard deviations calculations from bootstrap results
std_devs = {label: {metric: np.std(values) for metric, values in metrics.items()} for label, metrics in bootstrap_results.items()}

# original metrics calculations with standard deviations
final_results = []
for lab in possible_labels:
    result = {"Moral Value": lab}
    true = test_df[lab].values
    candidate = pred_df[f"pred_{lab}"].values
    
    # Original metrics
    result["F1 Score (Binary)"] = f"{f1_score(true, candidate, average='binary', zero_division=0):.2f} ± {std_devs[lab]['F1 (Binary)']:.2f}"
    result["F1 Score (Macro)"] = f"{f1_score(true, candidate, average='macro', zero_division=0):.2f} ± {std_devs[lab]['F1 (Macro)']:.2f}"  
    result["F1 Score (Weighted)"] = f"{f1_score(true, candidate, average='weighted', zero_division=0):.2f} ± {std_devs[lab]['F1 (Weighted)']:.2f}"
    
    result["Precision Score (Binary)"] = f"{precision_score(true, candidate, average='binary', zero_division=0):.2f} ± {std_devs[lab]['Precision (Binary)']:.2f}"
    result["Precision Score (Macro)"] = f"{precision_score(true, candidate, average='macro', zero_division=0):.2f} ± {std_devs[lab]['Precision (Macro)']:.2f}"    
    result["Precision Score (Weighted)"] = f"{precision_score(true, candidate, average='weighted', zero_division=0):.2f} ± {std_devs[lab]['Precision (Weighted)']:.2f}"
    result["Recall Score (Binary)"] = f"{recall_score(true, candidate, average='binary', zero_division=0):.2f} ± {std_devs[lab]['Recall (Binary)']:.2f}"
    result["Recall Score (Macro)"] = f"{recall_score(true, candidate, average='macro', zero_division=0):.2f} ± {std_devs[lab]['Recall (Macro)']:.2f}"
    result["Recall Score (Weighted)"] = f"{recall_score(true, candidate, average='weighted', zero_division=0):.2f} ± {std_devs[lab]['Recall (Weighted)']:.2f}"
    result["Accuracy"] = f"{accuracy_score(true, candidate):.2f} ± {std_devs[lab]['Accuracy']:.2f}"
    
    final_results.append(result)

results_df = pd.DataFrame(final_results)

In [83]:
results_df

Unnamed: 0,Moral Value,F1 Score (Binary),F1 Score (Macro),F1 Score (Weighted),Precision Score (Binary),Precision Score (Macro),Precision Score (Weighted),Recall Score (Binary),Recall Score (Macro),Recall Score (Weighted),Accuracy
0,care,0.37 ± 0.02,0.67 ± 0.01,0.93 ± 0.00,0.34 ± 0.02,0.66 ± 0.01,0.93 ± 0.00,0.41 ± 0.02,0.68 ± 0.01,0.93 ± 0.00,0.93 ± 0.00
1,harm,0.40 ± 0.01,0.66 ± 0.01,0.89 ± 0.00,0.30 ± 0.01,0.63 ± 0.01,0.92 ± 0.00,0.58 ± 0.02,0.74 ± 0.01,0.87 ± 0.00,0.87 ± 0.00
2,fairness,0.18 ± 0.01,0.56 ± 0.01,0.90 ± 0.00,0.13 ± 0.01,0.55 ± 0.00,0.93 ± 0.00,0.29 ± 0.02,0.60 ± 0.01,0.88 ± 0.00,0.88 ± 0.00
3,cheating,0.24 ± 0.01,0.59 ± 0.01,0.90 ± 0.00,0.20 ± 0.01,0.58 ± 0.01,0.91 ± 0.00,0.28 ± 0.01,0.60 ± 0.01,0.89 ± 0.00,0.89 ± 0.00
4,loyalty,0.22 ± 0.03,0.60 ± 0.01,0.97 ± 0.00,0.25 ± 0.03,0.62 ± 0.02,0.97 ± 0.00,0.19 ± 0.03,0.59 ± 0.01,0.98 ± 0.00,0.98 ± 0.00
5,betrayal,0.23 ± 0.03,0.61 ± 0.02,0.98 ± 0.00,0.32 ± 0.04,0.65 ± 0.02,0.98 ± 0.00,0.18 ± 0.03,0.59 ± 0.01,0.98 ± 0.00,0.98 ± 0.00
6,authority,0.12 ± 0.01,0.54 ± 0.01,0.94 ± 0.00,0.09 ± 0.01,0.53 ± 0.01,0.96 ± 0.00,0.20 ± 0.02,0.58 ± 0.01,0.93 ± 0.00,0.93 ± 0.00
7,subversion,0.22 ± 0.02,0.60 ± 0.01,0.96 ± 0.00,0.21 ± 0.02,0.59 ± 0.01,0.96 ± 0.00,0.24 ± 0.02,0.61 ± 0.01,0.96 ± 0.00,0.96 ± 0.00
8,purity,0.13 ± 0.03,0.56 ± 0.01,0.99 ± 0.00,0.10 ± 0.02,0.55 ± 0.01,0.99 ± 0.00,0.19 ± 0.04,0.59 ± 0.02,0.98 ± 0.00,0.98 ± 0.00
9,degradation,0.36 ± 0.03,0.67 ± 0.02,0.98 ± 0.00,0.34 ± 0.03,0.66 ± 0.02,0.98 ± 0.00,0.39 ± 0.04,0.69 ± 0.02,0.98 ± 0.00,0.98 ± 0.00
