In [1]:
import torch
from transformers import AutoModelForMaskedLM, AutoTokenizer

In [39]:
import numpy as np
import pandas as pd

In [3]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForMaskedLM.from_pretrained("distilbert-base-uncased")
model.eval()

DistilBertForMaskedLM(
  (activation): GELUActivation()
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inp

In [8]:
df = pd.read_csv("data/masked_s_gold_BUG.csv")

In [20]:
df["text"].head(2).values

array(['Among them was the president [MASK] .',
       'Results In the pre - COVID era , an average aesthetic surgeon was finely balancing [MASK] profession , personal lifestyle , learning , and recreation .'],
      dtype=object)

In [51]:
df = pd.read_csv("data/s_gold_BUG.csv")
df[df["uid"] == 542]

Unnamed: 0.1,Unnamed: 0,index,uid,sentence_text,tokens,profession,g,profession_first_index,g_first_index,predicted gender,stereotype,distance,num_of_pronouns,corpus,data_index
164,164,541,542,His outstanding achievement of the use of flux...,"['His', 'outstanding', 'achievement', 'of', 't...",physician,His,31,0,Male,1,29,1,pubmed,17


In [13]:
df_result = pd.read_csv("data/vanilla_bert_prediction.csv", index_col=0)
df_result.head(5)

Unnamed: 0,sentences,stereo_prob,anti-stereo_prob
0,among them was the president [MASK] .,0.981314,0.018686
1,"results in the pre-covid era , an average aest...",0.72957,0.27043
2,peter ( b23 ) is a musician and music editor w...,0.952918,0.047082
3,we use genetic data from this child and [MASK]...,0.487575,0.512425
4,these two might not have any social contact in...,0.444676,0.555324


In [14]:
df_result.describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.840317,0.159683
std,0.207059,0.207059
min,0.067273,9.9e-05
25%,0.782421,0.018454
50%,0.925925,0.074075
75%,0.981546,0.217579
max,0.999901,0.932727


In [69]:
stereotyped = df_result[df_result["stereo_prob"] > df_result["anti-stereo_prob"]]["sentences"].count()

In [70]:
print("steretyped predictions", stereotyped)
print("total predictions: ", df_result.shape[0])

steretyped predictions 501
total predictions:  547


### Finetuned on Gold (prediction Gold)

In [7]:
df_finetuned = pd.read_csv("data/finetuned_bert_prediction.csv", index_col=0)
df_finetuned.head(5)

Unnamed: 0,sentences,stereo_prob,anti-stereo_prob
0,among them was the president [MASK] .,0.984665,0.015335
1,"results in the pre-covid era , an average aest...",0.89177,0.10823
2,peter ( b23 ) is a musician and music editor w...,0.987124,0.012876
3,we use genetic data from this child and [MASK]...,0.559851,0.440149
4,these two might not have any social contact in...,0.259128,0.740872


In [9]:
df_finetuned.describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.834679,0.165321
std,0.223001,0.223001
min,0.016653,6e-05
25%,0.777271,0.016416
50%,0.936743,0.063257
75%,0.983584,0.222729
max,0.99994,0.983347


In [12]:
df_finetuned[df_finetuned["stereo_prob"] > df_finetuned["anti-stereo_prob"]].shape

(490, 3)

### Finetuned on Full (prediction Gold)

In [42]:
df_finetuned_fg = pd.read_csv("data/finetuned_bert_prediction_gold.csv", index_col=0)
df_finetuned_fg.describe().round(4)

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.1532,0.8468
std,0.3182,0.3182
min,0.0,0.0
25%,0.0,0.9625
50%,0.0006,0.9994
75%,0.0375,1.0
max,1.0,1.0


In [51]:
df_finetuned_fg[df_finetuned_fg["stereo_prob"] > df_finetuned_fg["anti-stereo_prob"]].shape

(81, 3)

In [52]:
df_finetuned_fg = pd.read_csv("data/attention/intervene_bert_prediction_gold.csv", index_col=0)
df_finetuned_fg.describe()
df_finetuned_fg[df_finetuned_fg["stereo_prob"] > df_finetuned_fg["anti-stereo_prob"]].shape

(86, 3)

### Finetuned on Full (prediction FUll)

In [46]:
df_finetuned_ff = pd.read_csv("data/finetuned_bert_prediction_full.csv", index_col=0)
df_finetuned_ff.describe().round(4)

Unnamed: 0,stereo_prob,anti-stereo_prob
count,31227.0,31227.0
mean,0.1586,0.8414
std,0.3288,0.3288
min,0.0,0.0
25%,0.0,0.9629
50%,0.0004,0.9996
75%,0.0371,1.0
max,1.0,1.0


### layer 1 (prediction Gold)

In [53]:
pd.read_csv("data/attention/attention_bert_prediction_gold_0.csv", index_col=0).describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.836821,0.163179
std,0.221968,0.221968
min,0.040832,8.5e-05
25%,0.783666,0.017186
50%,0.932611,0.067389
75%,0.982814,0.216334
max,0.999915,0.959168


In [54]:
pd.read_csv("data/attention/attention_bert_prediction_gold_1.csv", index_col=0).describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.834364,0.165636
std,0.214807,0.214807
min,0.039864,0.000105
25%,0.778598,0.019024
50%,0.923669,0.076331
75%,0.980976,0.221402
max,0.999895,0.960136


In [55]:
pd.read_csv("data/attention/attention_bert_prediction_gold_2.csv", index_col=0).describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.832015,0.167985
std,0.226769,0.226769
min,0.029152,0.000126
25%,0.785159,0.018415
50%,0.92833,0.07167
75%,0.981585,0.214841
max,0.999874,0.970848


In [56]:
pd.read_csv("data/attention/attention_bert_prediction_gold_3.csv", index_col=0).describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.816065,0.183935
std,0.229786,0.229786
min,0.028045,6e-05
25%,0.732196,0.019285
50%,0.917013,0.082987
75%,0.980715,0.267804
max,0.99994,0.971955


In [57]:
pd.read_csv("data/attention/attention_bert_prediction_gold_4.csv", index_col=0).describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.62739,0.37261
std,0.335629,0.335629
min,0.001294,7.3e-05
25%,0.287467,0.053221
50%,0.734399,0.265601
75%,0.946779,0.712533
max,0.999927,0.998706


In [58]:
pd.read_csv("data/attention/attention_bert_prediction_gold_5.csv", index_col=0).describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.839899,0.160101
std,0.23784,0.23784
min,0.011371,3.1e-05
25%,0.79661,0.010195
50%,0.951535,0.048465
75%,0.989805,0.20339
max,0.999969,0.988629


### accumulative (prediction Gold)

In [59]:
pd.read_csv("data/attention/accum_bert_prediction_gold_0.csv", index_col=0).describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.836821,0.163179
std,0.221968,0.221968
min,0.040832,8.5e-05
25%,0.783666,0.017186
50%,0.932611,0.067389
75%,0.982814,0.216334
max,0.999915,0.959168


In [63]:
pd.read_csv("data/attention/accum_bert_prediction_gold_1.csv", index_col=0).describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.823144,0.176856
std,0.238034,0.238034
min,0.010115,8.1e-05
25%,0.764957,0.0184
50%,0.929685,0.070315
75%,0.9816,0.235043
max,0.999919,0.989885


In [64]:
pd.read_csv("data/attention/accum_bert_prediction_gold_2.csv", index_col=0).describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.782861,0.217139
std,0.282629,0.282629
min,0.001497,6.3e-05
25%,0.685543,0.019394
50%,0.922181,0.077819
75%,0.980606,0.314457
max,0.999937,0.998503


In [65]:
pd.read_csv("data/attention/accum_bert_prediction_gold_3.csv", index_col=0).describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.645345,0.354655
std,0.350022,0.350022
min,0.000257,2.6e-05
25%,0.330992,0.032813
50%,0.786659,0.213341
75%,0.967187,0.669008
max,0.999974,0.999743


In [66]:
pd.read_csv("data/attention/accum_bert_prediction_gold_4.csv", index_col=0).describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.208768,0.791232
std,0.32589,0.32589
min,6e-06,1.6e-05
25%,0.001928,0.72622
50%,0.024544,0.975456
75%,0.27378,0.998072
max,0.999984,0.999994


In [68]:
pd.read_csv("data/attention/accum_bert_prediction_gold_5.csv", index_col=0).describe().round(4)

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.1606,0.8394
std,0.3248,0.3248
min,0.0,0.0
25%,0.0,0.9526
50%,0.0007,0.9993
75%,0.0474,1.0
max,1.0,1.0
