In [1]:
import torch
from transformers import AutoModelForMaskedLM, AutoTokenizer

In [2]:
import numpy as np
import pandas as pd

In [3]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForMaskedLM.from_pretrained("distilbert-base-uncased")
model.eval()

DistilBertForMaskedLM(
  (activation): GELUActivation()
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inp

In [8]:
df = pd.read_csv("data/masked_s_gold_BUG.csv")

In [20]:
df["text"].head(2).values

array(['Among them was the president [MASK] .',
       'Results In the pre - COVID era , an average aesthetic surgeon was finely balancing [MASK] profession , personal lifestyle , learning , and recreation .'],
      dtype=object)

In [51]:
df = pd.read_csv("data/s_gold_BUG.csv")
df[df["uid"] == 542]

Unnamed: 0.1,Unnamed: 0,index,uid,sentence_text,tokens,profession,g,profession_first_index,g_first_index,predicted gender,stereotype,distance,num_of_pronouns,corpus,data_index
164,164,541,542,His outstanding achievement of the use of flux...,"['His', 'outstanding', 'achievement', 'of', 't...",physician,His,31,0,Male,1,29,1,pubmed,17


In [13]:
df_result = pd.read_csv("data/vanilla_bert_prediction.csv", index_col=0)
df_result.head(5)

Unnamed: 0,sentences,stereo_prob,anti-stereo_prob
0,among them was the president [MASK] .,0.981314,0.018686
1,"results in the pre-covid era , an average aest...",0.72957,0.27043
2,peter ( b23 ) is a musician and music editor w...,0.952918,0.047082
3,we use genetic data from this child and [MASK]...,0.487575,0.512425
4,these two might not have any social contact in...,0.444676,0.555324


In [14]:
df_result.describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.840317,0.159683
std,0.207059,0.207059
min,0.067273,9.9e-05
25%,0.782421,0.018454
50%,0.925925,0.074075
75%,0.981546,0.217579
max,0.999901,0.932727


In [69]:
stereotyped = df_result[df_result["stereo_prob"] > df_result["anti-stereo_prob"]]["sentences"].count()

In [70]:
print("steretyped predictions", stereotyped)
print("total predictions: ", df_result.shape[0])

steretyped predictions 501
total predictions:  547


### Finetuned model

In [7]:
df_finetuned = pd.read_csv("data/finetuned_bert_prediction.csv", index_col=0)
df_finetuned.head(5)

Unnamed: 0,sentences,stereo_prob,anti-stereo_prob
0,among them was the president [MASK] .,0.984665,0.015335
1,"results in the pre-covid era , an average aest...",0.89177,0.10823
2,peter ( b23 ) is a musician and music editor w...,0.987124,0.012876
3,we use genetic data from this child and [MASK]...,0.559851,0.440149
4,these two might not have any social contact in...,0.259128,0.740872


In [9]:
df_finetuned.describe()

Unnamed: 0,stereo_prob,anti-stereo_prob
count,547.0,547.0
mean,0.834679,0.165321
std,0.223001,0.223001
min,0.016653,6e-05
25%,0.777271,0.016416
50%,0.936743,0.063257
75%,0.983584,0.222729
max,0.99994,0.983347


In [12]:
df_finetuned[df_finetuned["stereo_prob"] > df_finetuned["anti-stereo_prob"]].shape

(490, 3)