In [2]:
import numpy as np
import requests
import torch
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
#BOILERPLATE NLI MODEL CODE
# Set up NLI model
NLI_MODEL = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
TOKENIZER = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')
DEVICE    = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [63]:
def get_nli_scores(premise,hypothesis,occupation,gender):
    # Generate probabilities
    inputs = TOKENIZER.encode(premise, hypothesis, return_tensors='pt', truncation=True)
    inputs = inputs.to(DEVICE)
    NLI_MODEL.to(DEVICE)
    logits = NLI_MODEL(inputs)[0]
    #print(logits)
    probs = logits.softmax(dim = 1)
    
    # Parse probabilities
    entailment_prob = probs[:, 2].item()
    neutral_prob = probs[:, 1].item()
    contradiction_prob = probs[:, 0].item()
    # if gender!='gender_neutral' and (entailment_prob > 0.05 or contradiction_prob > 0.05):
    #     print(occupation)
    #print(occupation, contradiction_prob, neutral_prob, entailment_prob)
    return contradiction_prob, neutral_prob, entailment_prob
    

In [49]:
sentences_df = pd.read_csv("../data_new/type1_sentences_competence.tsv",sep='\t')
sentences_df = sentences_df[sentences_df['gender'].isin(['male','female'])]

In [50]:
contradiction_probs = []
neutral_probs = [] 
entailment_probs = []
for i in range(sentences_df.shape[0]):
    contradiction_prob, neutral_prob, entailment_prob = get_nli_scores(sentences_df.iloc[i]['premise'],sentences_df.iloc[i]['hypothesis'],sentences_df.iloc[i]['occupation'],sentences_df.iloc[i]['gender'] )
    contradiction_probs.append(contradiction_prob)
    neutral_probs.append(neutral_prob)
    entailment_probs.append(entailment_prob)


manager
baker
secretary


In [51]:
sentences_df['contradiction_prob'] = contradiction_probs
sentences_df['neutral_prob'] = neutral_probs
sentences_df['entailment_prob'] = entailment_probs
sentences_df.to_csv('../data_new_results/type1_sentences_competence_results.tsv', sep='\t', index=False)


In [52]:
sentences_df = pd.read_csv("../data_new/type1_sentences_incompetence.tsv",sep='\t')
sentences_df = sentences_df[sentences_df['gender'].isin(['male','female'])]
contradiction_probs = []
neutral_probs = [] 
entailment_probs = []
for i in range(sentences_df.shape[0]):
    contradiction_prob, neutral_prob, entailment_prob = get_nli_scores(sentences_df.iloc[i]['premise'],sentences_df.iloc[i]['hypothesis'],sentences_df.iloc[i]['occupation'],sentences_df.iloc[i]['gender'] )
    contradiction_probs.append(contradiction_prob)
    neutral_probs.append(neutral_prob)
    entailment_probs.append(entailment_prob)
sentences_df['contradiction_prob'] = contradiction_probs
sentences_df['neutral_prob'] = neutral_probs
sentences_df['entailment_prob'] = entailment_probs
sentences_df.to_csv('../data_new_results/type1_sentences_incompetence_results.tsv', sep='\t', index=False)


accountant
engineer
inspector
manager
physician
carpenter
carpenter
nurse
plumber
chemist
appraiser
auditor
broker
secretary


In [48]:
# Gender neutral 
sentences_df = pd.read_csv("../data_new/type1_sentences_competence_2.tsv",sep='\t')
contradiction_probs = []
neutral_probs = [] 
entailment_probs = []
for i in range(sentences_df.shape[0]):
    contradiction_prob, neutral_prob, entailment_prob = get_nli_scores(sentences_df.iloc[i]['premise'],sentences_df.iloc[i]['hypothesis'],sentences_df.iloc[i]['occupation'],sentences_df.iloc[i]['gender'] )
    contradiction_probs.append(contradiction_prob)
    neutral_probs.append(neutral_prob)
    entailment_probs.append(entailment_prob)
sentences_df['contradiction_prob'] = contradiction_probs
sentences_df['neutral_prob'] = neutral_probs
sentences_df['entailment_prob'] = entailment_probs
sentences_df.to_csv('../data_new_results/type1_sentences_competence_2_results.tsv', sep='\t', index=False)


manager
baker
secretary


In [None]:
# Type 3
sentences_df = pd.read_csv("../data_new/type3_sentences.tsv",sep='\t')
sentences_df = sentences_df[sentences_df['gender'].isin(['male','female'])]
contradiction_probs = []
neutral_probs = [] 
entailment_probs = []
for i in range(sentences_df.shape[0]):
    contradiction_prob, neutral_prob, entailment_prob = get_nli_scores(sentences_df.iloc[i]['premise'],sentences_df.iloc[i]['hypothesis'],sentences_df.iloc[i]['occupation'],sentences_df.iloc[i]['gender'] )
    contradiction_probs.append(contradiction_prob)
    neutral_probs.append(neutral_prob)
    entailment_probs.append(entailment_prob)
sentences_df['contradiction_prob'] = contradiction_probs
sentences_df['neutral_prob'] = neutral_probs
sentences_df['entailment_prob'] = entailment_probs
sentences_df.to_csv('../data_new_results/type3_sentences_results.tsv', sep='\t', index=False)


In [64]:
# Type 2
sentences_df = pd.read_csv("../data_new/type2_sentences.tsv",sep='\t')
sentences_df = sentences_df[sentences_df['gender'].isin(['male','female'])]
contradiction_probs = []
neutral_probs = [] 
entailment_probs = []
for i in range(sentences_df.shape[0]):
    contradiction_prob, neutral_prob, entailment_prob = get_nli_scores(sentences_df.iloc[i]['premise'],sentences_df.iloc[i]['hypothesis'],sentences_df.iloc[i]['occupation'],sentences_df.iloc[i]['gender'] )
    contradiction_probs.append(contradiction_prob)
    neutral_probs.append(neutral_prob)
    entailment_probs.append(entailment_prob)
sentences_df['contradiction_prob'] = contradiction_probs
sentences_df['neutral_prob'] = neutral_probs
sentences_df['entailment_prob'] = entailment_probs
sentences_df.to_csv('../data_new_results/type2_sentences_results.tsv', sep='\t', index=False)
