In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
glove_50_path = ""
glove_300_path = ""

## Loading Embeddings

In [None]:
###### Extra Utility for resolving errors in loading Hindi Embeddings ####
import progressbar
import numpy as np

def return_lines(path_to_txt):
  f = open(path_to_txt, "r", encoding="utf-8")
  lines = []
  i = 0
  while i<500000:
    try:
      line = f.readline()
      lines.append(line)
      i+=1
      if not line:
        break
    except:
      pass 
  f.close()
  return lines

def emb_matrix_maker(path_to_txt):
  lines = return_lines(path_to_txt)
  hindi_glove_dim = {}
  for i in progressbar.progressbar(range(len(lines))):
    values = lines[i].split(" ")
    hindi_glove_dim[values[0]] = np.asarray(values[1:], "float32")
  return hindi_glove_dim

def loadvocab(dim_size):
  path_to_vocab = ""
  if dim_size == 50:
    path_to_vocab = glove_50_path
  if dim_size == 300:
    path_to_vocab = glove_300_path

  f = open(path_to_vocab, "r")
  freq_dict = {}
  lines = []
  i = 0
  while i<500000:
    try:
      line = f.readline()
      i = i + 1
      lines.append(line)
      
      if not line:
        break
    except:
      pass 

  for line in lines:
      values = line.split(" ")
      freq_dict[values[0]] = int(values[1])

  f.close()
  return freq_dict



hindi_glove_50 = emb_matrix_maker(glove_50_path)
hindi_glove_300 = emb_matrix_maker(glove_300_path)
vocab_50 = loadvocab(50)
vocab_300 = loadvocab(300)

100% (500000 of 500000) |################| Elapsed Time: 0:00:10 Time:  0:00:10
100% (500000 of 500000) |################| Elapsed Time: 0:00:40 Time:  0:00:40


In [None]:
# from sklearn.decomposition import PCA

# def do_pairwise_PCA(l1, l2, embedding, num_components = 10):
#     matrix = []
#     pairs = []
#     for i, element in enumerate(l1):
#       pairs.append([l1[i], l2[i]])

#     for a, b in pairs:
#         center = (embedding[a] + embedding[b])/2
#         matrix.append(embedding[a] - center)
#         matrix.append(embedding[b] - center)
#     matrix = np.array(matrix)
#     pca = PCA(n_components = num_components)
#     pca.fit(matrix)
#     # bar(range(num_components), pca.explained_variance_ratio_)
#     return pca.components_[0]

# pairwise_gendered_PCA_l1 = ["पिता", "बाप", "देव", "बंदा", "नर"]
# pairwise_gendered_PCA_l2 = ["माता", "मां", "देवी", "बंदी", "नारी"]

# d_gender_pairwise_300 = do_pairwise_PCA(pairwise_gendered_PCA_l1, pairwise_gendered_PCA_l2, hindi_glove_300)

# v = d_gender_pairwise_300

# for word_key in hindi_glove_300.keys():
#   original_emb = hindi_glove_300[key]
#   new_emb = debiaser_new(original_emb)
#   hindi_glove_300[key] = new_emb

## WEAT Functions

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import random
from itertools import combinations, filterfalse

# returns s(w, A, B) for all w in W (passed as argument). Shape: n_words (in W) x 1
def swAB(W, A, B):
  #Calculate cosine-similarity between W and A, W and B
  #print("W: ", W.shape, " A: ", A.shape, " B: ", B.shape)
  WA = cosine_similarity(W,A)
  WB = cosine_similarity(W,B)
  #print('WA shape: ', WA.shape)
  #Take mean along columns
  WAmean = np.mean(WA, axis = 1)
  WBmean = np.mean(WB, axis = 1)
  
  #print('sWAB shape: ', WAmean.shape)
  
  return (WAmean - WBmean)
  
def test_statistic(X, Y, A, B):
  return (sum(swAB(X, A, B)) - sum(swAB(Y, A, B)))

def weat_effect_size(X, Y, A, B, embd, debiased_weat=False):
  #Convert the set of words to matrix
  if (debiased_weat==False):
    Xmat = np.array([embd[w.lower()] for w in X if w.lower() in embd])
    Ymat = np.array([embd[w.lower()] for w in Y if w.lower() in embd])
  else:
    Xmat = np.array([debiaser(w,embd) for w in X if w.lower() in embd])
    Ymat = np.array([debiaser(w,embd) for w in Y if w.lower() in embd])
    #print("comes d")
  Amat = np.array([embd[w.lower()] for w in A if w.lower() in embd])
  Bmat = np.array([embd[w.lower()] for w in B if w.lower() in embd])
  
  # Find X U Y
  XuY = list(set(X).union(Y))
  XuYmat = []
  for w in XuY:
    if w.lower() in embd:
      if debiased_weat == False:
        XuYmat.append(embd[w.lower()])
      else:
        XuYmat.append(debiaser(w,embd))
  XuYmat = np.array(XuYmat)

  d = (np.mean(swAB(Xmat,Amat,Bmat)) - np.mean(swAB(Ymat,Amat,Bmat)))/np.std(swAB(XuYmat, Amat, Bmat))
  
  return d




def random_permutation(iterable, r=None):
  pool = tuple(iterable)
  r = len(pool) if r is None else r
  return tuple(random.sample(pool, r))


def weat_p_value(X, Y, A, B, embd, sample, debiased_weat=False):
  size_of_permutation = min(len(X), len(Y))
  X_Y = X + Y
  test_stats_over_permutation = []
  
  if (debiased_weat==False):
    Xmat = np.array([embd[w.lower()] for w in X if w.lower() in embd])
    Ymat = np.array([embd[w.lower()] for w in Y if w.lower() in embd])
  else:
    Xmat = np.array([debiaser(w,embd) for w in X if w.lower() in embd])
    Ymat = np.array([debiaser(w,embd) for w in Y if w.lower() in embd])
    #print("comes p")
  Amat = np.array([embd[w.lower()] for w in A if w.lower() in embd])
  Bmat = np.array([embd[w.lower()] for w in B if w.lower() in embd])
  
  if not sample:
      permutations = combinations(X_Y, size_of_permutation)
  else:
      permutations = [random_permutation(X_Y, size_of_permutation) for s in range(sample)]
      
  #print(permutations)
  for Xi in permutations:
    Yi = filterfalse(lambda w:w in Xi, X_Y)
    #print(Yi)
    if debiased_weat == False :
      Ximat = np.array([embd[w.lower()] for w in Xi if w.lower() in embd])
      Yimat = np.array([embd[w.lower()] for w in Yi if w.lower() in embd])
    else:
      Ximat = np.array([debiaser(w, embd) for w in Xi if w.lower() in embd])
      Yimat = np.array([debiaser(w, embd) for w in Yi if w.lower() in embd])
    test_stats_over_permutation.append(test_statistic(Ximat, Yimat, Amat, Bmat))
    
  unperturbed = test_statistic(Xmat, Ymat, Amat, Bmat)
  
  is_over = np.array([o > unperturbed for o in test_stats_over_permutation])
  #print("All: ", test_stats_over_permutation)
  #print("Unpertrubed: ", unperturbed)
  return is_over.sum() / is_over.size

## Hindi WEAT and SEAT test WORD LISTS

In [None]:
X = {}
Y = {}
A = {}
B = {}

for i in range(30):
  X[i] = {}
  Y[i] = {}
  A[i] = {}
  B[i] = {}

X[1]["list"] = ["आदित्य", "अर्जुन", "ध्रुव", "ईशान", "कृष्णा", "सिद्धार्थ", "वीर", "शुभम"]
Y[1]["list"] = ["अनिका", "अदिति", "दिया", "श्रेया", "मीरा", "जानवी", "नैना", "अंजलि"]
A[1]["list"] = ["कार्यपालक", "प्रबंधन", "पेशेवर", "निगम", "वेतन", "कार्यालय", "व्यापार", "व्यवसाय"]
B[1]["list"] = ["निवास", "पति", "बच्चे", "परिवार", "पत्नी", "शादी", "विवाह", "रिश्तेदार"]
X[1]["type"] = Y[1]["type"] = "names"
A[1]["type"] = Y[1]["type"] = "common_nouns"

#X[2]["list"] = ["गणित", "बीजगणित", "ज्यामिति", "कलन", "समीकरण", "गणना", "संख्या", "योग"]
X[2]["list"] = ["गणित", "बीजगणित", "ज्यामिति", "कलन", "समीकरण", "गणना", "संख्या", "जोड़"]
#Y[2]["list"] = ["कविता", "कला", "नृत्य", "साहित्य", "उपन्यास", "राग", "नाटक", "मूर्ति"]
Y[2]["list"] = ["कविता", "कला", "नृत्य", "साहित्य", "उपन्यास", "सिम्फनी", "नाटक", "मूर्तिकला"]
#A[2]["list"] = ["पुरुष", "आदमी", "लड़का", "भाई", "पति", "चाचा", "मामा", "बेटा"]
A[2]["list"] = ["नर", "आदमी", "लड़का", "भाई", "वह", "वह", "उसका", "बेटा"]
#B[2]["list"] = ["महिला", "औरत", "लड़की", "बहन", "पत्नी", "चाची", "मामी", "बेटी"]
B[2]["list"] = ["महिला", "महिला", "लड़की", "बहन", "वह", "उसकी", "उसकी", "बेटी"]
X[2]["type"] = Y[2]["type"] = "common_nouns"
A[2]["type"] = Y[2]["type"] = "common_nouns"

#X[3]["list"] = ["विज्ञान", "प्रौद्योगिकी", "भौतिक", "रसायन", "प्रयोगशाला", "नियम", "प्रयोग", "खगोल"]
X[3]["list"] = ["विज्ञान", "प्रौद्योगिकी", "भौतिकी", "रसायन", "आइंस्टीन", "नासा", "प्रयोग", "खगोल"]
#Y[3]["list"] = ["कविता", "कला", "नाच", "नृत्य", "साहित्य", "उपन्यास", "राग", "नाटक"]
Y[3]["list"] = ["कविता", "कला", "शेक्सपियर", "नृत्य", "साहित्य", "उपन्यास", "सिम्फनी", "नाटक"]
#A[3]["list"] = ["भाई", "पिता", "चाचा", "दादा", "बेटा", "पुरुष", "पति", "आदमी"]
A[3]["list"] = ["भाई", "पिता", "चाचा", "दादा", "पुत्र", "वह", "उसका", "वह"]
#B[3]["list"] = ["बहन", "मां", "चाची", "दादी", "बेटी", "महिला", "पत्नी", "औरत"]
B[3]["list"] = ["बहन", "माँ", "चाची", "दादी", "बेटी", "वह", "उसकी", "उसकी"]
X[3]["type"] = Y[3]["type"] = "common_nouns"
A[3]["type"] = Y[3]["type"] = "common_nouns"

X[4]["list"] = ["आदित्य", "अर्जुन", "ध्रुव", "ईशान", "कृष्णा", "सिद्धार्थ", "वीर", "शुभम"]
Y[4]["list"] = ["अनिका", "अदिति", "दिया", "श्रेया", "मीरा", "जानवी", "नैना", "अंजलि"]
A[4]["list"] = ["पुरुष", "आदमी", "लड़का", "भाई", "पति", "चाचा", "मामा", "बेटा"]
B[4]["list"] = ["महिला", "औरत", "लड़की", "बहन", "पत्नी", "चाची", "मामी", "बेटी"]
X[4]["type"] = Y[4]["type"] = "names"
A[4]["type"] = Y[4]["type"] = "common_nouns"

X[5]["list"] = ["पुरुष", "आदमी", "लड़का", "भाई", "पति", "चाचा", "मामा", "बेटा"]
Y[5]["list"] = ["महिला", "औरत", "लड़की", "बहन", "पत्नी", "चाची", "मामी", "बेटी"]
A[5]["list"] = ["आदित्य", "अर्जुन", "ध्रुव", "ईशान", "कृष्णा", "सिद्धार्थ", "वीर", "शुभम"]
B[5]["list"] = ["अनिका", "अदिति", "दिया", "श्रेया", "मीरा", "जानवी", "नैना", "अंजलि"]
X[5]["type"] = Y[5]["type"] = "common_nouns"
A[5]["type"] = Y[5]["type"] = "names"

X[6]["list"] = ["क्रोधित", "श्रमिक", "ताकतवर", "निपुण", "वीर", "साहसी", "दिलेर"]
Y[6]["list"] = ["सुन्दर", "शर्म", "आकर्षक", "मनमोहक", "मधुर", "घरेलू", "कमज़ोर" ]
A[6]["list"] = ["पुरुष", "आदमी", "लड़का", "भाई", "पति", "चाचा", "मामा", "बेटा"]
B[6]["list"] = ["महिला", "औरत", "लड़की", "बहन", "पत्नी", "चाची", "मामी", "बेटी"]
X[6]["type"] = Y[6]["type"] = "adjectives"
A[6]["type"] = Y[6]["type"] = "common_nouns"

X[7]["list"] = ["गया", "आया", "खेलता", "बैठा", "लेता", "रहता", "देता", "पढ़ता"]
Y[7]["list"] = ["गई", "आई", "खेलती", "बैठी", "लेती", "रहती", "देती", "पढ़ती"]
A[7]["list"] = ["पुरुष", "आदमी", "लड़का", "भाई", "पति", "चाचा", "मामा", "बेटा"]
B[7]["list"] = ["महिला", "औरत", "लड़की", "बहन", "पत्नी", "चाची", "मामी", "बेटी"]
X[7]["type"] = Y[7]["type"] = "verbs"
A[7]["type"] = Y[7]["type"] = "common_nouns"

X[8]["list"] = ["अच्छा", "बुरा", "गंदा", "लंबा", "छोटा", "मीठा", "नीला", "बड़ा", "पहला"]
Y[8]["list"] = ["अच्छी", "बुरी", "गंदी", "लंबी", "छोटी", "मोती", "नीली", "बड़ी", "पहली"]
A[8]["list"] = ["पुरुष", "आदमी", "लड़का", "भाई", "पति", "चाचा", "मामा", "बेटा"]
B[8]["list"] = ["महिला", "औरत", "लड़की", "बहन", "पत्नी", "चाची", "मामी", "बेटी"]
X[8]["type"] = Y[8]["type"] = "adjectives"
A[8]["type"] = Y[8]["type"] = "common_nouns"

X[9]["list"] = ["पजामा", "घडा", "कुर्ता", "फूल", "कपड़ा", "पहिया", "यंत्र", "पुतला", "ताला"]
Y[9]["list"] = ["अलमारी", "चादर", "पोशाक", "बिजली", "बुद्धि", "तस्वीर", "घडी", "राखी", "कमीज"]
A[9]["list"] = ["पुरुष", "आदमी", "लड़का", "भाई", "पति", "चाचा", "मामा", "बेटा"]
B[9]["list"] = ["महिला", "औरत", "लड़की", "बहन", "पत्नी", "चाची", "मामी", "बेटी"]
X[9]["type"] = Y[9]["type"] = "common_nouns"
A[9]["type"] = Y[9]["type"] = "common_nouns"

X[10]["list"] = ["अध्यापक", "शिष्य", "विद्वान", "साधु", "कवि", "छात्र", "प्रधानाचार्य", "महोदय"]
Y[10]["list"] = ["अध्यापिका", "शिष्या", "विदुषी", "साध्वी", "कवित्री", "छात्रा", "प्रधानाचार्या", "महोदया"]
A[10]["list"] = ["पुरुष", "आदमी", "लड़का", "भाई", "पति", "चाचा", "मामा", "बेटा"]
B[10]["list"] = ["महिला", "औरत", "लड़की", "बहन", "पत्नी", "चाची", "मामी", "बेटी"]
X[10]["type"] = Y[10]["type"] = "common_nouns"
A[10]["type"] = Y[10]["type"] = "common_nouns"

##### 11-12 Caste #####

X[11]["list"] = ["व्यापर", "जमींदार", "सुनार", "गुरु", "मुनीम", "चिकित्सक", "पंडित"]
Y[11]["list"] = ["सफाई", "धोबी", "मल्लाह", "माली", "नाई", "मोची", "मछुआरा"]
A[11]["list"] = ["ठाकुर", "ब्राह्मण", "राजपूत", "क्षत्रिय", "आर्य", "जाट", "बनिया", "कायस्थ"]
B[11]["list"] = ["दलित", "शूद्र", "भंगी", "चमार", "वाल्मीकि", "हरिजन", "चूहड़ा", "जाटव"]
X[11]["type"] = Y[11]["type"] = "common_nouns"
A[11]["type"] = Y[11]["type"] = "common_nouns"

#X[12]["list"] = ["अमीर", "ज्ञानी", "वीर", "ताकतवर", "सुन्दर", "उच्च", "साहसी"] 
X[12]["list"] = ["दुलार", "स्वतंत्रता", "स्वास्थ्य", "प्रेम", "शांति", "जयकार", "मित्र", "स्वर्ग", "वफादार", "आनंद", "हीरा", "कोमल", "ईमानदार", "भाग्यशाली", "इंद्रधनुष", "डिप्लोमा", "उपहार", "सम्मान", "चमत्कार", "सूर्योदय", "परिवार", "खुश", "हँसी", "स्वर्ग", "छुट्टी"]
#Y[12]["list"] = ["नीच", "घृणा", "गन्दा", "कमजोर", "गरीब", "अज्ञानी", "निर्बल"]
Y[12]["list"] = ["दुर्व्यवहार", "दुर्घटना", "गंदगी", "हत्या", "बीमारी", "दुर्घटना", "मृत्यु", "दुख", "जहर", "बदबू", "हमला", "आपदा", "घृणा", "प्रदूषित", "त्रासदी", "बम", "तलाक", "जेल", "गरीबी", "बदसूरत", "कैंसर", "बुराई", "मार", "उल्टी"]
A[12]["list"] = ["ठाकुर", "ब्राह्मण", "राजपूत", "क्षत्रिय", "आर्य", "जाट", "बनिया", "कायस्थ"]
B[12]["list"] = ["दलित", "शूद्र", "भंगी", "चमार", "वाल्मीकि", "हरिजन", "चूहड़ा", "जाटव"]
X[12]["type"] = Y[12]["type"] = "adjectives"
A[12]["type"] = Y[12]["type"] = "common_nouns"

#### 13-15 Religion ####

#X[13]["list"] = ["शिक्षित", "वीर", "उच्च", "साहसी", "शांत", "दयालु", "सफल"] 
#Y[13]["list"] = ["नीच", "घृणा", "अशिक्षित", "हिंसक", "क्रोधी", "निर्दयी", "अत्याचारी"]
X[13]["list"] = ["दुलार", "स्वतंत्रता", "स्वास्थ्य", "प्रेम", "शांति", "जयकार", "मित्र", "स्वर्ग", "वफादार", "आनंद", "हीरा", "कोमल", "ईमानदार", "भाग्यशाली", "इंद्रधनुष", "डिप्लोमा", "उपहार", "सम्मान", "चमत्कार", "सूर्योदय", "परिवार", "खुश", "हँसी", "स्वर्ग", "छुट्टी"]
Y[13]["list"] = ["दुर्व्यवहार", "दुर्घटना", "गंदगी", "हत्या", "बीमारी", "दुर्घटना", "मृत्यु", "दुख", "जहर", "बदबू", "हमला", "आपदा", "घृणा", "प्रदूषित", "त्रासदी", "बम", "तलाक", "जेल", "गरीबी", "बदसूरत", "कैंसर", "बुराई", "मार", "उल्टी"]
A[13]["list"] = ["हिंदू", "भगवान", "गीता", "ब्राह्मण", "पंडित", "मंदिर", "राम", "व्रत"]
B[13]["list"] = ["मुसलमान", "अल्लाह", "कुरान", "शिया", "सुन्नी", "मस्जिद", "मुहम्मद", "रोजा"]
X[13]["type"] = Y[13]["type"] = "adjectives"
A[13]["type"] = Y[13]["type"] = "common_nouns"

#X[14]["list"] = ["शिक्षित", "वीर", "उच्च", "साहसी", "शांत", "दयालु", "सफल"] 
#Y[14]["list"] = ["नीच", "घृणा", "अशिक्षित", "हिंसक", "क्रोधी", "निर्दयी", "अत्याचारी"]
X[14]["list"] = ["दुलार", "स्वतंत्रता", "स्वास्थ्य", "प्रेम", "शांति", "जयकार", "मित्र", "स्वर्ग", "वफादार", "आनंद", "हीरा", "कोमल", "ईमानदार", "भाग्यशाली", "इंद्रधनुष", "डिप्लोमा", "उपहार", "सम्मान", "चमत्कार", "सूर्योदय", "परिवार", "खुश", "हँसी", "स्वर्ग", "छुट्टी"]
Y[14]["list"] = ["दुर्व्यवहार", "दुर्घटना", "गंदगी", "हत्या", "बीमारी", "दुर्घटना", "मृत्यु", "दुख", "जहर", "बदबू", "हमला", "आपदा", "घृणा", "प्रदूषित", "त्रासदी", "बम", "तलाक", "जेल", "गरीबी", "बदसूरत", "कैंसर", "बुराई", "मार", "उल्टी"]
A[14]["list"] = ["शर्मा", "वर्मा", "अग्रवाल", "गुप्ता", "चौहान", "बंसल", "मित्तल", "सिंह", "चौधरी"]
B[14]["list"] = ["यूसुफ", "मलिक", "खान", "अंसारी", "शेख़", "अब्दुल्ला", "अहमद", "पठान", "मिर्जा"]
X[14]["type"] = Y[14]["type"] = "adjectives"
A[14]["type"] = Y[14]["type"] = "names"

X[15]["list"] = ["भगवान", "गीता", "ब्राह्मण", "पंडित", "मंदिर", "राम", "व्रत"]
Y[15]["list"] = [ "अल्लाह", "कुरान", "शिया", "सुन्नी", "मस्जिद", "मुहम्मद", "रोजा"]
A[15]["list"] = ["हिंदू", "हिंदुत्व"]
B[15]["list"] = ["मुसलमान", "इसलाम"]
X[15]["type"] = Y[15]["type"] = "common_nouns"
A[15]["type"] = Y[15]["type"] = "common_nouns"

### 16-17 Extras ####

X[16]["list"] = ["अमीर", "ज्ञानी", "वीर", "ताकतवर", "सुन्दर", "उच्च", "साहसी"]  
Y[16]["list"] = ["नीच", "घृणा", "गन्दा", "कमजोर", "गरीब", "अज्ञानी", "निर्बल"]
A[16]["list"] = ["कार्यपालक", "प्रबंधन", "बैंकर", "व्यवसायी", "इंजीनियर", "डॉक्टर", "वकील", "वैज्ञानिक"]
B[16]["list"] = ["सफाईवाला", "माली", "चालक", "किसान", "हलवाई", "कुम्हार", "ग्वाला", "चरवाहा"]
X[16]["type"] = Y[16]["type"] = "adjectives"
A[16]["type"] = Y[16]["type"] = "common_nouns"

# X[17]["list"] = ["अमीर", "ज्ञानी", "वीर", "ताकतवर", "सुन्दर", "उच्च", "साहसी"]  
# Y[17]["list"] = ["नीच", "घृणा", "गन्दा", "कमजोर", "गरीब", "अज्ञानी", "निर्बल"]
X[17]["list"] = ["दुलार", "स्वतंत्रता", "स्वास्थ्य", "प्रेम", "शांति", "जयकार", "मित्र", "स्वर्ग", "वफादार", "आनंद", "हीरा", "कोमल", "ईमानदार", "भाग्यशाली", "इंद्रधनुष", "डिप्लोमा", "उपहार", "सम्मान", "चमत्कार", "सूर्योदय", "परिवार", "खुश", "हँसी", "स्वर्ग", "छुट्टी"]
Y[17]["list"] = ["दुर्व्यवहार", "दुर्घटना", "गंदगी", "हत्या", "बीमारी", "दुर्घटना", "मृत्यु", "दुख", "जहर", "बदबू", "हमला", "आपदा", "घृणा", "प्रदूषित", "त्रासदी", "बम", "तलाक", "जेल", "गरीबी", "बदसूरत", "कैंसर", "बुराई", "मार", "उल्टी"]
A[17]["list"] = ["बैंकर", "व्यवसायी", "इंजीनियर", "वकील", "वैज्ञानिक", "चालक", "अभिनेता", "मैनेजर"]
B[17]["list"] = ["लोहार", "जलवाहक", "किसान", "ग्वाला", "चरवाहा", "कुम्हार", "जमींदार", "जुलाहा"]
X[17]["type"] = Y[17]["type"] = "adjectives"
A[17]["type"] = Y[17]["type"] = "common_nouns"


## WEAT tests

In [None]:
#For 50 dimensional embeddings
embd = hindi_glove_50

print("GloVe 50 dim:")

for i in range(1,18,1):
  if i==1:
    print("\nGender WEAT:")
  if i==11:
    print("\nCaste WEAT:")
  if i==13:
    print("\nReligion WEAT:")
  if i==16:
    print("\nExtra WEAT:")
  print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, debiased_weat=False), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, 1000, debiased_weat=False)))


GloVe 50 dim:

Gender WEAT:
0.249 (0.349)
1.281 (0.002)
1.415 (0.003)
1.552 (0.001)
1.318 (0.003)
1.669 (0.000)
1.863 (0.000)
1.609 (0.000)
1.171 (0.012)
1.845 (0.000)

Caste WEAT:
1.391 (0.004)
1.612 (0.001)

Religion WEAT:
1.305 (0.007)
1.428 (0.003)
1.744 (0.000)

Extra WEAT:
0.955 (0.041)
1.297 (0.006)


In [None]:
#For 300 dimensional embeddings
embd = hindi_glove_300

print("GloVe 300 dim:")

for i in range(1,18,1):
  if i==1:
    print("\nGender WEAT:")
  if i==11:
    print("\nCaste WEAT:")
  if i==13:
    print("\nReligion WEAT:")
  if i==16:
    print("\nExtra WEAT:")
  print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, debiased_weat=False), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, 1000, debiased_weat=False)))


GloVe 300 dim:

Gender WEAT:
0.473 (0.177)
0.943 (0.024)
0.274 (0.305)
1.625 (0.000)
1.719 (0.000)
1.215 (0.016)
1.874 (0.000)
1.698 (0.000)
1.138 (0.006)
1.922 (0.000)

Caste WEAT:
1.439 (0.003)
0.756 (0.003)

Religion WEAT:
1.048 (0.000)
0.930 (0.000)
1.751 (0.000)

Extra WEAT:
1.136 (0.009)
-0.080 (0.594)


## SEAT Functions

In [None]:
SEAT_sent_dict = {}

SEAT_sent_dict["names"] = ["यह _ है", "वह _ है", "वहाँ _ है", "यहां _ है", "_ यहां है", "_ वहाँ है", "इस्का नाम _ है", "उसका नाम _ है"]
SEAT_sent_dict["common_nouns"] = ["यह _ है", "वह _ है", "वहाँ _ है", "यहां _ है", "_ यहां है", "_ वहाँ है", "वो _ है", "ये _ है"]
SEAT_sent_dict["verbs"] = ["यह _ है", "वह _ है", "वो _ है", "ये _ है", "वहाँ _ है", "यहां _ है"]
SEAT_sent_dict["adjectives"] = ["वह _ है", 'यह _ है', "वो _ है", "ये _ है"]

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import random
from itertools import combinations, filterfalse


def roberta_sentence_encoder(sentence, model, tokenizer, device='cuda'):
  tokens = tokenizer.tokenize(sentence)
  combined_toks = [tokenizer.cls_token] + tokens + [tokenizer.sep_token]
  encoded_sent = tokenizer.convert_tokens_to_ids(combined_toks)
  input_ids = []
  input_ids.append(encoded_sent)
  #print(input_ids)
  input_ids = pad_sequences(input_ids, maxlen=8, value=0, dtype="long", truncating="pre", padding="post")
  #print(input_ids)
  input_ids = input_ids[0]
  #print(list(input_ids))
  input_ids_tensor = torch.tensor(input_ids).unsqueeze(0).to(device)
  att_mask =[int(i > 0) for i in input_ids]
  #print(att_mask)
  att_mask_tensor = torch.tensor(att_mask).unsqueeze(0).to(device)
  with torch.no_grad():
    embedded = model(input_ids = input_ids_tensor, attention_mask=att_mask_tensor)

  #print(embedded)
  return embedded['last_hidden_state'][0][0].detach().cpu().numpy()

def elmo_encoder(sentence, model, debiased_weat=False):
  tokens = sentence.split(" ")
  vecs = model.get_elmo_vectors([tokens], layers="all")
  tok_embs = vecs[0][0]
  sent_emb = np.mean(tok_embs, axis=0)

  if (debiased_weat == True):
    each_debiased_emb = []
    for emb in tok_embs:
      each_debiased_emb.append(debiaser_new(emb))
    each_debiased_emb = np.array(each_debiased_emb)
    return list(np.mean(each_debiased_emb, axis = 0))

  return list(sent_emb)

def sentence_encoder(sentence, placed_word=None, word_embedding_type="word", E=None, encoder=None, return_connected_sentence=False, debiased_weat=False):

  tok_sentence = sentence.split(" ")
  actual_tok_sentence = []
  for tok in tok_sentence:
    if tok != "_":
      actual_tok_sentence.append(tok)
    else:
      actual_tok_sentence.append(placed_word)

  connected = (" ").join(actual_tok_sentence)
  if return_connected_sentence==True:
    return connected
  #print(connected)

  if word_embedding_type == "word":
    encoded = []
    for word in actual_tok_sentence:
      if word == placed_word and debiased_weat==False:
        #print("RUNS")
        encoded.append(E[word])
      elif word == placed_word and debiased_weat==True:
        encoded.append(debiaser(word, E))
      else:
        encoded.append(E[word])
    #encoded = [E[word] for word in actual_tok_sentence]

    return np.mean(encoded, axis=0)
  
  if word_embedding_type == "elmo":
    return elmo_encoder(connected, model, debiased_weat=True)

  if word_embedding_type == "roberta":
    return roberta_sentence_encoder(connected, model, tokenizer)



def sentence_iterator(word_type, word, SEAT_sent_dict, embd, return_connected_sentence=False, debiased_weat=False):
  candidate_sentences = SEAT_sent_dict[word_type]
  sentence_embeddings = []
  for sentence in candidate_sentences:
    #print(sentence)
    if return_connected_sentence==True:
      sentence_embeddings.append(sentence_encoder(sentence, placed_word=word, word_embedding_type="word", E=embd, return_connected_sentence=True))
    else:
      sentence_embeddings.append(sentence_encoder(sentence, placed_word=word, word_embedding_type="word", E=embd, debiased_weat=debiased_weat))
  
  #print(len(sentence_embeddings))
  #print("\n")
  return sentence_embeddings


def seat_effect_size(X, Y, x_type, A, B, a_type, embd, debiased_weat=False):
  X_full = []
  Y_full = []
  A_full = []
  B_full = []
  for w in X:
    X_full = X_full + sentence_iterator(x_type, w, SEAT_sent_dict, embd, debiased_weat=debiased_weat)
  for w in Y:
    Y_full = Y_full + sentence_iterator(x_type, w, SEAT_sent_dict, embd, debiased_weat=debiased_weat)
  for w in A:
    A_full = A_full + sentence_iterator(a_type, w, SEAT_sent_dict, embd, debiased_weat=debiased_weat)
  for w in B:
    B_full = B_full + sentence_iterator(a_type, w, SEAT_sent_dict, embd, debiased_weat=debiased_weat)

  #print(len(X_full), len(Y_full), len(A_full), len(B_full))
  Xmat = np.array(X_full)
  Ymat = np.array(Y_full)
  Amat = np.array(A_full)
  Bmat = np.array(B_full)
  

  XuYmat = np.array(X_full + Y_full)

  d = (np.mean(swAB(Xmat,Amat,Bmat)) - np.mean(swAB(Ymat,Amat,Bmat)))/np.std(swAB(XuYmat, Amat, Bmat))
  
  return d




def random_permutation(iterable, r=None):
  pool = tuple(iterable)
  r = len(pool) if r is None else r
  return tuple(random.sample(pool, r))


def seat_p_value(X, Y, x_type, A, B, a_type, embd, sample, debiased_weat=False):

  X_full = []
  Y_full = []
  A_full = []
  B_full = []
  for w in X:
    X_full = X_full + sentence_iterator(x_type, w, SEAT_sent_dict, embd, debiased_weat=debiased_weat)
  for w in Y:
    Y_full = Y_full + sentence_iterator(x_type, w, SEAT_sent_dict, embd, debiased_weat=debiased_weat)
  for w in A:
    A_full = A_full + sentence_iterator(a_type, w, SEAT_sent_dict, embd, debiased_weat=debiased_weat)
  for w in B:
    B_full = B_full + sentence_iterator(a_type, w, SEAT_sent_dict, embd, debiased_weat=debiased_weat)

  Xmat = np.array(X_full)
  Ymat = np.array(Y_full)
  Amat = np.array(A_full)
  Bmat = np.array(B_full)

  size_of_permutation = min(len(X_full), len(Y_full))
  X_Y_full = []
  XpY = X + Y
  for x in XpY:
    X_Y_full = X_Y_full + sentence_iterator(x_type, x, SEAT_sent_dict, embd, return_connected_sentence=True, debiased_weat=debiased_weat)

  #print(len(X_Y_full))
  #print(X_Y_full)
  test_stats_over_permutation = []
  
  if not sample:
      permutations = combinations(X_Y, size_of_permutation)
  else:
      permutations = [random_permutation(X_Y_full, size_of_permutation) for s in range(sample)]
      
  for Xi in permutations:
    #print(len(Xi))
    Yi = filterfalse(lambda e:e in Xi, X_Y_full)
    #print(Xi)
    #print("\n")
    #print(Yi)
    Ximat = np.array([sentence_encoder(sent, word_embedding_type="word", E=embd, debiased_weat=debiased_weat) for sent in Xi])
    Yimat = np.array([sentence_encoder(sent, word_embedding_type="word", E=embd, debiased_weat=debiased_weat) for sent in Yi])
    #print(Ximat)
    #print(Yimat.shape)
    test_stats_over_permutation.append(test_statistic(Ximat, Yimat, Amat, Bmat))
    
  unperturbed = test_statistic(Xmat, Ymat, Amat, Bmat)
  
  is_over = np.array([o > unperturbed for o in test_stats_over_permutation])
  #print("All: ", test_stats_over_permutation)
  #print("Unpertrubed: ", unperturbed)
  return is_over.sum() / is_over.size

## SEAT Tests

In [None]:
print("GloVe 50 dim:")

for i in range(1,18,1):
  if i==1:
    print("\nGender SEAT:")
  if i==11:
    print("\nCaste SEAT:")
  if i==13:
    print("\nReligion SEAT:")
  if i==16:
    print("\nExtra SEAT:")

  print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], X[i]["type"], hindi_glove_50), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], X[i]["type"], hindi_glove_50, sample=1000)))

GloVe 50 dim:

Gender SEAT:
0.039 (0.404)
1.380 (0.000)
1.435 (0.000)
1.429 (0.000)
1.166 (0.000)
1.673 (0.000)
1.880 (0.000)
1.626 (0.000)
1.224 (0.000)
1.788 (0.000)

Caste SEAT:
1.267 (0.000)
1.560 (0.000)

Religion SEAT:
1.169 (0.000)
1.323 (0.000)
1.556 (0.000)

Extra SEAT:
0.938 (0.000)
1.355 (0.000)


In [None]:
print("GloVe 300 dim:")

for i in range(1,18,1):
  if i==1:
    print("\nGender SEAT:")
  if i==11:
    print("\nCaste SEAT:")
  if i==13:
    print("\nReligion SEAT:")
  if i==16:
    print("\nExtra SEAT:")

  print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], X[i]["type"], hindi_glove_300), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], X[i]["type"], hindi_glove_300, sample=1000)))

GloVe 300 dim:

Gender SEAT:
0.153 (0.203)
0.872 (0.000)
0.183 (0.176)
1.477 (0.000)
1.523 (0.000)
1.186 (0.000)
1.839 (0.000)
1.634 (0.000)
1.121 (0.000)
1.862 (0.000)

Caste SEAT:
1.257 (0.000)
0.745 (0.000)

Religion SEAT:
1.043 (0.000)
0.953 (0.000)
1.693 (0.000)

Extra SEAT:
1.004 (0.000)
-0.134 (0.883)


## Preparing Hindi ELMo

In [None]:
!wget https://www.cfilt.iitb.ac.in/~diptesh/embeddings/monolingual/contextual/hi.zip

--2021-08-15 15:18:30--  https://www.cfilt.iitb.ac.in/~diptesh/embeddings/monolingual/contextual/hi.zip
Resolving www.cfilt.iitb.ac.in (www.cfilt.iitb.ac.in)... 103.21.127.134
Connecting to www.cfilt.iitb.ac.in (www.cfilt.iitb.ac.in)|103.21.127.134|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 372254783 (355M) [application/zip]
Saving to: ‘hi.zip’


2021-08-15 15:19:07 (10.0 MB/s) - ‘hi.zip’ saved [372254783/372254783]



In [None]:
!unzip hi.zip

Archive:  hi.zip
   creating: hi/
   creating: hi/elmo/
  inflating: hi/elmo/hi-d512-elmo.hdf5  
  inflating: hi/elmo/hi-d512-vocab.txt  
  inflating: hi/elmo/hi-d512-options.json  


In [None]:
!pip install --upgrade simple_elmo

Collecting simple_elmo
  Downloading simple_elmo-0.8.0-py3-none-any.whl (45 kB)
[?25l[K     |███████▏                        | 10 kB 27.9 MB/s eta 0:00:01[K     |██████████████▍                 | 20 kB 32.4 MB/s eta 0:00:01[K     |█████████████████████▋          | 30 kB 23.9 MB/s eta 0:00:01[K     |████████████████████████████▊   | 40 kB 19.1 MB/s eta 0:00:01[K     |████████████████████████████████| 45 kB 3.5 MB/s 
Installing collected packages: simple-elmo
Successfully installed simple-elmo-0.8.0


In [None]:
!mv "hi/elmo/hi-d512-options.json" "hi/elmo/options.json"
!mv "hi/elmo/hi-d512-elmo.hdf5" "hi/elmo/elmo.hdf5"
!mv "hi/elmo/hi-d512-vocab.txt" "hi/elmo/vocab.txt"

In [None]:
# Change n_characters to 262 in options.json
# Comment out print and warmup lines in elmo_helpers.py in /usr/local/lib/python3.7/dist-packages/simple_elmo/elmo_helpers.py

In [None]:
from simple_elmo import ElmoModel
model = ElmoModel()
model.load("hi/elmo")

2021-08-15 15:23:27,236 : INFO : Loading model from hi/elmo...
2021-08-15 15:23:27,237 : INFO : No model.hdf5 file found. Using hi/elmo/elmo.hdf5 as a model file.
2021-08-15 15:23:27,242 : INFO : We will cache the vocabulary of 100 tokens.


'The model is now loaded.'

## SEAT ELMo tests

In [None]:
print("Hindi ELMo")

for i in range(1,16,1):
  if i==1:
    print("\nGender SEAT:")
  if i==11:
    print("\nCaste SEAT:")
  if i==13:
    print("\nReligion SEAT:")

  print('{0:.3f}'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300)))

Hindi ELMo

Gender SEAT:
0.828
-0.299
0.505
1.285
1.333
1.306
1.711
1.763
1.682
1.310

Caste SEAT:
1.139
1.082

Religion SEAT:
1.217
0.130
0.625


## Preparing Hindi RoBERTa

In [None]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.9.1-py3-none-any.whl (2.6 MB)
[K     |████████████████████████████████| 2.6 MB 7.2 MB/s 
[?25hCollecting huggingface-hub==0.0.12
  Downloading huggingface_hub-0.0.12-py3-none-any.whl (37 kB)
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 61.5 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 48.5 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 68.0 MB/s 
Installing collected packages: tokenizers, sacremoses, pyyaml, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninsta

In [None]:
import random
import pandas as pd
import numpy as np
import csv
import tensorflow as tf
import torch
from sklearn.model_selection import train_test_split
from google.colab import drive
import textwrap
import progressbar
import keras
from keras.preprocessing.sequence import pad_sequences
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import get_linear_schedule_with_warmup
from transformers import ElectraModel, ElectraTokenizer, ElectraConfig, AutoModelWithLMHead, AutoTokenizer, AutoModel
import time
import datetime
import json
tokenizer = AutoTokenizer.from_pretrained("surajp/RoBERTa-hindi-guj-san")
model = AutoModel.from_pretrained("surajp/RoBERTa-hindi-guj-san")
model.to('cuda')

In [None]:
print("Hindi RoBERTa")

for i in range(1,16,1):
  if i==1:
    print("\nGender SEAT:")
  if i==11:
    print("\nCaste SEAT:")
  if i==13:
    print("\nReligion SEAT:")

  print('{0:.3f}'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300)))

Hindi RoBERTa

Gender SEAT:
0.493
-0.012
0.329
1.072
0.428
0.492
0.474
0.380
-0.222
1.311

Caste SEAT:
0.888
-0.015

Religion SEAT:
0.417
0.560
0.228


## Word Lists for PCA debiasing

In [None]:
listwise_gendered_PCA = ["पिता", "बाप", "देव", "बंदा", "नर", "माता", "मां", "देवी", "बंदी", "नारी"]

pairwise_gendered_PCA_l1 = ["पिता", "बाप", "देव", "बंदा", "नर"]
pairwise_gendered_PCA_l2 = ["माता", "मां", "देवी", "बंदी", "नारी"]

pairwise_verbsan_PCA_l1 = ["जानता", "बोलता", "देखता", "खाता", "चलता", "उड़ता", "जागता"]
pairwise_verbsan_PCA_l2 = ["जानती", "बोलती", "देखती", "खाती", "चलती", "उड़ती", "जागती"]

pairwise_titlessan_PCA_l1 = ["गायक", "लेखक", "प्रेमी", "बालक", "शिक्षक"]
pairwise_titlessan_PCA_l2 = ["गायिका", "लेखिका", "प्रेमिका", "बालिका", "शिक्षिका"]

pairwise_adjsan_PCA_l1 = ["दूसरा", "मोटा", "पतला", "गहरा", "महंगा"]
pairwise_adjsan_PCA_l2 = ["दूसरी", "मोटी", "पतली", "गहरी", "महंगी"]

listwise_entsan_PCA = ["रास्ता", "बस्ता", "चश्मा", "यान", "गीत", "केला", "सेब", "संतरा", "कुर्सी", "उंगली", "गाड़ी", "पुस्तक", "दवाई", "लकड़ी", "सब्जी", "रोटी"]



listwise_caste_PCA = ["देसाई", "रॉय", "पाठक", "पंडित", "पुजारी", "कोल", "धनुक", "मज़हबी", "मुशहर", "घसिया"]



listwise_religion_lastnames_PCA = ["रफीक", "मुस्तफा", "नासिर", "नवाज", "कासिम", "रेड्डी", "आचार्य", "पटेल", "आर्य", "कुमार"]
listwise_religion_PCA = ["हज", "फतवा", "इस्लाम", "ईद", "भक्त", "स्वर्ग", "हिंदुत्व", "दिवाली"]



In [None]:
from sklearn.decomposition import PCA

def do_pairwise_PCA(l1, l2, embedding, num_components = 10):
    matrix = []
    pairs = []
    for i, element in enumerate(l1):
      pairs.append([l1[i], l2[i]])

    for a, b in pairs:
        center = (embedding[a] + embedding[b])/2
        matrix.append(embedding[a] - center)
        matrix.append(embedding[b] - center)
    matrix = np.array(matrix)
    pca = PCA(n_components = num_components)
    pca.fit(matrix)
    # bar(range(num_components), pca.explained_variance_ratio_)
    return pca.components_[0]


def do_listwise_PCA(list_words, embedding, num_components=10):
  matrix = [embedding[word] for word in list_words]
  matrix = np.array(matrix)
  pca = PCA(n_components = num_components)
  pca.fit(matrix)
  return pca.components_[0]

## Bias directions

In [None]:
d_gender_listwise_50 = do_listwise_PCA(listwise_gendered_PCA, hindi_glove_50)
d_gender_pairwise_50 = do_pairwise_PCA(pairwise_gendered_PCA_l1, pairwise_gendered_PCA_l2, hindi_glove_50)
d_verbs_50 = do_pairwise_PCA(pairwise_verbsan_PCA_l1, pairwise_verbsan_PCA_l2, hindi_glove_50)
d_adj_50 = do_pairwise_PCA(pairwise_adjsan_PCA_l1, pairwise_adjsan_PCA_l2, hindi_glove_50)
d_titles_50 = do_pairwise_PCA(pairwise_titlessan_PCA_l1, pairwise_titlessan_PCA_l2, hindi_glove_50)
d_ent_50 = do_listwise_PCA(listwise_entsan_PCA, hindi_glove_50)
d_verbs_adj_50 = do_pairwise_PCA(pairwise_verbsan_PCA_l1 + pairwise_adjsan_PCA_l1, pairwise_verbsan_PCA_l2 + pairwise_adjsan_PCA_l2, hindi_glove_50)
d_ent_titles_50 = do_listwise_PCA(pairwise_titlessan_PCA_l1 + pairwise_titlessan_PCA_l2 + listwise_entsan_PCA, hindi_glove_50)

d_caste_50 = do_listwise_PCA(listwise_caste_PCA, hindi_glove_50)

d_religion_50 = do_listwise_PCA(listwise_religion_lastnames_PCA, hindi_glove_50)
d_religion_ent_50 = do_listwise_PCA(listwise_religion_PCA, hindi_glove_50, num_components=6)





d_gender_listwise_300 = do_listwise_PCA(listwise_gendered_PCA, hindi_glove_300)
d_gender_pairwise_300 = do_pairwise_PCA(pairwise_gendered_PCA_l1, pairwise_gendered_PCA_l2, hindi_glove_300)
d_verbs_300 = do_pairwise_PCA(pairwise_verbsan_PCA_l1, pairwise_verbsan_PCA_l2, hindi_glove_300)
d_adj_300 = do_pairwise_PCA(pairwise_adjsan_PCA_l1, pairwise_adjsan_PCA_l2, hindi_glove_300)
d_titles_300 = do_pairwise_PCA(pairwise_titlessan_PCA_l1, pairwise_titlessan_PCA_l2, hindi_glove_300)
d_ent_300 = do_listwise_PCA(listwise_entsan_PCA, hindi_glove_300)
d_verbs_adj_300 = do_pairwise_PCA(pairwise_verbsan_PCA_l1 + pairwise_adjsan_PCA_l1, pairwise_verbsan_PCA_l2 + pairwise_adjsan_PCA_l2, hindi_glove_300)
d_ent_titles_300 = do_listwise_PCA(pairwise_titlessan_PCA_l1 + pairwise_titlessan_PCA_l2 + listwise_entsan_PCA, hindi_glove_300)

d_caste_300 = do_listwise_PCA(listwise_caste_PCA, hindi_glove_300)

d_religion_300 = do_listwise_PCA(listwise_religion_lastnames_PCA, hindi_glove_300)
d_religion_ent_300 = do_listwise_PCA(listwise_religion_PCA, hindi_glove_300, num_components=6)

## Gender Debiasing


In [None]:
def debiaser(word, embd):
  global v
  v = v/np.linalg.norm(v)
  u = np.array(embd[word])
  debiased_word = u - np.dot(u,v)*v 
  return debiased_word


def debiaser_new(emb):
  global v
  v = v/np.linalg.norm(v)
  debiased_word = emb - np.dot(emb,v)*v 
  return debiased_word

### PCA Based debiasing WEAT results

In [None]:
print("### GENDER PCA DEBIASING RESULTS: 50 DIM GLOVE ###")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, 1000, debiased_weat=True)))

print("\nGender debiased with listwise gendered words")
v = d_gender_listwise_50
run_deb(1,11)

print("\nGender debiased with pairwise gendered words")
v = d_gender_pairwise_50
run_deb(1,11)

print("\nGender debiased with pairwise verbs")
v = d_verbs_50
run_deb(1,11)

print("\nGender debiased with pairwise adjectives")
v = d_adj_50
run_deb(1,11)

print("\nGender debiased with pairwise titles")
v = d_titles_50
run_deb(1,11)

print("\nGender debiased with listwise entities")
v = d_ent_50
run_deb(1,11)

### GENDER PCA DEBIASING RESULTS: 50 DIM GLOVE ###

Gender debiased with listwise gendered words
0.628 (0.130)
0.704 (0.089)
1.140 (0.005)
1.577 (0.000)
1.372 (0.003)
1.405 (0.004)
1.750 (0.000)
1.413 (0.002)
0.939 (0.027)
1.864 (0.000)

Gender debiased with pairwise gendered words
0.175 (0.379)
0.738 (0.070)
1.172 (0.007)
1.486 (0.001)
1.058 (0.016)
1.020 (0.038)
1.504 (0.000)
1.434 (0.002)
1.207 (0.006)
1.890 (0.000)

Gender debiased with pairwise verbs
0.063 (0.447)
1.297 (0.002)
1.352 (0.001)
1.602 (0.000)
1.108 (0.006)
1.665 (0.000)
0.875 (0.046)
-0.484 (0.819)
0.146 (0.380)
1.781 (0.000)

Gender debiased with pairwise adjectives
0.202 (0.370)
1.475 (0.000)
1.492 (0.000)
1.523 (0.000)
1.187 (0.007)
1.540 (0.001)
1.481 (0.001)
0.238 (0.318)
0.281 (0.281)
1.831 (0.000)

Gender debiased with pairwise titles
-0.288 (0.690)
1.333 (0.000)
1.021 (0.018)
0.672 (0.094)
0.406 (0.214)
1.456 (0.003)
1.229 (0.002)
1.593 (0.000)
1.425 (0.000)
-0.136 (0.595)

Gender debiased with listwise entiti

In [None]:
print("### GENDER PCA DEBIASING RESULTS: 300 DIM GLOVE ###")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, 1000, debiased_weat=True)))

print("\nGender debiased with listwise gendered words")
v = d_gender_listwise_300
run_deb(1,11)

print("\nGender debiased with pairwise gendered words")
v = d_gender_pairwise_300
run_deb(1,11)

print("\nGender debiased with pairwise verbs")
v = d_verbs_300
run_deb(1,11)

print("\nGender debiased with pairwise adjectives")
v = d_adj_300
run_deb(1,11)

print("\nGender debiased with pairwise titles")
v = d_titles_300
run_deb(1,11)

print("\nGender debiased with listwise entities")
v = d_ent_300
run_deb(1,11)

### GENDER PCA DEBIASING RESULTS: 300 DIM GLOVE ###

Gender debiased with listwise gendered words
0.540 (0.179)
0.724 (0.081)
0.645 (0.093)
1.626 (0.000)
1.706 (0.000)
0.913 (0.047)
1.844 (0.000)
1.628 (0.000)
0.949 (0.019)
1.926 (0.000)

Gender debiased with pairwise gendered words
0.360 (0.257)
0.771 (0.064)
0.001 (0.491)
1.446 (0.001)
1.492 (0.001)
0.824 (0.082)
1.790 (0.000)
1.658 (0.000)
0.998 (0.022)
1.897 (0.000)

Gender debiased with pairwise verbs
0.368 (0.242)
0.612 (0.120)
1.139 (0.017)
1.610 (0.000)
1.578 (0.000)
1.039 (0.030)
1.293 (0.007)
1.212 (0.001)
0.600 (0.118)
1.899 (0.000)

Gender debiased with pairwise adjectives
0.470 (0.201)
1.355 (0.004)
1.276 (0.007)
1.678 (0.000)
1.688 (0.000)
1.034 (0.031)
1.756 (0.000)
0.833 (0.055)
0.668 (0.095)
1.928 (0.000)

Gender debiased with pairwise titles
0.089 (0.442)
1.046 (0.020)
0.696 (0.094)
1.050 (0.021)
0.970 (0.023)
0.868 (0.067)
1.531 (0.002)
1.689 (0.000)
1.108 (0.011)
1.114 (0.011)

Gender debiased with listwise entities

### Zhou Based debiasing WEAT results

In [None]:
########## Zhou type #########

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, 1000, debiased_weat=True)))

v = d_gender_pairwise_50 - np.dot(d_gender_pairwise_50, d_verbs_50)*d_verbs_50
print("\nGender debiased with Zhou (subtracting verbs direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_50 - np.dot(d_gender_pairwise_50, d_adj_50)*d_adj_50
print("\nGender debiased with Zhou (subtracting adjectives direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_50 - np.dot(d_gender_pairwise_50, d_ent_50)*d_ent_50
print("\nGender debiased with Zhou (subtracting entities direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_50 - np.dot(d_gender_pairwise_50, d_titles_50)*d_titles_50
print("\nGender debiased with Zhou (subtracting titles direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_50 - np.dot(d_gender_pairwise_50, d_ent_50)*d_ent_50 - np.dot(d_gender_pairwise_50, d_verbs_50)*d_verbs_50
print("\nGender debiased with Zhou (subtracting verbs and adjectives direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_50 - np.dot(d_gender_pairwise_50, d_verbs_adj_50)*d_verbs_adj_50 - np.dot(d_gender_pairwise_50, d_ent_titles_50)*d_ent_titles_50
print("\nGender debiased with Zhou (subtracting verbs+adj and ent_titles direction from semantic gender):")
run_deb(1,11)



Gender debiased with Zhou (subtracting verbs direction from semantic gender):
0.230 (0.356)
0.981 (0.031)
1.268 (0.004)
1.479 (0.000)
1.181 (0.006)
1.270 (0.007)
1.714 (0.000)
1.594 (0.000)
1.368 (0.001)
1.884 (0.000)

Gender debiased with Zhou (subtracting adjectives direction from semantic gender):
0.200 (0.331)
0.804 (0.069)
1.144 (0.007)
1.517 (0.000)
1.150 (0.011)
1.230 (0.007)
1.701 (0.000)
1.645 (0.000)
1.433 (0.000)
1.883 (0.000)

Gender debiased with Zhou (subtracting entities direction from semantic gender):
0.173 (0.365)
0.765 (0.086)
1.174 (0.001)
1.488 (0.001)
1.056 (0.016)
1.019 (0.026)
1.508 (0.002)
1.437 (0.000)
1.214 (0.003)
1.889 (0.000)

Gender debiased with Zhou (subtracting titles direction from semantic gender):
0.253 (0.327)
1.033 (0.011)
1.333 (0.001)
1.501 (0.002)
1.422 (0.002)
1.479 (0.002)
1.744 (0.000)
1.540 (0.000)
1.167 (0.007)
1.889 (0.000)

Gender debiased with Zhou (subtracting verbs and adjectives direction from semantic gender):
0.229 (0.326)
0.998 (

In [None]:
########## Zhou type #########

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, 1000, debiased_weat=True)))

v = d_gender_pairwise_300 - np.dot(d_gender_pairwise_300, d_verbs_300)*d_verbs_300
print("\nGender debiased with Zhou (subtracting verbs direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_300 - np.dot(d_gender_pairwise_300, d_adj_300)*d_adj_300
print("\nGender debiased with Zhou (subtracting adjectives direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_300 - np.dot(d_gender_pairwise_300, d_ent_300)*d_ent_300
print("\nGender debiased with Zhou (subtracting entities direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_300 - np.dot(d_gender_pairwise_300, d_titles_300)*d_titles_300
print("\nGender debiased with Zhou (subtracting titles direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_300 - np.dot(d_gender_pairwise_300, d_ent_300)*d_ent_300 - np.dot(d_gender_pairwise_300, d_verbs_300)*d_verbs_300
print("\nGender debiased with Zhou (subtracting verbs and adjectives direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_300 - np.dot(d_gender_pairwise_300, d_verbs_adj_300)*d_verbs_adj_300 - np.dot(d_gender_pairwise_300, d_ent_titles_300)*d_ent_titles_300
print("\nGender debiased with Zhou (subtracting verbs+adj and ent_titles direction from semantic gender):")
run_deb(1,11)



Gender debiased with Zhou (subtracting verbs direction from semantic gender):
0.410 (0.241)
0.993 (0.021)
0.266 (0.331)
1.509 (0.002)
1.593 (0.000)
0.961 (0.034)
1.849 (0.000)
1.715 (0.000)
1.144 (0.007)
1.918 (0.000)

Gender debiased with Zhou (subtracting adjectives direction from semantic gender):
0.367 (0.260)
0.739 (0.066)
-0.009 (0.501)
1.458 (0.001)
1.524 (0.000)
0.917 (0.047)
1.825 (0.000)
1.728 (0.000)
1.103 (0.010)
1.902 (0.000)

Gender debiased with Zhou (subtracting entities direction from semantic gender):
0.360 (0.237)
0.702 (0.093)
-0.032 (0.515)
1.448 (0.001)
1.496 (0.001)
0.803 (0.089)
1.789 (0.000)
1.647 (0.000)
0.983 (0.020)
1.897 (0.000)

Gender debiased with Zhou (subtracting titles direction from semantic gender):
0.467 (0.220)
0.910 (0.032)
0.499 (0.178)
1.564 (0.000)
1.686 (0.000)
1.048 (0.027)
1.846 (0.000)
1.656 (0.000)
1.062 (0.012)
1.928 (0.000)

Gender debiased with Zhou (subtracting verbs and adjectives direction from semantic gender):
0.410 (0.222)
0.949

### Linear Projection debiasing WEAT results

In [None]:
########## Single direction Linear Debiasing #########

print("Gender Debiasing Linear Projection: 50 Dim GloVe")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, 1000, debiased_weat=True)))

v = np.array(hindi_glove_50["नारी"]) - np.array(hindi_glove_50["नर"])
print("\nGender debiased with Linear Direction (naari-nar):")
run_deb(1,11)

v = np.array(hindi_glove_50["माता"]) - np.array(hindi_glove_50["पिता"])
print("\nGender debiased with Linear Direction (maata-pita):")
run_deb(1,11)


Gender Debiasing Linear Projection: 50 Dim GloVe

Gender debiased with Linear Direction (naari-nar):
0.257 (0.335)
0.596 (0.135)
0.974 (0.021)
1.480 (0.004)
1.267 (0.000)
1.478 (0.000)
1.848 (0.000)
1.536 (0.000)
1.081 (0.005)
1.854 (0.000)

Gender debiased with Linear Direction (maata-pita):
0.185 (0.355)
1.388 (0.004)
1.464 (0.002)
1.614 (0.001)
1.286 (0.003)
1.534 (0.000)
1.841 (0.000)
1.710 (0.000)
1.127 (0.014)
1.777 (0.000)


In [None]:
########## Single direction Linear Debiasing #########

print("Gender Debiasing Linear Projection: 300 Dim GloVe")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, 1000, debiased_weat=True)))

v = np.array(hindi_glove_300["नारी"]) - np.array(hindi_glove_300["नर"])
print("\nGender debiased with Linear Direction (naari-nar):")
run_deb(1,11)

v = np.array(hindi_glove_300["माता"]) - np.array(hindi_glove_300["पिता"])
print("\nGender debiased with Linear Direction (maata-pita):")
run_deb(1,11)


Gender Debiasing Linear Projection: 300 Dim GloVe

Gender debiased with Linear Direction (naari-nar):
0.489 (0.187)
0.437 (0.203)
0.497 (0.175)
1.602 (0.002)
1.676 (0.000)
0.956 (0.056)
1.867 (0.000)
1.628 (0.000)
1.009 (0.016)
1.912 (0.000)

Gender debiased with Linear Direction (maata-pita):
0.502 (0.168)
1.342 (0.002)
1.186 (0.010)
1.629 (0.000)
1.689 (0.000)
1.046 (0.029)
1.861 (0.000)
1.719 (0.000)
1.136 (0.006)
1.914 (0.000)


### PCA Based Debiasing SEAT results

In [None]:
print("### GENDER PCA DEBIASING RESULTS: 50 DIM GLOVE ###")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, sample=1000, debiased_weat=True)))

print("\nGender debiased with listwise gendered words")
v = d_gender_listwise_50
run_deb(1,11)

print("\nGender debiased with pairwise gendered words")
v = d_gender_pairwise_50
run_deb(1,11)

print("\nGender debiased with pairwise verbs")
v = d_verbs_50
run_deb(1,11)

print("\nGender debiased with pairwise adjectives")
v = d_adj_50
run_deb(1,11)

print("\nGender debiased with pairwise titles")
v = d_titles_50
run_deb(1,11)

print("\nGender debiased with listwise entities")
v = d_ent_50
run_deb(1,11)

### GENDER PCA DEBIASING RESULTS: 50 DIM GLOVE ###

Gender debiased with listwise gendered words
0.226 (0.107)
0.993 (0.000)
1.168 (0.000)
1.552 (0.000)
1.311 (0.000)
1.391 (0.000)
1.791 (0.000)
1.454 (0.000)
1.024 (0.000)
1.780 (0.000)

Gender debiased with pairwise gendered words
-0.032 (0.557)
0.892 (0.000)
1.064 (0.000)
1.456 (0.000)
0.836 (0.000)
1.036 (0.000)
1.446 (0.000)
1.398 (0.000)
1.206 (0.000)
1.604 (0.000)

Gender debiased with pairwise verbs
-0.092 (0.672)
1.218 (0.000)
1.279 (0.000)
1.340 (0.000)
0.997 (0.000)
1.409 (0.000)
0.656 (0.001)
-0.294 (0.897)
0.192 (0.110)
1.645 (0.000)

Gender debiased with pairwise adjectives
-0.014 (0.528)
1.555 (0.000)
1.507 (0.000)
1.370 (0.000)
1.010 (0.000)
1.495 (0.000)
1.413 (0.000)
0.218 (0.203)
0.405 (0.004)
1.735 (0.000)

Gender debiased with pairwise titles
-0.428 (0.993)
1.159 (0.000)
0.771 (0.000)
0.748 (0.000)
0.371 (0.021)
1.187 (0.000)
1.329 (0.000)
1.589 (0.000)
1.179 (0.000)
0.017 (0.451)

Gender debiased with listwise enti

In [None]:
print("### GENDER PCA DEBIASING RESULTS: 300 DIM GLOVE ###")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, sample=1000, debiased_weat=True)))

print("\nGender debiased with listwise gendered words")
v = d_gender_listwise_300
run_deb(1,11)

print("\nGender debiased with pairwise gendered words")
v = d_gender_pairwise_300
run_deb(1,11)

print("\nGender debiased with pairwise verbs")
v = d_verbs_300
run_deb(1,11)

print("\nGender debiased with pairwise adjectives")
v = d_adj_300
run_deb(1,11)

print("\nGender debiased with pairwise titles")
v = d_titles_300
run_deb(1,11)

print("\nGender debiased with listwise entities")
v = d_ent_300
run_deb(1,11)

### GENDER PCA DEBIASING RESULTS: 300 DIM GLOVE ###

Gender debiased with listwise gendered words
0.158 (0.186)
0.901 (0.000)
0.657 (0.000)
1.502 (0.000)
1.525 (0.000)
0.969 (0.000)
1.806 (0.000)
1.592 (0.000)
0.968 (0.000)
1.878 (0.000)

Gender debiased with pairwise gendered words
0.056 (0.379)
0.781 (0.000)
0.071 (0.331)
1.213 (0.000)
1.174 (0.000)
0.798 (0.001)
1.670 (0.000)
1.538 (0.000)
0.992 (0.000)
1.745 (0.000)

Gender debiased with pairwise verbs
0.031 (0.443)
0.555 (0.002)
0.804 (0.000)
1.416 (0.000)
1.377 (0.000)
0.848 (0.001)
1.023 (0.000)
1.005 (0.000)
0.570 (0.000)
1.777 (0.000)

Gender debiased with pairwise adjectives
0.135 (0.226)
1.320 (0.000)
1.171 (0.000)
1.505 (0.000)
1.490 (0.000)
1.005 (0.000)
1.691 (0.000)
0.740 (0.002)
0.683 (0.000)
1.858 (0.000)

Gender debiased with pairwise titles
-0.214 (0.866)
0.858 (0.000)
0.517 (0.001)
1.079 (0.000)
0.797 (0.000)
0.722 (0.007)
1.463 (0.000)
1.643 (0.000)
0.986 (0.000)
0.898 (0.000)

Gender debiased with listwise entitie

### Zhou Based Debiasing SEAT results


In [None]:
########## Zhou type #########

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, sample=1000, debiased_weat=True)))


v = d_gender_pairwise_50 - np.dot(d_gender_pairwise_50, d_verbs_50)*d_verbs_50
print("\nGender debiased with Zhou (subtracting verbs direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_50 - np.dot(d_gender_pairwise_50, d_adj_50)*d_adj_50
print("\nGender debiased with Zhou (subtracting adjectives direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_50 - np.dot(d_gender_pairwise_50, d_ent_50)*d_ent_50
print("\nGender debiased with Zhou (subtracting entities direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_50 - np.dot(d_gender_pairwise_50, d_titles_50)*d_titles_50
print("\nGender debiased with Zhou (subtracting titles direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_50 - np.dot(d_gender_pairwise_50, d_ent_50)*d_ent_50 - np.dot(d_gender_pairwise_50, d_verbs_50)*d_verbs_50
print("\nGender debiased with Zhou (subtracting verbs and adjectives direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_50 - np.dot(d_gender_pairwise_50, d_verbs_adj_50)*d_verbs_adj_50 - np.dot(d_gender_pairwise_50, d_ent_titles_50)*d_ent_titles_50
print("\nGender debiased with Zhou (subtracting verbs+adj and ent_titles direction from semantic gender):")
run_deb(1,11)



Gender debiased with Zhou (subtracting verbs direction from semantic gender):
0.012 (0.466)
1.111 (0.000)
1.210 (0.000)
1.397 (0.000)
0.948 (0.000)
1.274 (0.000)
1.718 (0.000)
1.598 (0.000)
1.395 (0.000)
1.730 (0.000)

Gender debiased with Zhou (subtracting adjectives direction from semantic gender):
-0.010 (0.530)
0.944 (0.000)
1.057 (0.000)
1.421 (0.000)
0.943 (0.000)
1.221 (0.000)
1.689 (0.000)
1.629 (0.000)
1.429 (0.000)
1.704 (0.000)

Gender debiased with Zhou (subtracting entities direction from semantic gender):
-0.033 (0.577)
0.906 (0.000)
1.065 (0.000)
1.456 (0.000)
0.834 (0.000)
1.034 (0.000)
1.449 (0.000)
1.400 (0.000)
1.211 (0.000)
1.604 (0.000)

Gender debiased with Zhou (subtracting titles direction from semantic gender):
0.008 (0.481)
1.195 (0.000)
1.342 (0.000)
1.451 (0.000)
1.203 (0.000)
1.475 (0.000)
1.758 (0.000)
1.559 (0.000)
1.238 (0.000)
1.807 (0.000)

Gender debiased with Zhou (subtracting verbs and adjectives direction from semantic gender):
0.012 (0.489)
1.120

In [None]:
########## Zhou type #########

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, sample=1000, debiased_weat=True)))

v = d_gender_pairwise_300 - np.dot(d_gender_pairwise_300, d_verbs_300)*d_verbs_300
print("\nGender debiased with Zhou (subtracting verbs direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_300 - np.dot(d_gender_pairwise_300, d_adj_300)*d_adj_300
print("\nGender debiased with Zhou (subtracting adjectives direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_300 - np.dot(d_gender_pairwise_300, d_ent_300)*d_ent_300
print("\nGender debiased with Zhou (subtracting entities direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_300 - np.dot(d_gender_pairwise_300, d_titles_300)*d_titles_300
print("\nGender debiased with Zhou (subtracting titles direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_300 - np.dot(d_gender_pairwise_300, d_ent_300)*d_ent_300 - np.dot(d_gender_pairwise_300, d_verbs_300)*d_verbs_300
print("\nGender debiased with Zhou (subtracting verbs and adjectives direction from semantic gender):")
run_deb(1,11)

v = d_gender_pairwise_300 - np.dot(d_gender_pairwise_300, d_verbs_adj_300)*d_verbs_adj_300 - np.dot(d_gender_pairwise_300, d_ent_titles_300)*d_ent_titles_300
print("\nGender debiased with Zhou (subtracting verbs+adj and ent_titles direction from semantic gender):")
run_deb(1,11)



Gender debiased with Zhou (subtracting verbs direction from semantic gender):
0.102 (0.264)
0.983 (0.000)
0.278 (0.072)
1.308 (0.000)
1.301 (0.000)
0.944 (0.000)
1.796 (0.000)
1.637 (0.000)
1.137 (0.000)
1.816 (0.000)

Gender debiased with Zhou (subtracting adjectives direction from semantic gender):
0.061 (0.348)
0.766 (0.000)
0.061 (0.377)
1.230 (0.000)
1.214 (0.000)
0.868 (0.001)
1.725 (0.000)
1.627 (0.000)
1.088 (0.000)
1.766 (0.000)

Gender debiased with Zhou (subtracting entities direction from semantic gender):
0.057 (0.369)
0.750 (0.000)
0.047 (0.396)
1.220 (0.000)
1.181 (0.000)
0.787 (0.001)
1.673 (0.000)
1.531 (0.000)
0.983 (0.000)
1.749 (0.000)

Gender debiased with Zhou (subtracting titles direction from semantic gender):
0.139 (0.213)
0.964 (0.000)
0.472 (0.001)
1.395 (0.000)
1.449 (0.000)
1.031 (0.000)
1.793 (0.000)
1.583 (0.000)
1.059 (0.000)
1.851 (0.000)

Gender debiased with Zhou (subtracting verbs and adjectives direction from semantic gender):
0.102 (0.295)
0.961 (

### Linear Projection debiasing SEAT results

In [None]:
########## Single direction Linear Debiasing #########

print("Gender Debiasing Linear Projection: 50 Dim GloVe")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, sample=1000, debiased_weat=True)))

v = np.array(hindi_glove_50["नारी"]) - np.array(hindi_glove_50["नर"])
print("\nGender debiased with Linear Direction (naari-nar):")
run_deb(1,11)

v = np.array(hindi_glove_50["माता"]) - np.array(hindi_glove_50["पिता"])
print("\nGender debiased with Linear Direction (maata-pita):")
run_deb(1,11)


Gender Debiasing Linear Projection: 50 Dim GloVe

Gender debiased with Linear Direction (naari-nar):
0.047 (0.397)
0.860 (0.000)
1.098 (0.000)
1.331 (0.000)
1.109 (0.000)
1.540 (0.000)
1.868 (0.000)
1.592 (0.000)
1.177 (0.000)
1.797 (0.000)

Gender debiased with Linear Direction (maata-pita):
-0.009 (0.527)
1.372 (0.000)
1.352 (0.000)
1.418 (0.000)
1.096 (0.000)
1.396 (0.000)
1.786 (0.000)
1.635 (0.000)
1.126 (0.000)
1.667 (0.000)


In [None]:
########## Single direction Linear Debiasing #########

print("Gender Debiasing Linear Projection: 300 Dim GloVe")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, sample=1000, debiased_weat=True)))

v = np.array(hindi_glove_300["नारी"]) - np.array(hindi_glove_300["नर"])
print("\nGender debiased with Linear Direction (naari-nar):")
run_deb(1,11)

v = np.array(hindi_glove_300["माता"]) - np.array(hindi_glove_300["पिता"])
print("\nGender debiased with Linear Direction (maata-pita):")
run_deb(1,11)


Gender Debiasing Linear Projection: 300 Dim GloVe

Gender debiased with Linear Direction (naari-nar):
0.169 (0.147)
0.637 (0.000)
0.551 (0.000)
1.453 (0.000)
1.485 (0.000)
0.981 (0.000)
1.831 (0.000)
1.581 (0.000)
1.023 (0.000)
1.848 (0.000)

Gender debiased with Linear Direction (maata-pita):
0.182 (0.158)
1.195 (0.000)
0.902 (0.000)
1.435 (0.000)
1.478 (0.000)
0.932 (0.000)
1.783 (0.000)
1.588 (0.000)
1.086 (0.000)
1.814 (0.000)


### Debiasing ELMo first layer

In [None]:
v = np.array(elmo_encoder("नर", model)) - np.array(elmo_encoder("नारी", model))

In [None]:
print("Hindi ELMo")

for i in range(1,11,1):
  if i==1:
    print("\nGender SEAT:")

  print('{0:.3f}'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300)))

Hindi ELMo

Gender SEAT:
0.749
-0.391
0.328
1.113
1.170
1.317
1.741
1.751
1.658
1.274


## Caste Debiasing

### Linear Projection debiasing WEAT results

In [None]:
########## Single direction Linear Debiasing #########

print("Caste Debiasing Linear Projection: 50 Dim GloVe")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, 1000, debiased_weat=True)))

v = np.array(hindi_glove_50["घसिया"]) - np.array(hindi_glove_50["देसाई"])
print("\nCaste debiased with Linear Direction (ghasiya-desai):")
run_deb(11,13)

v = np.array(hindi_glove_50["घसिया"]) - np.array(hindi_glove_50["पंडित"])
print("\nCaste debiased with Linear Direction (ghasiya-pandit):")
run_deb(11,13)


Caste Debiasing Linear Projection: 50 Dim GloVe

Caste debiased with Linear Direction (ghasiya-desai):
1.311 (0.011)
1.593 (0.001)

Caste debiased with Linear Direction (ghasiya-pandit):
0.914 (0.053)
1.521 (0.000)


In [None]:
########## Single direction Linear Debiasing #########

print("Caste Debiasing Linear Projection: 300 Dim GloVe")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, 1000, debiased_weat=True)))

v = np.array(hindi_glove_300["घसिया"]) - np.array(hindi_glove_300["देसाई"])
print("\nCaste debiased with Linear Direction (ghasiya-desai):")
run_deb(11,13)

v = np.array(hindi_glove_300["घसिया"]) - np.array(hindi_glove_300["पंडित"])
print("\nCaste debiased with Linear Direction (ghasiya-pandit):")
run_deb(11,13)


Caste Debiasing Linear Projection: 300 Dim GloVe

Caste debiased with Linear Direction (ghasiya-desai):
1.339 (0.002)
1.508 (0.001)

Caste debiased with Linear Direction (ghasiya-pandit):
0.775 (0.086)
1.312 (0.005)


### PCA Based debiasing WEAT results

In [None]:
print("### CASTE PCA DEBIASING RESULTS: 50 DIM GLOVE ###")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, 1000, debiased_weat=True)))

print("\nCaste debiased with listwise caste words")
v = d_caste_50
run_deb(11,13)

### CASTE PCA DEBIASING RESULTS: 50 DIM GLOVE ###

Caste debiased with listwise caste words
1.227 (0.012)
1.414 (0.001)


In [None]:
print("### CASTE PCA DEBIASING RESULTS: 300 DIM GLOVE ###")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, 1000, debiased_weat=True)))

print("\nCaste debiased with listwise caste words")
v = d_caste_300
run_deb(11,13)

### CASTE PCA DEBIASING RESULTS: 300 DIM GLOVE ###

Caste debiased with listwise caste words
1.212 (0.016)
1.328 (0.003)


### Linear Projection debiasing SEAT results

In [None]:
########## Single direction Linear Debiasing #########

print("Caste Debiasing Linear Projection: 50 Dim GloVe")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, sample=1000, debiased_weat=True)))

v = np.array(hindi_glove_50["घसिया"]) - np.array(hindi_glove_50["देसाई"])
print("\nCaste debiased with Linear Direction (ghasiya-desai):")
run_deb(11,13)

v = np.array(hindi_glove_50["घसिया"]) - np.array(hindi_glove_50["पंडित"])
print("\nCaste debiased with Linear Direction (ghasiya-pandit):")
run_deb(11,13)


Caste Debiasing Linear Projection: 50 Dim GloVe

Caste debiased with Linear Direction (ghasiya-desai):
1.169 (0.000)
1.494 (0.000)

Caste debiased with Linear Direction (ghasiya-pandit):
0.678 (0.000)
1.343 (0.000)


In [None]:
########## Single direction Linear Debiasing #########

print("Caste Debiasing Linear Projection: 300 Dim GloVe")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, sample=1000, debiased_weat=True)))

v = np.array(hindi_glove_300["घसिया"]) - np.array(hindi_glove_300["देसाई"])
print("\nCaste debiased with Linear Direction (ghasiya-desai):")
run_deb(11,13)

v = np.array(hindi_glove_300["घसिया"]) - np.array(hindi_glove_300["पंडित"])
print("\nCaste debiased with Linear Direction (ghasiya-pandit):")
run_deb(11,13)


Caste Debiasing Linear Projection: 300 Dim GloVe

Caste debiased with Linear Direction (ghasiya-desai):
1.171 (0.000)
1.359 (0.000)

Caste debiased with Linear Direction (ghasiya-pandit):
0.665 (0.000)
1.184 (0.000)


### PCA based debiasing SEAT results

In [None]:
print("### CASTE PCA DEBIASING RESULTS: 50 DIM GLOVE ###")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, sample=1000, debiased_weat=True)))

print("\nCaste debiased with listwise caste words")
v = d_caste_50
run_deb(11,13)

### CASTE PCA DEBIASING RESULTS: 50 DIM GLOVE ###

Caste debiased with listwise caste words
0.991 (0.000)
1.231 (0.000)


In [None]:
print("### CASTE PCA DEBIASING RESULTS: 300 DIM GLOVE ###")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, sample=1000, debiased_weat=True)))

print("\nCaste debiased with listwise caste words")
v = d_caste_300
run_deb(11,13)

### CASTE PCA DEBIASING RESULTS: 300 DIM GLOVE ###

Caste debiased with listwise caste words
0.887 (0.000)
1.181 (0.000)


## Religion Debiasing


### PCA Based debiasing WEAT results

In [None]:
print("### RELIGION PCA DEBIASING RESULTS: 50 DIM GLOVE ###")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, 1000, debiased_weat=True)))

print("\nReligion debiased with listwise religion entities")
v = d_religion_ent_50
run_deb(13,16)

print("\nReligion debiased with listwise religion lastnames")
v = d_religion_50
run_deb(13,16)

### RELIGION PCA DEBIASING RESULTS: 50 DIM GLOVE ###

Religion debiased with listwise religion entities
0.821 (0.053)
0.995 (0.039)
1.340 (0.001)

Religion debiased with listwise religion lastnames
0.381 (0.253)
0.568 (0.159)
0.385 (0.262)


In [None]:
print("### RELIGION PCA DEBIASING RESULTS: 300 DIM GLOVE ###")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, 1000, debiased_weat=True)))

print("\nReligion debiased with listwise religion entities")
v = d_religion_ent_300
run_deb(13,16)

print("\nReligion debiased with listwise religion lastnames")
v = d_religion_300
run_deb(13,16)

### RELIGION PCA DEBIASING RESULTS: 300 DIM GLOVE ###

Religion debiased with listwise religion entities
1.276 (0.006)
1.551 (0.000)
1.723 (0.000)

Religion debiased with listwise religion lastnames
0.909 (0.044)
0.711 (0.098)
1.544 (0.000)


### Linear Projection based debiasing WEAT results

In [None]:
########## Single direction Linear Debiasing #########

print("Religion Debiasing Linear Projection: 50 Dim GloVe")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, 1000, debiased_weat=True)))

v = np.array(hindi_glove_50["आचार्य"]) - np.array(hindi_glove_50["नासिर"])
print("\nReligion debiased with Linear Direction (acharya-nasir):")
run_deb(13,16)

Religion Debiasing Linear Projection: 50 Dim GloVe

Religion debiased with Linear Direction (acharya-nasir):
1.163 (0.023)
1.601 (0.000)
1.030 (0.029)


In [None]:
########## Single direction Linear Debiasing #########

print("Religion Debiasing Linear Projection: 300 Dim GloVe")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, 1000, debiased_weat=True)))

v = np.array(hindi_glove_300["आचार्य"]) - np.array(hindi_glove_300["नासिर"])
print("\nReligion debiased with Linear Direction (acharya-nasir):")
run_deb(13,16)

Religion Debiasing Linear Projection: 300 Dim GloVe

Religion debiased with Linear Direction (acharya-nasir):
1.276 (0.007)
1.569 (0.000)
1.612 (0.001)


### Zhou Based Debiasing WEAT results

In [None]:
########## Zhou type #########

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_50, 1000, debiased_weat=True)))


v = d_religion_50 - np.dot(d_religion_50, d_religion_ent_50)*d_religion_ent_50
print("\nReligion debiased with Zhou (subtracting entities direction):")
run_deb(13,16)


Religion debiased with Zhou (subtracting entities direction):
0.947 (0.047)
1.174 (0.006)
1.441 (0.001)


In [None]:
########## Zhou type #########

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(weat_effect_size(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, debiased_weat=True), weat_p_value(X[i]["list"], Y[i]["list"], A[i]["list"], B[i]["list"], hindi_glove_300, 1000, debiased_weat=True)))


v = d_religion_300 - np.dot(d_religion_300, d_religion_ent_300)*d_religion_ent_300
print("\nReligion debiased with Zhou (subtracting entities direction):")
run_deb(13,16)


Religion debiased with Zhou (subtracting entities direction):
0.921 (0.057)
0.707 (0.108)
1.589 (0.000)


### PCA Based debiasing SEAT results

In [None]:
print("### RELIGION PCA DEBIASING RESULTS: 50 DIM GLOVE ###")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, sample=1000, debiased_weat=True)))

print("\nReligion debiased with listwise religion entities")
v = d_religion_ent_50
run_deb(13,16)

print("\nReligion debiased with listwise religion lastnames")
v = d_religion_50
run_deb(13,16)

### RELIGION PCA DEBIASING RESULTS: 50 DIM GLOVE ###

Religion debiased with listwise religion entities
0.794 (0.001)
0.983 (0.000)
1.161 (0.000)

Religion debiased with listwise religion lastnames
0.497 (0.030)
0.935 (0.000)
0.096 (0.293)


In [None]:
print("### RELIGION PCA DEBIASING RESULTS: 300 DIM GLOVE ###")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, sample=1000, debiased_weat=True)))

print("\nReligion debiased with listwise religion entities")
v = d_religion_ent_300
run_deb(13,16)

print("\nReligion debiased with listwise religion lastnames")
v = d_religion_300
run_deb(13,16)

### RELIGION PCA DEBIASING RESULTS: 300 DIM GLOVE ###

Religion debiased with listwise religion entities
1.189 (0.000)
1.407 (0.000)
1.645 (0.000)

Religion debiased with listwise religion lastnames
0.852 (0.001)
0.698 (0.002)
1.433 (0.000)


### Linear Projection based debiasing SEAT results

In [None]:
########## Single direction Linear Debiasing #########

print("Religion Debiasing Linear Projection: 50 Dim GloVe")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, sample=1000, debiased_weat=True)))

v = np.array(hindi_glove_50["आचार्य"]) - np.array(hindi_glove_50["नासिर"])
print("\nReligion debiased with Linear Direction (acharya-nasir):")
run_deb(13,16)

Religion Debiasing Linear Projection: 50 Dim GloVe

Religion debiased with Linear Direction (acharya-nasir):
1.067 (0.000)
1.614 (0.000)
0.694 (0.000)


In [None]:
########## Single direction Linear Debiasing #########

print("Religion Debiasing Linear Projection: 300 Dim GloVe")

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, sample=1000, debiased_weat=True)))

v = np.array(hindi_glove_300["आचार्य"]) - np.array(hindi_glove_300["नासिर"])
print("\nReligion debiased with Linear Direction (acharya-nasir):")
run_deb(13,16)

Religion Debiasing Linear Projection: 300 Dim GloVe

Religion debiased with Linear Direction (acharya-nasir):
1.221 (0.000)
1.429 (0.000)
1.518 (0.000)


### Zhou Based debiasing SEAT results

In [None]:
########## Zhou type #########

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_50, sample=1000, debiased_weat=True)))

v = d_religion_50 - np.dot(d_religion_50, d_religion_ent_50)*d_religion_ent_50
print("\nReligion debiased with Zhou (subtracting entities direction):")
run_deb(13,16)


Religion debiased with Zhou (subtracting entities direction):
0.865 (0.000)
1.126 (0.000)
1.174 (0.000)


In [None]:
########## Zhou type #########

def run_deb(l,r):
  for i in range(l,r,1):
    print('{0:.3f} ({1:.3f})'.format(seat_effect_size(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, debiased_weat=True), seat_p_value(X[i]["list"], Y[i]["list"], X[i]["type"], A[i]["list"], B[i]["list"], A[i]["type"], hindi_glove_300, sample=1000, debiased_weat=True)))


v = d_religion_300 - np.dot(d_religion_300, d_religion_ent_300)*d_religion_ent_300
print("\nReligion debiased with Zhou (subtracting entities direction):")
run_deb(13,16)


Religion debiased with Zhou (subtracting entities direction):
0.852 (0.000)
0.684 (0.002)
1.497 (0.000)
