### Gender mitigated word embedding using adversarial feature learning

#### In this project, we will try to mitiage the gender information in word embedding, based on [GloVe](https://nlp.stanford.edu/projects/glove/) and [Adversarial feature learning](https://arxiv.org/abs/1705.11122).

#### 1 GloVe Model
refer to [this](https://github.com/kefirski/pytorch_GloVe/blob/master/GloVe/glove.py) and [this](https://github.com/2014mchidamb/TorchGlove/blob/master/glove.py)

In [1]:
import torch as t
from nltk.tokenize import word_tokenize
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.nn.init import xavier_normal
import torch.optim as optim
import os, sys, time

In [44]:
class GloVe(nn.Module):
    def __init__(self, co_oc, embedding_size, x_max = 100, alpha = 0.75):
        """
        param: co_oc: co-occurrence ndarray
        """
        super(GloVe, self).__init__()
        
        self.embedding_size = embedding_size
        self.x_max = x_max
        self.alpha = alpha
        
        '''co_oc matrix'''
        self.co_oc = co_oc + 1.0
        self.vocabulary_size,_ = co_oc.shape

        self.in_embed = nn.Embedding(self.vocabulary_size, self.embedding_size)
        self.in_embed.weight = xavier_normal(self.in_embed.weight) #normalize
        
        self.in_bias = nn.Embedding(self.vocabulary_size, 1) #bias.shape =(vocabularySize,1)
        self.in_bias.weight = xavier_normal(self.in_bias.weight)
        
        self.out_embed = nn.Embedding(self.vocabulary_size, self.embedding_size)
        self.out_embed.weight = xavier_normal(self.out_embed.weight)
        
        self.out_bias = nn.Embedding(self.vocabulary_size, 1)
        self.out_bias.weight = xavier_normal(self.out_bias.weight)
        
    
    def forward(self, batch_input, batch_output):
        """
        return the loss
        """
        assert len(batch_input) == len(batch_output)
        
        batch_size = len(batch_input)
    
        # ZL: What's this stmt mean?
        co_occurences = np.array([self.co_oc[batch_input[i], batch_output[i]] for i in range(batch_size)])
        weights = np.array([self._weight(var) for var in co_occurences])
        
        # Variable can do backpropagation
        co_occurences = Variable(t.from_numpy(co_occurences)).float() 
        weights = Variable(t.from_numpy(weights)).float()
        
        batch_input = Variable(t.from_numpy(batch_input))
        batch_output = Variable(t.from_numpy(batch_output))
        
        input_embed = self.in_embed(batch_input)
        output_embed = self.out_embed(batch_output)
        input_bias = self.in_bias(batch_input)
        output_bias = self.out_bias(batch_output)
        
        loss = (t.pow(
            ((input_embed * output_embed).sum(1) + input_bias + output_bias).squeeze(1) - t.log(co_occurences), 2
        ) * weights).sum() / batch_size
        
        return loss 
    
    def _weight(self, x):
        return 1 if x > self.x_max else (x / self.x_max) ** self.alpha
    
    def embeddings(self):
        return self.in_embed.weight.data.cpu().numpy() + self.out_embed.weight.data.cpu().numpy()
    
        

In [45]:
# Get a random batch
def get_batch(vocab_size, batch_size):
    in_index   = np.random.choice(np.arange(vocab_size), size = batch_size, replace = False)
    out_index  = np.random.choice(np.arange(vocab_size), size = batch_size, replace = False)
    return in_index, out_index
        
    

In [54]:
context_size = 3
words_file = 'test.txt'
with open(words_file, 'r') as f:
    text = f.read().lower()

# word_list = word_tokenize(text)
word_list = text.split(" ")[:1700500]
text_size = len(word_list)
print("text size:", text_size)
vocab = np.unique(word_list)
vocabulary_size = len(vocab)
print("vocabulary_size:", vocabulary_size)
word2ind = {word:ind for ind,word in enumerate(vocab)}
embedding_size = 100
word2ind['UNK'] = -1

text size: 4250
vocabulary_size: 1533


In [47]:
# Actuallly not in use
import collections
# vocabulary_size = 50000

def read_data(filename):
    with open(filename) as f:
        data = f.read().split()
    return data

def build_dataset(filename, vocabulary_size):
        """Process raw inputs into a dataset."""
        vocabulary = read_data(filename)
        
        print('--  Data Size:', len(vocabulary))
        print('--  Vocabulary size:', vocabulary_size)
        count = [['UNK', -1]]
        count.extend(collections.Counter(vocabulary).most_common(vocabulary_size - 1))
        word2id = dict()
        for (index,word) in enumerate(count):
            word2id[word[0]] = index
        data = []
        unk_count = 0
        for word in vocabulary:
            index = word2id.get(word,0)
            if index == 0:
                unk_count += 1
            data.append(index)
        count[0][1] = unk_count
        id2word = dict(zip(word2id.values(), word2id.keys()))
        return data, count, word2id, id2word
    
# data, count, word2id, id2word = build_dataset(words_file, vocabulary_size)

In [48]:
def get_co_oc_matrix(vocabulary_size):   
    comat = np.zeros((vocabulary_size, vocabulary_size))
    for i in range(text_size): #main word
        left_context_ids = [word2ind[ind] for ind in word_list[max(0, i - context_size): i]]  #left context
        right_context_ids = [word2ind[ind] for ind in word_list[i+1: min(i+context_size+1, text_size)]] #right context
        ind = word2ind[word_list[i]]

        for left_ind, lind in enumerate(left_context_ids):
            comat[ind, lind] += 1./(len(left_context_ids) - left_ind) #symmetrically
            
        for right_ind, rind in enumerate(right_context_ids):
            comat[ind, rind] += 1./(right_ind + 1)
            
    return comat

In [49]:
def train_GloVe(co_oc_matrix, embeding_size, batch_size = 50, iterations = 1000):
    glove = GloVe(co_oc_matrix, embeding_size)
    optimizer = optim.SGD(glove.parameters(), 0.01)
    
    for i in range(iterations):
        avg_loss = 0.0
        for batch in range(text_size// batch_size):
            in_data, out_data = get_batch(len(co_oc_matrix), batch_size) #?? for i in len(batch)
        
            loss = glove(in_data, out_data)
            avg_loss += loss
        
        print("%s-epoch, mean loss = %s"%(str(i),str(loss.data[0])))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    word_embeddings = glove.embeddings()
    
    return word_embeddings

In [50]:
#test get_batch()
np.random.seed(1)
in_index, out_index = get_batch(len(comat), 50)
print(in_index, out_index)

[59905 28439 50506 33090 46854  9812 29110 26999 31240 23157 10693 36002
 20899 57200 27047  9053 49572 23003 61864 47391 15887 38609 38624 13623
 32896 43843 20514 37153 65182 21056 63186 14492 21318 68954 32524  4286
 45650 19153 36903 22653 66520 19462 29534 25457 60554 39461 10521 62429
 15405 15255] [43104 52147 36843 36379 12212 31077 17747 34198 53168 14580 47340 43621
 21046 54965 19583 17366 23566 18840 11205 51945 34174 66160  8305 10780
 70213 56895 23268 29139 69077 32958  5384 53434 37211  2365 37216 13405
 40957 53745  6235 63227  5382 53739 11301 50963 37007 65976 12833 16927
 31983 65237]


In [51]:
word2ind['the']

1289

In [90]:
# comat = get_co_oc_matrix(vocabulary_size)
print(word2ind['was'])
print(comat.shape)
print(word2ind['woman']) # word2ind doesn't have 'woman'

1432
(70889, 70889)


KeyError: 'woman'

### !!!loss does not work normally

In [55]:
word_embeddings = train_GloVe(comat, 50, 20, 300)

0-epoch, mean loss = 5.030234024161473e-05
1-epoch, mean loss = 2.834257247741334e-05
2-epoch, mean loss = 4.5230855903355405e-05
3-epoch, mean loss = 5.2493724069790915e-05
4-epoch, mean loss = 3.600874697440304e-05
5-epoch, mean loss = 2.431717985018622e-05
6-epoch, mean loss = 5.87908252782654e-05
7-epoch, mean loss = 3.603397999540903e-05
8-epoch, mean loss = 0.0033349920995533466
9-epoch, mean loss = 4.6180277422536165e-05
10-epoch, mean loss = 1.3806435163132846e-05
11-epoch, mean loss = 3.1271250918507576e-05
12-epoch, mean loss = 3.887403727276251e-05
13-epoch, mean loss = 4.2675128497648984e-05
14-epoch, mean loss = 2.3623808374395594e-05
15-epoch, mean loss = 2.925370426964946e-05
16-epoch, mean loss = 6.123475031927228e-05
17-epoch, mean loss = 1.9422164768911898e-05
18-epoch, mean loss = 5.567950938711874e-05
19-epoch, mean loss = 4.8355392209487036e-05
20-epoch, mean loss = 2.4777084036031738e-05
21-epoch, mean loss = 4.488590275286697e-05
22-epoch, mean loss = 3.544255014

184-epoch, mean loss = 3.2974890928016976e-05
185-epoch, mean loss = 4.21784607169684e-05
186-epoch, mean loss = 3.248563007218763e-05
187-epoch, mean loss = 6.554897117894143e-05
188-epoch, mean loss = 2.665251122380141e-05
189-epoch, mean loss = 2.736884016485419e-05
190-epoch, mean loss = 2.2819229343440384e-05
191-epoch, mean loss = 5.164226604392752e-05
192-epoch, mean loss = 2.965553358080797e-05
193-epoch, mean loss = 3.508747249725275e-05
194-epoch, mean loss = 4.009225085610524e-05
195-epoch, mean loss = 3.2266954804072157e-05
196-epoch, mean loss = 3.4527532989159226e-05
197-epoch, mean loss = 3.2657037081662565e-05
198-epoch, mean loss = 4.2160980228800327e-05
199-epoch, mean loss = 4.473894296097569e-05
200-epoch, mean loss = 3.200015635229647e-05
201-epoch, mean loss = 2.662545557541307e-05
202-epoch, mean loss = 3.005926191690378e-05
203-epoch, mean loss = 4.563562833936885e-05
204-epoch, mean loss = 3.506509892758913e-05
205-epoch, mean loss = 4.013781654066406e-05
206-e

#### 2. Adversarial Feature Learning

refer to [this](https://github.com/github-pengge/adversarial_invariance_feature_learning) code and [paper](https://arxiv.org/pdf/1705.11122.pdf)

Replace the last term in Eq.(3) to the GloVe loss

In [57]:
gender_word_path = '/zf2/jz4fu/Github/CS269/debiaswe/data'
gender_word_file = '/zf2/jz4fu/Github/CS269/debiaswe/data/gender_specific_full.json'

In [60]:
import json
gender_words = json.load(open(gender_word_file))

In [63]:
# word2ind is a dict
# gender_words_index are all index of gender words
gender_words_index = [word2ind[item] if item in word2ind else -1 for item in gender_words  ]

In [65]:
len(gender_words_index)

1441

In [67]:
class Discriminator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Discriminator, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.linear1 = nn.Linear(self.input_size, self.hidden_size, bias = True)
        self.linear2 = nn.Linear(self.input_size, self.hidden_size, bias = True)
        self.lrelu = nn.LeakyReLU(negative_slope = 0.2)
        self.sigmoid = nn.Sigmoid()
        
        #initialize weights
        for m in self.modules():
            if isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.02)
                m.bias.data.fill_(0)
                
    def forward(self, x, s):
        x = self.lrelu(self.linear1(x))
        score = self.sigmoid(t.dot(self.linear2(s), x))
        
        return score
        

In [84]:
from sklearn import decomposition

In [86]:
class Adversarial_FL(object):
    def __init__(self, D, word_embeddings, cuda = True):
        self.D = D
        self.word_embeddings = word_embeddings
        self.cuda = cuda
        self.embedding_size = embedding_size
        self.gender_index = [x for x in gender_words_index if x >= 0]
        
        if self.cuda:
            self.D.cuda()
            
    def train(self, lr = 1e-3, batch_size = 50, epochs = 1000):
        s = self.get_s(word_embeddings)
        pred_criterion = nn.BCELoss()
        
        if self.cuda:
            s = s.cuda()
            pred_criterion = pred_criterion.cuda()
            
        optimizer_D = optim.Adam(self.D.parameters(), lr = lr, betas = (0.5, 0.999))        
        
        for epoch in range(epochs):
            begin_time = time.time()
            avg_loss = 0.
            # Load data (word embedding)
            for i in range(text_size//batch_size):
                batch_id = t.randn(vocabulary_size, batch_size).int() #type(dtype)
                h = Variable(t.FloatTensor(self.word_embeddings[batch_id]).cuda()) #our word embedding
                label_s = t.from_numpy([1 if b_id in gender_words_index else 0 for b_id in batch_id])
                pred_s = D(h.detach(), s)
                D_loss = pred_criterion(pred_s, label_s)  #???How to get pred_s
                avg_loss += D_loss / batch_size
            print("%s-epoch, mean loss = %s"%(str(epoch),str(avg_loss.data[0])))
            D_loss.backward()
            optimizer_D.step()
        
    def get_s(self, word_embeddings):
        """
        Get $s$ from 14 definitional pairs of words. 
        
        Take all differences between the embeddings of the word pairs.
        Use PCA to extract the 1st (i.e. largest) components as $s$.
        """
        def_pairs = json.load(open('../debiaswe/data/definitional_pairs.json'))
        def_pairs_ind = [[word2ind[pair[0]], word2ind[pair[1]]] 
                         for pair in def_pairs ]
        diff = [word_embeddings[ind_pair[0]] - word_embeddings[ind_pair[1]]
                for ind_pair in def_pairs_ind]
        pca = decomposition.PCA(n_components = 1)
        pca.fit(np.array(diff))
        return pca.components_ # This returns the 1st principle component as $s$.
        
        

In [89]:
#train the Adversarial_FL
hidden_size = 50
batch_size = 64
training_epochs = 10
cuda = True
lr = 1e-3
D = Discriminator(embedding_size, hidden_size, 1)
adv = Adversarial_FL(D, word_embeddings, cuda)
adv.train(lr, batch_size, 10)

KeyError: 'woman'