In [2]:
# Importing Libraries
from collections import defaultdict
from operator import itemgetter
from pathlib import Path
import numpy as np
import pandas as pd
from collections import namedtuple
from tabulate import tabulate
import re 

import torch
import os

from adversarial_debiasing import AdversarialDebiasing
from load_data import load_data, transform_data, Datapoint

from load_vectors import load_pretrained_vectors, load_vectors
import config
import utility_functions
import qualitative_evaluation

import gensim
import gzip
import pickle

In [3]:
# For autoreloading changes made in other python scripts
%load_ext autoreload
%autoreload 2

In [None]:
# Loading the word vectors dictionary
# For Wikipedia2Vec - use config.wiki_embedding_data_path and config.wiki_embedding_type
# For Glove - use config.glove_embedding_data_path and config.glove_embedding_type
# For GoogleNews (Word2Vec) - use config.google_embedding_data_path and config.google_embedding_type
word_vectors = load_pretrained_vectors("Wikipedia2Vec")


In [5]:
# Load the google analogies training dataset:
analogy_dataset = load_data()
analogy_dataset[0:10]

[Raw_Datapoint(x1='Athens', x2='Greece', x3='Baghdad', y='Iraq', task='capital-common-countries'),
 Raw_Datapoint(x1='Athens', x2='Greece', x3='Bangkok', y='Thailand', task='capital-common-countries'),
 Raw_Datapoint(x1='Athens', x2='Greece', x3='Beijing', y='China', task='capital-common-countries'),
 Raw_Datapoint(x1='Athens', x2='Greece', x3='Berlin', y='Germany', task='capital-common-countries'),
 Raw_Datapoint(x1='Athens', x2='Greece', x3='Bern', y='Switzerland', task='capital-common-countries'),
 Raw_Datapoint(x1='Athens', x2='Greece', x3='Cairo', y='Egypt', task='capital-common-countries'),
 Raw_Datapoint(x1='Athens', x2='Greece', x3='Canberra', y='Australia', task='capital-common-countries'),
 Raw_Datapoint(x1='Athens', x2='Greece', x3='Hanoi', y='Vietnam', task='capital-common-countries'),
 Raw_Datapoint(x1='Athens', x2='Greece', x3='Havana', y='Cuba', task='capital-common-countries'),
 Raw_Datapoint(x1='Athens', x2='Greece', x3='Helsinki', y='Finland', task='capital-common-cou

In [6]:
# Transform the data such that it includes the embeddings of the words in consideration
transformed_analogy_dataset, gender_subspace = transform_data(word_vectors, analogy_dataset, use_boluk = False)


# Obtaining the dimensionality of the word embeddings
word_embedding_dim = transformed_analogy_dataset[0].gt_embedding.shape[0]

# Testing the transformed analogy dataset
assert transformed_analogy_dataset[0].analogy_embeddings.shape[0] == word_embedding_dim * 3
assert transformed_analogy_dataset[0].gt_embedding.shape[0] == word_embedding_dim
assert transformed_analogy_dataset[0].protected.shape[0] == 1

print(transformed_analogy_dataset[0].analogy_embeddings.shape)
print(transformed_analogy_dataset[0].gt_embedding.shape)
print(transformed_analogy_dataset[0].protected.shape)

(900,)
(300,)
(1,)


In [None]:
# To run the grid-search and obtain the np.dot(w.T, g) values
learning_rate_list = [2 ** -12, 2 ** -6, 2 ** -3]
adversary_loss_weight_list = [1.0, 0.5, 0.1]

# For the saved model checkpoints pertaining to the word embedding type
word_embedding_type = 'GNews'

# Performing the grid search
utility_functions.grid_search(learning_rate_list, adversary_loss_weight_list, word_embedding_dim, gender_subspace, transformed_analogy_dataset, word_embedding_type, 'models')


In [24]:
def load_model(model_path: Path, word_embedding_dim, gender_subspace):
    # with open(str(model_path), "rb") as f:
    #     state_dict = pickle.load(f)
    # device = torch.device('cpu')
    # print(model_path)
    state_dict = torch.load(str(model_path), map_location=torch.device('cpu'))
    model = AdversarialDebiasing(
                    seed = 42,
                    word_embedding_dim = word_embedding_dim,
                    num_epochs = 500,
                    debias = False,
                    gender_subspace = gender_subspace,
                    batch_size = 256,
                    adversary_loss_weight = 0.1,
                    classifier_learning_rate = 2 ** -6,
                    adversary_learning_rate = 2 ** -6
                )
    
    model.W1 = state_dict["W1"]
    model.W2 = state_dict["W2"]
    
    return model

debiased_models = defaultdict(list)
biased_models = defaultdict(list)


ModelResult = namedtuple('ModelResult', ['best_model', 'last_model', 'embedding_type', 'learning_rate', 'adversary_weight', 'debiased'])

for model_base_path in [Path('models/debiased'), Path('models/non_debiased')]:
    l, debiased = (biased_models, False) if 'non_debiased' in str(model_base_path) else (debiased_models, True)

    for model_path in model_base_path.iterdir():
        if '_last' in str(model_path):
            continue
            
        m = re.search('^([A-Za-z]+)_([\d.]+)_([\d.]+)(_last){0,1}.pckl$', str(model_path.name))
        embeddings = m.group(1)
        learning_rate = m.group(2)
        adversary_weight = m.group(3)
        
        best_model = load_model(model_path, word_embedding_dim, gender_subspace)
        
        last_model_path = model_path.parent / f"{model_path.stem}_last{model_path.suffix}"
        last_model = load_model(last_model_path, word_embedding_dim, gender_subspace)
        
        
        l[embeddings].append(ModelResult(best_model, last_model, embeddings, learning_rate, adversary_weight, debiased))
        
        

In [19]:
print(debiased_models, len(debiased_models))
print(biased_models, len(biased_models))
print(debiased_models['GNews'])

defaultdict(<class 'list'>, {'Glove': [ModelResult(best_model=<adversarial_debiasing.AdversarialDebiasing object at 0x000001A1D6FA8448>, last_model=<adversarial_debiasing.AdversarialDebiasing object at 0x000001A1D6FA8788>, embedding_type='Glove', learning_rate='0.000244140625', adversary_weight='0.1', debiased=True), ModelResult(best_model=<adversarial_debiasing.AdversarialDebiasing object at 0x000001A1C31E4988>, last_model=<adversarial_debiasing.AdversarialDebiasing object at 0x000001A1C31E4688>, embedding_type='Glove', learning_rate='0.000244140625', adversary_weight='0.5', debiased=True), ModelResult(best_model=<adversarial_debiasing.AdversarialDebiasing object at 0x000001A1C31E4C08>, last_model=<adversarial_debiasing.AdversarialDebiasing object at 0x000001A1C31E4AC8>, embedding_type='Glove', learning_rate='0.000244140625', adversary_weight='1.0', debiased=True), ModelResult(best_model=<adversarial_debiasing.AdversarialDebiasing object at 0x000001A1D6485548>, last_model=<adversarial

In [12]:
# Last model
learning_rates = list(set(model.learning_rate for models in debiased_models.values() for model in models)) 
adversary_weights = list(set(model.adversary_weight for models in debiased_models.values() for model in models)) 

adversary_weights = sorted(adversary_weights)
learning_rates = sorted(learning_rates)

box_df_debiased = pd.DataFrame([], columns=learning_rates, index=adversary_weights)

for model_result in debiased_models['GNews']:
    box_df_debiased.loc[model_result.adversary_weight, model_result.learning_rate] = np.dot(model_result.best_model.W1.clone().detach().numpy().T, gender_subspace.T).item()

box_df_debiased

Unnamed: 0,0.000244140625,0.015625,0.125
0.1,0.336968,0.723946,17.0963
0.5,0.339728,-0.25536,-6.16493
1.0,0.326831,-0.379391,-0.536122


In [20]:
# Best model
box_df_debiased = pd.DataFrame([], columns=learning_rates, index=adversary_weights)

for model_result in biased_models['GNews']:
    box_df_debiased.loc[model_result.adversary_weight, model_result.learning_rate] = np.dot(model_result.best_model.W1.clone().detach().numpy().T, gender_subspace.T).item()

box_df_debiased


Unnamed: 0,0.000244140625,0.015625,0.125
0.1,0.00297724,-0.0917036,-0.0926525
0.5,0.00297724,-0.0917036,-0.0926525
1.0,0.00297724,-0.0917036,-0.0926525


In [81]:
# 
box_df_biased = pd.DataFrame([], columns=learning_rates, index=adversary_weights)

for model_result in debiased_models:
    box_df_biased.loc[model_result.adversary_weight, model_result.learning_rate] = np.dot(model_result.last_model.W1.clone().detach().numpy().T, gender_subspace).item()

box_df_biased



# debiased_model_best.W1 = best_state_dict["W1"]
# debiased_model_best.W2 = best_state_dict["W2"]
# debiased_model_last.W1 = last_state_dict["W1"]
# debiased_model_last.W2 = last_state_dict["W2"]
# 
# print("Best : {}".format(np.dot(debiased_model_best.W1.clone().detach().cpu().numpy().T, gender_subspace.T)))
# 
# print("Last : {}".format(np.dot(debiased_model_last.W1.clone().detach().cpu().numpy().T, gender_subspace.T)))


Unnamed: 0,0.000244140625,0.015625,0.125
0.1,0.0,0.0,0.0
0.5,0.0,0.0,0.0
1.0,0.0,0.0,0.0


In [30]:
lr = '0.000244140625'
debiased_model_result = [m for m in debiased_models['GNews'] if m.learning_rate == lr and m.adversary_weight == '0.1'][0]
print(debiased_model_result)
debiased_model = debiased_model_result.best_model
biased_model_result = [m for m in biased_models['GNews'] if m.learning_rate == lr and m.adversary_weight == '0.1'][0]
print(biased_model_result)
biased_model = biased_model_result.best_model

ModelResult(best_model=<adversarial_debiasing.AdversarialDebiasing object at 0x000001A1D6FB4308>, last_model=<adversarial_debiasing.AdversarialDebiasing object at 0x000001A1D6FB4FC8>, embedding_type='GNews', learning_rate='0.000244140625', adversary_weight='0.1', debiased=True)
ModelResult(best_model=<adversarial_debiasing.AdversarialDebiasing object at 0x000001A04309DBC8>, last_model=<adversarial_debiasing.AdversarialDebiasing object at 0x000001A1D8192B48>, embedding_type='GNews', learning_rate='0.000244140625', adversary_weight='0.1', debiased=False)


#### Qualitative Evaluation

In [31]:

# Get sexism traps as word embeddings and words
datapoints, test_analogies = qualitative_evaluation.get_datapoints(word_vectors)

# Predictions of the non debiased model
non_debiased_predictions = biased_model.predict(datapoints)
non_debiased_most_similar_list = utility_functions.obtain_most_similar(non_debiased_predictions, word_vectors)

# Predictions of the debiased model
debiased_predictions = debiased_model.predict(datapoints)
debiased_most_similar_list = utility_functions.obtain_most_similar(debiased_predictions, word_vectors)

# Print similarity results for both models
qualitative_evaluation.print_combined_table(non_debiased_most_similar_list, debiased_most_similar_list, test_analogies)

he : strong :: she : 
+-------------+--------------+-------------+--------------+
| Biased      |       Biased | Debiased    |     Debiased |
| Neighbour   |   Similarity | Neighbour   |   Similarity |
|-------------+--------------+-------------+--------------|
| robust      |        0.492 | robust      |        0.504 |
| stong       |        0.48  | stong       |        0.498 |
| solid       |        0.46  | solid       |        0.488 |
| stronger    |        0.453 | stronger    |        0.476 |
| weak        |        0.445 | strongest   |        0.457 |
| strongest   |        0.438 | weak        |        0.449 |
| Strong      |        0.432 | Strong      |        0.447 |
| perky       |        0.384 | STRONG      |        0.388 |
| buoyant     |        0.381 | resilient   |        0.385 |
+-------------+--------------+-------------+--------------+
he : boss :: she : 
+---------------------------+--------------+---------------------------+--------------+
| Biased                    | 