#### Importing Libraries

In [None]:
from collections import defaultdict
from pathlib import Path
import numpy as np
import pandas as pd
from collections import namedtuple
from tabulate import tabulate

import torch
import os

from adversarial_debiasing import AdversarialDebiasing
from load_data import load_data, transform_data, Datapoint

from load_vectors import load_pretrained_vectors, load_vectors
import config
import utility_functions
import qualitative_evaluation

import gensim
import gzip
import pickle

#### Autoreloading changes made in other python scripts

In [None]:
%load_ext autoreload
%autoreload 2

#### Loading the desired word vectors

In [None]:
# For Wikipedia2Vec - use config.wiki_embedding_data_path and config.wiki_embedding_type
# For Glove - use config.glove_embedding_data_path and config.glove_embedding_type
# For GoogleNews (Word2Vec) - use config.google_embedding_data_path and config.google_embedding_type

word_vectors = load_pretrained_vectors(config.google_embedding_data_path, config.save_dir, config.google_embedding_type)

#### Loading the Google Analogies Training Dataset

In [None]:
analogy_dataset = load_data()
analogy_dataset[0:6]

#### Transforming the above dataset to include the respective word embeddings

In [None]:
transformed_analogy_dataset, gender_subspace = transform_data(word_vectors, analogy_dataset, use_boluk = False)

# Obtaining the dimensionality of the word embeddings
word_embedding_dim = transformed_analogy_dataset[0].gt_embedding.shape[0]

#### Test the dimensions of the transformed analogy dataset components

In [None]:
# Testing the transformed analogy dataset
assert transformed_analogy_dataset[0].analogy_embeddings.shape[0] == word_embedding_dim * 3
assert transformed_analogy_dataset[0].gt_embedding.shape[0] == word_embedding_dim
assert transformed_analogy_dataset[0].protected.shape[0] == 1

print(transformed_analogy_dataset[0].analogy_embeddings.shape)
print(transformed_analogy_dataset[0].gt_embedding.shape)
print(transformed_analogy_dataset[0].protected.shape)

In [None]:
# # To run the grid-search and obtain the np.dot(w.T, g) values
# learning_rate_list = [2 ** -12, 2 ** -6, 2 ** -3]
# adversary_loss_weight_list = [1.0, 0.5, 0.1]

# # For the saved model checkpoints pertaining to the word embedding type
# word_embedding_type = 'GNews'

# # Performing the grid search
# utility_functions.grid_search(learning_rate_list, adversary_loss_weight_list, word_embedding_dim, gender_subspace, transformed_analogy_dataset, word_embedding_type, 'models')

#### Flag to indicate whether you want to use a pre-trained model or you want to train a model from scratch

In [None]:
use_pretrained = True

#### In case you want to use a pre-trained model, then specify the type of word embeddings upon which the model was trained

In [None]:
word_embedding_type = ""

#### In case of using a pre-trained model

In [None]:
if use_pretrained:
    
    # Obtaining the best weights for the non-debiased model
    non_debiased_W1 = pretrained_parameters['non_debiased'][word_embedding_type]["W1"]
    
    # Creating an instance of the non-debiased model
    non_debiased_model = AdversarialDebiasing(word_embedding_dim = word_embedding_dim, debias = False)
    non_debiased_model.W1 = non_debiased_W1
    
    # Obtaining the best weights for the debiased model
    debiased_W1 = pretrained_parameters['debiased'][word_embedding_type]["W1"]
    
    # Creating an instance of the debiased model
    debiased_model = AdversarialDebiasing(word_embedding_dim = word_embedding_dim, debias = True)
    debiased_model.W1 = debiased_W1

#### In case of using a model trained from scratch

In [None]:
if not use_pretrained:
    
    # Obtaining the best learning rate and adversary loss weight for the non-debiased model
    non_debiased_learning_rate = pretrained_parameters['non_debiased'][word_embedding_type]['learning_rate']
    non_debiased_adversary_loss_weight = pretrained_parameters['non_debiased'][word_embedding_type]['adversary_loss_weight']
    
    # Creating an instance of the non-debiased model
    non_debiased_model = AdversarialDebiasing(word_embedding_dim = word_embedding_dim, num_epochs = 500, debias = False, \
                                             gender_subspace = gender_subspace, batch_size = 256, \
                                              adversary_loss_weight = non_debiased_adversary_loss_weight, \
                                             classifier_learning_rate = non_debiased_learning_rate, \
                                             adversary_learning_rate = non_debiased_learning_rate)
    
    # Fitting the non-debiased model to the training dataset
    print("****************** Training the non-debiased model ********************")
    non_debiased_model.fit(dataset = transformed_analogy_dataset)
    
    # Obtaining the best learning rate and adversary loss weight for the non-debiased model
    debiased_learning_rate = pretrained_parameters['debiased'][word_embedding_type]['learning_rate']
    debiased_adversary_loss_weight = pretrained_parameters['debiased'][word_embedding_type]['adversary_loss_weight']
    
    # Creating an instance of the debiased model
    debiased_model = AdversarialDebiasing(word_embedding_dim = word_embedding_dim, num_epochs = 500, debias = True, \
                                             gender_subspace = gender_subspace, batch_size = 256, \
                                              adversary_loss_weight = debiased_adversary_loss_weight, \
                                             classifier_learning_rate = debiased_learning_rate, \
                                             adversary_learning_rate = debiased_learning_rate)
    
    # Fitting the non-debiased model to the training dataset
    print("****************** Training the debiased model ********************")
    debiased_model.fit(dataset = transformed_analogy_dataset)

In [None]:
word_embedding_dim = transformed_analogy_dataset[0].gt_embedding.shape[0]
# Training the variant of the model without debiasing
non_debiased_model = AdversarialDebiasing(
    word_embedding_dim=word_embedding_dim,
    num_epochs=500,
    debias=False,
    gender_subspace=gender_subspace,
    batch_size=256,
    adversary_loss_weight=0.1,
    classifier_learning_rate = 2 ** -6,
    adversary_learning_rate = 2 ** -6
)
non_debiased_model.fit(dataset=transformed_analogy_dataset)

In [None]:
W1 = non_debiased_model.get_model_weights()
print(np.dot(W1.detach().numpy().T,gender_subspace.T))

In [None]:
gender_subspace.shape

In [None]:
# Training the variant of the model with debiasing
# debiased_model = AdversarialDebiasing(
#     word_embedding_dim=word_embedding_dim,
#     num_epochs=500,
#     debias=True,
#     gender_subspace=gender_subspace,
#     batch_size=256,
#     adversary_loss_weight=0.1,
#     classifier_learning_rate = 2 ** -8,
#     adversary_learning_rate = 2 ** -8
# )
debiased_model = AdversarialDebiasing(
    word_embedding_dim=word_embedding_dim,
    num_epochs=500,
    debias=True,
    gender_subspace=gender_subspace,
    batch_size=256,
    adversary_loss_weight=0.1,
    classifier_learning_rate = 2 ** -6,
    adversary_learning_rate = 2 ** -6
)

debiased_model.fit(dataset=transformed_analogy_dataset)

In [None]:
W1 = debiased_model.get_model_weights()
print(np.dot(W1.clone().cpu().detach().numpy().T,gender_subspace.T))

#### Qualitative Evaluation

In [None]:
# Get sexism traps as word embeddings and words
datapoints, test_analogies = qualitative_evaluation.get_datapoints(word_vectors)

# Predictions of the non debiased model
non_debiased_predictions = qualitative_evaluation.get_non_debiased_predictions(datapoints, word_embedding_dim)
non_debiased_most_similar_list = utility_functions.obtain_most_similar(non_debiased_predictions, word_vectors)

# Predictions of the debiased model
debiased_predictions = debiased_model.predict(datapoints)
debiased_most_similar_list = utility_functions.obtain_most_similar(debiased_predictions, word_vectors)

# Print similarity results for both models
qualitative_evaluation.print_combined_table(non_debiased_most_similar_list, debiased_most_similar_list, test_analogies)