In [1]:
import numpy as np
import random
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import transformers
from transformers import get_linear_schedule_with_warmup
# from datasets import Dataset
from peft import LoraConfig, PeftConfig, get_peft_model
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix,
                             f1_score,
                             recall_score)
from sklearn.model_selection import train_test_split
from torch.utils.tensorboard import SummaryWriter
from fairness_loss import fair_loss
from evaluation import gap, disparate_impact, evaluate


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# get working directory
cwd = os.getcwd()
data_dir = os.path.join(cwd, 'data')
model_dir = os.path.join(cwd, 'model')

# load data and pre-process datasets
train_df = pd.read_csv(os.path.join(data_dir, 'gptTestNames.csv'))
# test_df = pd.read_csv(os.path.join(data_dir, 'gptTestNames.csv'))
# val_df = pd.read_csv(os.path.join(data_dir, 'gptValNames.csv'))

In [3]:
# X_train = list()
# X_test = list()
# for race in ["API", "White", "Black", "Hispanic"]:
#     train, test  = train_test_split(train_df[train_df.label==race], 
#                                     train_size=300,
#                                     test_size=300, 
#                                     random_state=42)
#     X_train.append(train)
#     X_test.append(test)

X_train, X_test  = train_test_split(train_df, 
                                train_size=int(len(train_df) * 0.0008),
                                test_size=int(len(train_df) * 0.0002), 
                                random_state=42)

# X_train = pd.concat(X_train).sample(frac=1, random_state=10)
# X_test = pd.concat(X_test)

# eval_idx = [idx for idx in train_df.index if idx not in list(train.index) + list(test.index)]
# X_eval = train_df[train_df.index.isin(eval_idx)]
X_train, X_eval = train_test_split(X_train, 
                                test_size=0.1,
                                random_state=42)
X_train = X_train.reset_index(drop=True)
X_eval = X_eval.reset_index(drop=True)

In [4]:
len(X_train), len(X_test), len(X_eval)

(1867, 518, 208)

In [5]:
def generate_prompt(data_point, shuffle=False):
    if not shuffle:
        return f"""
                Guess the race of the name enclosed in square brackets into 1 of the following 4 categories: Asian, Black, Hispanic, or White. 
                Your answer should only be the category name.
                [{data_point["name"]}].
                ANSWER: {data_point["label"]}
                """.strip()
    
    categories = ["Hispanic", "Black", "White", "Asian"]
    random.shuffle(categories)
    categories_str = ', '.join(categories)
    return f"""
            Guess the race of the name enclosed in square brackets into 1 of the following 4 categories: {categories_str}. 
            Your answer should only be the category name.
            [{data_point["name"]}]
            ANSWER: {data_point["label"]}
            """.strip()

def generate_test_prompt(data_point, shuffle=False):
    if not shuffle:
        return f"""
                Guess the race of the name enclosed in square brackets into 1 of the following 4 categories: Asian, Black, Hispanic, or White. 
                Your answer should only be the category name.
                [{data_point["name"]}]
                ANSWER: """.strip()
    
    categories = ["Hispanic", "Black", "White", "Asian"]
    random.shuffle(categories)
    categories_str = ', '.join(categories)
    return f"""
            Guess the race of the name enclosed in square brackets into 1 of the following 4 categories: {categories_str}. 
            Your answer should only be the category name.
            [{data_point["name"]}]
            ANSWER: """.strip()

race2num = {
    'API': 0,
    'Black': 1,
    'Hispanic': 2,
    'White': 3
}

y_train_true = [race2num[category] for category in X_train.label.values]
X_train_1 = pd.DataFrame(X_train.apply(lambda row: generate_prompt(row, shuffle=True), axis=1), 
                       columns=["name"])

y_eval_true = [race2num[category] for category in X_eval.label.values]
X_eval_1 = pd.DataFrame(X_eval.apply(lambda row: generate_prompt(row, shuffle=True), axis=1), 
                       columns=["name"])

y_test_true = X_test.label.values
X_test_1 = pd.DataFrame(X_test.apply(lambda row: generate_test_prompt(row, shuffle=True), axis=1), 
                      columns=["name"])
X_test_2 = pd.DataFrame(X_test.apply(lambda row: generate_test_prompt(row, shuffle=False), axis=1), 
                      columns=["name"])

# train_data = Dataset.from_pandas(X_train_1)
# eval_data = Dataset.from_pandas(X_eval_1)

In [6]:
class MyDataset(Dataset):
    def __init__(self, data, label):
        self.data = data['name']
        self.label = label

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        data_item = self.data[idx] 
        label_item = self.label[idx]
        return [data_item, label_item]

train_data = MyDataset(X_train_1, y_train_true)
eval_data = MyDataset(X_eval_1, y_eval_true)
def custom_collate(batch):
    data = [item[0] for item in batch]
    target = [item[1] for item in batch]
    return [data, target]
train_loader = DataLoader(train_data, batch_size=2, shuffle=True, collate_fn=custom_collate)
eval_loader = DataLoader(eval_data, batch_size=2, shuffle=False, collate_fn=custom_collate)

In [7]:
import logging
logger = logging.getLogger(__name__)
log_dir = os.path.join(cwd, 'logs','fair_loss')
if not os.path.exists(log_dir):
    os.mkdir(log_dir)
logging.basicConfig(filename=os.path.join(log_dir, "fairloss.txt"),
                    filemode='a',
                    format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
                    datefmt='%H:%M:%S',
                    level=logging.DEBUG)
def evaluate(y_true, y_pred):
    labels = ['API', 'Black', 'Hispanic', 'White']
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')
    logging.info(f'Accuracy: {accuracy:.3f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred, target_names=labels)
    print('\nClassification Report:')
    print(class_report)
    logging.info('\nClassification Report:')
    logging.info(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=labels)
    print('\nConfusion Matrix:')
    print(conf_matrix)
    logging.info('\nConfusion Matrix:')
    logging.info(conf_matrix)

In [8]:
model_name = "meta-llama/Llama-2-7b-chat-hf"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained(
        model_name, 
        torch_dtype=torch.bfloat16,
        device_map=device,
        token='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'
        )

output_dir="trained_weigths"

peft_config = LoraConfig(
        lora_alpha=16, 
        lora_dropout=0.1,
        r=64,
        bias="none",
        task_type="CAUSAL_LM",
)

tokenizer = AutoTokenizer.from_pretrained(model_name,)
# if tokenizer.pad_token_id is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

model = get_peft_model(model, peft_config)
trainable_params, all_param = model.get_nb_trainable_parameters()

optimizer = torch.optim.AdamW(model.parameters(), lr=2e-4)
lr_scheduler = get_linear_schedule_with_warmup(
            optimizer=optimizer,
            num_warmup_steps=0,
            num_training_steps=(len(train_data) * 2),
        )


# compute_dtype = getattr(torch, "float16")

# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True, 
#     bnb_4bit_quant_type="nf4", 
#     bnb_4bit_compute_dtype=compute_dtype,
#     bnb_4bit_use_double_quant=False,
# )

# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     device_map="auto",
#     torch_dtype=compute_dtype,
#     # quantization_config=bnb_config, 
# )

# model.config.use_cache = False
# model.config.pretraining_tp = 1

# tokenizer = AutoTokenizer.from_pretrained(model_name, 
#                                           trust_remote_code=True,
#                                          )
# tokenizer.pad_token = tokenizer.eos_token
# tokenizer.padding_side = "right"

# model, tokenizer = setup_chat_format(model, tokenizer)

Loading checkpoint shards: 100%|██████████| 2/2 [00:11<00:00,  5.80s/it]


In [9]:
class AverageMeter(object):
    """
    Computes and stores the average and current value
    Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262
    """
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [10]:
from datetime import datetime
num_epochs = 2
logwriter = SummaryWriter(os.path.join(cwd, output_dir, "runs",datetime.now().strftime("%b%d_%H-%M-%S")))
current_step = 0
avg_train_loss = AverageMeter()
avg_val_loss = AverageMeter()
for epoch in tqdm(range(num_epochs)):
    print(f"Epoch {epoch + 1}")
    model.train()
    for data, labels in tqdm(train_loader):
        optimizer.zero_grad()
        input_ids = tokenizer(data, return_tensors="pt", padding=True, truncation=True).input_ids.to(device)
        vir_labels = input_ids.to(device)
        outputs = model(input_ids=input_ids, labels=vir_labels)
        labels = torch.tensor(labels).to(device) 
        fairness_loss = fair_loss(outputs.logits, labels, tokenizer, lambda_val=1.0)
        loss = outputs.loss + fairness_loss
        logwriter.add_scalar('Training/train_loss_step', loss.detach().float().item(), current_step)
        logwriter.add_scalar('Training/fairness_loss_step', fairness_loss.detach().float().item(), current_step)
        current_step += 1
        avg_train_loss.update(loss.detach().float().item())
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        # print(f"Loss: {loss.item()}")
        
    model.eval()
    # y_pred = list()
    for data, labels in tqdm(eval_loader):
        input_ids = tokenizer(data, return_tensors="pt", padding=True, truncation=True).input_ids.to(device)
        vir_labels = input_ids.to(device)
        outputs = model(input_ids=input_ids, labels=vir_labels)
        labels = torch.tensor(labels).to(device)
        fairness_loss = fair_loss(outputs.logits, labels, tokenizer, lambda_val=1.0)
        loss = outputs.loss + fairness_loss
        avg_val_loss.update(loss.detach().float().item())

    logwriter.add_scalar('Training/train_loss_epoch', avg_train_loss.avg, epoch)
    logwriter.add_scalar('Training/val_loss_epoch', avg_val_loss.avg, epoch)
        
    # evaluate(y_true, y_pred)


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|██████████| 934/934 [01:45<00:00,  8.89it/s]
100%|██████████| 104/104 [00:04<00:00, 21.71it/s]
 50%|█████     | 1/2 [01:49<01:49, 109.86s/it]

Epoch 2


100%|██████████| 934/934 [01:40<00:00,  9.26it/s]
100%|██████████| 104/104 [00:04<00:00, 21.00it/s]
100%|██████████| 2/2 [03:35<00:00, 107.84s/it]


In [12]:
def predict(test, model, tokenizer):
    y_pred = []
    for i in tqdm(range(len(test))):
    # for i in [69, 222, 676, 1270, 2060, 3684, 3827, 4472, 4799, 4972, 5120]:
        prompt = test.iloc[i]["name"]
        pipe = pipeline(task="text-generation", 
                        model=model, 
                        tokenizer=tokenizer, 
                        max_new_tokens = 4, 
                        # temperature = 0.01,
                        do_sample = False,
                       )
        result = pipe(prompt)
        answer = result[0]['generated_text'].split(":")[-1].lower()
        # print(prompt, answer)
        if "asian" in answer or "api" in answer:
            y_pred.append("API")
        elif "black" in answer:
            y_pred.append("Black")
        elif "hispanic" in answer:
            y_pred.append("Hispanic")
        elif "white" in answer:
            y_pred.append("White")
        else:
            y_pred.append("none")
            print(prompt,answer)
    return y_pred

In [13]:
model.eval()
y_pred = predict(X_test_2, model, tokenizer)

  0%|          | 0/518 [00:00<?, ?it/s]The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausa

  0%|          | 1/518 [00:00<01:49,  4.72it/s]The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPT

In [14]:
# y_pred = ['API' if 'none' in x else x for x in y_pred]
evaluate(y_test_true, y_pred)

Accuracy: 0.815

Classification Report:
              precision    recall  f1-score   support

         API       1.00      0.55      0.71        11
       Black       0.80      0.15      0.25        80
    Hispanic       0.81      0.91      0.86        92
       White       0.81      0.96      0.88       335

    accuracy                           0.81       518
   macro avg       0.86      0.64      0.67       518
weighted avg       0.81      0.81      0.77       518


Confusion Matrix:
[[  6   0   1   4]
 [  0  12   7  61]
 [  0   0  84   8]
 [  0   3  12 320]]


In [15]:
from metrics import gap, disparate_impact
logging.info(f'1-GAP: {gap(y_test_true, y_pred)}')
logging.info(f'Disparate Impact: {disparate_impact(y_test_true, y_pred)}')


1-GAP:  0.8777
Disparate Impact is: 0.5612987895256917


: 