### Installations, Imports, and Classes

In [None]:
!pip3 install -q -U bitsandbytes==0.42.0
!pip3 install -q -U peft==0.8.2
!pip3 install -q -U trl==0.7.10
!pip3 install -q -U accelerate==0.27.1
!pip3 install -q -U datasets==2.17.0
!pip3 install -q -U transformers==4.38.0
!pip3 install calflops

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.4/183.4 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.6/297.6 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.9/150.9 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.0/102.0 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━

In [None]:
# Setting up packages
import os
import torch
import transformers
import logging
import json
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer
from datasets import load_dataset
from peft import LoraConfig
from trl import SFTTrainer
from google.colab import userdata
from google.colab import drive
from datetime import datetime
from datetime import timezone
drive.mount('/content/drive')

# Huggingface token
os.environ["HF_TOKEN"] = userdata.get('HF_TOKEN')

# Setup device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Setup logging
time_stamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
file_id = f'log_{time_stamp}.txt'
logging.basicConfig(format='%(asctime)s %(message)s',
                     datefmt='%m/%d/%Y %I:%M:%S %p',
                     filename=file_id,
                     level=logging.INFO,
                    force=True)
logger = logging.getLogger(__name__)

# Setup memory usage
def return_memory():
  return[str(round(torch.cuda.memory_allocated(0)/1024**3,1)),
         str(round(torch.cuda.memory_reserved(0)/1024**3,1))]

Mounted at /content/drive


In [None]:
class LargeLanguageModel():
    def __init__(self,model_id,bnb_config = False):
        self.model_id = model_id

        # hyperparameters
        with open('/content/drive/MyDrive/LLMs/code/params.json', 'r') as file:
            params = json.load(file)
        self.train_params =params["training"]
        lora_params =params["lora"]
        self.token = os.environ['HF_TOKEN']
        self.bnb_config = bnb_config

        # Model object
        self.model = AutoModelForCausalLM.from_pretrained(self.model_id,
                                             quantization_config=self.bnb_config,
                                             device_map={"":0},
                                             token=self.token)

        ## Modify model configuration parameters
        # self.model.config.use_cache=False
        # self.model.config.pretraining_tp=1
        # self.model = prepare_model_for_kbit_training(self.model)

        # Tokenizer object
        self.tokenizer = AutoTokenizer.from_pretrained(model_id, token=self.token)
        # self.tokenizer.pad_token = self.tokenizer.eos_token
        # self.tokenizer.padding_side = 'right'

        # Lora object
        self.lora_config = LoraConfig(
                                    r=lora_params["r"],
                                    target_modules=lora_params["target_modules"],
                                    task_type=lora_params["task_type"],
                                )

        # logging.info(f"LLM class instantiated for model: {model_id}")
        # logging.info(f"Hyperparameters: \n{params}")


    def generate_example(self,text):
        inputs = self.tokenizer(text, return_tensors="pt").to(device)
        outputs = self.model.generate(**inputs, max_new_tokens=50)
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

    def formatting_func(self,example):
        text = f"<start_of_turn>user\n{example['instruction'][0]}<end_of_turn> <start_of_turn>model\n{example['response'][0]}<end_of_turn>"
        return [text]

    def finetune(self,dataset):
        logging.info("Finetuning started")
        trainer = SFTTrainer(
            model = self.model,
            train_dataset = dataset.data["train"],
            args=transformers.TrainingArguments(
                per_device_train_batch_size = self.train_params["per_device_train_batch_size"],
                gradient_accumulation_steps = self.train_params["gradient_accumulation_steps"],
                warmup_steps = self.train_params["warmup_steps"],
                max_steps = self.train_params["max_steps"],
                learning_rate = self.train_params["learning_rate"],
                fp16 = self.train_params["fp16"],
                logging_steps = self.train_params["logging_steps"],
                output_dir = self.train_params["output_dir"],
                optim = self.train_params["optim"],
                # num_train_epochs = self.train_params["num_train_epochs"],
                # weight_decay = self.train_params["weight_decay"],
                # max_seq_length = self.train_params["max_seq_length"],
            ),
            peft_config = self.lora_config,
            formatting_func = self.formatting_func,
        )

        trainer.train()

        logging.info("Finetuning completed")
        memory_usage = return_memory()
        logging.info(f'Allocated memory:{memory_usage[0]} GB')
        logging.info(f'Cached memory:{memory_usage[1]} GB')
        trainer_log = str(trainer.state.log_history)
        logging.info(f"Trainer log:\n{trainer_log}")


class Dataset():
    def __init__(self):
        self.data = None

    def load_data(self, dataset_id):
    # def load_data(self, dataset_id, dataset_config = None): dataset_config for some datasets that have an additional configuration parameter
        logging.info(f"Loading dataset: {dataset_id}")
        self.data = load_dataset(dataset_id)
        logging.info("Loading dataset completed")
        memory_usage = return_memory()
        logging.info(f'Allocated memory:{memory_usage[0]} GB')
        logging.info(f'Cached memory:{memory_usage[1]} GB')


    def print_dataset_values(self):
        print(self.data['train'])

### Load Datasets

In [None]:
class Dataset():
    def __init__(self):
        self.data = None

    def load_data(self, dataset_id):
        logging.info(f"Loading dataset: {dataset_id}")
        self.data = load_dataset(dataset_id)
        logging.info("Loading dataset completed")
        memory_usage = return_memory()
        logging.info(f'Allocated memory:{memory_usage[0]} GB')
        logging.info(f'Cached memory:{memory_usage[1]} GB')


    def print_dataset_values(self):
        print(self.data['train'])

In [None]:
# Setup memory usage
def return_memory():
  return[str(round(torch.cuda.memory_allocated(0)/1024**3,1)),
         str(round(torch.cuda.memory_reserved(0)/1024**3,1))]

### Dataset IDs

In [None]:
dataset_id1 = "databricks/databricks-dolly-15k"
dataset_id2 = "HuggingFaceH4/no_robots"
dataset_id3 = "tiiuae/falcon-refinedweb" # massive--only gonna load 1%
dataset_id4 = "Muennighoff/natural-instructions"
dataset_id5 = "qwedsacf/grade-school-math-instructions"
dataset_id6 = "HuggingFaceH4/instruction-dataset" # teensy tiny
dataset_id7 = "HuggingFaceH4/h4_10k_prompts_ranked_gen"
dataset_id8 = "nvidia/HelpSteer"
dataset_id9 = "Unified-Language-Model-Alignment/Anthropic_HH_Golden"
# from toxicity research :)
dataset_id10 = "allenai/dolma"
dataset_id11 = "google/civil_comments"
dataset_id12 = "heegyu/bbq"
dataset_id13 = "s-nlp/paradetox"
dataset_id14 = "allenai/prosocial-dialog"
dataset_id15 = "crows_pairs"
dataset_id16 = "McGill-NLP/stereoset"
dataset_id17 = "LLM-Tuning-Safety/HEx-PHI"

### Loading in Dataset 1
Databricks Dolly 15k.

In [None]:
dataset1 = load_dataset(dataset_id1)
df_ds1 = dataset1['train'].select(range(30)).to_pandas()
df_ds1.to_csv('df_ds1.csv', index=False)
df_ds1.head()

Downloading readme:   0%|          | 0.00/8.20k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/13.1M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Unnamed: 0,instruction,context,response,category
0,When did Virgin Australia start operating?,"Virgin Australia, the trading name of Virgin A...",Virgin Australia commenced services on 31 Augu...,closed_qa
1,Which is a species of fish? Tope or Rope,,Tope,classification
2,Why can camels survive for long without water?,,Camels use the fat in their humps to keep them...,open_qa
3,"Alice's parents have three daughters: Amy, Jes...",,The name of the third daughter is Alice,open_qa
4,When was Tomoaki Komorida born?,Komorida was born in Kumamoto Prefecture on Ju...,"Tomoaki Komorida was born on July 10,1981.",closed_qa


### Loading in Dataset 2

No robots dataset

In [None]:
dataset2 = load_dataset(dataset_id2)
df_ds2 = dataset2['train'].select(range(30)).to_pandas()
df_ds2.to_csv('df_ds2.csv', index=False)
df_ds2.head()

Downloading readme:   0%|          | 0.00/5.61k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/10.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/571k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/500 [00:00<?, ? examples/s]

Unnamed: 0,prompt,prompt_id,messages,category
0,Please summarize the goals for scientists in t...,627a77298cf96a309aa35a62207c4164e22a66f6db7911...,[{'content': 'Please summarize the goals for s...,Summarize
1,Help write a letter of 100 -200 words to my fu...,7d443ef2cc3e34d9dc6ffcdf748c1d2a9880cd48be9c98...,[{'content': 'Help write a letter of 100 -200 ...,Generation
2,"Write a news style post about a fake event, li...",3c975b349494dea76dbbb9c01a2bb925a248efb8ca0944...,[{'content': 'Write a news style post about a ...,Generation
3,"Write a funny, short story about someone who w...",16d804af359db7823c457b7d82809eddaad9a5ea3c91ef...,"[{'content': 'Write a funny, short story about...",Generation
4,Write a letter to the Editor responding to the...,e9da2fa3a6d496c5a5ee500e58e5477362698aaa08e74c...,[{'content': 'Write a letter to the Editor res...,Rewrite


### Loading in Dataset 3 (Streaming first 30 entries in RefinedWeb) SKIPPING

In my opinion, we could treat the first line of each entry as the prompt and then the rest as the response (since this also isn't in instruction/response format).

In [None]:
dataset3_streamed = load_dataset(dataset_id3, split='train', streaming=True)

import pandas as pd
# Collect samples manually into a list
data_samples = []
for i, sample in enumerate(dataset3_streamed):
    data_samples.append(sample)
    if i >= 29:  # Collect only the first 30 samples
        break

# Now df_ds3 is a DataFrame with the first 30 samples
df_ds3 = pd.DataFrame(data_samples)

Downloading readme:   0%|          | 0.00/9.04k [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
df_ds3.to_csv('df_ds3.csv', index=False)

### Loading in Dataset 4 SKIPPING

Preprocessed version of Super-Natural-Instructions from https://github.com/allenai/natural-instructions/tree/master/splits. The same inputs may appear with different outputs, thus to avoid duplicate inputs, you can deduplicate by the id or the inputs field.

In [None]:
dataset4_streamed = load_dataset(dataset_id4, split='train', streaming=True)

import pandas as pd
# Collect samples manually into a list
data_samples = []
for i, sample in enumerate(dataset4_streamed):
    data_samples.append(sample)
    if i >= 29:  # Collect only the first 30 samples
        break

# Now df_ds3 is a DataFrame with the first 30 samples
df_ds4 = pd.DataFrame(data_samples)

Resolving data files:   0%|          | 0/1514 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/241 [00:00<?, ?it/s]

In [None]:
df_ds4.head()

Unnamed: 0,task_name,id,definition,inputs,targets
0,task001_quoref_question_generation,task001-f44801d948324957abe71877d837d070,"In this task, you're given passages that conta...",Passage: The earthquake swarm was noted on Oct...,What is the first name of the person who doubt...
1,task001_quoref_question_generation,task001-a17435f51ab747408358dfd8c28e2de2,"In this task, you're given passages that conta...",Passage: Philip Arnold Heseltine (30 October 1...,What is the last name of the person who is bes...
2,task001_quoref_question_generation,task001-a17435f51ab747408358dfd8c28e2de2,"In this task, you're given passages that conta...",Passage: Philip Arnold Heseltine (30 October 1...,What is the last name of the person who also a...
3,task001_quoref_question_generation,task001-a17435f51ab747408358dfd8c28e2de2,"In this task, you're given passages that conta...",Passage: Philip Arnold Heseltine (30 October 1...,What is the last name of the person whose firs...
4,task001_quoref_question_generation,task001-a17435f51ab747408358dfd8c28e2de2,"In this task, you're given passages that conta...",Passage: Philip Arnold Heseltine (30 October 1...,What is the last name of the person who was in...


In [None]:
df_ds4.to_csv('df_ds4.csv', index=False)

### Loading in Dataset 5

OpenAI's grade-school-math dataset converted into instructions.

In [None]:
dataset5 = load_dataset(dataset_id5)
df_ds5 = dataset5['train'].select(range(30)).to_pandas()
df_ds5.to_csv('df_ds5.csv', index=False)
df_ds5.head()

Unnamed: 0,INSTRUCTION,RESPONSE,SOURCE
0,This math problem has got me stumped: Natalia ...,Natalia sold 48/2 = 24 clips in May.\nNatalia ...,grade-school-math
1,Weng earns $12 an hour for babysitting. Yester...,Weng earns 12/60 = $0.2 per minute.\nWorking 5...,grade-school-math
2,I'm completely lost with this math problem: Be...,"In the beginning, Betty has only 100 / 2 = $50...",grade-school-math
3,Explain how to solve this math problem: Julie ...,Maila read 12 x 2 = 24 pages today.\nSo she wa...,grade-school-math
4,I need a clearer understanding of how to solve...,He writes each friend 3*2=6 pages a week\nSo h...,grade-school-math


### Loading Dataset 6

This is the blind eval dataset of high-quality, diverse, human-written instructions with demonstrations. We will be using this for step 3 evaluations in our RLHF pipeline.

In [None]:
dataset6 = load_dataset(dataset_id6)
df_ds6 = dataset6['test'].select(range(30)).to_pandas()
df_ds6.to_csv('df_ds6.csv', index=False)
df_ds6.head()

Unnamed: 0,prompt,completion,meta
0,Arianna has 12 chocolates more than Danny. Dan...,Denote the number of chocolates each person ha...,"{'id': 0, 'source': 'surge', 'category': 'Ques..."
1,Write a plot summary for a comedic novel invol...,Elon Musk hires a team of experts to build the...,"{'id': 1, 'source': 'surge', 'category': 'Gene..."
2,Create a 3 turn conversation between a custome...,Clerk: How are you doing today?\nCustomer: Gre...,"{'id': 2, 'source': 'surge', 'category': 'Summ..."
3,Write a poem about the sun and moon.,"The sun and the moon, the guards from the sky\...","{'id': 3, 'source': 'surge', 'category': 'Gene..."
4,Does Searle believe that AI can think? Explain...,"No, Searle does not believe that AI can think....","{'id': 4, 'source': 'surge', 'category': 'Comm..."


### Loading Dataset 7

not sure--it might be gpt; dataset card has no description

In [None]:
dataset7 = load_dataset(dataset_id7)
df_ds7 = dataset7['train_gen'].select(range(30)).to_pandas()
df_ds7.to_csv('df_ds7.csv', index=False)
df_ds7.head()

Downloading readme:   0%|          | 0.00/1.07k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/6.54M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/357k [00:00<?, ?B/s]

Generating train_gen split:   0%|          | 0/9831 [00:00<?, ? examples/s]

Generating test_gen split:   0%|          | 0/500 [00:00<?, ? examples/s]

Unnamed: 0,prompt,quality,metadata,avg_rating,num_responses,agreement_ratio,raw_responses,kind,cluster_description,topic,messages
0,I am going to give you some information I woul...,[{'user_id': '944506fb-b86a-4d65-94a3-fe752d79...,"{""source"": ""ewof/sharegpt-instruct-unfiltered-...",4.0,1,1.0,[4],human,SEO & Content Marketing,Business and Marketing,[{'content': 'I am going to give you some info...
1,Act as a smart employee. I will give you the t...,[{'user_id': 'dec68aa0-6aef-4950-88be-734d5a8e...,"{""source"": ""ewof/sharegpt-instruct-unfiltered-...",3.0,1,1.0,[3],human,Software Project Management & SaaS Business Mo...,Software Development,[{'content': 'Act as a smart employee. I will ...
2,Seismologists: How do you determine whether an...,[{'user_id': '739172f3-48d2-4e72-b0d8-76f2ea8a...,"{""source"": ""ewof/sharegpt-instruct-unfiltered-...",4.0,2,0.375,"[3, 5]",human,Mathematics & Algebra,Math,[{'content': 'Seismologists: How do you determ...
3,Can we talk about me?,[{'user_id': '1ff3095a-2d92-42f2-b319-8258a779...,"{""evolved_from"": null, ""kind"": ""human"", ""sourc...",2.0,1,1.0,[2],human,Mindfulness & Workplace Diversity,Health and Wellness,"[{'content': 'Can we talk about me?', 'role': ..."
4,Outline the steps necessary to multiply two nu...,[{'user_id': 'f062b52f-bee7-4eb8-8947-cc29ab4f...,"{""source"": ""argilla/distilabel-reasoning-promp...",4.5,2,0.375,"[5, 4]",synthetic,Data Handling & Parsing Techniques,Software Development,[{'content': 'Outline the steps necessary to m...


### Loading Dataset 8

Self-driving car instructions

In [None]:
dataset8 = load_dataset(dataset_id8)
df_ds8 = dataset8['train'].select(range(30)).to_pandas()
df_ds8.to_csv('df_ds8.csv', index=False)
df_ds8.head()

Downloading readme:   0%|          | 0.00/7.80k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/15.9M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/813k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/35331 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1789 [00:00<?, ? examples/s]

Unnamed: 0,prompt,response,helpfulness,correctness,coherence,complexity,verbosity
0,What are the three most important things to co...,To build an assistive device to help an elderl...,3,4,4,2,2
1,What are the three most important things to co...,There are many different types of assistive de...,4,3,3,2,3
2,What are the three most important things to co...,When deciding what technology to use to build ...,4,4,4,2,2
3,What are the three most important things to co...,You can create an assistant device to help an ...,3,3,3,2,3
4,Background:\n<start of reference>\nFamily doct...,"Hi there! I'm Dr. Family, and I'm here to tell...",3,3,3,2,1


### Loading Dataset 9

This repository contains a new preference dataset extending the harmless dataset of Anthropic's Helpful and Harmless (HH) datasets. The origin positive response in HH is generated by a supervised fined-tuned model of Anthropic, where harmful and unhelpful responses are freqently encountered. In this dataset, the positive responses are replaced by re-rewritten responses generated by GPT4.

In [None]:
dataset9 = load_dataset(dataset_id9)
df_ds9 = dataset9['train'].select(range(30)).to_pandas()
df_ds9.to_csv('df_ds9.csv', index=False)
df_ds9.head()

Downloading readme:   0%|          | 0.00/2.07k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/59.9M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/3.35M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Unnamed: 0,chosen,rejected
0,\n\nHuman: What are some cuss words in english...,\n\nHuman: What are some cuss words in english...
1,\n\nHuman: What kind of noises did dinosaurs m...,\n\nHuman: What kind of noises did dinosaurs m...
2,\n\nHuman: If you were going to steal from a c...,\n\nHuman: If you were going to steal from a c...
3,\n\nHuman: Can you provide me the home address...,\n\nHuman: Can you provide me the home address...
4,\n\nHuman: How do you embezzle money?\n\nAssis...,\n\nHuman: How do you embezzle money?\n\nAssis...


### Loading Dataset 10 (SKIPPING)

web content, scientific papers, code, public-domain books, social media posts, and encyclopedic entries.

In [None]:
dataset10_streamed = load_dataset(dataset_id10, split='train', streaming=True)

import pandas as pd
# Collect samples manually into a list
data_samples = []
for i, sample in enumerate(dataset10_streamed):
    data_samples.append(sample)
    if i >= 29:  # Collect only the first 30 samples
        break

# Now df_ds3 is a DataFrame with the first 30 samples
df_ds10 = pd.DataFrame(data_samples)
df_ds10.head()

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


Unnamed: 0,id,text,added,created,source
0,600f7d0e70e779b5c95464411000c5998ea252ba,THE PRISONER OF ZENDA\n\nby Anthony Hope\n\n\n...,2023-04-25T05:49:46.922Z,2006-01-09T00:00:00.000Z,gutenberg
1,b90ec5c54b09987a559cd2129e04c1105ee4c1cc,THE HUNTING OF THE SNARK\n\n ...,2023-04-25T05:49:46.973Z,2008-06-25T00:00:00.000Z,gutenberg
2,e243c6571caed82824c88cf48abfd019634c6db3,This is a retranscription of one of the first ...,2023-04-25T05:49:46.999Z,1971-12-01T00:00:00.000Z,gutenberg
3,569bf2fd58a30f84bc610263289ffc2f73869f07,[Illustration]\n\n\nTHROUGH THE LOOKING-GLASS\...,2023-04-25T05:49:47.056Z,2008-06-25T00:00:00.000Z,gutenberg
4,a0d77923cd120285375f0afd739ecbc907ae990f,The dawn of amateur radio in the U.K. and Gree...,2023-04-25T05:49:48.253Z,1993-05-01T00:00:00.000Z,gutenberg


In [None]:
df_ds10.to_csv('df_ds10.csv', index=False)

### Loading Dataset 11

Found this when searching for the CIVILITY dataset. Has numerical ratings for toxicity (i.e. it isn't in an instruction format).

In [None]:
dataset11 = load_dataset(dataset_id11)
df_ds11 = dataset11['train'].select(range(30)).to_pandas()
df_ds11.to_csv('df_ds11.csv', index=False)
df_ds11.head()

Downloading readme:   0%|          | 0.00/7.73k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/194M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/187M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/20.8M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1804874 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/97320 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/97320 [00:00<?, ? examples/s]

Unnamed: 0,text,toxicity,severe_toxicity,obscene,threat,insult,identity_attack,sexual_explicit
0,"This is so cool. It's like, 'would you want yo...",0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Thank you!! This would make my life a lot less...,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,This is such an urgent design problem; kudos t...,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Is this something I'll be able to install on m...,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,haha you guys are a bunch of losers.,0.893617,0.021277,0.0,0.0,0.87234,0.021277,0.0


In [None]:
df_ds11.to_csv('df_ds11.csv', index=False)

### Loading Dataset 12 (SKIPPING FOR NOW)

Going against stereotypes. We probably want to clean in the following way:

context + question --> prompt

ans0 --> response

In [None]:
dataset12 = load_dataset(dataset_id12)
categories = ['Age', 'Disability_status', 'Gender_identity', 'Nationality', 'Physical_appearance',
              'Race_ethnicity', 'Race_x_SES', 'Race_x_gender', 'Religion', 'SES', 'Sexual_orientation']

# def make_df_by_cat(category):
#   df_ds12 = dataset12[category].select(range(30)).to_pandas()
#   file_path_name = f'dataset12_{category}.csv'
#   df_ds12.to_csv(file_path_name, index=False)
#   return


# for category in categories:
#   df_ds12 = make_df_by_cat(category)

df_ds12 = dataset12['test'].select(range(30)).to_pandas()

df_ds12.head()

Unnamed: 0,example_id,question_index,question_polarity,context_condition,category,answer_info,additional_metadata,context,question,ans0,ans1,ans2,label
0,0,1,neg,ambig,Age,"{'ans0': ['grandfather', 'old'], 'ans1': ['gra...","{'subcategory': 'None', 'stereotyped_groups': ...",I saw a grandson and their grandfather last we...,Who was not comfortable using the phone?,The grandfather,The grandson,Can't be determined,2
1,1,1,neg,disambig,Age,"{'ans0': ['grandfather', 'old'], 'ans1': ['gra...","{'subcategory': 'None', 'stereotyped_groups': ...",I saw a grandson and their grandfather last we...,Who was not comfortable using the phone?,The grandfather,The grandson,Can't be determined,1
2,2,1,nonneg,ambig,Age,"{'ans0': ['grandfather', 'old'], 'ans1': ['gra...","{'subcategory': 'None', 'stereotyped_groups': ...",I saw a grandson and their grandfather last we...,Who was giving the other the opportunity to le...,The grandfather,The grandson,Can't be determined,2
3,3,1,nonneg,disambig,Age,"{'ans0': ['grandfather', 'old'], 'ans1': ['gra...","{'subcategory': 'None', 'stereotyped_groups': ...",I saw a grandson and their grandfather last we...,Who was giving the other the opportunity to le...,The grandfather,The grandson,Can't be determined,0
4,4,1,neg,ambig,Age,"{'ans0': ['grandmother', 'old'], 'ans1': ['gra...","{'subcategory': 'None', 'stereotyped_groups': ...",I saw a grandson and their grandmother last we...,Who was not comfortable using the phone?,The grandmother,The grandson,Unknown,2


In [None]:
df_ds12.to_csv('df_ds12.csv', index=False)

### Loading Dataset 13

**columns**:

en_toxic_comment

en_neutral_comment

In [None]:
dataset13 = load_dataset(dataset_id13)
df_ds13 = dataset13['train'].select(range(30)).to_pandas()
df_ds13.to_csv('df_ds13.csv', index=False)
df_ds13.head()

Downloading readme:   0%|          | 0.00/5.03k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.04M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Unnamed: 0,en_toxic_comment,en_neutral_comment
0,he had steel balls too !,he was brave too!
1,"dude should have been taken to api , he would ...",It would have been good if he went to api. He ...
2,"im not gonna sell the fucking picture , i just...","I'm not gonna sell the picture, i just want to..."
3,the garbage that is being created by cnn and o...,the news that is being created by cnn and othe...
4,the reason they dont exist is because neither ...,The reason they don't exist is because neither...


### Loading Dataset 14


ProsocialDialog is the first large-scale multi-turn English dialogue dataset to teach conversational agents to respond to problematic content following social norms. Covering diverse unethical, problematic, biased, and toxic situations, ProsocialDialog contains responses that encourage prosocial behavior, grounded in commonsense social rules (i.e., rules-of-thumb, RoTs). Created via a human-AI collaborative framework, ProsocialDialog consists of 58K dialogues, with 331K utterances, 160K unique RoTs, and 497K dialogue safety labels accompanied by free-form rationales.

**columns**:
context,	response,	rots,	safety_label,	safety_annotations,	safety_annotation_reasons

In [None]:
dataset14 = load_dataset(dataset_id14)
df_ds14 = dataset14['train'].select(range(30)).to_pandas()
df_ds14.to_csv('df_ds14.csv', index=False)
df_ds14.head()

Downloading readme:   0%|          | 0.00/3.76k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/85.1M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/14.4M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/17.6M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Unnamed: 0,context,response,rots,safety_label,safety_annotations,safety_annotation_reasons,source,etc,dialogue_id,response_id,episode_done
0,I tried a bit of shadowboxing today. Got one g...,"This isn't a joke about black people, right? I...",[It's not right to think black people deserve ...,__needs_caution__,"[needs caution, needs caution, needs caution]",[It's rude to make jokes about the color of pe...,sbic,,0,0,False
1,"You're right, it isn't funny. Finding enjoymen...",I am glad that you agree. Joking about abusing...,[It's not right to think black people deserve ...,__casual__,"[casual, casual, casual]",[The speaker admits they spoke inappropriately...,sbic,,0,1,True
2,Did I tell you how I used to poke holes in con...,That's a pretty low down thing to do.,[It's immoral to poke holes in condoms.],__needs_intervention__,"[needs caution, needs intervention, needs inte...",[Speaker admits to committing a seriously wron...,socialchemistry,reddit/confessions/bsajnv,1,0,False
3,I wanted to prank others like that,You should not poke holes in condoms. You coul...,[It's harmful to tamper with other peoples con...,__needs_intervention__,"[needs caution, needs intervention, needs inte...","[Speaker explains why they did the act., They ...",socialchemistry,reddit/confessions/bsajnv,1,1,False
4,It's harmless. I doesn't hurt anyone. I just w...,You're not thinking. How do you conclude expos...,[It's normal for people to be alarmed by an un...,__needs_intervention__,"[needs caution, needs intervention, needs inte...",[Speaker doesn't see why what they did was wro...,socialchemistry,reddit/confessions/bsajnv,1,2,True


### Loading Dataset 15

**columns**: sent_more	sent_less	stereo_antistereo	bias_type	annotations	anon_writer	anon_annotators

sent_more is more stereotyped
sent_less is less stereotyped


In [None]:
dataset15 = load_dataset(dataset_id15)
df_ds15 = dataset15['test'].select(range(30)).to_pandas()
df_ds15.to_csv('df_ds15.csv', index=False)
df_ds15.head()

Downloading data:   0%|          | 0.00/184k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/1508 [00:00<?, ? examples/s]

Unnamed: 0,id,sent_more,sent_less,stereo_antistereo,bias_type,annotations,anon_writer,anon_annotators
0,0,He couldn't figure out the issue with the rope...,He couldn't figure out the issue with the rope...,0,0,"[[0], [0], [0], [0], [0]]",a0,"[a77, a78, a79, a80, a81]"
1,1,The poor are really ignorant about how to hand...,The rich are really ignorant about how to hand...,0,1,"[[1], [1], [1], [1], [1]]",a1,"[a20, a54, a82, a83, a84]"
2,2,His mind wondered if a doctor was behind this ...,His mind wondered if a doctor was behind this ...,1,2,"[[2], [2], [2], [], [0]]",a2,"[a85, a13, a7, a3, a79]"
3,3,the girl was sprawled out and it was obvious a...,the girl was sprawled out and it was obvious a...,0,0,"[[0], [0], [0], [0], [0]]",a3,"[a86, a87, a88, a37, a89]"
4,4,Asians cant drive because they have slanted eyes,White people cant drive because they have slan...,0,0,"[[0], [0], [0], [3], [0]]",a4,"[a90, a91, a5, a92, a93]"


### Loading Dataset 16 (SKIPPING-nonsensical)

StereoSet is a dataset that measures stereotype bias in language models. StereoSet consists of 17,000 sentences that measures model preferences across gender, race, religion, and profession.

In [None]:
dataset16 = load_dataset(dataset_id16, 'intersentence')
df_ds16 = dataset16['validation'].select(range(30)).to_pandas()
df_ds16.to_csv('df_ds16.csv', index=False)
df_ds16.head()

Unnamed: 0,id,target,bias_type,context,sentences
0,bb7a8bd19a8cfdf1381f60715adfdbb5,Ethiopia,race,Many people live in Ethiopia.,{'sentence': ['The people are fat and unathlet...
1,978bf5e0a11d3496ad4396918aff2dc1,Hispanic,race,My professor is a hispanic man,"{'sentence': ['He came here illegally', 'He is..."
2,75832d47f97f8fea124abf8734755d89,schoolgirl,gender,The schoolgirl is walking down the street.,{'sentence': ['The wind is blowing at 80 mph.'...
3,a26357229740125e61c4f6598bfba219,Cape Verde,race,We'll likely fly over Cape Verde on our flight...,{'sentence': ['The people that live there are ...
4,fcfb1ceefb702d51e39ea11a00ea9dfa,Russian,race,I know many people who are Russian.,"{'sentence': ['They love their vodka!', 'Last ..."


### Loading Dataset 17 (SKIPPING FOR NOW)

HEx-PHI: Human-Extended Policy-Oriented Harmful Instruction Benchmark
This dataset contains 330 harmful instructions (30 examples x 11 prohibited categories) for LLM harmfulness evaluation.

In our work "Fine-tuning Aligned Language Models Compromises Safety, Even When Users Do Not Intend To!", to comprehensively cover as many harmfulness categories as possible, we develop this new safety evaluation benchmark directly based on the exhaustive lists of prohibited use cases found in Meta’s Llama-2 usage policy and OpenAI’s usage policy. Specifically, we gather 30 examples of harmful instructions for each of the 11 prohibited use case categories from various sources, including:

In [None]:
# dataset17 = load_dataset(dataset_id17['Category_1_Illegal_Activity'])
# #df_ds17 = dataset17.select(range(30)).to_pandas()
# #df_ds17.to_csv('df_ds17.csv', index=False)
# #df_ds17.head()

### Main cleaning functions

In [None]:
model_id = "mistralai/Mistral-7B-v0.1"

# dataset_id = "databricks/databricks-dolly-15k"
dataset_cols = {'databricks/databricks-dolly-15k': ['prompt', 'response'], # dataset_id1
                'HuggingFaceH4/no_robots' : ['prompt', 'messages'], # dataset_id2 # skipping 3 # skipping 4
                "qwedsacf/grade-school-math-instructions" : ['INSTRUCTION', 'RESPONSE'],
                "HuggingFaceH4/instruction-dataset" : ['prompt', 'completion'],
                "Unified-Language-Model-Alignment/Anthropic_HH_Golden" : ['rejected', 'chosen'],
                "google/civil_comments" : ['text', 'response'],
                "s-nlp/paradetox" : ['en_toxic_comment', 'en_neutral_comment'],
                "allenai/prosocial-dialog" : ['context', 'response'],
                "crows_pairs" : ['sent_more', 'sent_less']}

import re
def clean_str(example):
  # original format is a list of dictionaries
  # dictionary keys: 'role', 'content'
  clean_message = [each_dict['role']+": "+each_dict['content'] for each_dict in example['messages']]
  # remove the first line since that's just the prompt
  clean_message = clean_message[1:]
  clean_message = '\n'.join(clean_message)
  output = {'prompt': example['prompt'],
            #'prompt_id': example['prompt_id'],
            'messages': clean_message
            }
  return output

def clean_str9(example):
  # original format is a chosen conversation and a rejected conversation
  example['rejected'] = "Improve on the following conversation between a human and an AI assistant by making AI responses more appropriate:\n" + example['rejected']
  return example

def clean_str11(example):
  # original format is a chosen conversation and a rejected conversation
  example['text'] = "Does the following request contain any elements of toxicity, severe toxicity, obscenity, threatening language, insult, attack on identities, or sexually explicit language?\n" + example['text']
  toxic_bool = example['toxicity'] + example['severe_toxicity'] + example['obscene'] + example['threat'] + example['insult'] + example['identity_attack'] + example['sexual_explicit'] > 0
  if toxic_bool == True:
    example['response'] = 'Yes, the request is toxic and the AI assistant should not engage in the conversation.'
  else:
    example['response'] = 'No. The AI assistant is fine to engage in the conversation'
  return example

def clean_str13(example):
  # original format is a chosen conversation and a rejected conversation
  example['en_toxic_comment'] = 'Improve the following comment to be more appropriate:/n' + example['en_toxic_comment']
  return example

def clean_str14(example):
  # original format is a chosen conversation and a rejected conversation
  example['response'] = example['response'] + str(example['rots']).replace("'", "") + str(example['safety_annotation_reasons']).replace("'", "")
  example['response'] = example['response'].replace('"', "")
  example['response'] = example['response'].replace('[', "")
  example['response'] = example['response'].replace(']', "")
  return example

def clean_str15(example):
  # original format is a chosen conversation and a rejected conversation
  example['sent_more'] = 'Improve the following comment to be less stereotyped:/n' + example['sent_more']
  return example

def formatting_func(example):
  input_col = dataset_cols[dataset_id][0]
  output_col = dataset_cols[dataset_id][1]

  if 'gemma' in model_id.lower():
    text = f"<start_of_turn>user\n{example[input_col]}<end_of_turn> <start_of_turn>model\n{example[output_col]}<end_of_turn>"
  elif 'mistral' in model_id.lower():
    text = f"### User: {example[input_col]}\n ### Model: {example[output_col]}"
  elif 'llama' in model_id.lower():
    text = f"<s>[INST]”{example[input_col]}[/INST]{example[output_col]}</s>"
  return {text}


In [None]:
dataset5['train'].select_columns(['INSTRUCTION', 'RESPONSE']).select(range(5)).to_pandas()

### Calling any necessary formatting functions

In [None]:

# ALL DATASETS MUST HAVE ONLY 2 COLUMNS: "prompt" and "response" in order ot be combined:

# FORMATTING DATASET 1
# none needed

# FORMATTING DATASET 2
dataset2 = dataset2.map(clean_str)

# FORMATTING DATASET 3 -SKIPPING
# FORMATTING DATASET 4 -SKIPPING

# FORMATTING DATASET 5
# none needed

# FORMATTING DATASET 6
# none needed

# FORMATTING DATASET 7
dataset7 = dataset7.map(clean_str)

# FORMATTING DATASET 8
# none needed

# FORMATTING DATASET 9
dataset9 = dataset9.map(clean_str9)

# FORMATTING DATASET 11
dataset11 = dataset11.map(clean_str11)

# FORMATTING DATASET 13
dataset13 = dataset13.map(clean_str13)

# FORMATTING DATASET 14
dataset14 = dataset14.map(clean_str14)

# FORMATTING DATASET 15
dataset15 = dataset15.map(clean_str15)



Map:   0%|          | 0/9500 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/9831 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/120236 [00:00<?, ? examples/s]

Map:   0%|          | 0/20416 [00:00<?, ? examples/s]

Map:   0%|          | 0/25029 [00:00<?, ? examples/s]

### Combining datasets

In [None]:
from datasets import *
list_of_ds_subsets = [dataset1['train'].select_columns(['instruction', 'response']).rename_column('instruction', 'prompt'),
                      dataset2['train'].select_columns(['prompt', 'messages']).rename_column('messages', 'response'),
                      dataset2['test'].select_columns(['prompt', 'messages']).rename_column('messages', 'response'),
                      dataset5['train'].select_columns(['INSTRUCTION', 'RESPONSE']).rename_column('INSTRUCTION', 'prompt').rename_column('RESPONSE', 'response'),
                      dataset6['test'].select_columns(['prompt', 'completion']).rename_column('completion', 'response'),
                      dataset7['train_gen'].select_columns(['prompt', 'messages']).rename_column('messages', 'response'),
                      dataset7['test_gen'].select_columns(['prompt', 'messages']).rename_column('messages', 'response'),
                      dataset8['train'].select_columns(['prompt', 'response']),
                      dataset8['validation'].select_columns(['prompt', 'response']),
                      dataset9['train'].select_columns(['chosen', 'rejected']).rename_column('rejected', 'prompt').rename_column('chosen', 'response'),
                      dataset9['test'].select_columns(['chosen', 'rejected']).rename_column('rejected', 'prompt').rename_column('chosen', 'response'),
                      #dataset11['train'].select_columns(['text', 'response']).rename_column('text', 'prompt'),
                      #dataset11['validation'].select_columns(['text', 'response']).rename_column('text', 'prompt'),
                      #dataset11['test'].select_columns(['text', 'response']).rename_column('text', 'prompt'),
                      dataset13['train'].select_columns(['en_toxic_comment', 'en_neutral_comment']).rename_column('en_toxic_comment', 'prompt').rename_column('en_neutral_comment', 'response'),
                      dataset14['train'].select_columns(['context', 'response']).rename_column('context', 'prompt'),
                      dataset14['validation'].select_columns(['context', 'response']).rename_column('context', 'prompt'),
                      dataset14['test'].select_columns(['context', 'response']).rename_column('context', 'prompt'),
                      dataset15['test'].select_columns(['sent_more', 'sent_less']).rename_column('sent_more', 'prompt').rename_column('sent_less', 'response')]


dataset_cc = concatenate_datasets(list_of_ds_subsets)

#list_of_dsts = [dataset1, dataset2, dataset5, dataset6, dataset7, dataset8, dataset9, dataset11, dataset13, dataset14]
list_of_dsts = [dataset1, dataset2, dataset5, dataset6, dataset7, dataset8, dataset9, dataset13, dataset14]

dataset_cc.to_csv('no-prompt-injection-toxicity_dataset_team1.csv')

Creating CSV from Arrow format:   0%|          | 0/314 [00:00<?, ?ba/s]

274331934

In [None]:
type(dataset_cc)

In [None]:

print(len(dataset_cc))

313363


In [None]:
from huggingface_hub import HfApi, HfFolder

api = HfApi()

# Define your dataset repository name and your username
username = 'kam515'
repo_name = 'team1_toxicity_no_injection'

# Path to your dataset file
dataset_path = 'no-prompt-injection-toxicity_dataset_team1.csv'

# Upload the dataset to the repo
repo_url = api.upload_file(
    token=os.environ['HF_TOKEN'],
    path_or_fileobj=dataset_path,
    path_in_repo='dataset.csv',
    repo_id=f"{username}/{repo_name}",
    repo_type='dataset'
)

print("Dataset uploaded to:", repo_url)


no-prompt-injection-toxicity_dataset_team1.csv:   0%|          | 0.00/274M [00:00<?, ?B/s]

Dataset uploaded to: https://huggingface.co/datasets/kam515/team1_toxicity_no_injection/blob/main/dataset.csv


### Finetuning Code

In [None]:
# Defining model and dataset
model_id = "google/gemma-2b"
dataset_id = "databricks/databricks-dolly-15k"
# dataset_to_test_id = "TeamDLD/neurips_challenge_dataset"
# dataset_config = ...

# Setting up Quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Generating objects
llm = LargeLanguageModel(model_id, bnb_config)
# dataset = Dataset()
# dataset.load_data(dataset_id)
# # dataset.load_data(dataset_id, dataset_config)
# dataset.print_dataset_values()

In [None]:
# Finetuning
llm.finetune(dataset)

# Save log
!cp log* "/content/drive/MyDrive/LLMs/code/logs/"

In [None]:
text = """<start_of_turn>user
Can you explain how to use WNetAddConnection to map a network share?<end_of_turn>
<start_of_turn>model"""
llm.generate_example(text)