In [None]:
%%capture
!pip install bitsandbytes==0.40.2 transformers==4.31.0 peft==0.4.0 accelerate==0.21.0 datasets trl==0.4.7 sentencepiece scipy scikit-learn

In [None]:
# It's recommended to run notebook like:
# jupyter notebook --NotebookApp.iopub_data_rate_limit=1.0e10
# iopub_data_rate_limit is used to unlimit the io load from files, like pickle

In [1]:
# Config private

huggingface_token = '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'


In [2]:
# All imports should be here
from huggingface_hub.hf_api import HfFolder
from transformers import AutoTokenizer
from datasets import load_dataset
import transformers
import torch
import pickle

In [3]:
# Config for model

# Model name
model = 'meta-llama/Llama-2-7b-chat-hf'

# Llama 2 has the padding on the right side
padding_side = 'right'

# MIPS does not support bfloat16, so we need to use float16
torch_dtype = torch.float16

# Use GPU if possible, otherwise CPU
device_map = 'auto'

# File name that is used to store all unique sentances in dataset
file_name_all_sentances = 'all_sentances.pkl'

# File name that is used to store all unique sentances in dataset
file_name_summary_all_sentances = 'llama_2_summary_sentances.pkl'

# Batch size for sentence processing
llm_batch_size = 3

# Number of sentances that LLM should return 
num_return_sequences = 2

# Max length of sentances that LLM should return, in chars
max_sequences_length=200



In [4]:
# Log in the HF to get access to the model (LLama 2)
HfFolder.save_token(huggingface_token)

tokenizer = AutoTokenizer.from_pretrained(model, padding_side=padding_side)

pipeline = transformers.pipeline(
    'text-generation',
    model=model,
    torch_dtype=torch_dtype,
    device_map=device_map,
)

# Check if the model is working
def check_model_is_working():
    sequences = pipeline(
        'I liked to read about travel. Do you have any book recommendations?',
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=200,
    )
    for seq in sequences:
        print(f"Result: {seq['generated_text']}")

check_model_is_working()

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


Result: I liked to read about travel. Do you have any book recommendations?
I'm glad you like to read about travel! Here are some book recommendations that might interest you:
1. "The Travel Book" by Lonely Planet - This is a comprehensive guide to traveling, with information on destinations around the world, as well as tips on how to plan and prepare for your trips.
2. "The Art of Travel" by Alain de Botton - This book explores the psychology of travel and the ways in which it can broaden our horizons and change our perspectives.
3. "On the Road" by Jack Kerouac - This classic novel follows the adventures of a group of friends as they travel across America in the 1950s, and is a great read for anyone who loves to travel and explore new places.
4. "The Beach" by Alex Gar


In [5]:
dataset = load_dataset("glue", "mrpc")

# Some sentances are present more than a few times, it is more efficient to get the list 
# of all unique sentances.
def get_all_sentances_from_dataset(dataset):
    all_sentances = set()
    
    for part in ['train', 'validation', 'test']:
      for elem in dataset[part]:
        all_sentances.add(elem['sentence1'])
        all_sentances.add(elem['sentence2'])
    
    return all_sentances

all_sentances = get_all_sentances_from_dataset(dataset)

print(f"We have found {len(all_sentances)} unique sentances.")

We have found 10944 unique sentances.


In [6]:
# Try to load the already processed sentances
# It is usefull if processing was stopped and we want to resume from the 
# point when we stopped.
def load_already_processed_sentances():
    try:
        with open(file_name_summary_all_sentances, 'rb') as file:
            processed_sentances = pickle.load(file)
    except:
        processed_sentances = {}
    return processed_sentances

def save_already_processed_sentances(all_sentances_processed):
    with open(file_name_summary_all_sentances, 'wb') as fp:
        pickle.dump(all_sentances_processed, fp)

already_processed_sentances = load_already_processed_sentances()  

sentances_to_process = all_sentances.difference(set(already_processed_sentances.keys())) 
print(f"We need to process {len(sentances_to_process)} sentances.")


We need to process 10944 sentances.


In [7]:
# Models are better working when a few inputs are present, but to improve reliability
# let's split into chunks and save partial results

sentances_to_process_list = list(sentances_to_process)

def batch_sentances(sentances, chunk_size):
     
    result = []
    for i in range(0, len(sentances), chunk_size):
        result.append(sentances[i:i + chunk_size])
    return result

sentances_to_llm_batched = batch_sentances(sentances_to_process_list, llm_batch_size)
    
sentances_to_llm_sample = sentances_to_llm_batched[0]

def process_prompt_batch(batch):
    global already_processed_sentances
    
    prompts_to_llm = [f"Provide a summary of this text: {s}?\n Summary:" for s in batch]
    
    sequences = pipeline(
        prompts_to_llm,
        top_k=10,
        num_return_sequences=num_return_sequences,
        eos_token_id=tokenizer.eos_token_id,
        max_length=max_sequences_length,
        return_full_text = False,
    )
    for original_s, generated_batch in zip(batch, sequences):
        already_processed_sentances[original_s] = [s['generated_text'] for s in generated_batch]
    
    save_already_processed_sentances(already_processed_sentances)

In [8]:
print("Sample batch:")
print(sentances_to_llm_sample)


Sample batch:
['The 14-year-old national spelling finalist who attends school in Belmont , N.C. , got a word that sounded like " zistee " during competition Wednesday .', 'The euro was at 1.5281 versus the Swiss franc EURCHF = , up 0.2 percent on the session , after hitting its highest since mid-2001 around 1.5292 earlier in the session .', 'Prosecutors called Durst a cold-blooded killer who shot Black to steal his identity .']


In [None]:
process_prompt_batch(sentances_to_llm_sample)

In [None]:
already_processed_sentances