In [2]:
import pprint
import os
import torch
import transformers
import yaml
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from dotenv import load_dotenv

load_dotenv()

True

# Load Model

In [3]:
config_path =  '../configs/model/Llama_2_7b.yaml'

# Load configuration from the YAML file
with open(config_path, 'r') as file:
    config = yaml.safe_load(file)

model_id = config['model']['id']
device = config['model']['device']

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=config['bits_and_bytes']['load_in_4bit'],
    bnb_4bit_quant_type=config['bits_and_bytes']['bnb_4bit_quant_type'],
    bnb_4bit_use_double_quant=config['bits_and_bytes']['bnb_4bit_use_double_quant'],
    bnb_4bit_compute_dtype=getattr(torch, config['bits_and_bytes']['bnb_4bit_compute_dtype'])
)

hf_auth = os.getenv("HF_TOKEN")

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map=device if device != 'auto' else 'auto',
    use_auth_token=hf_auth
)

model.eval()

actual_device = f'cuda:{torch.cuda.current_device()}' if torch.cuda.is_available() else 'cpu'
print(f"Model loaded on {actual_device}")




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Model loaded on cuda:0


In [4]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)



In [5]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

[[1, 29871, 13, 29950, 7889, 29901], [1, 29871, 13, 28956, 13]]

In [6]:
import torch

stop_token_ids = [torch.LongTensor(x).to('cuda') for x in stop_token_ids]
stop_token_ids

[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

In [7]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [11]:
generate_text = transformers.pipeline(
    model=model, 
    tokenizer=tokenizer,
    return_full_text=True, 
    task='text-generation',
    stopping_criteria=stopping_criteria, 
    temperature=0.5, 
    max_new_tokens=512,
    repetition_penalty=1.1 
)

In [12]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

out = llm(prompt="Complete the following text: \nThe quick brown fox ...",)


In [13]:
print(out)


A) jumped over a lazy dog.
B) ran quickly to catch its prey.
C) was chased by a pack of wolves.
D) all of the above.

Answer: D) all of the above.


# Zero-shot Summarization

In [14]:
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.prompts.prompt import PromptTemplate

In [17]:
import datasets
dataset = datasets.load_from_disk("/home/ubuntu/oulas/github/text-summarization-pipeline/data/validation_split")


In [18]:
prompt_template = """Write a concise summary of the following:
"{document}"
CONCISE SUMMARY:"""

prompt = PromptTemplate(input_variables = ["document"],template=prompt_template)


In [19]:
validation_sample = dataset[0]

In [20]:
llm.get_num_tokens(prompt.format(**{"document":validation_sample['document']}))

389

In [21]:
pprint.pprint(prompt.format(**{"document":validation_sample['document']}))

('Write a concise summary of the following:\n'
 '"Draw a number line, but place your zero to the far right of the line. Then, '
 'number your line toward your left using negative numbers. Mark the first '
 'negative number you want to add on the number line. Then, count out your '
 'second negative number, moving to the left. This gives you your answer. For '
 'example, let’s say you’re adding -4+-6. You’d circle -4 on your number line. '
 'Then, count 6 spaces to the left. You’ll arrive at -10, which is your '
 'answer. This is because you are moving the same number of places on the '
 'number line, just toward the negative side. This means your final answer '
 'will be negative. For example, when adding -12+-21, you could add 12+21=33. '
 'However, since your numbers were negative, you’d make your answer -33. Draw '
 'a number line with zero in the middle. Number to the left moving from -1 to '
 '-10, then number to the right 1 to 10. Circle the positive number on your '
 'number lin

In [22]:
out = llm(prompt=prompt.format(**{"document":validation_sample['document']}))

In [23]:
pprint.pprint(out)

('\n'
 'To perform addition with negative numbers, draw a number line with zero in '
 'the middle and number it from -1 to 10 on both sides. Move the cursor or pen '
 'to the left for negative numbers and right for positive numbers. When adding '
 'a negative number with a positive number, move towards the left on the '
 'number line and count the number of spaces. The answer will be negative.')


# Load Vectorstore

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

model_name = 'sentence-transformers/sentence-t5-base'
model_kwargs = {'device': 'cuda'}

hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,

)