In [1]:
!python3 -m pip install \
    --upgrade \
    --requirement requirements.txt \
    --constraint constraints.txt \
    --extra-index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu118


In [2]:
import gc
import pathlib
import shutil

import peft
import torch
import transformers

In [3]:
base_model_identifier = "HuggingFaceH4/zephyr-7b-beta"
tuned_adapter_archive = pathlib.Path("tuned_zephyr_adaptr_archive.zip")
tuned_adapter_directory = pathlib.Path("tuned_zephyr_adapter_directory")

prompt = """You are a chat assistant to help new users for a Python package.

1. You will be provided with a specific question and a context relevant to answer that question.
2. Your response should be based solely on the given context.
3. Keep your answer concise, not exceeding five sentences.
4. If the answer is not found within the context, respond with "I do not know.".
5. Do not fabricate any information.

Context: 'subtract_numbers' function documents itself as follows: 'Perform subtraction of two real numbers.'.

Based on docstring, return of 'subtract_numbers' function is as follows: 'difference of ``first_number`` from ``second_number``'.

Based on docstring, return of 'add_numbers' function is as follows: 'sum of ``first_number`` and ``second_number``'.
Question: Tell me how to add the number 2 and 3, and subtract the result by 5.

Answer:"""

In [4]:
shutil.unpack_archive(tuned_adapter_archive, tuned_adapter_directory)

In [5]:
quantisation_configuration = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=False,
)

In [6]:
untuned_model = transformers.AutoModelForCausalLM.from_pretrained(
    base_model_identifier, quantization_config=quantisation_configuration, device_map={"": 0}
)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [7]:
tokeniser = transformers.AutoTokenizer.from_pretrained(base_model_identifier)
tokeniser.pad_token = tokeniser.eos_token
tokeniser.padding_side = "right"

In [8]:
untuned_pipeline = transformers.pipeline(
    "text-generation",
    model=untuned_model,
    tokenizer=tokeniser,
    device_map={"": 0},
    torch_dtype=torch.float16,
    model_kwargs={"low_cpu_mem_usage": True},
    max_new_tokens=256,
    do_sample=True,
    top_k=1,
)

In [9]:
untuned_pipeline(prompt)



[{'generated_text': 'You are a chat assistant to help new users for a Python package.\n\n1. You will be provided with a specific question and a context relevant to answer that question.\n2. Your response should be based solely on the given context.\n3. Keep your answer concise, not exceeding five sentences.\n4. If the answer is not found within the context, respond with "I do not know.".\n5. Do not fabricate any information.\n\nContext: \'subtract_numbers\' function documents itself as follows: \'Perform subtraction of two real numbers.\'.\n\nBased on docstring, return of \'subtract_numbers\' function is as follows: \'difference of ``first_number`` from ``second_number``\'.\n\nBased on docstring, return of \'add_numbers\' function is as follows: \'sum of ``first_number`` and ``second_number``\'.\nQuestion: Tell me how to add the number 2 and 3, and subtract the result by 5.\n\nAnswer: To add 2 and 3, you can use our \'add_numbers\' function. The return value will be the sum of 2 and 3,

In [10]:
tuned_model = peft.PeftModel.from_pretrained(untuned_model, tuned_adapter_directory)

In [11]:
tuned_pipeline = transformers.pipeline(
    "text-generation",
    model=tuned_model,
    tokenizer=tokeniser,
    device_map={"": 0},
    torch_dtype=torch.float16,
    model_kwargs={"low_cpu_mem_usage": True},
    max_new_tokens=256,
    do_sample=True,
    top_k=1,
)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PersimmonForCausalLM', 'PLBartFo

In [12]:
tuned_pipeline(prompt)

[{'generated_text': 'You are a chat assistant to help new users for a Python package.\n\n1. You will be provided with a specific question and a context relevant to answer that question.\n2. Your response should be based solely on the given context.\n3. Keep your answer concise, not exceeding five sentences.\n4. If the answer is not found within the context, respond with "I do not know.".\n5. Do not fabricate any information.\n\nContext: \'subtract_numbers\' function documents itself as follows: \'Perform subtraction of two real numbers.\'.\n\nBased on docstring, return of \'subtract_numbers\' function is as follows: \'difference of ``first_number`` from ``second_number``\'.\n\nBased on docstring, return of \'add_numbers\' function is as follows: \'sum of ``first_number`` and ``second_number``\'.\nQuestion: Tell me how to add the number 2 and 3, and subtract the result by 5.\n\nAnswer:\n    The number of members in the Yielded\nif first number is ``second_number`` from ``float`\'. 6. su

In [13]:
del tuned_pipeline
del tuned_model
del untuned_pipeline
del tokeniser
del untuned_model

In [14]:
gc.collect()

126

In [15]:
torch.cuda.empty_cache()