In [1]:
import giskard
from typing import Sequence, Optional
from giskard.llm.client import set_default_client
from giskard.llm.client.base import LLMClient, ChatMessage
import torch
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM
from langchain.llms import HuggingFacePipeline
from sentence_transformers import SentenceTransformer
from langchain.embeddings import HuggingFaceEmbeddings
from langchain import PromptTemplate, LLMChain

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# mistralai/Mistral-7B-Instruct-v0.2
model_name_or_path = "NousResearch/Hermes-2-Pro-Llama-3-8B"

config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
config.max_position_embeddings = 8096
quantization_config = BitsAndBytesConfig(
llm_int8_enable_fp32_cpu_offload=True,
bnb_4bit_quant_type='nf4',
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16,
load_in_4bit=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
model_name_or_path,
config=config,
trust_remote_code=True,
quantization_config=quantization_config,
device_map="cuda",
)

# tokenizer = AutoTokenizer.from_pretrained('mistralai/Mistral-7B-Instruct-v0.2', token=hf_token)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:27<00:00,  7.00s/it]


In [6]:
pipe  = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=250
)

local_llm = HuggingFacePipeline(pipeline=pipe)

template = """can you tell me a joke about {topic}?"""
prompt = PromptTemplate(template=template, input_variables=["spaniards"])
llm_chain = LLMChain(prompt=prompt, llm=local_llm, verbose=True)

model.eval()
with torch.no_grad():
    print(llm_chain.run({"topic": "spaniards"}))


Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mcan you tell me a joke about spaniards?[0m

[1m> Finished chain.[0m
can you tell me a joke about spaniards?'
'the difference between a spanish man and a spanish armadillo is that the armadillo'
'oh, come on. tell me the punchline or i'll get mad.'
'the armadillo is a lot faster!'
- Joke submitted by Samir Ghoshal
[1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] [18] [19] [20] [21] [22] [23] [24] [25] [26] [27] [28] [29] [30] [31] [32] [33] [34] [35] [36] [37] [38] [39] [40] [41] [42] [43] [44] [45] [46] [47] [48] [49] [50] [51] [52] [53] [54] [55] [56] [57] [58] [59] [60] [


ValidationError: 2 validation errors for LLMChain
llm
  instance of Runnable expected (type=type_error.arbitrary_type; expected_arbitrary_type=Runnable)
llm
  instance of Runnable expected (type=type_error.arbitrary_type; expected_arbitrary_type=Runnable)

In [None]:
from langchain.llms import GPT4All


In [7]:
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
hf = HuggingFaceEmbeddings(model_name=model_name)

In [8]:
hf

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [None]:
from langchain import PromptTemplate, LLMChain

template = """
Let's think step by step of the question: {question}
Based on all the thought the final answer becomes:
"""
prompt = PromptTemplate(template=template, input_variables=["question"])

# paste the path where your model's weight are located (.bin file)
# you can download the models by going to gpt4all's website.
# scripts for downloading is also available in the later 
# sections of this tutorial

local_path = ("./models/GPT4All/ggml-gpt4all-j-v1.3-groovy.bin")

# initialize the LLM and make chain it with the prompts

llm = GPT4All(
    model=local_path, 
    backend="llama", 
)

llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=True)

# run the chain with your query (question)

llm_chain('Who is the CEO of Google and why he became the ceo of Google?')