## **Website Bot**

In [1]:
import os
import sys
import torch
from langchain.document_loaders import TextLoader,UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain import HuggingFacePipeline
from huggingface_hub import notebook_login

In [2]:
import nltk

# Download the required NLTK packages
try:
    nltk.download("punkt")
    nltk.download("average_perceptron_tagger")
    print("All required NLTK packages downloaded successfully!")
except Exception as e:
    print(f"An error occurred while downloading NLTK packages: {e}")


All required NLTK packages downloaded successfully!


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vikas\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Error loading average_perceptron_tagger: Package
[nltk_data]     'average_perceptron_tagger' not found in index


In [3]:
URLs = [
    'https://blog.gopenai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models-23e539522acb',
    'https://www.mosaicml.com/blog/mpt-7b',
    'https://stability.ai/blog/stability-ai-launches-the-first-of-its-stablem-suite-of-language-models',
    'https://lmsys.org/blog/2023-03-30-vicuna/',
    'https://www.datacamp.com/blog/top-open-source-llms'
]

In [4]:
loader = UnstructuredURLLoader(urls=URLs)
data = loader.load()


In [5]:
print(data)

[Document(metadata={'source': 'https://blog.gopenai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models-23e539522acb'}, page_content='Open in app\n\nSign up\n\nSign in\n\nWrite\n\nSign up\n\nSign in\n\nPaper Review\n\nPaper Review: Llama 2: Open Foundation and Fine-Tuned Chat Models\n\nLlama 2: one of the best open source models\n\nAndrew Lukyanenko\n\nFollow\n\nPublished in\n\nGoPenAI\n\n15 min read\n\nJul 20, 2023\n\n--\n\nProject link\n\nModel link\n\nPaper link\n\nThe authors of the work present Llama 2, an assortment of pretrained and fine-tuned large language models (LLMs) with sizes varying from 7 billion to 70 billion parameters. The fine-tuned versions, named Llama 2-Chat, are specifically designed for dialogue applications. These models surpass the performance of existing open-source chat models on most benchmarks, and according to human evaluations for usefulness and safety, they could potentially replace closed-source models. The authors also detail their ap

In [6]:
text_splitter = CharacterTextSplitter(separator='\n',chunk_size = 1000, chunk_overlap = 200)
text_chunks = text_splitter.split_documents(data)

In [7]:
len(text_chunks)

81

In [8]:
embedding = HuggingFaceEmbeddings()

  embedding = HuggingFaceEmbeddings()
  embedding = HuggingFaceEmbeddings()





In [9]:
embedding

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-mpnet-base-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [10]:
query_result = embedding.embed_query("How are you?")
print(query_result)
print(len(query_result))

[0.027106203138828278, 0.011331875808537006, -0.0019524118397384882, -0.03695134446024895, 0.017764907330274582, 0.0009032891830429435, -0.0338648296892643, 0.01337840873748064, 0.017730554565787315, -0.013246707618236542, -0.04028136655688286, -0.01528528518974781, -0.012560365721583366, 0.015230913646519184, 0.015512331388890743, -0.057512737810611725, -0.017129551619291306, -0.061840787529945374, -0.01876133494079113, -0.007223891094326973, -0.049612585455179214, 0.011142101138830185, 2.97226997645339e-05, -0.009051499888300896, 0.05342879891395569, 0.010582651011645794, 0.03314786031842232, -0.004505352582782507, -0.0061722793616354465, 0.06208071857690811, -0.027285361662507057, 0.029826559126377106, 0.024574674665927887, -0.02151191048324108, 1.6617831306575681e-06, 0.04796356335282326, -0.023516835644841194, -0.05457734316587448, 0.07369428873062134, -0.036343853920698166, 0.023755066096782684, -0.06763102859258652, 0.007463234942406416, 0.07609964907169342, -0.01268930546939373

In [11]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [12]:
model = "meta-llama/Llama-2-7b-chat-hf"

In [13]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

quant_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True  # Allow FP32 offloading to CPU
)

In [14]:
tokenizer = AutoTokenizer.from_pretrained(model,use_auth_token=True)

model = AutoModelForCausalLM.from_pretrained(model,device_map='auto',
                                             torch_dtype = torch.float16,
                                             use_auth_token = True,
                                             offload_folder="offload",
                                             )



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the disk and cpu.


In [15]:
from transformers import pipeline

pipe = pipeline(
    "text-generation",  # Correct task
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,  # Ensure model's data type compatibility
    device_map="auto",  # Automatically map to CPU/GPU
    max_new_tokens=512,
    do_sample=True,
    top_k=30,
    num_return_sequences=1
)


Device set to use cuda:0


In [16]:
llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})

  llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})


In [17]:
llm.predict("What is vicuna?")

  llm.predict("What is vicuna?")


: 

In [1]:
print("The End")

The End
