![](output.png)

In [1]:
import os
import sys
import torch
from langchain.document_loaders import TextLoader,UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.embeddings import HuggingFaceEmbeddings

In [4]:
import nltk

# Download the required NLTK packages
try:
    nltk.download("punkt")
    nltk.download("average_perceptron_tagger")
    print("All required NLTK packages downloaded successfully!")
except Exception as e:
    print(f"An error occurred while downloading NLTK packages: {e}")


All required NLTK packages downloaded successfully!


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vikas\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Error loading average_perceptron_tagger: Package
[nltk_data]     'average_perceptron_tagger' not found in index


In [5]:
from dotenv import load_dotenv
import os
from langchain.llms import OpenAI


load_dotenv()
# Load the OpenAI API key from the environment variable 
api_key = os.getenv("OPENAI_API_KEY")

### Load the data

In [6]:
URLs = [
    'https://blog.gopenai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models-23e539522acb',
    'https://www.mosaicml.com/blog/mpt-7b',
    'https://stability.ai/blog/stability-ai-launches-the-first-of-its-stablem-suite-of-language-models',
    'https://lmsys.org/blog/2023-03-30-vicuna/',
    'https://www.datacamp.com/blog/top-open-source-llms'
]

In [7]:
loader = UnstructuredURLLoader(urls=URLs)
data = loader.load()
data

[Document(metadata={'source': 'https://blog.gopenai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models-23e539522acb'}, page_content='Open in app\n\nSign up\n\nSign in\n\nWrite\n\nSign up\n\nSign in\n\nPaper Review\n\nPaper Review: Llama 2: Open Foundation and Fine-Tuned Chat Models\n\nLlama 2: one of the best open source models\n\nAndrew Lukyanenko\n\nFollow\n\nPublished in\n\nGoPenAI\n\n15 min read\n\nJul 20, 2023\n\n--\n\nProject link\n\nModel link\n\nPaper link\n\nThe authors of the work present Llama 2, an assortment of pretrained and fine-tuned large language models (LLMs) with sizes varying from 7 billion to 70 billion parameters. The fine-tuned versions, named Llama 2-Chat, are specifically designed for dialogue applications. These models surpass the performance of existing open-source chat models on most benchmarks, and according to human evaluations for usefulness and safety, they could potentially replace closed-source models. The authors also detail their ap

In [9]:
text_splitter = CharacterTextSplitter(separator='\n',chunk_size = 1000, chunk_overlap = 200)
text_chunks = text_splitter.split_documents(data)

In [10]:
text_chunks[0]

Document(metadata={'source': 'https://blog.gopenai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models-23e539522acb'}, page_content='Open in app\nSign up\nSign in\nWrite\nSign up\nSign in\nPaper Review\nPaper Review: Llama 2: Open Foundation and Fine-Tuned Chat Models\nLlama 2: one of the best open source models\nAndrew Lukyanenko\nFollow\nPublished in\nGoPenAI\n15 min read\nJul 20, 2023\n--\nProject link\nModel link\nPaper link\nThe authors of the work present Llama 2, an assortment of pretrained and fine-tuned large language models (LLMs) with sizes varying from 7 billion to 70 billion parameters. The fine-tuned versions, named Llama 2-Chat, are specifically designed for dialogue applications. These models surpass the performance of existing open-source chat models on most benchmarks, and according to human evaluations for usefulness and safety, they could potentially replace closed-source models. The authors also detail their approach to fine-tuning and safety enhanc

In [11]:
embeddings = OpenAIEmbeddings(model = "text-embedding-3-large",openai_api_key=api_key)

  embeddings = OpenAIEmbeddings(model = "text-embedding-3-large",openai_api_key=api_key)


In [12]:
query_result = embeddings.embed_query("Hello")
len(query_result)

3072

In [13]:
query_result

[-0.03813586967587374,
 -0.021921639333492487,
 -0.0016345970105932214,
 -0.005634444992754745,
 -0.003958298657069427,
 0.008066579357847426,
 0.002837490170218339,
 0.03644958729137692,
 0.010733820043346862,
 0.04588626875832168,
 0.0016427041732618613,
 -0.024418630534273022,
 -0.03794129777182638,
 0.016749299353221347,
 -0.026656194392302147,
 0.023024206005847238,
 0.021029855165232238,
 0.0033624259262145344,
 0.005484463414707442,
 -0.008625969856693439,
 0.015630518355529317,
 0.003593478665286969,
 -0.007661223552721768,
 0.04108685826080797,
 0.008544899161329576,
 0.02088392809984179,
 -0.04507555994203796,
 -0.008723255808717119,
 -0.02829383193814206,
 -0.012322814613174934,
 -0.002164599626842,
 0.020624500757090384,
 0.009331289283574972,
 0.014211772339097614,
 0.0399194342871041,
 0.009971751409107384,
 0.018613935591138105,
 0.002592249896475784,
 -0.018095080905635292,
 0.02278099298843311,
 0.017754581936197486,
 0.015662945143558808,
 -0.014706306467917043,
 0.01

In [14]:
vectorstore = FAISS.from_documents(text_chunks,embeddings)


In [15]:
llm = OpenAI(openai_api_key = api_key, temperature = 0.9)

  llm = OpenAI(openai_api_key = api_key, temperature = 0.9)


In [16]:
#llm.predict("Please provide a concise summary of the book")

In [17]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm,retriever = vectorstore.as_retriever())

In [18]:
result = chain({"question":"How good is Vicuna?"},return_only_outputs=True)

result['answer']

  result = chain({"question":"How good is Vicuna?"},return_only_outputs=True)


' According to preliminary evaluation using GPT-4, Vicuna-13B achieves more than 90% quality of OpenAI ChatGPT and Google Bard while outperforming other models like LLaMA and Stanford Alpaca in more than 90% of cases. \n'

In [20]:
import textwrap
wrapped_text = textwrap.fill(result['answer'],width=100)
print(wrapped_text)

 According to preliminary evaluation using GPT-4, Vicuna-13B achieves more than 90% quality of
OpenAI ChatGPT and Google Bard while outperforming other models like LLaMA and Stanford Alpaca in
more than 90% of cases.


In [22]:
print("The End")

The End
