In [1]:
import torch

if torch.cuda.is_available():
    print("Running on GPU")
else:
    print("Running on CPU")

import os

# Print the present working directory
print(os.getcwd())

import sys

print(sys.prefix)


Running on GPU
/media/mldadmin/home/s123mdg310_03/Convo2Calendar
/media/mldadmin/home/s123mdg310_03/miniconda3/envs/conda_env1


In [2]:
from torch import cuda, bfloat16
import transformers
from huggingface_hub import notebook_login
notebook_login()

model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token

#hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,)


model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map='auto',
)

# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
)

stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

generate_text = transformers.pipeline(
    model=model, 
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)



VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model loaded on cuda:0


In [3]:
res = generate_text("Explain me the difference between Data Lakehouse and Data Warehouse.")
print(res[0]["generated_text"])

Explain me the difference between Data Lakehouse and Data Warehouse. Unterscheidung between data lakehouse and data warehouse is a common topic of discussion in the data engineering community, as both are designed to store large amounts of data but have different architectures, use cases, and benefits. A data warehouse is a centralized repository that stores data in a structured manner, typically for querying and analysis. A data lakehouse, on the other hand, is a more flexible and scalable storage solution that allows for storing data in its raw form, without any predefined schema or structure.

Here are some key differences between data lakehouses and data warehouses:

1. Structure: A data warehouse stores data in a structured manner, with well-defined schemas and tables. In contrast, a data lakehouse stores data in its raw form, without any predefined schema or structure. This means that data can be stored in various formats, such as JSON, CSV, or Avro, and can be easily integrated 

In [4]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

# checking again that everything is working fine
llm(prompt="Explain me the difference between Data Lakehouse and Data Warehouse.")

" Unterscheidung between data lakehouse and data warehouse is a common topic of discussion in the data engineering community, as both concepts have gained popularity in recent years. A data lakehouse is a centralized repository that stores all the raw data from various sources in its original form, without transforming or processing it. On the other hand, a data warehouse is a structured repository that stores data in a specific format, typically after cleaning, transforming, and aggregating it.\nData Lakehouse vs Data Warehouse: What's the Difference? - DataCamp\nA data lakehouse is a centralized repository that stores all the raw data from various sources in its original form, without transforming or processing it. This means that the data is stored in its native format, such as JSON, CSV, or Avro, and can be accessed and queried directly without having to transform or process it first. In contrast, a data warehouse is a structured repository that stores data in a specific format, ty

In [5]:
from langchain.document_loaders import WebBaseLoader

web_links = ["https://www.databricks.com/","https://help.databricks.com","https://databricks.com/try-databricks","https://help.databricks.com/s/","https://docs.databricks.com","https://kb.databricks.com/","http://docs.databricks.com/getting-started/index.html","http://docs.databricks.com/introduction/index.html","http://docs.databricks.com/getting-started/tutorials/index.html","http://docs.databricks.com/release-notes/index.html","http://docs.databricks.com/ingestion/index.html","http://docs.databricks.com/exploratory-data-analysis/index.html","http://docs.databricks.com/data-preparation/index.html","http://docs.databricks.com/data-sharing/index.html","http://docs.databricks.com/marketplace/index.html","http://docs.databricks.com/workspace-index.html","http://docs.databricks.com/machine-learning/index.html","http://docs.databricks.com/sql/index.html","http://docs.databricks.com/delta/index.html","http://docs.databricks.com/dev-tools/index.html","http://docs.databricks.com/integrations/index.html","http://docs.databricks.com/administration-guide/index.html","http://docs.databricks.com/security/index.html","http://docs.databricks.com/data-governance/index.html","http://docs.databricks.com/lakehouse-architecture/index.html","http://docs.databricks.com/reference/api.html","http://docs.databricks.com/resources/index.html","http://docs.databricks.com/whats-coming.html","http://docs.databricks.com/archive/index.html","http://docs.databricks.com/lakehouse/index.html","http://docs.databricks.com/getting-started/quick-start.html","http://docs.databricks.com/getting-started/etl-quick-start.html","http://docs.databricks.com/getting-started/lakehouse-e2e.html","http://docs.databricks.com/getting-started/free-training.html","http://docs.databricks.com/sql/language-manual/index.html","http://docs.databricks.com/error-messages/index.html","http://www.apache.org/","https://databricks.com/privacy-policy","https://databricks.com/terms-of-use"] 

loader = WebBaseLoader(web_links)
documents = loader.load()

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
all_splits = text_splitter.split_documents(documents)

In [7]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# storing embeddings in the vector store
vectorstore = FAISS.from_documents(all_splits, embeddings)

In [8]:
from langchain.chains import ConversationalRetrievalChain

chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)

In [9]:
chat_history = []

query = "What is Data lakehouse architecture in Databricks?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

 In Databricks, the Medallion architecture is used to describe the data lakehouse architecture. This architecture separates data storage from compute resources, allowing for flexible and scalable data processing. The data lakehouse stores data in a centralized repository, while the compute layer provides the necessary resources for data processing and analysis. This allows for faster query performance and easier management of data.


In [10]:
chat_history = [(query, result["answer"])]

query = "What are Data Governance and Interoperability in it?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

 In the context of Data Lakehouse architecture in Databricks, Data Governance refers to the policies and practices implemented to securely manage the data assets within an organization. It encompasses the centralized management of data across various teams, departments, and stakeholders, ensuring data quality, security, and compliance with regulatory requirements. By implementing a unified data governance solution, organizations can simplify data management, reduce data silos, and improve their overall data governance posture.


In [11]:
print(result['source_documents'])

[Document(page_content='Data governance\nLakehouse architecture\n\nReference & resources\n\nReference\nResources\nWhat’s coming?\nDocumentation archive\n\n\n\n\n    Updated Oct 27, 2023\n  \n\n\nSend us feedback\n\n\n\n\n\n\n\n\n\n\nDocumentation \nSecurity and compliance guide\n\n\n\n\n\n\n\nSecurity and compliance guide \nThis guide provides an overview of security features and capabilities that an enterprise data team can use to harden their Databricks environment according to their risk profile and governance policy.\nThis guide does not cover information about securing your data. For that information, see Data governance guide.\n\nNote\nThis article focuses on the most recent (E2) version of the Databricks platform. Some of the features described here may not be supported on legacy deployments that have not migrated to the E2 platform.', metadata={'source': 'http://docs.databricks.com/security/index.html', 'title': 'Security and compliance guide | Databricks on AWS', 'description'

In [12]:
chat_history = [(query, result["answer"])]

query = "Who was singapore's first prime minister?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

 Lee Kuan Yew


In [13]:
chat_history = [(query, result["answer"])]

query = "Who is Singapore's current prime minister?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

  The current Prime Minister of Singapore is Lee Hsien Loong. He has been serving in this position since 2004 and is also the leader of the People's Action Party, which has been the ruling party in Singapore since 1959.


In [15]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain

# Initialize the text splitter and embeddings model
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# Split and process documents in batches
batch_size = 5  # Set your desired batch size here

for i in range(0, len(documents), batch_size):
    # Split documents into chunks (batch processing)
    document_batch = documents[i:i+batch_size]
    all_splits_batch = text_splitter.split_documents(document_batch)

    # Store embeddings in the vector store for the current batch
    vectorstore_batch = FAISS.from_documents(all_splits_batch, embeddings)

    # Create a retrieval chain for the current batch of vectors and query it.
    chain_batch = ConversationalRetrievalChain.from_llm(llm,
                                                       vectorstore_batch.as_retriever(),
                                                       return_source_documents=True)
    
    chat_history_batch = []  # Add your chat history for each document here
    
    queries_batch = ["What is Data lakehouse architecture?", "Another question"] 
                       # Add your list of queries corresponding to each document here
    
    for query in queries_batch:
        chat_history_batch.append(chain({"question": query, "chat_history": chat_history}))

        print(f"Answer to '{query}': {result['answer']}")



Answer to 'What is Data lakehouse architecture?':   The current Prime Minister of Singapore is Lee Hsien Loong. He has been serving in this position since 2004 and is also the leader of the People's Action Party, which has been the ruling party in Singapore since 1959.
Answer to 'Another question':   The current Prime Minister of Singapore is Lee Hsien Loong. He has been serving in this position since 2004 and is also the leader of the People's Action Party, which has been the ruling party in Singapore since 1959.
Answer to 'Ellipsis':   The current Prime Minister of Singapore is Lee Hsien Loong. He has been serving in this position since 2004 and is also the leader of the People's Action Party, which has been the ruling party in Singapore since 1959.
Answer to 'What is Data lakehouse architecture?':   The current Prime Minister of Singapore is Lee Hsien Loong. He has been serving in this position since 2004 and is also the leader of the People's Action Party, which has been the ruling

KeyboardInterrupt: 

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import WebBaseLoader
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Initialize the text splitter and embeddings model
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# Load documents from web links using WebBaseLoader
web_links = ["https://www.databricks.com/","https://help.databricks.com","https://databricks.com/try-databricks","https://help.databricks.com/s/","https://docs.databricks.com"]
web_links = ['https://en.wikipedia.org/wiki/History_of_Singapore','https://worldpopulationreview.com/country-rankings/smallest-countries','https://www.history.com/topics/religion/vatican-city','https://www.iexplore.com/articles/travel-guides/europe/monaco/history-and-culture#:~:text=Historically%2C%20Monaco%20was%20part%20of,for%20over%20715%20years%20now.','https://www.thevaticantickets.com/vatican-history/']
loader = WebBaseLoader(web_links)
documents = loader.load()

# Split and process documents in batches
batch_size = 32  # Set your desired batch size here

for i in range(0, len(documents), batch_size):
    # Split documents into chunks (batch processing)
    document_batch = documents[i:i+batch_size]
    all_splits_batch = text_splitter.split_documents(document_batch)

    # Store embeddings in the vector store for the current batch
    vectorstore_batch = FAISS.from_documents(all_splits_batch, embeddings)

    # Create a retrieval chain for the current batch of vectors and query it.
    chain_batch = ConversationalRetrievalChain.from_llm(llm,
                                                       vectorstore_batch.as_retriever(),
                                                       return_source_documents=True)
    

In [17]:
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_chain(llm, chain_type="map_reduce")
chain = ConversationalRetrievalChain(
    retriever=vectorstore.as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
)
queries_batch = ["Who is the smallest country in the world", "When did it gain independence"] 
chat_history = []
query = queries_batch[0]
result = chain({"question": query, "chat_history": chat_history})

chat_history = [(query, result["answer"])]
query = queries_batch[1]
result2 = chain({"question": query, "chat_history": chat_history})

Token indices sequence length is longer than the specified maximum sequence length for this model (1580 > 1024). Running this sequence through the model will result in indexing errors


KeyboardInterrupt: 

In [18]:
web_links = ['https://en.wikipedia.org/wiki/History_of_Singapore','https://worldpopulationreview.com/country-rankings/smallest-countries','https://www.history.com/topics/religion/vatican-city','https://www.iexplore.com/articles/travel-guides/europe/monaco/history-and-culture#:~:text=Historically%2C%20Monaco%20was%20part%20of,for%20over%20715%20years%20now.','https://www.thevaticantickets.com/vatican-history/']
loader = WebBaseLoader(web_links)
documents = loader.load()

# Split and process documents in batches
batch_size = 32  # Set your desired batch size here

for i in range(0, len(documents), batch_size):
    # Split documents into chunks (batch processing)
    document_batch = documents[i:i+batch_size]
    all_splits_batch = text_splitter.split_documents(document_batch)

    # Store embeddings in the vector store for the current batch
    vectorstore_batch = FAISS.from_documents(all_splits_batch, embeddings)

    # Create a retrieval chain for the current batch of vectors and query it.
    chain_batch = ConversationalRetrievalChain.from_llm(llm,
                                                       vectorstore_batch.as_retriever(),
                                                       return_source_documents=True)
    
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT

question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_chain(llm, chain_type="map_reduce")

# Create a retrieval chain for the current batch of vectors and query it.
chain_batch = ConversationalRetrievalChain(
    retriever=FAISS.from_documents(all_splits_batch, embeddings).as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
)

query_list = ["Who is the smallest country in the world", "When did it gain independence", "Tell me more about it"]
chat_history = []
#query = queries_batch[0]
for query in query_list:
    result = chain_batch({"question": query, "chat_history": chat_history})
    chat_history.append((query, result["answer"]))
    print('answer',result['answer'])
    print('chat_history',chat_history)
    print('')

#query = queries_batch[1]
#result2 = chain_batch({"question": query, "chat_history": chat_history})
#chat_history.append((query, result2["answer"]))


#print(result2['answer'])

answer  The smallest country in the world is Vatican City.
chat_history [('Who is the smallest country in the world', ' The smallest country in the world is Vatican City.')]

answer  Vatican City gained independence in 1929.
chat_history [('Who is the smallest country in the world', ' The smallest country in the world is Vatican City.'), ('When did it gain independence', ' Vatican City gained independence in 1929.')]

answer  Vatican City gained its independence in 1929 with the signing of the Lateran Pacts.
chat_history [('Who is the smallest country in the world', ' The smallest country in the world is Vatican City.'), ('When did it gain independence', ' Vatican City gained independence in 1929.'), ('Tell me more about it', ' Vatican City gained its independence in 1929 with the signing of the Lateran Pacts.')]



In [19]:
import langchain
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
from langchain.vectorstores.faiss import FAISS


# Load the web links
web_links = [
    "https://en.wikipedia.org/wiki/History_of_Singapore",
    "https://worldpopulationreview.com/country-rankings/smallest-countries",
    "https://www.history.com/topics/religion/vatican-city",
    "https://www.iexplore.com/articles/travel-guides/europe/monaco/history-and-culture",
    "https://www.thevaticantickets.com/vatican-history/"
]

# Load the documents
loader = WebBaseLoader(web_links)
documents = loader.load()

# Split and process documents in batches
batch_size = 32

# Create a retrieval chain for the current batch of vectors and query it.
for i in range(0, len(documents), batch_size):
    # Split documents into chunks (batch processing)
    document_batch = documents[i:i+batch_size]
    all_splits_batch = text_splitter.split_documents(document_batch)

    # Store embeddings in the vector store for the current batch
    vectorstore_batch = FAISS.from_documents(all_splits_batch, embeddings)

    # Create a retrieval chain for the current batch of vectors and query it.
    # Create a retrieval chain for the current batch of vectors and query it.
    chain_batch = ConversationalRetrievalChain.from_llm(llm,
                                                        vectorstore_batch.as_retriever(),
                                                        return_source_documents=True)

    # Initialize the chat history
    chat_history = []

    # Iterate through the queries from the specified documents
    for query in web_links:
        # Replace the general query with the specific document query
        query = query.replace("https://", "").replace("/", "")
        query = "Who is the main topic of the document: " + query

        # Send the query and get the result
        result = chain_batch({"question": query, "chat_history": chat_history})
        chat_history = [(query, result["answer"])]

In [21]:
print(chain_batch)

memory=None callbacks=None callback_manager=None verbose=False tags=None metadata=None combine_docs_chain=StuffDocumentsChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, input_key='input_documents', output_key='output_text', llm_chain=LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=PromptTemplate(input_variables=['context', 'question'], output_parser=None, partial_variables={}, template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:", template_format='f-string', validate_template=True), llm=HuggingFacePipeline(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7f89b97c40d0>, model_id='gp

In [22]:
print(chat_history)

[('Who is the main topic of the document: www.thevaticantickets.comvatican-history', "  The history of the Vatican dates back to the 4th century AD when the construction of a basilica over St. Peter's grave in Rome began. Over time, the area developed into a popular pilgrimage site and commercial district, but was abandoned after the move of the papal court to France in 1309. In 1929, the Lateran Pacts established Vatican City as a sovereign entity distinct from the Holy See, and granted the church $92 million in compensation for the loss of the Papal States. Today, the Vatican is the home of the Pope and the Roman Curia, and the spiritual center for over 1.2 billion followers of the Catholic Church.")]


In [20]:

from langchain.text_splitter import CharacterTextSplitter
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

from langchain.document_loaders import WebBaseLoader
from langchain.chains import RetrievalQA
from langchain.vectorstores import Pinecone
from langchain.document_loaders import TextLoader
import textwrap
import sys
import os
web_links = ['https://en.wikipedia.org/wiki/History_of_Singapore','https://worldpopulationreview.com/country-rankings/smallest-countries','https://www.history.com/topics/religion/vatican-city','https://www.iexplore.com/articles/travel-guides/europe/monaco/history-and-culture#:~:text=Historically%2C%20Monaco%20was%20part%20of,for%20over%20715%20years%20now.','https://www.thevaticantickets.com/vatican-history/']
loader = WebBaseLoader(web_links)
documents = loader.load()
text_splitter=CharacterTextSplitter(separator='\n',
                                    chunk_size=1000, 
                                    chunk_overlap=50)
#text_chunks=text_splitter.split_documents(documents)
documents2 = TextLoader("aboutme.txt").load()
text_chunks = text_splitter.split_documents(documents2)
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cuda'})
vectorstore=FAISS.from_documents(text_chunks, embeddings)
'''
import torch
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                             device_map='auto',
                                             torch_dtype=torch.float16,
                                             load_in_8bit=True,
                                              #load_in_4bit=True
                                             )'''
llm=HuggingFacePipeline(pipeline=generate_text, model_kwargs={'temperature':0})
chain =  RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff",return_source_documents=True, retriever=vectorstore.as_retriever())
query = "question?"
result=chain({"query": query, "chat_history": []},return_only_outputs=True)

In [23]:
documents2 ='my name is Qi Xuan. \n I am studying at NtU. \n I am staying at hall 13'

In [135]:
#print(documents)

In [25]:
generate_text = transformers.pipeline(
    model=model, 
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

In [26]:
query_list = ["What is the smallest country in the world", "When did it gain independence", "Tell me more about it"]
chat_history = []
#query = queries_batch[0]
for query in query_list:
    result = chain({"query": query, "chat_history": chat_history})
    chat_history.append((query, result["result"].strip("\n")))
    print(query)
    print('answer',result['result'].strip("\n"))
    print('chat_history',chat_history, '\n')

What is the smallest country in the world
answer  Vatican City has a population of around 800 people and covers an area of approximately 0.44 km² (0.17 sq mi), making it the smallest country in the world.
chat_history [('What is the smallest country in the world', ' Vatican City has a population of around 800 people and covers an area of approximately 0.44 km² (0.17 sq mi), making it the smallest country in the world.')] 

When did it gain independence
answer  I don't have access to information about when NtU gained independence as I'm just an AI and do not have real-time information or databases. However, I can suggest some resources where you might be able to find the information you are looking for.
chat_history [('What is the smallest country in the world', ' Vatican City has a population of around 800 people and covers an area of approximately 0.44 km² (0.17 sq mi), making it the smallest country in the world.'), ('When did it gain independence', " I don't have access to informati

In [27]:
query_list2 = ["Is Vatican City the smallest country", "What is it country size", "Tell me more about it"]
chat_history = []
#query = queries_batch[0]
for query in query_list2:
    result = chain({"query": query, "chat_history": chat_history})
    chat_history.append((query, result["result"].strip("\n")))
    print(query)
    print('answer',result['result'].strip("\n"))
    print('chat_history',chat_history, '\n')

Is Vatican City the smallest country
answer  Yes, Vatican City is the smallest country in the world. It has a total area of approximately 0.44 km² (0.17 sq mi) and a population of around 800 people.

Please provide more context or clarify your question if you need further help.
chat_history [('Is Vatican City the smallest country', ' Yes, Vatican City is the smallest country in the world. It has a total area of approximately 0.44 km² (0.17 sq mi) and a population of around 800 people.\n\nPlease provide more context or clarify your question if you need further help.')] 

What is it country size
answer  The country size of Taiwan is approximately 36,000 square kilometers (13,900 square miles).
chat_history [('Is Vatican City the smallest country', ' Yes, Vatican City is the smallest country in the world. It has a total area of approximately 0.44 km² (0.17 sq mi) and a population of around 800 people.\n\nPlease provide more context or clarify your question if you need further help.'), ('W

In [28]:
query_list = ["What is my name", "Where am I studying at", "Where do I stay at"]
chat_history = []
for query in query_list:
    result = chain({"query": query, "chat_history": chat_history})
    chat_history.append((query, result["result"].strip("\n")))
    print('start of query')
    print(query)
    print('end of query')
    print('answer',result['result'].strip("\n"))
    print('chat_history',chat_history, '\n')

start of query
What is my name
end of query
answer  Your name is Youthful Squirrel.
chat_history [('What is my name', ' Your name is Youthful Squirrel.')] 

start of query
Where am I studying at
end of query
answer  You are studying at National Taiwan University (NtU).
chat_history [('What is my name', ' Your name is Youthful Squirrel.'), ('Where am I studying at', ' You are studying at National Taiwan University (NtU).')] 

start of query
Where do I stay at
end of query
answer  You stay at hall 13.
chat_history [('What is my name', ' Your name is Youthful Squirrel.'), ('Where am I studying at', ' You are studying at National Taiwan University (NtU).'), ('Where do I stay at', ' You stay at hall 13.')] 



In [29]:
for i in result.keys():
    print(i)
    print(result[i])
    print('')

query
Where do I stay at

chat_history
[('What is my name', ' Your name is Youthful Squirrel.'), ('Where am I studying at', ' You are studying at National Taiwan University (NtU).'), ('Where do I stay at', ' You stay at hall 13.')]

result
 You stay at hall 13.

source_documents
[Document(page_content='my name is Youthful Squirrel.\nI am studying at NtU.\nI am staying at hall 13.', metadata={'source': 'aboutme.txt'})]



In [134]:

import torch
text_chunks = text_splitter.split_documents(TextLoader("transcripts/meeting002.txt").load())
model_name = ["sentence-transformers/all-mpnet-base-v2", 'sentence-transformers/all-MiniLM-L6-v2']
embeddings = HuggingFaceEmbeddings(model_name=model_name[0],model_kwargs={'device': 'cuda'})
vectorstore=FAISS.from_documents(text_chunks, embeddings)

llm=HuggingFacePipeline(pipeline=generate_text, model_kwargs={'temperature':0})
chain =  RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff",return_source_documents=False, retriever=vectorstore.as_retriever())
query_list = ["what is the date of the meeting? example: the meeting was held on 03/07/2022. You should output {03/07/2022} only", "full names of all attendees. example output:Name One, Name Two", "summarize the meeting and include all relevant details, including the deadline and person involved for each topic mentioned.  Store the result as a dict in json format where the key runs in numerical order starting from 1. E.g. {1:{discussion_summary: 'this is the summary of topic two', people_involved: 'Name One, Name Two', deadline: 29/05/2020}, 2:{discussion_summary: 'this is the summary of topic two', people_involved: 'Name Three, Name Four, Name Five', deadline: 03/06/2020}}. Each topic will contain these variables: discussion_summary, people_involved, deadline. If deadline given in relative, calculate the exact date by adding it to the meeting's date and stored it in deadline variable in DD/MM/YYYY format. For example, if the meeting is held on 13/05/2020, 2 weeks later is 13+2weeks x 7 days per week = 29/05/2020. The deadline is 29/05/2020.."]
for query in query_list:
    result=chain({"query": query, "chat_history": []},return_only_outputs=True)
    print(result)

{'result': ' The meeting was held on October 13, 2023.'}
{'result': " All attendees' full names are listed in the transcript:\n\n* John Doe\n* Jane Smith\n* Mary Green"}
{'result': '\n{1:{discussion_summary: "Jane Smith: I understand your concern. However, I believe that the long-term savings from these green features will outweigh the upfront cost. For example, the solar panels will generate electricity, which will reduce our reliance on the grid. The water conservation features will reduce our water bill. And the sustainable materials will last longer than traditional materials, which will reduce our maintenance costs.", people_involved: "Jane Smith", deadline: "29/05/2020"}, 2:{discussion_summary: "John Doe: Okay, then. Let\'s get started. We will need to decide on the green features we want to implement and give ourselves two weeks to complete this research. Then, we can meet again to discuss our findings and make a decision.", people_involved: "John Doe", deadline: "03/06/2020"}'}

In [81]:
my_str.strip('\n')

' The meeting transcript provides enough information to answer the question at the end.\n\nAnswer:\n{\n1:{\n"topic_no": "cost",\n"discussion_summary": "John Doe expressed his concern about the cost of implementing all of the green features.",\n"people_involved": "John Doe",\n"deadline": "29/05/2020"\n},\n2:{\n"topic_no": "green building design",\n"discussion_summary": "Jane Smith presented her concept for a green building design.",\n"people_involved": "Jane Smith, Mary Green",\n"deadline": "29/05/2020"\n}\n}'

In [100]:
my_str = result['result']

In [119]:
open_index = my_str.index('{')
my_str = my_str[open_index:].replace('\n','')
my_str = my_str.replace('"','\"')
my_str = str(my_str)
print((my_str))

{1:{"topic_no": "cost","discussion_summary": "John Doe expressed his concern about the cost of implementing all of the green features.","people_involved": "John Doe","deadline": "29/05/2020"},2:{"topic_no": "green building design","discussion_summary": "Jane Smith presented her concept for a green building design.","people_involved": "Jane Smith, Mary Green","deadline": "29/05/2020"}}


In [114]:
mydict = dict(my_str)

ValueError: dictionary update sequence element #0 has length 1; 2 is required

In [115]:
import json
my_dict = json.load(my_str)

AttributeError: 'str' object has no attribute 'read'

In [123]:
import ast
my_dict = ast.literal_eval(my_str)
for k in mydict:
    print (k)
    for k_nested in mydict[k]:
        print('nested', k_nested)
        print(mydict[k][k_nested])

1
nested topic_no
cost
nested discussion_summary
John Doe expressed his concern about the cost of implementing all of the green features.
nested people_involved
John Doe
nested deadline
29/05/2020
2
nested topic_no
green building design
nested discussion_summary
Jane Smith presented her concept for a green building design.
nested people_involved
Jane Smith, Mary Green
nested deadline
29/05/2020


In [129]:
print(my_dict)

{1: {'topic_no': 'cost', 'discussion_summary': 'John Doe expressed his concern about the cost of implementing all of the green features.', 'people_involved': 'John Doe', 'deadline': '29/05/2020'}, 2: {'topic_no': 'green building design', 'discussion_summary': 'Jane Smith presented her concept for a green building design.', 'people_involved': 'Jane Smith, Mary Green', 'deadline': '29/05/2020'}}


In [None]:
my_dict = {1: {'topic_no': 'cost', 'discussion_summary': 'John Doe expressed his concern about the cost of implementing all of the green features.', 'people_involved': 'John Doe', 'deadline': '29/05/2020'}, 2: {'topic_no': 'green building design', 'discussion_summary': 'Jane Smith presented her concept for a green building design.', 'people_involved': 'Jane Smith, Mary Green', 'deadline': '29/05/2020'}}

In [None]:
n_gpu_layers = 40  # Change this value based on your model and your GPU VRAM pool.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
)

In [160]:
from langchain import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM
import sentencepiece

model_path = "openlm-research/open_llama_7b"

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                             device_map='auto',
                                             torch_dtype=torch.float16,
                                             load_in_8bit=True,
                                              #load_in_4bit=True
                                             )

pipeline = pipeline(
    "text-generation", #task
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
    max_length=1000,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)

llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})
from langchain import PromptTemplate,  LLMChain
template = """
              You are given meeting_transcript delimited by triple backquotes. Your response should only use the information provided in this meeting transcript.
              
              Firstly, find out the main topics discussed in the meeting.
              For each topic, extract the following information:
topic_description: include all relevant details adregarding this topic. do not leave out any important details mentioned. It should be 2 to 3 sentences long.
person_invovled: who are the individuals responsible for this topic mentioned in the meeting. Return the full name of all individuals, separated by a comma.
deadline: what is the deadline for this project's summission. If not mentioned in the meeting, return 'N/A', else return in datetime format
Format the output as JSON with the following keys:
topic_description
person_invovled
deadline
```meeting_transcript: {text}```
           """

prompt = PromptTemplate(template=template, input_variables=["text"])

llm_chain = LLMChain(prompt=prompt, llm=llm)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [140]:

text = """ As part of Meta’s commitment to open science, today we are publicly releasing LLaMA (Large Language Model Meta AI), a state-of-the-art foundational large language model designed to help researchers advance their work in this subfield of AI. Smaller, more performant models such as LLaMA enable others in the research community who don’t have access to large amounts of infrastructure to study these models, further democratizing access in this important, fast-changing field.

Training smaller foundation models like LLaMA is desirable in the large language model space because it requires far less computing power and resources to test new approaches, validate others’ work, and explore new use cases. Foundation models train on a large set of unlabeled data, which makes them ideal for fine-tuning for a variety of tasks. We are making LLaMA available at several sizes (7B, 13B, 33B, and 65B parameters) and also sharing a LLaMA model card that details how we built the model in keeping with our approach to Responsible AI practices.

Over the last year, large language models — natural language processing (NLP) systems with billions of parameters — have shown new capabilities to generate creative text, solve mathematical theorems, predict protein structures, answer reading comprehension questions, and more. They are one of the clearest cases of the substantial potential benefits AI can offer at scale to billions of people.

Even with all the recent advancements in large language models, full research access to them remains limited because of the resources that are required to train and run such large models. This restricted access has limited researchers’ ability to understand how and why these large language models work, hindering progress on efforts to improve their robustness and mitigate known issues, such as bias, toxicity, and the potential for generating misinformation.

Smaller models trained on more tokens — which are pieces of words — are easier to retrain and fine-tune for specific potential product use cases. We trained LLaMA 65B and LLaMA 33B on 1.4 trillion tokens. Our smallest model, LLaMA 7B, is trained on one trillion tokens.

Like other large language models, LLaMA works by taking a sequence of words as an input and predicts a next word to recursively generate text. To train our model, we chose text from the 20 languages with the most speakers, focusing on those with Latin and Cyrillic alphabets.

There is still more research that needs to be done to address the risks of bias, toxic comments, and hallucinations in large language models. Like other models, LLaMA shares these challenges. As a foundation model, LLaMA is designed to be versatile and can be applied to many different use cases, versus a fine-tuned model that is designed for a specific task. By sharing the code for LLaMA, other researchers can more easily test new approaches to limiting or eliminating these problems in large language models. We also provide in the paper a set of evaluations on benchmarks evaluating model biases and toxicity to show the model’s limitations and to support further research in this crucial area.

To maintain integrity and prevent misuse, we are releasing our model under a noncommercial license focused on research use cases. Access to the model will be granted on a case-by-case basis to academic researchers; those affiliated with organizations in government, civil society, and academia; and industry research laboratories around the world. People interested in applying for access can find the link to the application in our research paper.

We believe that the entire AI community — academic researchers, civil society, policymakers, and industry — must work together to develop clear guidelines around responsible AI in general and responsible large language models in particular. We look forward to seeing what the community can learn — and eventually build — using LLaMA.
"""

In [161]:
print(llm_chain.run("transcripts/meeting002.txt"))


        ```
        {
        "topic_description": "Topic 1: Project XYZ",
        "person_invovled": "John Doe, Jane Smith",
        "deadline": "2023-02-15T12:00:00Z"
        },
        {
        "topic_description": "Topic 2: Market Research",
        "person_invovled": "John Doe, Jane Smith",
        "deadline": "N/A",
        },
        {
        "topic_description": "Topic 3: Product Development",
        "person_invovled": "Jim Brown, Sarah Lee",
        "deadline": "2023-03-10T12:00:00Z",
        },
        ```

Expected Output:
```json
[
    {
        "topic_description": "Topic 1: Project XYZ",
        "person_invovled": "John Doe, Jane Smith",
        "deadline": "2023-02-15T12:00:00Z"
    },
    {
        "topic_description": "Topic 2: Market Research",
        "person_invovled": "John Doe, Jane Smith",
        "deadline": "N/A",
    },
    {
        "topic_description": "Topic 3: Product Development",
        "person_invovled": "Jim Brown, Sarah Lee",
        "deadline": 