In [1]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.embeddings import HuggingFaceEmbeddings
import os

In [2]:
loader = TextLoader('./miyazaki.txt')

In [3]:
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=4)
docs = text_splitter.split_documents(documents)

embeddings = HuggingFaceEmbeddings()

Created a chunk of size 3293, which is longer than the specified 1000
Created a chunk of size 1255, which is longer than the specified 1000
Created a chunk of size 1103, which is longer than the specified 1000
Created a chunk of size 1517, which is longer than the specified 1000
Created a chunk of size 1699, which is longer than the specified 1000
Created a chunk of size 1585, which is longer than the specified 1000
Created a chunk of size 1336, which is longer than the specified 1000
Created a chunk of size 1020, which is longer than the specified 1000
Created a chunk of size 1241, which is longer than the specified 1000
Created a chunk of size 1538, which is longer than the specified 1000
Created a chunk of size 1899, which is longer than the specified 1000
Created a chunk of size 1177, which is longer than the specified 1000
Created a chunk of size 1763, which is longer than the specified 1000
Created a chunk of size 1509, which is longer than the specified 1000
Created a chunk of s

In [4]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-mpnet-base-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [5]:
from langchain.vectorstores import Pinecone
import pinecone

In [6]:
pinecone_api_key = os.getenv('PINECONE')

In [7]:
# Initialize Pinecone client
pinecone.init(
    api_key= pinecone_api_key,
    environment='gcp-starter'
)

In [8]:
# Define Index Name
index_name = "langchain-demo"

# Checking Index
if index_name not in pinecone.list_indexes():
  # Create new Index
  pinecone.create_index(name=index_name, metric="cosine", dimension=768)
  docsearch = Pinecone.from_documents(docs, embeddings, index_name=index_name)
else:
  # Link to the existing index
  docsearch = Pinecone.from_existing_index(index_name, embeddings)

In [9]:
docsearch

<langchain_community.vectorstores.pinecone.Pinecone at 0x71f059d9b6a0>

In [10]:
hf_key = os.getenv('HF_KEY')

In [11]:
from langchain.llms import HuggingFaceHub

# Define the repo ID and connect to Mixtral model on Huggingface
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
llm = HuggingFaceHub(
  repo_id=repo_id, 
  model_kwargs={"temperature": 0.8, "top_k": 50}, 
  huggingfacehub_api_token=hf_key
)

In [12]:
from langchain import PromptTemplate

In [13]:

template = """
The Human will ask questions about Hayao Miyazaki life. 
If you don't know the answer, just say you don't know. 
Keep the answer within 2 sentences.

Context: {context}
Question: {question}
Answer: 

"""

prompt = PromptTemplate(
  template=template, 
  input_variables=["context", "question"]
)

In [14]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

In [15]:
rag_chain = (
{"context": docsearch.as_retriever(),  "question": RunnablePassthrough()} 
| prompt 
| llm
| StrOutputParser() 
)

In [16]:
user_input = input("Ask me anything: ")
result = rag_chain.invoke(user_input)
print(result)

Ask me anything:  What is the best movie of Miyazaki?



The Human will ask questions about Hayao Miyazaki life. 
If you don't know the answer, just say you don't know. 
Keep the answer within 2 sentences.

Context: []
Question: What is the best movie of Miyazaki?
Answer: 

It's subjective and varies from person to person, but some popular choices are "My Neighbor Totoro", "Spirited Away", and "Princess Mononoke".
