In [6]:
from dotenv import load_dotenv
import loggingService

from genai.extensions.langchain import LangChainInterface
from genai.model import Credentials, Model
from genai.schemas import GenerateParams

from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.vectorstores import Weaviate

import os
from sentence_transformers import SentenceTransformer
import weaviate

load_dotenv()
logger = loggingService.get_logger()


In [7]:
api_key = os.getenv("GENAI_KEY", None)
api_endpoint = os.getenv("GENAI_API", 'https://workbench-api.res.ibm.com/v1')
class_name = os.getenv("WEVIATE_CLASS", 'LivrosVectorizer')
model_name = os.getenv('MODEL_NAME', 'bigscience/mt0-xxl')
model_name_embedding = os.getenv("MODEL_NAME_EMBEDDING", "sentence-transformers/gtr-t5-large")
weaviate_url = os.getenv("WEAVIATE_URL", 'http://127.0.0.1:8080')

client = weaviate.Client(url=weaviate_url,)
embeddings = SentenceTransformer(model_name_embedding)

creds = Credentials(api_key, api_endpoint=api_endpoint)
params = GenerateParams(
    decoding_method="sample",
    max_new_tokens=100,
    min_new_tokens=1,
    stream=False,
    temperature=0.5,
    top_k=50,
    top_p=1,
).dict()  # Langchain uses dictionaries to pass kwargs
llm = LangChainInterface(model=model_name, credentials=creds, params=params)
vector_store = Weaviate(client=client, index_name=class_name, text_key='content')


In [3]:
vector_store.similarity_search_by_vector(embeddings.encode('onde mora dom casmurro'))
# vector_store.similarity_search('onde mora dom casmurro')


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

ValueError: Error during query: [{'locations': [{'column': 6, 'line': 1}], 'message': "explorer: get class: vector search: object vector search at index livrosvectorizer: shard livrosvectorizer_mChtM8yIWPzY: vector search: knn search: distance between entrypoint and query node: vector lengths don't match: 384 vs 768", 'path': ['Get', 'LivrosVectorizer']}]

In [4]:
pt1 = """Responda a pergunta a seguir de forma sucinta usando o contexto fornecido. Caso não tenha certeza da resposta siceramente diga que não possui informações suficientes sobre esse tema.

{context}

Pergunta: {question}
Resposta:"""
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=pt1,
)
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever(), chain_type_kwargs={"prompt": prompt})


In [5]:
qa.run('onde morava brás cubas')

' na Rua de Mata-cavalos, onde morava sua mãe, D. Maria da Glória Fernandes Santiago.'