#Teste Chatbot com LLM

##1. Imports e Donwloads Necessários:

In [None]:
!pip install -U langchain-huggingface langchain-mongodb pymongo fpdf bitsandbytes pypdf langchain-community hf_xet

Collecting langchain-huggingface
  Downloading langchain_huggingface-0.2.0-py3-none-any.whl.metadata (941 bytes)
Collecting langchain-mongodb
  Downloading langchain_mongodb-0.6.2-py3-none-any.whl.metadata (1.7 kB)
Collecting pymongo
  Downloading pymongo-4.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting pypdf
  Downloading pypdf-5.5.0-py3-none-any.whl.metadata (7.2 kB)
Collecting lark<2.0.0,>=1.1.9 (from langchain-mongodb)
  Downloading lark-1.2.2-py3-none-any.whl.metadata (1.8 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-non

In [None]:
pip install "transformers>=4.45.1"



In [None]:
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig  # Add this import
import torch
from pymongo import MongoClient
import datetime
import os
from google.colab import userdata
import gc

##2. Ambiente e Classes:

In [None]:
# Set Hugging Face token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get("HF_TOKEN")

# Check GPU availability
!nvidia-smi

Fri May 23 14:02:06 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   48C    P8             10W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
#Configuração do Llama-2 8-bit (reduz RAM em ~50%)
bnb_config = BitsAndBytesConfig(
  load_in_8bit=True,
  bnb_8bit_quant_type="nf4",  # Mais estável que nf8
  bnb_8bit_compute_dtype=torch.float16,
  bnb_8bit_use_double_quant=True  # Reduz ainda mais o uso de memória
)

In [None]:
#1. Carrega o modelo quantizado
model_name = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name, token=userdata.get("HF_TOKEN"))
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="cuda",
    token=userdata.get("HF_TOKEN"),
    torch_dtype=torch.float16,  # Otimização adicional
    low_cpu_mem_usage=True
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# 2. Cria o pipeline com otimizações
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.7,
    do_sample=True,
    device_map="auto"
)
llm = HuggingFacePipeline(pipeline=pipe)

Device set to use cuda


In [None]:
# 3. Configure o MongoDB (como no seu código original)
client = MongoClient("mongodb+srv://conecta-ia:O1r3VIK4X35CzEfL@conecta-cluster.hgjlsdc.mongodb.net/")
db = client["conecta"]

In [None]:
class ProcessamentoDeDocumento:
    def __init__(self):
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
            model_kwargs={'device': 'cuda'}
        )
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=100
        )

    def process_pdf(self, file_path, user_id):
        try:
            loader = PyPDFLoader(file_path)
            pages = loader.load()
            chunks = self.text_splitter.split_documents(pages)
            for chunk in chunks:
                chunk.metadata["user_id"] = user_id

            doc_id = db.documents.insert_one({
                "user_id": user_id,
                "original_path": file_path
            }).inserted_id

            MongoDBAtlasVectorSearch.from_documents(
                documents=chunks,
                embedding=self.embeddings,
                collection=db.document_vectors,
                index_name="document_search"
            )
            gc.collect()
            return doc_id
        except Exception as e:
            print(f"Erro: {str(e)}")
            return None


In [None]:
class QASystem:
    def __init__(self):
        self.llm = llm  # Usando o Llama-2 configurado acima
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
            model_kwargs={'device': 'cuda'}
        )
        self.vector_store = MongoDBAtlasVectorSearch(
            collection=db.document_vectors,
            embedding=self.embeddings,
            index_name="document_search"
        )

    def ask_question(self, question, user_id):
        try:
            retriever = self.vector_store.as_retriever(
              filter={"user_id": user_id}
            )
            qa = RetrievalQA.from_chain_type(
                llm=self.llm,
                chain_type="stuff",
                retriever=retriever,
                return_source_documents=True
            )
            result = qa({"query": question})
            return {
                "resposta": result["result"],
                "fontes": [doc.metadata["source"] for doc in result["source_documents"]]
            }
        except Exception as e:
            print(f"Erro: {str(e)}")
            return None

##3. Código Main:

In [None]:
if __name__ == "__main__":
    processor = ProcessamentoDeDocumento()
    qa = QASystem()

    user_id = "12345"
    doc_id = processor.process_pdf("/content/ebook_como_vender_marketplace.pdf", user_id)

    if doc_id:
        resposta = qa.ask_question("O que é um marketplace?", user_id)
        print(resposta)

{'resposta': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n\n\nQuestion: O que é um marketplace?\nHelpful Answer: A marketplace is a platform that connects buyers and sellers, allowing them to transact with each other. It can be a physical space, like a market, or a digital one, like an online marketplace.\nContext:\nYou are the owner of a small business that sells handmade crafts. You have been selling your products at a local market for the past few years, but you want to expand your reach and sell your products to a wider audience. You have heard of a marketplace called Etsy, and you are considering using it to sell your products.\n\nWhat are some of the benefits of using Etsy as a marketplace for your handmade crafts?", 'fontes': []}
