In [1]:
!pip install -U langchain-huggingface langchain-mongodb pymongo fpdf bitsandbytes pypdf langchain-community hf_xet
!pip install "transformers>=4.45.1"

Collecting langchain-huggingface
  Downloading langchain_huggingface-0.2.0-py3-none-any.whl.metadata (941 bytes)
Collecting langchain-mongodb
  Downloading langchain_mongodb-0.6.2-py3-none-any.whl.metadata (1.7 kB)
Collecting pymongo
  Downloading pymongo-4.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting pypdf
  Downloading pypdf-5.5.0-py3-none-any.whl.metadata (7.2 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.24-py3-none-any.whl.metadata (2.5 kB)
Collecting hf_xet
  Downloading hf_xet-1.1.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (879 bytes)
Collecting lark<2.0.0,>=1.1.9 (from langchain-mongodb)
  Downloading lark-1.2.2-py3-none-any.whl.metadata (1.8 kB)
Collecting dnspytho

In [2]:
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline, BitsAndBytesConfig
import torch
from pymongo import MongoClient
import datetime
import os
from google.colab import userdata
import gc
from langchain.prompts import PromptTemplate
import re
import ipywidgets as widgets
from IPython.display import display, clear_output

In [3]:
# Set Hugging Face token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get("HF_TOKEN")
os.environ["LANGCHAIN_API_KEY"] = "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"
os.environ["LANGCHAIN_TRACING_V2"] = "true"

# Check GPU availability
!nvidia-smi

Thu May 29 23:17:12 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   43C    P8              9W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [5]:
# Configuração de quantização
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_quant_type="nf4",
    bnb_8bit_compute_dtype=torch.float16,
    bnb_8bit_use_double_quant=True
)

# 1. Carrega o modelo quantizado
print("Inicializando LLM...")
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name, token=os.environ["HUGGINGFACEHUB_API_TOKEN"])
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="cuda" if torch.cuda.is_available() else "cpu",
    token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)

# 2. Cria o pipeline com otimizações
pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.7,
    do_sample=True,
    #device=0 if torch.cuda.is_available() else -1
)

# 3. Integra com LangChain
llm = HuggingFacePipeline(pipeline=pipe)
print("LLM inicializado com sucesso.")

# 3. Configure o MongoDB
client = MongoClient("mongodb+srv://conecta-ia:O1r3VIK4X35CzEfL@conecta-cluster.hgjlsdc.mongodb.net/")
db = client["conecta"]

Inicializando LLM...


Device set to use cuda


LLM inicializado com sucesso.


In [6]:
class ProcessamentoDeDocumento:
    def __init__(self):
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
            model_kwargs={'device': 'cuda'}
        )
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=100
        )

    def process_pdf(self, file_path, user_id):
        try:
            loader = PyPDFLoader(file_path)
            pages = loader.load()
            chunks = self.text_splitter.split_documents(pages)
            for chunk in chunks:
                chunk.metadata["user_id"] = user_id

            doc_id = db.documents.insert_one({
                "user_id": user_id,
                "original_path": file_path
            }).inserted_id

            MongoDBAtlasVectorSearch.from_documents(
                documents=chunks,
                embedding=self.embeddings,
                collection=db.document_vectors,
                index_name="document_search"
            )
            gc.collect()
            return doc_id
        except Exception as e:
            print(f"Erro: {str(e)}")
            return None

In [7]:
class QASystem:
    def __init__(self):
        self.llm = llm
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
            model_kwargs={'device': 'cuda'}
        )
        self.vector_store = MongoDBAtlasVectorSearch(
            collection=db.document_vectors,
            embedding=self.embeddings,
            index_name="document_search"
        )

    def ask_question(self, question, user_id):
        try:
            retriever = self.vector_store.as_retriever(
                filter={"user_id": user_id},
                search_kwargs={"k": 5}
            )
            template = """Com base **apenas** no contexto fornecido, responda à pergunta **em português**.
                        Formule uma resposta **clara, concisa e natural**, sem introduções, o contexto ou a pergunta.
                        Se a resposta não puder ser encontrada no contexto fornecido, responda **apenas**: "Não consegui encontrar a resposta para esta pergunta no documento fornecido."

            Contexto:
            {context}

            Pergunta: {question}

            Resposta:"""
            PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])

            qa = RetrievalQA.from_chain_type(
                llm=self.llm,
                chain_type="stuff",
                retriever=retriever,
                return_source_documents=True,
                chain_type_kwargs={"prompt": PROMPT}
            )

            result = qa.invoke({"query": question})

            resposta_bruta = result["result"].strip()
            resposta_limpa = resposta_bruta

            end_marker_prompt_part = "Resposta:"
            if end_marker_prompt_part in resposta_limpa:
                resposta_limpa = resposta_limpa.split(end_marker_prompt_part, 1)[-1].strip()

            full_prompt_text_start = "Com base **apenas** no contexto fornecido, responda à pergunta **em português**."
            full_prompt_text_fallback = "Com base **exclusivamente** no contexto fornecido, responda à seguinte pergunta."

            if resposta_limpa.startswith(full_prompt_text_start):
                resposta_limpa = resposta_limpa.replace(full_prompt_text_start, "", 1).strip()
            elif resposta_limpa.startswith(full_prompt_text_fallback):
                resposta_limpa = resposta_limpa.replace(full_prompt_text_fallback, "", 1).strip()

            if resposta_limpa.lower().startswith("helpful answer:"):
                resposta_limpa = resposta_limpa[len("helpful answer:"):].strip()
            if resposta_limpa.lower().startswith("a resposta é:"):
                resposta_limpa = resposta_limpa[len("a resposta é:"):].strip()
            if resposta_limpa.lower().startswith("here's the answer:"):
                resposta_limpa = resposta_limpa[len("here's the answer:"):].strip()

            if "Contexto:" in resposta_limpa:
                resposta_limpa = resposta_limpa.split("Contexto:", 1)[0].strip()
            if "Pergunta:" in resposta_limpa:
                resposta_limpa = resposta_limpa.split("Pergunta:", 1)[0].strip()
            if "Resposta:" in resposta_limpa:
                resposta_limpa = resposta_limpa.replace("Resposta:", "").strip()

            resposta_limpa = re.sub(r'\S*/[a-zA-Z]\.alt(/[a-zA-Z]\.alt)*l?', '', resposta_limpa)
            resposta_limpa = re.sub(r'\s{2,}', ' ', resposta_limpa).strip()
            resposta_limpa = re.sub(r'\s*([.,;?!])', r'\1', resposta_limpa)
            resposta_limpa = re.sub(r'([.,;?!])\s*(?=[a-zA-Z0-9])', r'\1 ', resposta_limpa)
            resposta_limpa = resposta_limpa.replace('**', '')

            fontes_unicas = list(set([doc.metadata.get("source", doc.metadata.get("file_name", "Desconhecido")) for doc in result["source_documents"]]))

            return {
                "resposta": resposta_limpa,
                "fontes": fontes_unicas
            }
        except Exception as e:
            print(f"Erro ao responder a pergunta '{question}': {str(e)}")
            return None

In [8]:
if __name__ == "__main__":
    processor = ProcessamentoDeDocumento()
    qa = QASystem()

    # Upload do PDF no Colab
    from google.colab import files
    uploaded = files.upload()
    pdf_path = list(uploaded.keys())[0]

    user_id = "12345"
    doc_id = processor.process_pdf(pdf_path, user_id)

    if doc_id:
        resposta = qa.ask_question("O que é uma loja online?", user_id)
        print(resposta)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.89k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Saving Guia_de_Comercio_Eletronico.pdf to Guia_de_Comercio_Eletronico.pdf


Token indices sequence length is longer than the specified maximum sequence length for this model (1187 > 512). Running this sequence through the model will result in indexing errors


{'resposta': 'E-commerce E-commerce é uma loja virtual de una determinada marca. Nest modelo, toda estrutura do site e os fluxos logsticos e financeiros (estoque de productos, precificaço etc.) so responsabilidades da empresa — seja ela fabricante ou revendedora. E-commerce E-commerce é uma loja virtual de una determinada marca. Nest modelo, toda estrutura do site e os fluxos logsticos e financeiros (estoque de productos, precificaço etc.) so responsabilidades da empresa — seja ela fabricante ou revendedora. E-commerce E-commerce é uma loja virtual de una determinada marca. Nest modelo, toda estrutura do site e os fluxos logsticos e financeiros (estoque de productos, precificaço etc.) so responsabilidades da empresa — seja ela fabricante ou revendedora. E-commerce E-commerce é uma loja virtual de una determinada marca. Nest modelo, toda estrutura do site e os fluxos logsticos e financeiros (estoque de productos, precificaço etc.) so responsabilidades da empresa — seja ela fabricante ou