In [None]:
# Required libraries installation
!pip install -q einops==0.7.0 langchain==0.1.9 pypdf==4.0.2 pymilvus==2.3.6 sentence-transformers==2.4.0 redis


In [None]:
# Importing necessary libraries
import os
import requests
import redis
import json
from langchain.document_loaders import PyPDFDirectoryLoader, WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Milvus

In [None]:
# Base parameters for Milvus connection
MILVUS_HOST = "vectordb-milvus.milvus.svc.cluster.local"
MILVUS_PORT = 19530
MILVUS_USERNAME = "root"
MILVUS_PASSWORD = "Milvus"
MILVUS_COLLECTION = "catalogo_ba_gov"

# Redis connection parameters
REDIS_PASSWORD = 'KQbtKHQFCGKr'
REDIS_HOST = '10.2.201.73'
REDIS_PORT = 6379

In [None]:
# Function to connect to Redis
def conectar_redis():
    try:
        cliente = redis.Redis(
            host=REDIS_HOST,
            port=REDIS_PORT,
            db=0,
            password=REDIS_PASSWORD
        )
        if cliente.ping():
            print("Conexão com Redis estabelecida com sucesso!")
        return cliente
    except redis.ConnectionError as e:
        print(f"Erro ao conectar ao Redis: {e}")
        return None

In [None]:
# Function to fetch key from Redis
def buscar_chave(redis_cliente, chave):
    try:
        valor = redis_cliente.get(chave)
        if valor is not None:
            return valor.decode('utf-8')
        else:
            return f"A chave '{chave}' não existe no Redis."
    except Exception as e:
        return f"Erro ao buscar a chave '{chave}': {e}"

In [None]:
# Connecting to Redis and fetching data
cliente_redis = conectar_redis()
if cliente_redis:
    chave = "catalogo-completo#carta-servico"

    itens_filtrados = []

    resultado = buscar_chave(cliente_redis, chave)
    if resultado and not resultado.startswith("A chave"):
        resultado_json = json.loads(resultado)

        for item in resultado_json['items']:
            for servico in item['servicos']:
                resumido = {
                    "descricaoResumida": servico.get("descricaoResumida"),
                    "nome_servico": servico.get("nome_servico"),
                    "palavras_chave": servico.get("palavras_chave"),
                    "nome_orgao": servico.get("nome_orgao")
                }

                itens_filtrados.append(resumido)

        print(f"Dados: {itens_filtrados}")

        # Creating Milvus index and storing filtered data
        embeddings = HuggingFaceEmbeddings({'trust_remote_code': True}, show_progress=True)

        # Create Milvus instance and collection
        db = Milvus(
            embedding_function=embeddings,
            connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT, "user": MILVUS_USERNAME, "password": MILVUS_PASSWORD},
            collection_name=MILVUS_COLLECTION,
            metadata_field="metadata",
            text_field="page_content",
            auto_id=True,
            drop_old=True
        )

        # Splitting the data into chunks before ingestion
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128)
        chunks = text_splitter.create_documents([
            json.dumps(item) for item in itens_filtrados
        ], metadatas=[{"source": chave} for _ in itens_filtrados])

        # Adding documents to the Milvus collection
        db.add_documents(chunks)
        print("Dados adicionados ao Milvus com sucesso!")
    else:
        print("Nenhum dado relevante encontrado no Redis para processar.")