In [24]:
from langchain_core.output_parsers import StrOutputParser
from PyPDF2 import PdfReader
from langchain_text_splitters  import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_groq import ChatGroq
from langchain_core.documents import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer
from langchain_community.embeddings import SentenceTransformerEmbeddings
from qdrant_client import QdrantClient
from langchain_community.vectorstores import Qdrant
from typing import List
import os
import getpass
from dotenv import load_dotenv
load_dotenv()

True

In [6]:
api_key = os.getenv("GROQ_API_KEY")
api_key

'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'

## Load LLM

In [7]:
def load_llm(id_model, temperature):
    llm = ChatGroq(
        model=id_model,
        temperature=temperature,
        api_key=api_key,
        max_tokens=1024,
        timeout=None,
        max_retries=3
    )
    return llm

llm = load_llm('meta-llama/llama-4-maverick-17b-128e-instruct', 0.3)

In [None]:
# document laoder
def extract_text_from_pdf(file_path :str) -> List[Document]:
    try : 
        
        reader= PyMuPDFLoader(file_path)
        docs = reader.load()
        print(f"✅ Berhasil memproses Dokumen")
        return docs
    except Exception as e:
        print(f"Error saat memproses PDF : {str(e)}")
        return []
#chunking text
def chunk_text(documents : List[Document],chunk_size=1000,overlap=200) -> List[Document]:
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size= chunk_size,
        chunk_overlap=overlap,
        length_function=len,
        separators=["\n\n", "\n", '\n●','\n1','\n2','\n3','\n4','\n5','\n6','\n7','\n8','\n9','\n10'," "]
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Chunking selesai. Total halaman asli : {len(documents)}")
    return chunks

#index to qdrant
def index_to_qdrant(chunks : List[Document], url : str , collection_name : str)-> None:
    if not chunks:
        print("❌ Tidak ada chunks yang valid untuk di-index.")
        return

    try:
        # Inisialisasi model embedding yang akan digunakan
        model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

        print(f"⏳ Memulai indexing ke Qdrant Collection: {collection_name}...")
        
        # 2. Indexing Otomatis
        # Qdrant.from_documents secara otomatis menangani:
        # a) Mengubah chunks menjadi embeddings.
        # b) Menyimpan embeddings, teks, dan metadata ke Qdrant.
        Qdrant.from_documents(
            documents=chunks,
            embedding=model,
            url = url,
            collection_name=collection_name,
        )
        
        print("✅ Indexing ke Qdrant Selesai!")

    except Exception as e:
        print(f"❌ Gagal Indexing ke Qdrant. Cek koneksi atau API Key Anda. Error: {e}")



In [30]:
pdf_path = 'Handbook_BNI_Mbank.pdf'
qdrant_url = "http://localhost:6333"
collection_name = 'user_manual_BNI_Mbank'

#load pdf
text_book = extract_text_from_pdf(pdf_path)
chunk = chunk_text(text_book)

#save qdrant
index_to_qdrant(chunk,qdrant_url,collection_name)


Error saat memproses PDF : 'PyMuPDFLoader' object has no attribute 'pages'
Chunking selesai. Total halaman asli : 0
❌ Tidak ada chunks yang valid untuk di-index.


In [28]:
from qdrant_client import QdrantClient
from qdrant_client.http.exceptions import UnexpectedResponse

# Ganti 'localhost' dan 6333 dengan alamat server Qdrant Anda
QDRANT_HOST = "localhost"
QDRANT_PORT = 6333 # Port REST API

try:
    # Buat instance klien
    client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
    print(f"Mencoba koneksi ke: {QDRANT_HOST}:{QDRANT_PORT}")
    
    # Tambahkan timeout untuk mencegah pemblokiran yang terlalu lama (opsional, default 5 detik)
    # client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT, timeout=5)

    # Lakukan operasi ringan yang memerlukan koneksi
    info = client.get_collections() 
    
    ## Alternatif: Cek informasi server (lebih mendalam)
    # info = client.get_locks() 
    # info = client.info()
    
    # Jika berhasil, cetak hasilnya
    print("✅ KONEKSI BERHASIL!")
    print(f"Informasi Koleksi Qdrant: {info.collections}")

except ConnectionRefusedError:
    print(f"❌ KONEKSI GAGAL: Server Qdrant tidak berjalan atau menolak koneksi pada {QDRANT_HOST}:{QDRANT_PORT}")
except UnexpectedResponse as e:
    # Terjadi jika server berjalan tetapi merespons dengan kode status error
    print(f"❌ KONEKSI GAGAL: Server merespons dengan Error: {e}")
except Exception as e:
    print(f"❌ KONEKSI GAGAL: Terjadi kesalahan lain: {e}")

Mencoba koneksi ke: localhost:6333
✅ KONEKSI BERHASIL!
Informasi Koleksi Qdrant: []
