# Faiss

In [None]:
# docker run -d --name qdrant -p 6333:6333 -p 6334:6334 qdrant/qdrant

## 0. PDF 로드

In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
from langchain_community.document_loaders import PyPDFLoader

pdf_docs = PyPDFLoader("../data/Samsung_Electronics_Sustainability_Report_2025_KOR.pdf").load()
len(pdf_docs)

87

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

rec_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = rec_splitter.split_documents(pdf_docs)
len(docs)

237

In [5]:
for doc in docs:
    doc.metadata['from'] = 'wanted'
    
docs[0].metadata['from']

'wanted'

## 1. Qdrant 벡터 DB 생성

In [6]:
from langchain_openai.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model='text-embedding-3-large')

dim_size = len(embeddings.embed_query('test'))
print(dim_size) # large 모델 차원수 확인

3072


In [17]:
from qdrant_client import QdrantClient

# Qdrant 서버 접속
client = QdrantClient(url="http://localhost:6333")

# 서버 정상 동작 확인
print(client.get_collections())

collections=[CollectionDescription(name='kpop_demon_hunters'), CollectionDescription(name='ee6f42f0c8e7492192a74de39587507d'), CollectionDescription(name='qdrant_test')]


In [16]:
from langchain_community.vectorstores import Qdrant
from langchain_community.docstore.in_memory import InMemoryDocstore

db = Qdrant.from_documents(
    documents=[docs[0]],
    embedding=embeddings,
    collection_name='qdrant_test',
    #ids=['test1']
)

In [18]:
db.similarity_search('삼성', k=3)

[Document(metadata={'producer': 'Adobe PDF Library 15.0', 'creator': 'Adobe InDesign 15.1 (Macintosh)', 'creationdate': '2025-07-10T16:11:16+09:00', 'moddate': '2025-09-04T16:51:11+09:00', 'trapped': '/False', 'source': '../data/Samsung_Electronics_Sustainability_Report_2025_KOR.pdf', 'total_pages': 87, 'page': 0, 'page_label': '1', 'from': 'wanted', '_id': 'abad4ae4-55cc-422d-9e80-3a05cdadee12', '_collection_name': 'qdrant_test'}, page_content='삼성전자 지속가능경영보고서 2025\nA Journey  Towards \n a Sustainable Future\nA Journey  Towards\n a Sustainable Future')]

In [20]:
db.as_retriever().invoke('삼성', k=3)

[Document(metadata={'producer': 'Adobe PDF Library 15.0', 'creator': 'Adobe InDesign 15.1 (Macintosh)', 'creationdate': '2025-07-10T16:11:16+09:00', 'moddate': '2025-09-04T16:51:11+09:00', 'trapped': '/False', 'source': '../data/Samsung_Electronics_Sustainability_Report_2025_KOR.pdf', 'total_pages': 87, 'page': 0, 'page_label': '1', 'from': 'wanted', '_id': 'abad4ae4-55cc-422d-9e80-3a05cdadee12', '_collection_name': 'qdrant_test'}, page_content='삼성전자 지속가능경영보고서 2025\nA Journey  Towards \n a Sustainable Future\nA Journey  Towards\n a Sustainable Future')]