<a href="https://colab.research.google.com/github/vyperid/Qdrant-vs-ChromaDB/blob/main/QdrantSentenceTransformers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q PyMuPDF
!pip install -q langchain-text-splitters
!pip install -q qdrant-client
!pip install -q langchain_community
!pip install -q tiktoken
!pip install -q transformers
!pip install -q sentence_transformers

In [2]:
import pymupdf
from langchain_text_splitters import RecursiveCharacterTextSplitter
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer
import time

  from tqdm.autonotebook import tqdm, trange


In [3]:
def pdf_to_txt(pdf_path, output_path):
  document = pymupdf.open(pdf_path)

  text = ""

  for page in range(len(document)):
    page = document.load_page(page)
    text += page.get_text()

  with open(output_path, "w", encoding='utf-8') as f:
    f.write(text)

pdf_path = '/content/62f12134d4d3a-dead-poets-society.pdf'
txt_path = 'deadpoets.txt'
pdf_to_txt(pdf_path, txt_path)

In [4]:
with open("/content/deadpoets.txt") as f:
  document = f.read()

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=100,
    length_function=len,
    is_separator_regex=True,
    separators=["\."]
)

In [6]:
text_chunks = text_splitter.split_text(document)

In [7]:
qdrant = QdrantClient(":memory:")

In [None]:
encoder = SentenceTransformer('all-MiniLM-L6-v2')

In [11]:
embeddings = encoder.encode(text_chunks, show_progress_bar=True)

Batches:   0%|          | 0/29 [00:00<?, ?it/s]

In [9]:
qdrant.recreate_collection(
    collection_name = "deadpoetssociety",
    vectors_config = models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(),
        distance=models.Distance.COSINE)
)

  qdrant.recreate_collection(


True

In [14]:
records = [
    models.Record(
        id=i,
        vector=embeddings[i],
        payload={"text": text_chunks[i]}
    )
    for i in range(len(text_chunks))
]

qdrant.upload_records(collection_name="deadpoetssociety", records=records)

  qdrant.upload_records(collection_name="deadpoetssociety", records=records)


In [21]:
questions = [
    "Who is going to be Neil Perry's roommate?",
    "What did Todd do when Neil left the room?",
    "What was the Dead Poets Society according to Mr. Keating?",
    "How did the juniors feel about their first physical education class?",
    "What is Todd’s reaction to the pressure of attending Welton Academy?",
    "How does Mr. Keating’s unconventional approach to teaching Latin influence the students?",
    "Describe the atmosphere and activities during the Welton Academy’s opening ceremony.",
    "How does the character of Neil Perry evolve through his interactions with his father, and how does this affect his decisions and actions at Welton Academy?",
    "How does Mr. Keating’s philosophy of “Carpe Diem” challenge the traditional values taught at Welton Academy, and what are the implications for the students who embrace this philosophy?",
    "Examine how the theme of conformity versus individuality is portrayed through the interactions between Mr. Keating and the students, particularly in the context of Welton Academy’s values."
]

In [24]:
for question in questions:
  start = time.time()
  query_embedding = encoder.encode([question])[0]
  results = qdrant.search(
    collection_name="deadpoetssociety",
    query_vector=query_embedding,
    query_filter=None,
    limit=1)
  end = time.time()
  print(f"Time taken: {end - start}")
  print(results)

Time taken: 0.02834630012512207
[ScoredPoint(id=71, version=0, score=0.7324936389923096, payload={'text': '.\n“I hear we’re going to be roommates,” he said. “I’m Neil\nPerry.”\n“Todd Anderson,” he replied softly. The boys walked in\nawkward silence.\n“Why’d you leave Balincrest?” Neil asked.\n“My brother went here.”\nNeil shook his head. “Oh, so you’re THAT Anderson.”\nTodd shrugged and groaned'}, vector=None, shard_key=None, order_value=None)]
Time taken: 0.02861332893371582
[ScoredPoint(id=370, version=0, score=0.7633233070373535, payload={'text': '. Todd walked\nin late, but once they were all assembled, Neil stood and\nstarted the meeting.\n“‘I went to the woods because I wished to live deliberately. I\nwanted to live deep and suck out all the marrow of life'}, vector=None, shard_key=None, order_value=None)]
Time taken: 0.03498959541320801
[ScoredPoint(id=1, version=0, score=0.7633446455001831, payload={'text': '. “What was the Dead Poets Society, sir?” Neil asked.\nFor a split sec