In [44]:
# imports
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter

import cassio

from PyPDF2 import PdfReader

In [70]:
# Read pdf
pdfreader = PdfReader('constitution.pdf')

raw_text = ''
for i, page in enumerate(pdfreader.pages):
    content = page.extract_text()
    if content:
        raw_text += content

In [69]:
raw_text[40:200]

' CONSTITUTION OF INDIA\n[As on 1stMay, 2024] \n2024\nGOVERNMENT OF INDIA\nMINISTRY OF LAW AND JUSTICE\nLEGISLATIVE DEPARTMENT, OFFICIAL LANGUAGES WINGPREFACE\nThis is'

In [50]:
ASTRA_DB_APPLICATION_TOKEN = "Astra_db_token"
ASTRA_DB_ID = "Astra_db_id"
OPENAI_API_KEY = "openai_api_key"

In [51]:
# Initialize connection to database
cassio.init(token=ASTRA_DB_APPLICATION_TOKEN, database_id=ASTRA_DB_ID)

In [52]:
llm = OpenAI(
    api_key=OPENAI_API_KEY
)
embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [53]:
# Create vector store

astra_vector_store = Cassandra(
    embedding=embedding,
    table_name="rag_constitution",
    session=None,
    keyspace=None,
)

In [54]:
text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 800,
    chunk_overlap  = 200,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

In [56]:
len(texts)

1443

In [None]:
# Load data in vector store
astra_vector_store.add_texts(texts[:200])
astra_vector_index = VectorStoreIndexWrapper(vectorstore=astra_vector_store)

In [64]:
first_question = True
while True:
    if first_question:
        query_text = input("\nEnter your question (or type 'quit' to exit): ").strip()
    else:
        query_text = input("\nWhat's your next question (or type 'quit' to exit): ").strip()

    if query_text.lower() == "quit":
        break

    if query_text == "":
        continue

    first_question = False

    print(f"\nQUESTION: \"{query_text}\"\n")

    answer = astra_vector_index.query(query_text, llm=llm).strip()
    print(f"ANSWER: \"{answer}\"\n")

    print("FIRST DOCUMENTS BY RELEVANCE:")
    for doc, score in astra_vector_store.similarity_search_with_score(query_text, k=4):
        doc_preview = doc.page_content[:100] + ('...' if len(doc.page_content) > 100 else '')
        print(f"    [Relevance Score: {score:.4f}] \"{doc_preview}\"\n")


QUESTION: "What are the fundamental rights of Indians?"

ANSWER: "The fundamental rights of Indians include the right to equality before law, prohibition of discrimination on grounds of religion, race, caste, sex, or place of birth, equality of opportunity in public employment, abolition of untouchability, and abolition of titles. Additionally, citizens have the right to freedom of speech and expression, to assemble peacefully and without arms, to form associations and unions, to move freely throughout the territory of India, to reside and settle in any part of the country, and to practice any profession or carry on any occupation, trade or business."

FIRST DOCUMENTS BY RELEVANCE:
    [Relevance Score: 0.9396] "FUNDAMENTAL RIGHTS
General
12. Definition .—In this Part, unless the context otherwise requires, “th..."

    [Relevance Score: 0.9359] "51A. Fundamental duties. —It shall be the duty of every citizen of 
India—
(a) to abide by the Const..."

    [Relevance Score: 0.9345] "19.