In [1]:
!pip install langchain_community langchain_openai chromadb PyPDF2
!pip install pypdf

Collecting langchain_community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain_openai
  Downloading langchain_openai-0.3.14-py3-none-any.whl.metadata (2.3 kB)
Collecting chromadb
  Downloading chromadb-1.0.5-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain-core<1.0.0,>=0.3.51 (from langchain_community)
  Downloading langchain_core-0.3.54-py3-none-any.whl.metadata (5.9 kB)
Collecting tiktoken<1,>=0.7 (from langcha

In [2]:
import os, torch
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_community.llms import HuggingFacePipeline

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)


In [3]:
# Define the path to the document and the directory for storing the vector database
file_path = "data.pdf" # Path to the file
persistent_directory = os.path.join("db", "chroma_db") # Directory to persist the vector store

print("Initializing vector store...")

# Check if the file exists at the specified path
if not os.path.exists(file_path):
    raise FileNotFoundError(f"The file {file_path} does not exist. Please check the path.")

# Load the document based on its file type (PDF or text)
if file_path.endswith(".pdf"):
  loader = PyPDFLoader(file_path)
else:
  loader = TextLoader(file_path)
documents = loader.load()

Initializing vector store...


In [4]:
# Split the document into smaller chunks for processing
!pip install sentence-transformers
text_splitter = CharacterTextSplitter(chunk_size=700, chunk_overlap=100) # Define chunk size and overlap
document_chunks = text_splitter.split_documents(documents) # Split the documents into chunks

print(f"Total number of chunks: {len(document_chunks)}") # Print the total number of chunks created

# Generate embeddings for the document chunks using a pre-trained model
print("\n--- Generating embeddings ---")
embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-mpnet-base-v2")  # Load the embedding model
print("\n--- Embeddings generated successfully ---")

# Create a vector store from the document chunks and embeddings, and persist it to disk
print("\n--- Creating vector store ---")
vector_store = Chroma.from_documents(
    document_chunks, embeddings, persist_directory=persistent_directory) # Persist the vector store
print("\n--- Vector store created and persisted successfully ---")

# Load the existing vector store with the embedding function for querying
db = Chroma(persist_directory=persistent_directory,
            embedding_function=embeddings)

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.11.0->sentence-transformers)
 

Total number of chunks: 116

--- Generating embeddings ---


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


--- Embeddings generated successfully ---

--- Creating vector store ---

--- Vector store created and persisted successfully ---


In [5]:
# Load the Hugging Face model and tokenizer for text generation
model_name = "deepseek-ai/deepseek-llm-7b-chat"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
'''
# Create a Hugging Face pipeline for text generation
 pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=2048,
    temperature=0.7,
    top_p=0.95,
    repetition_penalty=1.15
)

# Wrap the pipeline in a LangChain HuggingFacePipeline for integration
llm = HuggingFacePipeline(pipeline=pipe)
'''

tokenizer_config.json:   0%|          | 0.00/1.28k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.61M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/594 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/22.5k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.97G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.85G [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.6k [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

Device set to use cuda:0


In [6]:
# Define the user's question for querying the document
query = "What are woman's rights in pakistan family law?" # input("Enter the question: ")

# Retrieve relevant document chunks based on the query
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 4},
)
relevant_docs = retriever.invoke(query) # Retrieve relevant documents

# Combine the query and the relevant document contents for input to the model


combined_input = f"""
You are a legal expert trained in Pakistani family and civil law. Your role is to explain the answer in both clear English and simple Urdu so that it is understandable by both lawyers and the general public.

Here is a question from the user:
"{query}"

Below are some documents that may help answer the question:
------------------------------
{chr(10).join([doc.page_content for doc in relevant_docs])}
------------------------------

Instructions:

1. Read the documents carefully. If the answer is found in the documents, use them as your main source.
2. If the answer cannot be found in the documents or is incomplete, rely on your own legal knowledge.
3. The tone should be:
   - Clear and professional (for lawyers)
   - Simple and respectful (for general users)
4. Structure your response in two parts:
   - **Part 1: English Explanation (Plain Language)**
   - **Part 2: Urdu Explanation (آسان اور سادہ زبان میں)**
5. Keep the total response under 500 words.
6. Avoid legal jargon unless necessary. If used, explain it clearly.

Now generate a response that answers the user's question.
"""



# Define the messages for the model, including system and user roles
messages = [
    {"role": "system", "content": "You are a helpful pakistan legal assistant."},
    {"role": "user", "content": combined_input},
]

# Format the prompt using the tokenizer's chat template
formatted_prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

In [7]:
# Invoke the model with the formatted prompt and generate a response
# result = llm.invoke(formatted_prompt,max_new_tokens=512) # Added max_new_tokens
# Instead of using max_new_tokens in invoke, we use it when creating the pipeline
# Create a Hugging Face pipeline for text generation
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,  # Setting max_new_tokens here
    temperature=0.7,
    top_p=0.95,
    repetition_penalty=1.15
)

# Wrap the pipeline in a LangChain HuggingFacePipeline for integration
llm = HuggingFacePipeline(pipeline=pipe)

result = llm.invoke(formatted_prompt)  # Remove max_new_tokens here

# Print the user's query and the model's response
print("You:")
print(query)
print(result)

Device set to use cuda:0


You:
What are woman's rights in pakistan family law?
<｜begin▁of▁sentence｜>You are a helpful pakistan legal assistant.

User: 
You are a legal expert trained in Pakistani family and civil law. Your role is to explain the answer in both clear English and simple Urdu so that it is understandable by both lawyers and the general public.

Here is a question from the user:
"What are woman's rights in pakistan family law?"

Below are some documents that may help answer the question:
------------------------------
Statutes on Family Law in Pakistan 
 
(Annotated and Updated)
65 
26. Power to make rules. – (1) Government may, by notification in the official Gazette, make 
rules to carry into effect the provisions of this Act. 
 
 (2) Without prejudice to the generality of the provisions contained in sub -section (1), the 
rules so made may, among other matters, provide for the procedure, which shall not be 
inconsistent with the provisions of this Act, to be followed by the Family Courts. 
  
SC

In [10]:
response_text = result.strip()

# Extract only the part after "Assistant:"
import re

# This regex finds "Assistant:" and captures everything after it
match = re.search(r"Assistant:(.*)", response_text, re.DOTALL | re.IGNORECASE)

if match:
    clean_answer = match.group(1).strip()
else:
    clean_answer = response_text.strip()  # fallback to whole text if "Assistant:" not found

print(clean_answer)


Sure, I'll explain women's rights in Pakistani family law based on these documents.

First, let me clarify that these laws apply to both male and female Muslims in Pakistan. Now, here's what the laws say about women's rights:

1. Women have the right to file a "Khula" case if they want to get divorced and retrieve their dowry (property given by their husband's family at the time of marriage).

2. Women have the right to maintenance (financial support) from their husbands if they cannot take care of themselves due to being sick, old age, or some other valid reasons mentioned in the law.

3. Women have the right to seek "custody of children" in court if needed, along with seeking "guardianship" over them. They can also ask for visitation rights to see their children regularly.

4. Women have the right to file cases related to issues like domestic violence, cruelty, and harassment by their husbands or in-laws.

5. Lastly, women have the same rights as men in disputes regarding properties 

Sure, I'll explain women's rights in Pakistani family law based on these documents.

First, let me clarify that these laws apply to both male and female Muslims in Pakistan. Now, here's what the laws say about women's rights:

1. Women have the right to file a "Khula" case if they want to get divorced and retrieve their dowry (property given by their husband's family at the time of marriage).

2. Women have the right to maintenance (financial support) from their husbands if they cannot take care of themselves due to being sick, old age, or some other valid reasons mentioned in the law.

3. Women have the right to seek "custody of children" in court if needed, along with seeking "guardianship" over them. They can also ask for visitation rights to see their children regularly.

4. Women have the right to file cases related to issues like domestic violence, cruelty, and harassment by their husbands or in-laws.

5. Lastly, women have the same rights as men in disputes regarding properties owned before or after marriage, including shares in inherited properties.

As for explaining the laws using simpler language, here's how we could put those points across:

1. Women have the option to end their marriages if they don't want to stay together anymore, and their families will return whatever items they gave during the wedding ceremony (called 'dowry').

2. When couples part ways, wives might need financial assistance from their partners to look after themselves. So, husbands must pay money to keep their wives alive comfortably until they find another job or settle elsewhere.

3. Mothers and fathers share responsibility for taking care of their kids equally. Also, moms can visit their little ones whenever they want while still maintaining good relationships between everyone involved!

4. Sometimes husbands mistreat their wives verbally or physically, making life unbearable for either side. But no worries because our justice system offers protection against abuse, harsh treatment, and bullying actions towards anyone dealing with unhappy unions.

5. Even though someone inherits wealth upon marrying into a different household, half goes directly back home—regardless whether the individual had married prior to receiving said inheritance! That way, you know exactly who gets ownership control over everything shared between loving spouses throughout the years spent building strong bonds rooted deeply within love & understanding communities worldwide today.

In [19]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
import os

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = "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"  # replace with your key

# Initialize OpenAI model (you can also use model_name="gpt-3.5-turbo")
llm = ChatOpenAI(model_name="gpt-4", temperature=0.3)

# Input text to translate
input_text = clean_answer

# Construct prompt
prompt = f"""Translate the following text into Urdu in a respectful and simple tone:\n\n"{input_text}"\n\nUrdu:"""

# Invoke the model
response = llm.invoke([HumanMessage(content=prompt)])

# Print only the Urdu translation
print(response.content.strip())


"بیشک، میں ان دستاویزات کی بنیاد پر پاکستانی خاندانی قانون میں خواتین کے حقوق کی تشریح کروں گا۔

سب سے پہلے، مجھے یہ واضح کرنے دیں کہ یہ قوانین پاکستان میں مرد و خواتین مسلمانوں پر لاگو ہوتے ہیں۔ اب، یہاں خواتین کے حقوق کے بارے میں قوانین کیا کہتے ہیں:

1. خواتین کو "خلا" کیس کرنے کا حق ہے اگر وہ طلاق لینا چاہتی ہیں اور اپنی جہیز (شادی کے وقت شوہر کے خاندان کی طرف سے دی گئی جائیداد) واپس لینا چاہتی ہیں۔

2. خواتین کو اپنے شوہر سے معاشرت (مالی مدد) کا حق ہے اگر وہ بیماری، بڑھاپے یا قانون میں ذکر کی گئی کچھ دیگر درست وجوہات کی بنا پر خود کی دیکھ بھال نہیں کر سکتیں۔

3. خواتین کو "بچوں کی حفاظت" کی تلاش کرنے کا حق ہے اگر ضرورت ہو، ساتھ ہی "سرپرستی" کا حق بھی ہے۔ وہ اپنے بچوں سے باقاعدہ ملاقات کرنے کا حق بھی مانگ سکتی ہیں۔

4. خواتین کو گھریلو تشدد، ظلم و ستم اور شوہر یا سسرال کی جانب سے تنگ کرنے والے معاملات سے متعلق مقدمات درج کرنے کا حق ہے۔

5. آخر میں، خواتین کو شادی سے پہلے یا بعد میں ملکیت کے معاملات میں مردوں کے برابر حقوق ہیں، جس میں وراثتی جائیدادوں میں حصے شامل ہیں۔

اور اگر قوان

بیشک، میں ان دستاویزات کی بنیاد پر پاکستانی خاندانی قانون میں خواتین کے حقوق کی تشریح کروں گا۔

سب سے پہلے، مجھے یہ واضح کرنے دیں کہ یہ قوانین پاکستان میں مرد و خواتین مسلمانوں پر لاگو ہوتے ہیں۔ اب، یہاں خواتین کے حقوق کے بارے میں قوانین کیا کہتے ہیں:

1. خواتین کو "خلا" کیس کرنے کا حق ہے اگر وہ طلاق لینا چاہتی ہیں اور اپنی جہیز (شادی کے وقت شوہر کے خاندان کی طرف سے دی گئی جائیداد) واپس لینا چاہتی ہیں۔

2. خواتین کو اپنے شوہر سے معاشرت (مالی مدد) کا حق ہے اگر وہ بیماری، بڑھاپے یا قانون میں ذکر کی گئی کچھ دیگر درست وجوہات کی بنا پر خود کی دیکھ بھال نہیں کر سکتیں۔

3. خواتین کو "بچوں کی حفاظت" کی تلاش کرنے کا حق ہے اگر ضرورت ہو، ساتھ ہی "سرپرستی" کا حق بھی ہے۔ وہ اپنے بچوں سے باقاعدہ ملاقات کرنے کا حق بھی مانگ سکتی ہیں۔

4. خواتین کو گھریلو تشدد، ظلم و ستم اور شوہر یا سسرال کی جانب سے تنگ کرنے والے معاملات سے متعلق مقدمات درج کرنے کا حق ہے۔

5. آخر میں، خواتین کو شادی سے پہلے یا بعد میں ملکیت کے معاملات میں مردوں کے برابر حقوق ہیں، جس میں وراثتی جائیدادوں میں حصے شامل ہیں۔

اور اگر قوانین کو زیادہ آسان زبان میں بیان کرنے کی بات ہو تو، ہم ان نکات کو اس طرح پیش کر سکتے ہیں:

1. خواتین کو اپنی شادیوں کو ختم کرنے کا اختیار ہے اگر وہ مزید ساتھ نہیں رہنا چاہتیں، اور ان کے خاندان وہ تمام اشیاء واپس کر دیں گے جو انہوں نے شادی کی تقریب کے دوران دی تھیں (جسے 'جہیز' کہا جاتا ہے)۔

2. جب جوڑے الگ ہوتے ہیں، بیویوں کو اپنے شریک سے خود کی دیکھ بھال کے لئے مالی مدد کی ضرورت ہو سکتی ہے۔ لہذا، شوہر کو اپنی بیویوں کو آرام سے زندگی گزارنے کے لئے پیسے دینے چاہیے جب تک وہ دوسری نوکری نہیں دھوندھ لیتیں یا کہیں اور بستہ بستہ نہ ہو جائیں۔

3. ماں اور باپ کا بچوں کی دیکھ بھال کا برابر ذمہ داری ہوتی ہے۔ ساتھ ہی، ماں اپنے چھوٹے بچوں سے کبھی بھی ملاقات کر سکتی ہیں جبکہ وہ سب کے درمیان اچھے تعلقات برقرار رکھتی ہیں!

4. کبھی کبھی شوہر اپنی بیویوں کو زبانی یا جسمانی طور پر معاملہ کرتے ہیں، جو دونوں طرف کے لئے زندگی کو ناقابل برداشت بنا دیتے ہیں۔ لیکن فکر نہ کریں کیونکہ ہمارا عدلی نظام تشدد، سخت سلوک اور بدسلوکی کے خلاف تحفظ فراہم کرتا ہے، جو بھی ناخوشگوار اتحادوں کا سامنا کر رہا ہو۔

5. بھالے ہی کوئی شخص مختلف گھر میں شادی کرکے دولت وراثت کرتا ہو، لیکن آدھا فوراً واپس گھر چلا جاتا ہے - بہرحال اگر شخص نے وراثت ملنے سے پہلے شادی کی ہو! اس طرح، آپ کو یہ معلوم ہوتا ہے کہ محبت اور تفہیم کے برابر معاشرت میں سب سے زیادہ سالوں کے دوران محبت سے بنائے گئے رشتوں کے درمیان سب کچھ کا مالک کون ہوتا ہے۔"
