In [1]:
import os
import re

from llama_index.core import Settings
from llama_index.llms.gemini import Gemini
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, ServiceContext, load_index_from_storage
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.core.vector_stores import SimpleVectorStore
from llama_index.core import PromptTemplate
from llama_index.core.node_parser import SentenceSplitter

from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.core.llms import ChatMessage


import nest_asyncio
nest_asyncio.apply()

In [2]:
os.environ['GOOGLE_API_KEY'] = "AIzaSyCB0FsriiPfyTLwZGM9z_cDLdl03MFjeFQ"
safety_settings = [
        {
            "category": "HARM_CATEGORY_DANGEROUS",
            "threshold": "BLOCK_NONE",
        },
        {
            "category": "HARM_CATEGORY_HARASSMENT",
            "threshold": "BLOCK_NONE",
        },
        {
            "category": "HARM_CATEGORY_HATE_SPEECH",
            "threshold": "BLOCK_NONE",
        },
        {
            "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
            "threshold": "BLOCK_NONE",
        },
        {
            "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
            "threshold": "BLOCK_NONE",
        },
    ]
generation_config = {
  "temperature": 0.3,
  "top_p": 0.5,
  "top_k": 100,
  "max_output_tokens": 8192,
}

llm = Gemini(model_types="gemini-1.5-Pro-latest", generation_config = generation_config,
                  safety_settings=safety_settings, device = 'cuda', device_map='cuda')

model_name = "nomic-ai/nomic-embed-text-v1"
embed_model = HuggingFaceEmbedding(model_name=model_name, trust_remote_code=True)

Settings.embed_model = embed_model
Settings.llm=llm

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/128 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/69.4k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/2.03k [00:00<?, ?B/s]

configuration_hf_nomic_bert.py:   0%|          | 0.00/1.96k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/nomic-ai/nomic-bert-2048:
- configuration_hf_nomic_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_hf_nomic_bert.py:   0%|          | 0.00/84.7k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/nomic-ai/nomic-bert-2048:
- modeling_hf_nomic_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
<All keys matched successfully>


tokenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]

In [7]:
storage_context = StorageContext.from_defaults(
    docstore=SimpleDocumentStore(),
    vector_store=SimpleVectorStore(),
    index_store=SimpleIndexStore(),
)
# parser = SentenceSplitter()


In [10]:
# # load data
# company_act_docs = SimpleDirectoryReader(
#     input_files=["data/CompaniesAct2013.pdf"]
# ).load_data()

# hindu_marriage_docs = SimpleDirectoryReader(
#     input_files=["data/Hindu Marriage Act 1955.pdf"]
# ).load_data()

# ipc_docs = SimpleDirectoryReader(
#     input_files=["data/Indian Penal Code Book.pdf", "data/ipc sections ecourts pta_1 (1).pdf"]
# ).load_data()

# company_nodes = parser.get_nodes_from_documents(company_act_docs)
# hindu_marriage_nodes = parser.get_nodes_from_documents(hindu_marriage_docs)
# ipc_nodes = parser.get_nodes_from_documents(ipc_docs)

# build index
# company_index = VectorStoreIndex(company_nodes)
# company_index.set_index_id("company")
# divorce_index = VectorStoreIndex(hindu_marriage_nodes)
# divorce_index.set_index_id("divorce")
# ipc_index = VectorStoreIndex(ipc_nodes)
# ipc_index.set_index_id("ipc")
storage_context = StorageContext.from_defaults(persist_dir="storage/company")
company_index = load_index_from_storage(storage_context, index_id="company")



# divorce_index = load_index_from_storage(storage_context, index_id="marriage")
# ipc_index = load_index_from_storage(storage_context,index_id="ipc")

# company_index.storage_context.persist(persist_dir="storage")
# divorce_index.storage_context.persist(persist_dir="storage")
# ipc_index.storage_context.persist(persist_dir="storage")

In [11]:
storage_context1 = StorageContext.from_defaults(persist_dir="storage/ipc")
ipc_index = load_index_from_storage(storage_context1, index_id="ipc")

In [13]:
storage_context2 = StorageContext.from_defaults(persist_dir="storage/marriage")
divorce_index = load_index_from_storage(storage_context2, index_id="divorce")

In [14]:

from llama_index.core import PromptTemplate

template = (
    '''You are an AI trained to provide legal advice based on the given context. The context includes information from various legal documents such as the Indian Penal Code (IPC), IPC sections, Marriage Act, and Company Act.

Below is the context information:
---------------------
{context_str}
---------------------

Based on the above context, please answer the following legal query:

{query_str}

Please ensure your response is accurate, concise, and adheres to the legal context provided. If additional context is required or if the query cannot be answered with the provided information, please indicate so.
'''
)
llm_prompt = PromptTemplate(template)


In [15]:
company_query_engine = company_index.as_query_engine(similarity_top_k=6, text_qa_template = llm_prompt)
divorce_query_engine = divorce_index.as_query_engine(similarity_top_k=6, text_qa_template = llm_prompt)
ipc_query_engine = ipc_index.as_query_engine(similarity_top_k=6, text_qa_template = llm_prompt)

In [16]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.core.llms import ChatMessage##

# Define Query 
query_engine_tools = [
    QueryEngineTool(
        query_engine=company_query_engine,
        metadata=ToolMetadata(
            name="company_act",
            description=(
                "Provides information about company act. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=divorce_query_engine,
        metadata=ToolMetadata(
            name="marriage_act",
            description=(
                "Provides information about hindu marriage act "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=ipc_query_engine,
        metadata=ToolMetadata(
            name="ipc_sections",
            description=(
                "Provides detailed information about variouc IPCs and IPC sections"
                "Use a detailed plain text question as input to the tool."
                # "Use a detailed plain text question as input to the tool."
            ),
            # return_direct=True
        ),
    ),
    
]
agent = ReActAgent.from_tools(
    query_engine_tools,
    llm=llm,
    verbose=True,
    # prefix_message=prefix_message
)

In [17]:
response = agent.chat("what is IPC")

InvalidArgument: 400 Add an image to use models/gemini-pro-vision, or switch your model to a text model.

In [40]:
agent.reset()