In [1]:
import os
import chromadb

import openai

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

In [4]:
def getVectorDB():
    from langchain.vectorstores import Chroma 
    #await delete folder
    persist_directory = '..\\functions\create_vectorstores\\vectorstores\chroma'

    from langchain.embeddings.openai import OpenAIEmbeddings
    embedding = OpenAIEmbeddings()

    vectordb = Chroma(
        embedding_function=embedding,
        persist_directory=persist_directory
    )
    return vectordb

vectordb = getVectorDB()

In [5]:
search_results = vectordb.max_marginal_relevance_search("chlorine",k=20,lambda_mult=0.25)

In [6]:
search_results

[Document(metadata={'HS Code': '2801.10', 'source': 'Ch. 28.pdf'}, page_content='Prefix:  , HS Heading Name:Flourine, chlorine, bromine and iodine , Description:Chlorine'),
 Document(metadata={'HS Code': '2801.20', 'source': 'Ch. 28.pdf'}, page_content='Prefix:  , HS Heading Name:Flourine, chlorine, bromine and iodine , Description:Iodine'),
 Document(metadata={'HS Code': '2812.11', 'source': 'Ch. 28.pdf'}, page_content='Prefix: Chlorides and chloride oxides : , HS Heading Name:Halides and halide oxides of non-metals. , Description:Carbonyl dichloride (Phosgene)'),
 Document(metadata={'HS Code': '2812.14', 'source': 'Ch. 28.pdf'}, page_content='Prefix: Chlorides and chloride oxides : , HS Heading Name:Halides and halide oxides of non-metals. , Description:Phosphorus pentachloride'),
 Document(metadata={'HS Code': '2812.15', 'source': 'Ch. 28.pdf'}, page_content='Prefix: Chlorides and chloride oxides : , HS Heading Name:Halides and halide oxides of non-metals. , Description:Sulphur mono

In [10]:
search_results = vectordb.max_marginal_relevance_search("chlorine",k=20,lambda_mult=0.25)

hsCodes_of_results = []
for search_result in search_results:
    hsCodes_of_results.append(search_result.metadata["HS Code"])

In [11]:
print(vectorDB._collection.count())

190


In [12]:
hsCodes_of_results

['2801.10',
 '2801.20',
 '2812.11',
 '2812.14',
 '2812.15',
 '2812.13',
 '2801.30',
 '2812.16',
 '2806.10',
 '2806.20',
 '2812.12',
 '2853.10',
 '2812.17',
 '2829.11',
 '2827.20',
 '2829.90',
 '2829.19',
 '2827.10',
 '2812.90',
 '2812.19']

In [6]:
def getChain(vectordb):
    from langchain.chat_models import ChatOpenAI
    llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

    from langchain.chains import RetrievalQA
    from langchain.prompts import PromptTemplate

    # Build prompt
    template = """You are a chemistry expert. The context provided is from a tariff document regarding chemicals. Use the context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. 
    {context}
    Question: {question}
    Helpful Answer:"""
    QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectordb.as_retriever(
            search_type = "mmr",
            search_kwargs = { 'k':20, 'lambda_mult':0.25 }
        ),
        return_source_documents=True,
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
    )
    return qa_chain

In [7]:
qa_chain = getChain(vectorDB)

  llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


In [8]:
def getAnswer(user_question) -> str:
    result = qa_chain({"query": user_question})
    return result["result"]

In [9]:

question = input("Enter query")

result = qa_chain({"query": question}) 
print("Answer: " + result["result"])
print()
print("Source documents:")

print(result["source_documents"])

  result = qa_chain({"query": question})


Answer: Chlorine falls under HS Code 2801.10 and is classified under the category of "Flourine, chlorine, bromine and iodine". The preferential duty for chlorine is free in India.

Source documents:
[Document(metadata={'seq_num': 41, 'source': '/Users/rehangagamage/Desktop/pdfplumber/jsons/ch28.json'}, page_content='{"Prefix": "Chlorides and chloride oxides :", "HS Hdg Name": "Halides and halide oxides of non-metals.", "HS Hdg": "28.12", "HS Code": "2812.12", "Description": "Phosphorus oxychloride", "Unit": "kg", "ICL/SLSI": "L", "Preferential Duty_AP": "", "Preferential Duty_AD": "", "Preferential Duty_BN": "", "Preferential Duty_GT": "", "Preferential Duty_IN": "Free", "Preferential Duty_PK": "Free", "Preferential Duty_SA": "", "Preferential Duty_SF": "Free", "Preferential Duty_SD": "Free", "Preferential Duty_SG": "Free", "Gen Duty": "Free", "VAT": "18%", "PAL_Gen": "Ex", "PAL_SG": "", "Cess_GEN": "", "Cess_SG": "", "Excise SPD": "", "SSCL": "2.5%", "SCL": ""}'), Document(metadata={'