In [2]:
! pip install langchain langchain-community transformers faiss-cpu gradio langchain-groq kaleido pypdf sentence-transformers



In [3]:
# Chat Bot based on the FAQ's of Food Seed Licensing Application
import os
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_groq import ChatGroq
import traceback

pdfs = ["food_seed_licensing_faq.pdf"]
documents = []
for path in pdfs:
    try:
        loader = PyPDFLoader(path)
        documents.extend(loader.load())
    except Exception as e:
        documents = []

if not documents:
    exit()

textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = textSplitter.split_documents(documents)

embeddingModel = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorStore = FAISS.from_documents(chunks, embeddingModel)
vectorStore.save_local("food_seed_license_vector_db")

os.environ["GROQ_API_KEY"] = "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"
llm = ChatGroq(model_name="llama3-70b-8192", temperature=0)

def chatbot(query):
    try:
        if not query.strip():
            return "Please enter a valid question."

        searchResults = vectorStore.similarity_search(query, k=3)

        if not searchResults:
            return "Sorry, no relevant information was found in the documents."

        context = "\n".join([doc.page_content for doc in searchResults])
        prompt = f"Answer the following question based on the provided context:\n\n{context}\n\nQuestion: {query}"
        response = llm.invoke([prompt])

        if hasattr(response, 'content'):
            return response.content
        else:
            return "Something went wrong, please try asking a different question."

    except Exception as e:
        error_message = f"Error : {traceback.format_exc()}"
        return f"Something went wrong. Please try again later.\n{error_message}"

interface = gr.Interface(
    fn=chatbot,
    inputs=[gr.Textbox(label="Ask your question", placeholder="Type your question here...", lines=2)],
    outputs="text",
    title="Seed Licensing FAQ Chatbot",
    description="Ask your question and the bot will provide answer based on the FAQ's.",
    theme="compact",
)

interface.launch()



Sorry, we can't find the page you are looking for.


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ad142657a54e419aa3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [9]:
!pip install langchain_huggingface

Collecting langchain_huggingface
  Downloading langchain_huggingface-0.1.2-py3-none-any.whl.metadata (1.3 kB)
Downloading langchain_huggingface-0.1.2-py3-none-any.whl (21 kB)
Installing collected packages: langchain_huggingface
Successfully installed langchain_huggingface-0.1.2


In [84]:
import gradio as gr
import faiss
import json
import os
import numpy as np # Import numpy
from langchain.schema import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.docstore.in_memory import InMemoryDocstore # Import InMemoryDocstore

db_path = "vector_store"
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Load or create FAISS vector store
def load_vector_store():
    if os.path.exists(db_path):
        # Set allow_dangerous_deserialization to True to load the vector store
        return FAISS.load_local(db_path, embedding_model, allow_dangerous_deserialization=True) # Added allow_dangerous_deserialization=True
    # Handle empty document list by setting embedding dimension explicitly
    # Assuming your embeddings have dimension 384 (adjust if different)
    embedding_size = np.array(embedding_model.embed_query("test")).shape[0] # Convert the list to a NumPy array
    # Create an InMemoryDocstore and an empty index_to_docstore_id mapping
    docstore = InMemoryDocstore({})
    index_to_docstore_id = {}
    return FAISS(embedding_model, faiss.IndexFlatL2(embedding_size), docstore, index_to_docstore_id) # Pass docstore and index_to_docstore_id

vector_store = load_vector_store()

def submit_data(application_id, distributor_name, registration_number, contact_number, email, seed_category, license_type, application_date, status, remarks, document):
    file_name = document.name if document else "No file uploaded"
    record = {
        "application_id": application_id,
        "distributor_name": distributor_name,
        "registration_number": registration_number,
        "contact_number": contact_number,
        "email": email,
        "seed_category": seed_category,
        "license_type": license_type,
        "application_date": application_date,
        "status": status,
        "remarks": remarks,
        "file": file_name
    }

    doc = Document(page_content=json.dumps(record))
    vector_store.add_documents([doc])
    vector_store.save_local(db_path)
    return "Data successfully submitted and stored in vector store!"

gui = gr.Interface(
    fn=submit_data,
    inputs=[
        gr.Textbox(label="Application ID"),
        gr.Textbox(label="Distributor Name"),
        gr.Textbox(label="Registration Number"),
        gr.Textbox(label="Contact Number"),
        gr.Textbox(label="Email"),
        gr.Textbox(label="Seed Category"),
        gr.Textbox(label="License Type"),
        gr.Textbox(label="Application Date"),
        gr.Textbox(label="Status"),
        gr.Textbox(label="Remarks"),
        gr.File(label="Upload Document")
    ],
    outputs="text",
    title="Seed License Application Form"
)

gui.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5524a3d5b47cdf7738.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [24]:
def fetch_all_data():
    documents = vector_store.docstore._dict.values()
    return [json.loads(doc.page_content) for doc in documents]


data_fetch_gui = gr.Interface(
    fn=fetch_all_data,
    inputs=[],
    outputs="json",
    title="Fetch All Stored Data"
)

data_fetch_gui.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2da2359dd8471fdb09.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [27]:
def delete_entry(application_id):
    global vector_store
    doc_ids_to_delete = []
    for doc_id, doc in vector_store.docstore._dict.items():
        record = json.loads(doc.page_content)
        if record.get("application_id") == application_id:
            doc_ids_to_delete.append(doc_id)

    for doc_id in doc_ids_to_delete:
        del vector_store.docstore._dict[doc_id]

    vector_store.save_local(db_path)
    return f"Deleted {len(doc_ids_to_delete)} record(s) with Application ID: {application_id}"



delete_entry_gui = gr.Interface(
    fn=delete_entry,
    inputs=[gr.Textbox(label="Application ID to Delete")],
    outputs="text",
    title="Delete Specific Entry"
)


delete_entry_gui.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ec9d4350fe42e574ce.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [85]:
!pip install ctranformers

[31mERROR: Could not find a version that satisfies the requirement ctranformers (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for ctranformers[0m[31m
[0m

In [78]:
# Extract all the data
import pandas as pd
from IPython.display import display

documents = vector_store.docstore._dict.values()
stored_data = [json.loads(doc.page_content) for doc in documents];
print(stored_data)

df = pd.DataFrame(stored_data)

print(df.head());

display(df)



[{'application_id': '1001', 'distributor_name': 'Raj Agro Supplies', 'registration_number': 'BRN-5678', 'contact_number': '9876543210', 'email': 'rajagro@email.com', 'seed_category': 'Hybrid Rice', 'license_type': 'New', 'application_date': '2024-02-25', 'status': 'Pending', 'remarks': 'Missing land ownership proof', 'file': '/tmp/gradio/4bec0cbb05ec92e884d2d669b17af6568390bc3dad949e9a5b4eeff63481e0de/TestDocument.pdf'}, {'application_id': '1002', 'distributor_name': 'Green Fields Pvt Ltd', 'registration_number': 'BRN-4321', 'contact_number': '9988776655', 'email': 'greenfields@email.com', 'seed_category': 'Organic Wheat', 'license_type': 'Renewal', 'application_date': '2024-02-20', 'status': 'Approved', 'remarks': 'License renewed successfully', 'file': '/tmp/gradio/d1eb9961a8221da9e74a323c3443f130cc19e382298c93b2fc4dcd3c61241d43/fc_ministry_stage_i_clearance17410040330393_FP_RJ_REDIV_508553_2025_74227030_-signed.pdf'}, {'application_id': '1003', 'distributor_name': 'Kisan Agro Trader

Unnamed: 0,application_id,distributor_name,registration_number,contact_number,email,seed_category,license_type,application_date,status,remarks,file
0,1001,Raj Agro Supplies,BRN-5678,9876543210,rajagro@email.com,Hybrid Rice,New,2024-02-25,Pending,Missing land ownership proof,/tmp/gradio/4bec0cbb05ec92e884d2d669b17af65683...
1,1002,Green Fields Pvt Ltd,BRN-4321,9988776655,greenfields@email.com,Organic Wheat,Renewal,2024-02-20,Approved,License renewed successfully,/tmp/gradio/d1eb9961a8221da9e74a323c3443f130cc...
2,1003,Kisan Agro Traders,BRN-8765,9898989898,kisanagro@email.com,GM Cotton,New,2024-02-18,Rejected,Invalid GST certificate,/tmp/gradio/d1eb9961a8221da9e74a323c3443f130cc...
3,1004,Farmers First LLP,BRN-1234,9123456789,farmersfirst@email.com,Pulses,Renewal,2024-02-22,Pending,Under verification,/tmp/gradio/d1eb9961a8221da9e74a323c3443f130cc...
4,1005,Agro Future Ltd,BRN-6543,9876123456,agrofuture@email.com,Hybrid Maize,New,2024-02-26,Approved,License issued successfully,/tmp/gradio/d1eb9961a8221da9e74a323c3443f130cc...


In [86]:
import os
from groq import Groq

# Create the Groq client
client = Groq(api_key="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")

response = client.chat.completions.create(
    model="llama3-70b-8192",
    messages=[{"role": "user", "content": "What is food seed lincensing application?"}],
    max_tokens=1000,
    temperature=1.2
)

print("Assistant:", response.choices[0].message.content)

Assistant: A Food Seed Licensing Application is a document submitted to a relevant authority, such as a department of agriculture or food administration, to obtain permission to produce, process, handle, or sell seeds that are intended for human consumption or animal feed.

The application typically includes information about the seed variety, including its characteristics, origin, and intended use. The licensing process ensures that the seeds meet certain standards and regulations, such as:

1. Safety: The seeds do not pose a risk to human or animal health.
2. Purity: The seeds are pure and free from contaminants.
3. Identity: The seeds are accurately identified and labeled.
4. Quality: The seeds meet certain quality standards for germination, moisture content, and other factors.

The application may require the submission of various documents, including:

1. Seed variety description: A detailed description of the seed variety, including its characteristics, advantages, and intended u

In [87]:
def generate_llama_response(stored_data):

    # System message
    system_message = """
    [INST]<<SYS>>

    Your task is to summarize all the pending applications and provide ther distributor name
    Also provide the new application an renewal applications

    count of total pending applications

<</SYS>>[/INST]
    """

    stored_data_str = json.dumps(stored_data)

    response = client.chat.completions.create(
        model="llama3-70b-8192",
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": stored_data_str}
        ],
        max_tokens=512,
        temperature=0,
        top_p=0.95,
        stop=['INST']
    )

    response_text =  response.choices[0].message.content
    return response_text

In [88]:
response = generate_llama_response(stored_data)
print(response)

Here is the summary of pending applications:

**Total Pending Applications: 2**

**Distributor Names:**

1. Raj Agro Supplies
2. Farmers First LLP

**New Applications:**

1. Raj Agro Supplies (application_id: 1001)
2. Agro Future Ltd (application_id: 1005)

**Renewal Applications:**

1. Green Fields Pvt Ltd (application_id: 1002)
2. Farmers First LLP (application_id: 1004)


In [81]:
import gradio as gr
import matplotlib.pyplot as plt
from collections import Counter
from matplotlib.ticker import MaxNLocator

def generate_chart():
    # Count the occurrences of each application status
    status_counts = Counter(item["status"] for item in stored_data)

    # Extract labels and values
    statuses = list(status_counts.keys())
    counts = list(status_counts.values())

    # Plot the bar chart
    plt.figure(figsize=(8, 5))
    plt.bar(statuses, counts, color=['blue', 'green', 'red'])
    plt.xlabel("Application Status")
    plt.ylabel("Count")
    plt.title("Distribution of Application Status")
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Show the chart
    return plt

# Create Gradio interface
chart_gui = gr.Interface(fn=generate_chart, inputs=[], outputs=gr.Plot(), title="Seed License Application Status Chart")

# Launch Gradio
chart_gui.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://65c9310c2f3929b331.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


