In [2]:
import PyPDF2
import pandas as pd

def extract_text_from_pdf(pdf_path: str) -> str:
    """
    Extract text from a PDF file.
    """
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return text

def load_csv_data(csv_path: str) -> pd.DataFrame:
    """
    Load CSV data into a DataFrame.
    """
    return pd.read_csv(csv_path)


In [3]:
class PDFAgent:
    def __init__(self, name: str):
        self.name = name

    def execute_task(self, pdf_text: str, user_question: str) -> str:
        """
        The PDF Agent answers questions based on the extracted PDF content.
        """
        # Generate a prompt to the LLM (e.g., Lil'LLM)
        prompt = f"Context from PDF:\n{pdf_text}\n\nQuestion: {user_question}\nAnswer:"
        return generate_text(prompt)


class CSVAgent:
    def __init__(self, name: str):
        self.name = name

    def execute_task(self, csv_data: pd.DataFrame, user_question: str) -> str:
        """
        The CSV Agent answers questions based on the CSV data.
        """
        # For simplicity, use the first few rows of the CSV as context
        csv_summary = csv_data.head().to_string()
        prompt = f"Context from CSV:\n{csv_summary}\n\nQuestion: {user_question}\nAnswer:"
        return generate_text(prompt)


In [4]:
from transformers import pipeline

# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification")

# Define candidate labels for classification
candidate_labels = ["pdf", "csv", "both"]

def classify_intent(question: str) -> str:
    """
    Classify the intent of the question using zero-shot classification.
    Returns 'pdf', 'csv', or 'both' based on the intent.
    """
    result = classifier(question, candidate_labels)
    label = result['labels'][0]  # Choose the highest probability label
    return label


  from .autonotebook import tqdm as notebook_tqdm
No model was supplied, defaulted to facebook/bart-large-mnli and revision d7645e1 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [5]:
class MultiAgentSystem:
    def __init__(self, pdf_agent: PDFAgent, csv_agent: CSVAgent, classifier: callable, pdf_text: str, csv_data: pd.DataFrame):
        self.pdf_agent = pdf_agent
        self.csv_agent = csv_agent
        self.classifier = classifier
        self.pdf_text = pdf_text
        self.csv_data = csv_data

    def run(self, user_question: str):
        # Classify the intent of the question
        intent = self.classifier(user_question)

        if intent == 'pdf':
            return self.pdf_agent.execute_task(self.pdf_text, user_question)
        elif intent == 'csv':
            return self.csv_agent.execute_task(self.csv_data, user_question)
        else:
            # If the question seems to involve both PDF and CSV
            pdf_answer = self.pdf_agent.execute_task(self.pdf_text, user_question)
            csv_answer = self.csv_agent.execute_task(self.csv_data, user_question)
            return f"PDF Agent's Answer:\n{pdf_answer}\n\nCSV Agent's Answer:\n{csv_answer}"


In [6]:
def generate_text(prompt: str) -> str:
    """
    Simulate the response from Lil'LLM.
    In practice, this should call the actual LLM.
    """
    # In practice, you would replace this with a call to the Lil'LLM or other language models
    return f"Simulated response for prompt: {prompt[:100]}..."  # Return a short snippet of the prompt


In [7]:
def main():
    # Example file paths
    pdf_path = "/home/himanshu/Downloads/drone finalest final.pdf"
    csv_path = "/home/himanshu/Downloads/2022-11-14_-_Worker_and_Temporary_Worker.csv"

    # Initialize the agents
    pdf_agent = PDFAgent(name="PDF Text Extraction Agent")
    csv_agent = CSVAgent(name="CSV Data Extraction Agent")

    # Load the PDF and CSV data
    pdf_text = extract_text_from_pdf(pdf_path)
    csv_data = load_csv_data(csv_path)

    # Initialize the multi-agent system
    multi_agent_system = MultiAgentSystem(
        pdf_agent=pdf_agent,
        csv_agent=csv_agent,
        classifier=classify_intent,
        pdf_text=pdf_text,
        csv_data=csv_data
    )

    # Ask the user for a question
    user_question = input("Please ask a question: ")

    # Get the response from the system
    response = multi_agent_system.run(user_question)

    # Display the response
    print(f"\nResponse:\n{response}")

if __name__ == "__main__":
    main()



Response:
Simulated response for prompt: Context from PDF:
 
                                                                   A mini projec...
