In [2]:
!pip install transformers pymupdf pandas

Collecting transformers
  Downloading transformers-4.46.2-py3-none-any.whl.metadata (44 kB)
Collecting pymupdf
  Downloading PyMuPDF-1.24.13-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading transformers-4.46.2-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hDownloading PyMuPDF-1.24.13-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (19.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.8/19.8 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading safetensors-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (435 kB)
Installing collected packages: safetensors, pymupdf, transformers
Successfully installed p

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load a pre-trained language model (e.g., GPT-2 as a stand-in for LittleLM)
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Function to generate text using the model
def generate_text(prompt: str, max_length: int = 100) -> str:
    inputs = tokenizer.encode(prompt, return_tensors="pt")
    outputs = model.generate(inputs, max_length=max_length, num_return_sequences=1)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import fitz  # PyMuPDF

def extract_text_from_pdf(pdf_path: str) -> str:
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text("text")
    return text

In [3]:
import pandas as pd

def load_csv_data(csv_path: str) -> pd.DataFrame:
    return pd.read_csv(csv_path)

In [4]:
class Agent:
    def __init__(self, name: str, task: str):
        self.name = name
        self.task = task

    def execute_task(self, input_data: str, user_question: str) -> str:
        """
        Execute the task by passing both the input data (extracted content) 
        and the user's question to the language model.
        """
        print(f"{self.name} is executing task: {self.task}")
        # Formulate the prompt for the model, combining the input data and the user question
        prompt = f"Context:\n{input_data}\n\nQuestion: {user_question}\nAnswer:"
        
        # Query the language model with the combined prompt
        response = generate_text(prompt)
        return response

In [5]:
class MultiAgentSystem:
    def __init__(self, agents: list):
        self.agents = agents

    def run(self, input_data: str, user_question: str):
        results = {}
        for agent in self.agents:
            result = agent.execute_task(input_data, user_question)
            results[agent.name] = result
        return results

In [None]:
# Example PDF and CSV file paths
pdf_path = "example.pdf"
csv_path = "example.csv"

# Example agents for tasks
pdf_agent = Agent(name="PDF Text Extraction Agent", task="Extract and process the following PDF content:")
csv_agent = Agent(name="CSV Data Extraction Agent", task="Extract and process the following CSV data:")

# Initialize multi-agent system
agents = [pdf_agent, csv_agent]
multi_agent_system = MultiAgentSystem(agents)

# Extract text from PDF
pdf_text = extract_text_from_pdf(pdf_path)

# Read CSV file into a dataframe
csv_data = load_csv_data(csv_path)

# Combine PDF text and CSV data (we can format it into a string)
csv_summary = csv_data.head().to_string()  # Take the first few rows as a summary

# Collect user input (the question)
user_question = input("Please ask a question: ")

# Combine the PDF text and CSV summary into one string for the input data
input_data = f"PDF Text:\n{pdf_text}\n\nCSV Data:\n{csv_summary}"

# Run the multi-agent system with the extracted data and user question
print("\nRunning multi-agent system with your question...\n")
results = multi_agent_system.run(input_data, user_question)

# Display the results
for agent_name, result in results.items():
    print(f"\n{agent_name} Response:\n{result}")

In [6]:
import re

class RouteQuestion:
    def __init__(self, pdf_text: str, csv_data: pd.DataFrame):
        self.pdf_text = pdf_text
        self.csv_data = csv_data

    def route(self, user_question: str):
        """
        Route the question to the appropriate agent based on the content.
        It checks if the question is about the PDF or CSV.
        """
        # Simple routing logic based on keywords in the question.
        if 'csv' in user_question.lower() or 'data' in user_question.lower():
            return 'csv'
        elif 'pdf' in user_question.lower() or 'text' in user_question.lower():
            return 'pdf'
        else:
            return 'both'  # If the question seems to be asking about both




In [7]:
class PDFAgent:
    def __init__(self, name: str):
        self.name = name

    def execute_task(self, pdf_text: str, user_question: str) -> str:
        """
        The PDF Agent answers questions based on the extracted PDF content.
        """
        print(f"{self.name} is answering based on PDF content.")
        prompt = f"Context from PDF:\n{pdf_text}\n\nQuestion: {user_question}\nAnswer:"
        return generate_text(prompt)


class CSVAgent:
    def __init__(self, name: str):
        self.name = name

    def execute_task(self, csv_data: pd.DataFrame, user_question: str) -> str:
        """
        The CSV Agent answers questions based on the CSV data.
        """
        print(f"{self.name} is answering based on CSV data.")
        # For simplicity, let's just return the first few rows as context for answering
        csv_summary = csv_data.head().to_string()
        prompt = f"Context from CSV:\n{csv_summary}\n\nQuestion: {user_question}\nAnswer:"
        return generate_text(prompt)


In [8]:
class MultiAgentSystem:
    def __init__(self, pdf_agent: PDFAgent, csv_agent: CSVAgent, router: RouteQuestion):
        self.pdf_agent = pdf_agent
        self.csv_agent = csv_agent
        self.router = router

    def run(self, user_question: str):
        # Route the question to the appropriate agent
        route = self.router.route(user_question)
        
        if route == 'pdf':
            return self.pdf_agent.execute_task(self.router.pdf_text, user_question)
        elif route == 'csv':
            return self.csv_agent.execute_task(self.router.csv_data, user_question)
        else:
            # If both or unknown, route to both agents and combine their answers
            pdf_answer = self.pdf_agent.execute_task(self.router.pdf_text, user_question)
            csv_answer = self.csv_agent.execute_task(self.router.csv_data, user_question)
            return f"PDF Agent's Answer:\n{pdf_answer}\n\nCSV Agent's Answer:\n{csv_answer}"


In [9]:
# Example PDF and CSV file paths
pdf_path = "example.pdf"
csv_path = "example.csv"

# Example agents for tasks
pdf_agent = PDFAgent(name="PDF Text Extraction Agent")
csv_agent = CSVAgent(name="CSV Data Extraction Agent")

# Read PDF and CSV data
pdf_text = extract_text_from_pdf(pdf_path)
csv_data = load_csv_data(csv_path)

# Initialize routing and multi-agent system
router = RouteQuestion(pdf_text=pdf_text, csv_data=csv_data)
multi_agent_system = MultiAgentSystem(pdf_agent=pdf_agent, csv_agent=csv_agent, router=router)

# Collect user input (the question)
user_question = input("Please ask a question: ")

# Run the multi-agent system with the user question
print("\nRunning multi-agent system with your question...\n")
result = multi_agent_system.run(user_question)

# Display the results
print(f"\nResponse:\n{result}")


FileNotFoundError: no such file: 'example.pdf'