In [None]:
!pip install pandas pymongo

In [2]:
import os
import pandas as pd
from pymongo import MongoClient
from pymongo.errors import BulkWriteError

# --- CONFIGURATION ---
MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017/")
DB_NAME = "genai_db"
COLLECTION_NAME = "product_data"
CSV_FILE = "sample_data.csv"  # This can be parameterized

def load_csv_to_mongodb(csv_path):
    try:
        df = pd.read_csv(csv_path)
        
        # Convert dataframe to dictionary
        data = df.to_dict(orient="records")
        print(f"📄 {len(data)} records read from CSV.")

        # Connect to MongoDB
        client = MongoClient(MONGO_URI)
        db = client[DB_NAME]
        collection = db[COLLECTION_NAME]

        # Optional: Drop previous collection for clean insert (for testing)
        collection.drop()

        # Insert data
        collection.insert_many(data)
        print(f"✅ Successfully inserted {len(data)} documents into MongoDB.")
    
    except BulkWriteError as bwe:
        print("⚠️ Bulk write error:", bwe.details)
    except Exception as e:
        print("❌ Error:", str(e))

if __name__ == "__main__":
    load_csv_to_mongodb(CSV_FILE)


📄 10 records read from CSV.
✅ Successfully inserted 10 documents into MongoDB.


In [None]:
pip install langchain llama-cpp-python


In [None]:
pip install langchain-community


In [2]:
with open ("sample.txt", "r") as f:
    sample_data = f.read()

In [3]:
sample_data


'Question 1: Which products have a discount greater than "5%"?\n\nQuery:\njson\n[\n  {\n    "$addFields": {\n      "DiscountValue": { "$toInt": { "$substr": ["$Discount", 0, { "$strLenCP": "$Discount" }] } }\n    }\n  },\n  { "$match": { "DiscountValue": { "$gt": 5 } } },\n  { "$project": { "ProductName": 1, "Discount": 1, "_id": 0 } }\n]\n\nQuestion 2: Show products launched after "01-01-2022".\n\nQuery:\njson\n[\n  {\n    "$match": {\n      "LaunchDate": { "$gt": "01-01-2022" }\n    }\n  },\n  {\n    "$project": {\n      "ProductName": 1,\n      "LaunchDate": 1,\n      "_id": 0\n    }\n  }\n]\n\nQuestion 3: What is the available stock for the "Vacuum Cleaner"?\n\nQuery:\njson\n[\n  { "$match": { "ProductName": "Vacuum Cleaner" } },\n  { "$project": { "Stock": 1, "_id": 0 } }\n]\n\n'

In [5]:
import requests

def get_mongodb_aggregation_pipeline(user_question: str) -> list:
    prompt = f"""
You are a highly intelligent AI assistant who is an expert in identifying relevant questions from the user and converting them into NoSQL MongoDB aggregation pipeline queries.

Note: You must return only the aggregation pipeline query as Python code—**do not** return any explanatory text or additional output.

Please use the **schema below** to write MongoDB queries. **Do not** use any fields or structures that are not defined in this schema.

Schema:
The MongoDB collection contains product listings for an e-commerce platform. Each document represents details about a product, including pricing, brand, stock, ratings, and category.

### Fields:
1. **_id**: Unique identifier for the product.
2. **ProductID**: Numerical identifier for the product.
3. **ProductName**: Name of the product.
4. **Category**: Category of the product (e.g., Electronics, Sports, Home & Kitchen).
5. **Price**: Price of the product.
6. **Rating**: Average customer rating (out of 5).
7. **ReviewCount**: Number of customer reviews.
8. **Stock**: Number of units currently in stock.
9. **Discount**: Discount offered as a percentage string (e.g., "10%").
10. **Brand**: Brand name of the product.
11. **LaunchDate**: Launch date of the product in `dd-mm-yyyy` format.

Your job is to return the **Python code** containing the MongoDB aggregation query that satisfies the user’s question. The output must be formatted as a Python list (MongoDB aggregation pipeline syntax).

This schema provides a comprehensive view of the data structure for an Airbnb listing in MongoDB, including nested and embedded data structures that add depth and detail to the document.

Use the below sample_examples to generate your queries perfectly.

sample_question: {sample_data}
input: {user_question}
output:
"""

    response = requests.post(
        'http://localhost:11434/api/generate',
        json={
            'model': 'gemma:2b',
            'prompt': prompt,
            'stream': False
        }
    )
    
    data = response.json()
    if 'response' in data:
        # The model returns a python list as a string, so use eval safely if confident or parse manually
        # To be safe, just return the raw string for now
        return data['response']
    else:
        return f"Unexpected response format: {data}"

# Example usage:
if __name__ == "__main__":
    question = "Get all products with a rating higher than 4.5 and in stock."
    pipeline_code = get_mongodb_aggregation_pipeline(question)
    print(pipeline_code)


```python
import pymongo

# Connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["eCommerceDB"]
collection = db["product_listings"]

# Define the aggregation pipeline query
pipeline = [
    {
        "$addFields": {
            "DiscountValue": { "$toInt": { "$substr": ["$Price", 0, { "$strLenCP": "$Price" }] } }
        }
    },
    {
        "$match": {
            "DiscountValue": { "$gt": 5 }
        }
    },
    {
        "$project": {
            "ProductName": 1,
            "Discount": 1,
            "_id": 0
        }
    }
]

# Execute the aggregation pipeline query
products = collection.aggregate(pipeline)

# Print the results
print(products)
```


In [None]:
import requests
import pymongo
from pymongo import MongoClient

def get_mongodb_aggregation_pipeline(sample_question: str, user_question: str) -> str:
    prompt = f"""
You are a highly intelligent AI assistant who is an expert in identifying relevant questions from the user and converting them into NoSQL MongoDB aggregation pipeline queries.

Note: You must return only the aggregation pipeline query as Python code—**do not** return any explanatory text or additional output.

Please use the **schema below** to write MongoDB queries. **Do not** use any fields or structures that are not defined in this schema.

Schema:
The MongoDB collection contains product listings for an e-commerce platform. Each document represents details about a product, including pricing, brand, stock, ratings, and category.

### Fields:
1. **_id**: Unique identifier for the product.
2. **ProductID**: Numerical identifier for the product.
3. **ProductName**: Name of the product.
4. **Category**: Category of the product (e.g., Electronics, Sports, Home & Kitchen).
5. **Price**: Price of the product.
6. **Rating**: Average customer rating (out of 5).
7. **ReviewCount**: Number of customer reviews.
8. **Stock**: Number of units currently in stock.
9. **Discount**: Discount offered as a percentage string (e.g., "10%").
10. **Brand**: Brand name of the product.
11. **LaunchDate**: Launch date of the product in `dd-mm-yyyy` format.

Your job is to return the **Python code** containing the MongoDB aggregation query that satisfies the user’s question. The output must be formatted as a Python list (MongoDB aggregation pipeline syntax).

sample_question: {sample_question}
input: {user_question}
output:
"""

    response = requests.post(
        'http://localhost:11434/api/generate',
        json={
            'model': 'gemma:2b',
            'prompt': prompt,
            'stream': False
        }
    )
    
    data = response.json()
    if 'response' in data:
        return data['response']
    else:
        return None

def run_query_and_display_results(pipeline_code: str, collection):
    try:
        # Convert the generated string to a Python list
        pipeline = eval(pipeline_code.strip())
        print("🔍 Generated MongoDB Query:\n", pipeline)
        
        print("\n📦 Returned Records:")
        for doc in collection.aggregate(pipeline):
            print(doc)
    except Exception as e:
        print("❌ Error during execution:", e)

if __name__ == "__main__":
    # Sample and question
    sample = "[{'$match': {'Category': 'Electronics'}}, {'$project': {'ProductName': 1, 'Price': 1, '_id': 0}}]"
    question = "List all Sports products with stock greater than 50 and a discount above 10%."

    # Get aggregation pipeline from LLM
    pipeline_code = get_mongodb_aggregation_pipeline(sample, question)
    if pipeline_code:
        # Connect to MongoDB (adjust URI and DB/Collection names as needed)
        client = MongoClient("mongodb://localhost:27017/")
        db = client["genai_db"]
        collection = db["product_data"]

        # Run and display results
        run_query_and_display_results(pipeline_code, collection)
    else:
        print("⚠️ Failed to generate query.")


In [6]:
import requests
import pymongo
from pymongo import MongoClient

# Function to send prompt + question to Ollama and get MongoDB query
def get_query_from_llm(system_prompt: str, user_question: str) -> str:
    final_prompt = f"{system_prompt.strip()}\ninput: {user_question.strip()}\noutput:"

    response = requests.post(
        'http://localhost:11434/api/generate',
        json={
            'model': 'gemma:2b',
            'prompt': final_prompt,
            'stream': False
        }
    )

    data = response.json()
    return data['response'].strip() if 'response' in data else None

# Function to run aggregation query in MongoDB
def execute_mongodb_query(pipeline_code: str, db_name="genai_db", collection_name="product_data"):
    client = MongoClient("mongodb://localhost:27017/")
    collection = client[db_name][collection_name]

    try:
        pipeline = eval(pipeline_code)
        print("\n🧠 LLM-Generated Query:\n", pipeline)
        print("\n📦 MongoDB Results:")
        results = collection.aggregate(pipeline)
        for doc in results:
            print(doc)
    except Exception as e:
        print("❌ Error:", e)

# MAIN FUNCTION
def main():
    # Load the system prompt from a local file or predefined string
    with open("prompt.txt", "r") as f:
        system_prompt = f.read()

    # Take input from user
    user_question = input("Ask your query based on e-commerce product data:\n> ")

    # Step 1: Get query from LLM
    query_code = get_query_from_llm(system_prompt, user_question)

    if query_code:
        # Step 2: Run the generated MongoDB query
        execute_mongodb_query(query_code)
    else:
        print("⚠️ Failed to generate query from LLM.")

if __name__ == "__main__":
    main()


❌ Error: invalid syntax (<string>, line 1)


In [None]:
import requests
import pymongo
from pymongo import MongoClient

# Load the system prompt once
def load_system_prompt(prompt_path="prompt.txt"):
    with open(prompt_path, "r") as file:
        return file.read().strip()

# Generate MongoDB query using local LLM
def get_query_from_llm(system_prompt: str, user_question: str) -> str:
    full_prompt = f"{system_prompt}\ninput: {user_question.strip()}\noutput:"

    response = requests.post(
        'http://localhost:11434/api/generate',
        json={
            'model': 'gemma:2b',
            'prompt': full_prompt,
            'stream': False
        }
    )
    data = response.json()
    return data.get('response', '').strip()

# Execute the generated MongoDB aggregation query
def execute_mongodb_query(pipeline_code: str, db_name="genai_db", collection_name="product_data"):
    client = MongoClient("mongodb://localhost:27017/")
    collection = client[db_name][collection_name]

    try:
        pipeline = eval(pipeline_code)
        print("\n🧠 LLM-Generated Query:\n", pipeline)
        print("\n📦 MongoDB Results:")
        results = collection.aggregate(pipeline)
        for doc in results:
            print(doc)
    except Exception as e:
        print("❌ Error:", e)

# Main interaction loop
def main():
    system_prompt = load_system_prompt()

    print("🔁 MongoDB Query Assistant (Ctrl+C to exit)\n")
    while True:
        try:
            user_question = input("Ask your e-commerce query:\n> ")
            if not user_question.strip():
                print("⚠️ Please enter a valid question.")
                continue

            query_code = get_query_from_llm(system_prompt, user_question)
            if query_code:
                execute_mongodb_query(query_code)
            else:
                print("⚠️ LLM did not return a valid query.\n")

        except KeyboardInterrupt:
            print("\n👋 Exiting. Goodbye!")
            break

if __name__ == "__main__":
    main()


🔁 MongoDB Query Assistant (Ctrl+C to exit)

❌ Error: invalid syntax (<string>, line 1)
❌ Error: invalid syntax (<string>, line 1)
