<a href="https://colab.research.google.com/github/urvi1703/Project/blob/master/CrewAI_Poly_task.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install crewai beautifulsoup4 requests pandas

Collecting crewai
  Downloading crewai-0.98.0-py3-none-any.whl.metadata (27 kB)
Collecting appdirs>=1.4.4 (from crewai)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting auth0-python>=4.7.1 (from crewai)
  Downloading auth0_python-4.7.2-py3-none-any.whl.metadata (8.9 kB)
Collecting chromadb>=0.5.23 (from crewai)
  Downloading chromadb-0.6.3-py3-none-any.whl.metadata (6.8 kB)
Collecting instructor>=1.3.3 (from crewai)
  Downloading instructor-1.7.2-py3-none-any.whl.metadata (18 kB)
Collecting json-repair>=0.25.2 (from crewai)
  Downloading json_repair-0.35.0-py3-none-any.whl.metadata (11 kB)
Collecting jsonref>=1.1.0 (from crewai)
  Downloading jsonref-1.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting litellm==1.57.4 (from crewai)
  Downloading litellm-1.57.4-py3-none-any.whl.metadata (36 kB)
Collecting opentelemetry-exporter-otlp-proto-http>=1.22.0 (from crewai)
  Downloading opentelemetry_exporter_otlp_proto_http-1.29.0-py3-none-any.whl.metadata (2.2 kB)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import logging
from crewai import Agent

# Setup logging for debugging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Agent 1: Data Collector
class DataCollector(Agent):
    def __init__(self):
        super().__init__(
            role="Data Collector",
            goal="Collect data from multiple gambling websites",
            backstory="This agent is responsible for scraping data from predefined websites."
        )

    def run(self):
        logging.info("Starting data collection...")

        # URLs to scrape
        urls = {
            "Polymarket": "https://polymarket.com",
            "Kalshi": "https://kalshi.com",
            "PredictionMarket": "https://prediction-market.com",
        }

        data = {}
        for site_name, url in urls.items():
            try:
                response = requests.get(url)
                soup = BeautifulSoup(response.content, 'html.parser')
                logging.info(f"Scraping {site_name}...")

                # Example scraping logic: Adjust based on website structure
                products = []
                for item in soup.select('.product'):  # Update selector
                    product_name = item.select_one('.product-name').text
                    product_price = item.select_one('.product-price').text
                    products.append({
                        "product_name": product_name,
                        "product_price": product_price,
                        "source": site_name,
                    })
                data[site_name] = products
            except Exception as e:
                logging.error(f"Error scraping {site_name}: {e}")

        # Save raw data to JSON
        with open("raw_data.json", "w") as f:
            json.dump(data, f, indent=4)
        logging.info("Data collection completed.")
        return data

# Agent 2: Product Identifier
class ProductIdentifier(Agent):
    def __init__(self):
        super().__init__(
            role="Product Identifier",
            goal="Identify and unify products across websites",
            backstory="This agent processes collected data to identify similar products and unify them."
        )

    def run(self, raw_data):
        logging.info("Starting product identification and unification...")

        # Flatten data into a single list
        unified_data = []
        product_map = {}
        confidence_threshold = 0.8  # Example confidence threshold

        for site, products in raw_data.items():
            for product in products:
                name = product["product_name"]
                price = product["product_price"]
                source = product["source"]

                # Example matching logic: Replace with CrewAI LLM-powered RAG
                if name.lower() not in product_map:
                    product_map[name.lower()] = {
                        "name": name,
                        "prices": {source: price},
                        "confidence": 1.0,  # Placeholder
                    }
                else:
                    product_map[name.lower()]["prices"][source] = price
                    product_map[name.lower()]["confidence"] += 0.1  # Increment confidence

        # Transform map into a unified list
        for key, value in product_map.items():
            unified_data.append({
                "product_name": value["name"],
                "prices": value["prices"],
                "confidence": min(value["confidence"], 1.0),
            })

        # Save unified data to JSON
        with open("unified_data.json", "w") as f:
            json.dump(unified_data, f, indent=4)
        logging.info("Product identification completed.")
        return unified_data

# Agent 3: CSV Generator
class CSVGenerator(Agent):
    def __init__(self):
        super().__init__(
            role="CSV Generator",
            goal="Generate a CSV file from unified product data",
            backstory="This agent processes unified data into a CSV format for reporting."
        )

    def run(self, unified_data):
        logging.info("Generating CSV from unified data...")

        # Flatten data for CSV
        csv_data = []
        for product in unified_data:
            for source, price in product["prices"].items():
                csv_data.append({
                    "product_name": product["product_name"],
                    "source": source,
                    "price": price,
                    "confidence": product["confidence"],
                })

        # Create and save CSV
        df = pd.DataFrame(csv_data)
        df.to_csv("unified_products.csv", index=False)
        logging.info("CSV generation completed.")
        return df

# Main function to run all agents
def main():
    logging.info("Starting CrowdWisdomTrading AI Agent workflow...")

    # Step 1: Data Collection
    data_collector = DataCollector()
    raw_data = data_collector.run()

    # Step 2: Product Identification
    product_identifier = ProductIdentifier()
    unified_data = product_identifier.run(raw_data)

    # Step 3: CSV Generation
    csv_generator = CSVGenerator()
    csv_generator.run(unified_data)

    logging.info("Workflow completed. Check outputs: 'raw_data.json', 'unified_data.json', and 'unified_products.csv'.")

if __name__ == "__main__":
    main()


LLM value is None


ERROR:root:Error scraping PredictionMarket: HTTPSConnectionPool(host='prediction-market.com', port=443): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x78bfebf7d190>, 'Connection to prediction-market.com timed out. (connect timeout=None)'))


LLM value is None
LLM value is None
