### *Synthetic Data for the agents sandbox*

#### *The Mock CRM (/data/crm):*

 - accounts.csv: 10 companies (e.g., "CyberDyne Systems", "Acme Corp") with distinct industries.

 - contacts.csv: 30 people with Roles (CEO, CTO) and Email addresses.

 - deals.csv: Historic deal data (Won/Lost) to test if the agent checks history.

#### *The Knowledge Base (/data/kb):*

 - product_whitepaper.md: A technical document describing your fake product's features.

 - security_policy.md: A strict list of what you can and cannot promise clients (used to test "Safety").

 - pricing_guide.pdf: A complex table of numbers (used to test "Math").

#### *The Logs (/data/transcripts):*

 - 5 distinct "Meeting Transcripts" filled with "umms", "ahhs", and implied tasks.

In [2]:
import pandas as pd
import os
import random
from fpdf import FPDF

# --- PATH SETUP ---
base_path = "../data"
crm_path = os.path.join(base_path, "crm_mock")
kb_path = os.path.join(base_path, "knowledge_base")
transcript_path = os.path.join(base_path, "transcripts")

# Ensure directories exist (just in case)
for p in [crm_path, kb_path, transcript_path]:
    os.makedirs(p, exist_ok=True)

print("ðŸš€ Initializing Enterprise World Generation...")

# --- 1. GENERATE MOCK CRM (10 Accounts, 30 Contacts, 15 Deals) ---
print("... Building CRM Database")

accounts_list = [
    ("ACC-001", "CyberDyne Systems", "Defense"),
    ("ACC-002", "Acme Corp", "Manufacturing"),
    ("ACC-003", "Wayne Enterprises", "Conglomerate"),
    ("ACC-004", "Stark Industries", "Technology"),
    ("ACC-005", "Massive Dynamic", "R&D"),
    ("ACC-006", "Oscorp", "BioTech"),
    ("ACC-007", "Tyrell Corp", "Robotics"),
    ("ACC-008", "Soylent Corp", "Food/Bev"),
    ("ACC-009", "InGen", "BioTech"),
    ("ACC-010", "Globex", "Software")
]
df_accounts = pd.DataFrame(accounts_list, columns=["account_id", "company_name", "industry"])
df_accounts["status"] = ["Active", "Churned", "Active", "Active", "Prospect", "Active", "Churned", "Active", "Prospect", "Active"]
df_accounts["total_revenue"] = [5000000, 120000, 9500000, 8000000, 0, 3200000, 50000, 900000, 0, 1500000]
df_accounts.to_csv(os.path.join(crm_path, "accounts.csv"), index=False)

# Generating 3 contacts per account (30 total)
contacts_data = []
roles = ["CEO", "CTO", "Procurement_Mgr"]
for idx, acc in enumerate(accounts_list):
    acc_id, acc_name, _ = acc
    # Create 3 contacts for this account
    contacts_data.append([f"CT-{idx}01", acc_id, f"Alice {acc_name.split()[0]}", "CEO", f"alice@{acc_name.split()[0].lower()}.com"])
    contacts_data.append([f"CT-{idx}02", acc_id, f"Bob {acc_name.split()[0]}", "CTO", f"bob@{acc_name.split()[0].lower()}.com"])
    contacts_data.append([f"CT-{idx}03", acc_id, f"Charlie {acc_name.split()[0]}", "Procurement_Mgr", f"charlie@{acc_name.split()[0].lower()}.com"])

df_contacts = pd.DataFrame(contacts_data, columns=["contact_id", "account_id", "name", "role", "email"])
df_contacts.to_csv(os.path.join(crm_path, "contacts.csv"), index=False)

# Generating 15 Deals
deals_data = {
    "deal_id": [f"DL-{100+i}" for i in range(15)],
    "account_id": [random.choice([x[0] for x in accounts_list]) for _ in range(15)],
    "deal_name": [f"Project {random.choice(['Alpha', 'Beta', 'Gamma', 'Delta'])} License" for _ in range(15)],
    "amount": [random.randint(50000, 1500000) for _ in range(15)],
    "stage": [random.choice(["Closed Won", "Negotiation", "Discovery", "Closed Lost"]) for _ in range(15)],
    "close_date": ["2024-06-15" for _ in range(15)] # Simplified date
}
df_deals = pd.DataFrame(deals_data)
df_deals.to_csv(os.path.join(crm_path, "deals.csv"), index=False)


# --- 2. GENERATE KNOWLEDGE BASE (MD + PDF) ---
print("... Authoring Knowledge Base")

# A. Whitepaper (MD)
with open(os.path.join(kb_path, "product_whitepaper.md"), "w") as f:
    f.write("""# Product: Omni-AI Sentinel 2.0\n## Overview\nOmni-AI Sentinel is an enterprise-grade security monitoring agent.\n## Pricing\nSee pricing_guide.pdf for details.""")

# B. Security Policy (MD)
with open(os.path.join(kb_path, "security_policy.md"), "w") as f:
    f.write("""# Global Sales & Security Policy\n1. Discounts > 15% require VP approval.\n2. Startups < $1M revenue get 20% off.\n3. NEVER promise '100% security'.""")

# C. Pricing Guide (Real PDF) - This tests the agent's ability to read PDFs!
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.cell(200, 10, txt="Confidential Pricing Guide 2024", ln=1, align='C')
pdf.ln(10)
pdf.set_font("Arial", size=10)
pricing_rows = [
    ("SKU", "Product Name", "List Price (Annual)"),
    ("AI-S-001", "Sentinel Standard", "$50,000"),
    ("AI-S-002", "Sentinel Enterprise", "$120,000"),
    ("AI-S-003", "Sentinel Ultimate", "$250,000"),
    ("AI-S-ADD", "Add-on: Legacy Support", "$15,000")
]
for row in pricing_rows:
    pdf.cell(40, 10, txt=row[0], border=1)
    pdf.cell(80, 10, txt=row[1], border=1)
    pdf.cell(50, 10, txt=row[2], border=1)
    pdf.ln()

pdf.output(os.path.join(kb_path, "pricing_guide.pdf"))


# --- 3. GENERATE TRANSCRIPTS (5 Distinct Logs) ---
print("... Simulating Meetings")

transcripts = [
    ("meeting_001.txt", "Pepper Potts", "Stark Ind", "Price is too high ($120k). Can we do $100k?"),
    ("meeting_002.txt", "Bruce Wayne", "Wayne Ent", "We need the Ultimate tier. Send contract by Monday."),
    ("meeting_003.txt", "Miles Dyson", "Cyberdyne", "Does this integrate with Skynet? We need an API check."),
    ("meeting_004.txt", "Norman Osborn", "Oscorp", "We are churning unless we get Legacy Support included for free."),
    ("meeting_005.txt", "Hank Scorpio", "Globex", "I want to buy 50 licenses. Who is the account manager?")
]

for filename, person, company, issue in transcripts:
    content = f"""
    Meeting Date: 2024-03-10
    Attendees: {person} ({company}), Sales Rep
    Transcript:
    Sales Rep: Good morning.
    {person}: Let's get to business. {issue}
    Sales Rep: I will note that down.
    {person}: Also, ensure the security terms are updated.
    """
    with open(os.path.join(transcript_path, filename), "w") as f:
        f.write(content)

print("\nâœ… Enterprise World Generation Complete! (10 Accounts, 30 Contacts, PDF Created, 5 Logs)")

ðŸš€ Initializing Enterprise World Generation...
... Building CRM Database
... Authoring Knowledge Base
... Simulating Meetings

âœ… Enterprise World Generation Complete! (10 Accounts, 30 Contacts, PDF Created, 5 Logs)
