### *Synthetic Data for the agents sandbox*

#### *The Mock CRM (/data/crm):*

 - accounts.csv: 10 companies (e.g., "CyberDyne Systems", "Acme Corp") with distinct industries.

 - contacts.csv: 30 people with Roles (CEO, CTO) and Email addresses.

 - deals.csv: Benchmark specific deals (Won/Lost/Negotiation) for testing.

#### *The Knowledge Base (/data/kb):*

 - product_whitepaper.md, security_policy.md, etc.

 - pricing_guide.pdf: A complex table of numbers (used to test "Math").

#### *The Logs (/data/transcripts):*

 - 5 distinct "Meeting Transcripts" filled with "umms", "ahhs", and implied tasks.

In [None]:
import pandas as pd
import os
import random
from fpdf import FPDF

# --- PATH SETUP ---
base_path = "../data"
crm_path = os.path.join(base_path, "crm_mock")
kb_path = os.path.join(base_path, "knowledge_base")
transcript_path = os.path.join(base_path, "transcripts")

# Ensure directories exist
for p in [crm_path, kb_path, transcript_path]:
    os.makedirs(p, exist_ok=True)

print("ðŸš€ Initializing Enterprise World Generation...")

# --- 1. GENERATE MOCK CRM ---
print("... Building CRM Database")

# A. ACCOUNTS
accounts_list = [
    ("ACC-001", "CyberDyne Systems", "Defense"),
    ("ACC-002", "Acme Corp", "Manufacturing"),
    ("ACC-003", "Wayne Enterprises", "Conglomerate"),
    ("ACC-004", "Stark Industries", "Technology"),
    ("ACC-005", "Massive Dynamic", "R&D"),
    ("ACC-006", "Oscorp", "BioTech"),
    ("ACC-007", "Tyrell Corp", "Robotics"),
    ("ACC-008", "Soylent Corp", "Food/Bev"),
    ("ACC-009", "InGen", "BioTech"),
    ("ACC-010", "Globex", "Software")
]
df_accounts = pd.DataFrame(accounts_list, columns=["account_id", "company_name", "industry"])
# Statuses aligned loosely with the deals
df_accounts["status"] = ["Active", "Churned", "Active", "Active", "Prospect", "Active", "Active", "Active", "Prospect", "Active"]
df_accounts["total_revenue"] = [5000000, 120000, 9500000, 8000000, 0, 3200000, 50000, 900000, 0, 1500000]
df_accounts.to_csv(os.path.join(crm_path, "accounts.csv"), index=False)

# B. CONTACTS
contacts_data = []
for idx, acc in enumerate(accounts_list):
    acc_id, acc_name, _ = acc
    # Create 3 contacts for this account
    contacts_data.append([f"CT-{idx}01", acc_id, f"Alice {acc_name.split()[0]}", "CEO", f"alice@{acc_name.split()[0].lower()}.com"])
    contacts_data.append([f"CT-{idx}02", acc_id, f"Bob {acc_name.split()[0]}", "CTO", f"bob@{acc_name.split()[0].lower()}.com"])
    contacts_data.append([f"CT-{idx}03", acc_id, f"Charlie {acc_name.split()[0]}", "Procurement_Mgr", f"charlie@{acc_name.split()[0].lower()}.com"])

# Ensure Alice CyberDyne specifically exists for benchmarks
contacts_data.append(["CT-001", "ACC-001", "Alice CyberDyne", "CEO", "alice@cyberdyne.com"])

df_contacts = pd.DataFrame(contacts_data, columns=["contact_id", "account_id", "name", "role", "email"])
# Drop duplicates just in case
df_contacts = df_contacts.drop_duplicates(subset=["email"])
df_contacts.to_csv(os.path.join(crm_path, "contacts.csv"), index=False)

# C. DEALS (Benchmarks Specifics)
print("... Generating Benchmark Deals")
deals_data = [
    {"deal_id": "DL-112", "account_id": "ACC-004", "deal_name": "Stark AI Upgrade", "amount": 1180425, "stage": "Negotiation", "close_date": "2025-12-01"},
    {"deal_id": "DL-100", "account_id": "ACC-003", "deal_name": "Wayne Enterprise License", "amount": 5000000, "stage": "Negotiation", "close_date": "2025-11-15"},
    {"deal_id": "DL-111", "account_id": "ACC-002", "deal_name": "Acme Renewal", "amount": 150000, "stage": "Discovery", "close_date": "2025-10-30"},
    {"deal_id": "DL-105", "account_id": "ACC-006", "deal_name": "Oscorp Global Rollout", "amount": 750000, "stage": "Negotiation", "close_date": "2026-01-20"},
    {"deal_id": "DL-108", "account_id": "ACC-007", "deal_name": "Tyrell Pilot", "amount": 330799, "stage": "Closed Won", "close_date": "2024-06-15"},
    {"deal_id": "DL-103", "account_id": "ACC-010", "deal_name": "Globex PoC", "amount": 50000, "stage": "Closed Lost", "close_date": "2024-02-10"},
]
df_deals = pd.DataFrame(deals_data)
df_deals.to_csv(os.path.join(crm_path, "deals.csv"), index=False)


# --- 2. GENERATE KNOWLEDGE BASE ---
print("... Authoring Knowledge Base")

kb_files = {
    "product_whitepaper.md": "# Product: Omni-AI Sentinel 2.0\n## Overview\nOmni-AI Sentinel is an enterprise-grade security monitoring agent.\n## Features\n- On-premise control\n- Real-time threat detection\n## Pricing\nSee pricing_guide.pdf for details.",
    "security_policy.md": "# Global Sales & Security Policy\n1. Discounts > 15% require VP approval.\n2. Startups < $1M revenue get 20% off.\n3. NEVER promise '100% security'.",
    "discount_policy.md": "# Discount Policy\n1. >15% requires VP Approval.\n2. Startups <$5M revenue get 20% off.\n3. Never promise 100% security.",
    "sales_playbook.md": "# Sales Playbook\n- Value Prop: On-premise control.\n- 30/60/90 Plan: Discovery, PoC, Production.",
    "winback_playbook.md": "# Win-Back Strategy\n- Ask about what changed.\n- Propose a low-friction pilot.",
    "delivery_methodology.md": "# Delivery Methodology\n- Phase 1: Architecture Review.\n- Phase 2: Docker Deployment.",
    "product_roadmap.md": "# Roadmap\n- Q4: Omni-AI Sentinel 2.0 (Live).\n- Q1: Cloud Connector (Beta).",
    "loss_review_playbook.md": "# Loss Review\n- Ask: Why us? Why now? Why them?",
    "case_study_library.md": "# Case Studies\n- CyberDyne: 50% reduction in breaches.\n- Wayne Enterprises: 200% ROI in year 1."
}

# Write Markdown Files
for filename, content in kb_files.items():
    with open(os.path.join(kb_path, filename), "w") as f:
        f.write(content)

# Generate PDF (Pricing Guide)
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.cell(200, 10, txt="Confidential Pricing Guide 2024", ln=1, align='C')
pdf.ln(10)
pdf.set_font("Arial", size=10)
pricing_rows = [
    ("SKU", "Product Name", "List Price (Annual)"),
    ("AI-S-001", "Sentinel Standard", "$50,000"),
    ("AI-S-002", "Sentinel Enterprise", "$120,000"),
    ("AI-S-003", "Sentinel Ultimate", "$250,000"),
    ("AI-S-ADD", "Add-on: Legacy Support", "$15,000")
]
for row in pricing_rows:
    pdf.cell(40, 10, txt=row[0], border=1)
    pdf.cell(80, 10, txt=row[1], border=1)
    pdf.cell(50, 10, txt=row[2], border=1)
    pdf.ln()
pdf.output(os.path.join(kb_path, "pricing_guide.pdf"))


# --- 3. GENERATE TRANSCRIPTS ---
print("... Simulating Meetings")
transcripts = [
    ("meeting_001.txt", "Pepper Potts", "Stark Ind", "Price is too high ($120k). Can we do $100k?"),
    ("meeting_002.txt", "Bruce Wayne", "Wayne Ent", "We need the Ultimate tier. Send contract by Monday."),
    ("meeting_003.txt", "Miles Dyson", "Cyberdyne", "Does this integrate with Skynet? We need an API check."),
    ("meeting_004.txt", "Norman Osborn", "Oscorp", "We are churning unless we get Legacy Support included for free."),
    ("meeting_005.txt", "Hank Scorpio", "Globex", "I want to buy 50 licenses. Who is the account manager?")
]

# Add Benchmark Specific Meeting
cyberdyne_transcript = """
[00:00:00] Alice (CEO): Okay, let's get started. Thanks for joining.
[00:00:15] Sales Rep: Thanks Alice. I wanted to review the timeline.
[00:01:00] Bob (CTO): Yeah, the timeline is tight. I still haven't received the API keys.
[00:01:30] Sales Rep: I can get those to you.
[00:02:00] Alice (CEO): Bob, can you please finalize the firewall configuration by next Friday? We can't delay the pilot.
[00:02:15] Bob (CTO): Fine, but I need the documentation.
[00:02:30] Sales Rep: I'll send the whitepaper right after this call. Alice, can you sign the SOW amendment?
[00:02:45] Alice (CEO): Yes, I'll sign it by EOD tomorrow.
[00:03:00] Sales Rep: Great. Also, I heard rumors that Wayne Enterprises is looking at a competitor.
[00:03:15] Bob (CTO): Yeah, I heard they are talking to LexCorp.
"""
with open(os.path.join(transcript_path, "meeting_cyberdyne_001.txt"), "w") as f:
    f.write(cyberdyne_transcript.strip())

for filename, person, company, issue in transcripts:
    content = f"""
    Meeting Date: 2024-03-10
    Attendees: {person} ({company}), Sales Rep
    Transcript:
    Sales Rep: Good morning.
    {person}: Let's get to business. {issue}
    Sales Rep: I will note that down.
    {person}: Also, ensure the security terms are updated.
    """
    # Remove leading indentation for clean file
    with open(os.path.join(transcript_path, filename), "w") as f:
        f.write(content.strip())

print("\nâœ… Enterprise World Generation Complete!")