In [16]:
import pandas as pd
import random
from faker import Faker
from datetime import timedelta

fake = Faker()

# Configuration
num_records = 50000
categories = ['Network', 'Software', 'Hardware', 'Access/Auth', 'Security']
priorities = ['Critical', 'High', 'Medium', 'Low']
statuses = ['Closed', 'Open', 'Pending Vendor']
agents = ['Agent_A', 'Agent_B', 'Agent_C', 'Agent_D', 'Agent_E']

ticket_counter = 1
data = []

for _ in range(num_records):

    # 1. Ticket open time
    open_date = fake.date_time_between(start_date='-1y', end_date='now')

    # 2. Priority (Weighted)
    priority = random.choices(priorities, weights=[10, 20, 30, 40])[0]

    # 3. Resolution logic by priority
    if priority == 'Critical':
        hours_to_solve = random.randint(1, 12)
        sla_limit = 4
    elif priority == 'High':
        hours_to_solve = random.randint(4, 48)
        sla_limit = 24
    elif priority == 'Medium':
        hours_to_solve = random.randint(12, 72)
        sla_limit = 72
    else:  # Low
        hours_to_solve = random.randint(24, 120)
        sla_limit = 120

    # 4. Ticket Status
    status = random.choice(statuses)

    # Close time logic
    if status == 'Closed':
        close_date = open_date + timedelta(hours=hours_to_solve)
        sla_breached = 1 if hours_to_solve > sla_limit else 0
    else:
        close_date = pd.NaT
        sla_breached = 0

    # 5. Category & Issue mapping
    category = random.choice(categories)

    issue_map = {
        'Network': ['VPN Disconnect', 'Slow Wifi', 'Firewall Block'],
        'Software': ['Excel Crashing', 'SAP Login Fail', 'Teams Lag'],
        'Hardware': ['Printer Jam', 'Laptop Blue Screen', 'Mouse Broken'],
        'Access/Auth': ['Password Reset', 'MFA Failure', 'Account Locked'],
        'Security': ['Phishing Alert']
    }

    issue = random.choice(issue_map[category])

    # 6. Agent assignment
    agent = random.choice(agents)

    data.append([
        f"INC{ticket_counter:07d}",
        open_date,
        close_date,
        category,
        issue,
        priority,
        sla_limit,
        hours_to_solve,
        sla_breached,
        agent,
        status
    ])

    ticket_counter += 1

# Create DataFrame
columns = [
    'Ticket_ID', 'Open_Time', 'Close_Time', 'Category', 'Issue_Type',
    'Priority', 'SLA_Limit_Hours', 'Resolution_Hours',
    'SLA_Breached', 'Agent', 'Status'
]

df = pd.DataFrame(data, columns=columns)

# Save CSV
df.to_csv('Deloitte_IT_Incidents.csv', index=False)

print("✅ Data Generated: Deloitte_IT_Incidents.csv")


✅ Data Generated: Deloitte_IT_Incidents.csv
