In [None]:
import pandas as pd
import numpy as np
from faker import Faker
import random

# Initialize Faker
fake = Faker()

# Number of records
n = 1000

# Function to simulate dataset
def generate_dataset( n ):
    data = []

    for _ in range(n):
        # Patient Demographics
        patient_id = fake.uuid4()
        age = random.randint(18, 45)
        parity = random.randint(0, 5)
        bmi = round(random.uniform(18, 35), 1)
        pre_existing = random.choice(['None', 'Diabetes', 'Hypertension', 'Anemia', 'Obesity'])

        # Clinical & Surgical Details
        c_section_type = random.choice(['Elective', 'Emergency'])
        duration_surgery = random.randint(30, 120)  # in minutes
        blood_loss = random.randint(300, 1200)  # ml
        antibiotics_given = random.choice(['Yes', 'No'])
        indication = random.choice(['Fetal distress', 'Prolonged labor', 'Placenta previa', 'Previous C-section'])

        # Vital Signs (postoperative)
        temp = round(np.random.normal(37, 0.8), 1)  # °C
        hr = random.randint(60, 140)  # bpm
        rr = random.randint(12, 30)  # breaths/min
        bp_sys = random.randint(90, 140)
        bp_dia = random.randint(60, 90)
        spo2 = round(random.uniform(92, 100), 1)

        # Lab Results
        wbc = round(np.random.normal(10, 3), 1)  # ×10⁹/L
        crp = round(np.random.normal(30, 20), 1)  # mg/L
        procalcitonin = round(np.random.uniform(0.05, 5.0), 2)  # ng/mL
        lactate = round(np.random.uniform(0.5, 4.0), 1)  # mmol/L
        blood_culture = random.choice(['Negative', 'E. coli', 'Staphylococcus aureus', 'Klebsiella pneumoniae'])

        # Postoperative Indicators
        wound_status = random.choice(['Clean', 'Erythema', 'Pus discharge'])
        uterine_tenderness = random.choice(['Yes', 'No'])
        urine_output = random.randint(20, 80)  # ml/hr
        organ_dysfunction = random.choice(['Yes', 'No'])

        # Sepsis Outcome (target variable)
        sepsis = 'Yes' if (temp > 38.5 and wbc > 12 and crp > 50) or organ_dysfunction == 'Yes' else 'No'
        time_to_diagnosis = random.randint(1, 48) if sepsis == 'Yes' else None
        outcome = random.choice(['Recovered', 'ICU', 'Mortality']) if sepsis == 'Yes' else 'Recovered'

        # Append record
        data.append([
            patient_id, age, parity, bmi, pre_existing, c_section_type, duration_surgery, blood_loss,
            antibiotics_given, indication, temp, hr, rr, bp_sys, bp_dia, spo2, wbc, crp, procalcitonin,
            lactate, blood_culture, wound_status, uterine_tenderness, urine_output, organ_dysfunction,
            sepsis, time_to_diagnosis, outcome
        ])

    columns = [
        'Patient_ID', 'Age', 'Parity', 'BMI', 'Pre_existing_conditions', 'C_section_type', 'Duration_surgery_min',
        'Blood_loss_ml', 'Antibiotics_given', 'Indication_for_C_section', 'Temperature_C', 'Heart_rate_bpm',
        'Respiratory_rate_bpm', 'BP_systolic_mmHg', 'BP_diastolic_mmHg', 'SpO2_percent', 'WBC_count_x10^9/L',
        'CRP_mg/L', 'Procalcitonin_ng/mL', 'Lactate_mmol/L', 'Blood_culture_result', 'Wound_status',
        'Uterine_tenderness', 'Urine_output_ml/hr', 'Organ_dysfunction', 'Sepsis_detected', 'Time_to_diagnosis_hr',
        'Outcome'
    ]

    return pd.DataFrame(data, columns=columns)

# Generate dataset
df = generate_dataset(n)

# Save to CSV
df.to_csv('maternal_sepsis_dataset.csv', index=False)

print("Dataset generated successfully!")
print(df.head())