In [6]:
import pandas as pd
import random
from datetime import date, timedelta

In [7]:
# --- Create simple lists for data generation ---
first_names = ['John', 'Jane', 'Robert', 'Emily', 'Michael', 'Sarah', 'David', 'Laura', 'James', 'Linda', 'William', 'Mary', 'Richard', 'Patricia']
last_names = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia', 'Miller', 'Davis', 'Rodriguez', 'Martinez', 'Hernandez', 'Lopez']
hospital_names = ['City', 'General', 'Mercy', 'St. Jude', 'Community', 'Memorial', 'Sunrise', 'Hope']
hospital_suffixes = ['Hospital', 'Clinic', 'Medical Center', 'Group']
blood_types = ['A+', 'A-', 'B+', 'B-', 'AB+', 'AB-', 'O+', 'O-']
medical_conditions = ['Diabetes', 'Cancer', 'Obesity', 'Arthritis', 'Hypertension', 'Asthma', 'Pneumonia']
insurance_providers = ['Medicare', 'Aetna', 'Blue Cross', 'Cigna', 'UnitedHealthcare']
admission_types = ['Urgent', 'Emergency', 'Elective']
medications = ['Paracetamol', 'Ibuprofen', 'Aspirin', 'Penicillin', 'Lipitor', 'Amoxicillin']
test_results = ['Normal', 'Abnormal', 'Inconclusive']
genders = ['Male', 'Female']

In [8]:
# Function to generate a random date
def get_random_date(start_date, end_date):
    """Generates a random date between two dates."""
    days_between = (end_date - start_date).days
    random_days = random.randint(0, days_between)
    return start_date + timedelta(days=random_days)

In [9]:
# Function to generate a single row of data
def create_row():
    """Creates a single dictionary representing a row of data."""
    start_date = date(2019, 1, 1)
    end_date = date(2025, 10, 31)
    
    admission_date = get_random_date(start_date, end_date)
    discharge_date = admission_date + timedelta(days=random.randint(1, 15))
    
    return {
        'Name': f"{random.choice(first_names)} {random.choice(last_names)}",
        'Age': random.randint(18, 90),
        'Gender': random.choice(genders),
        'Blood Type': random.choice(blood_types),
        'Medical Condition': random.choice(medical_conditions),
        'Date of Admission': admission_date.strftime('%m/%d/%Y'),
        'Doctor': f"Dr. {random.choice(first_names)} {random.choice(last_names)}",
        'Hospital': f"{random.choice(hospital_names)} {random.choice(hospital_suffixes)}",
        'Insurance Provider': random.choice(insurance_providers),
        'Billing Amount': round(random.uniform(1000.0, 50000.0), 2),
        'Room Number': random.randint(100, 599),
        'Admission Type': random.choice(admission_types),
        'Discharge Date': discharge_date.strftime('%m/%d/%Y'),
        'Medication': random.choice(medications),
        'Test Results': random.choice(test_results)
    }

In [10]:
# --- Main script execution ---
num_files = 5
rows_per_file = 1000
generated_files = []

print(f"Generating {num_files} files, each with {rows_per_file} rows...")

for i in range(num_files):
    data = [create_row() for _ in range(rows_per_file)]
    df = pd.DataFrame(data)
    
    # Define the file name
    file_name = f'generated_admissions_batch_{i+1}.csv'
    
    # Save to CSV in the current directory
    df.to_csv(file_name, index=False)
    generated_files.append(file_name)

print("\nSuccessfully generated 5 files:")
for f in generated_files:
    print(f)

Generating 5 files, each with 1000 rows...

Successfully generated 5 files:
generated_admissions_batch_1.csv
generated_admissions_batch_2.csv
generated_admissions_batch_3.csv
generated_admissions_batch_4.csv
generated_admissions_batch_5.csv
