## This script generates synthetic wait time data for 15 banks, 3 branches each (45 branches), 50 customers per branch (total: 2,250 records).

### Bank Data 

In [5]:
import pandas as pd
import random
from datetime import datetime, timedelta

# --- Parameters ---
branches = [
    "Uttara Branch",
    "Dhanmondi Branch",
    "Motijheel Branch",
    "Mirpur Branch",
    "Banani Branch",
]
service_types = {"Deposit": 5, "Withdraw": 4, "Loan": 15}
days_of_week = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday"]
rush_hours = [(9, 11), (13, 15)]  # 9–11 AM, 1–3 PM


# --- Helpers ---
def is_rush_hour(hour):
    return any(start <= hour < end for start, end in rush_hours)


def simulate_customer():
    branch = random.choice(branches)
    service = random.choice(list(service_types.keys()))
    arrival_hour = random.randint(9, 16)
    arrival_minute = random.randint(0, 59)
    arrival_time = f"{arrival_hour:02d}:{arrival_minute:02d}"
    dow = random.choice(days_of_week)
    queue_length = random.randint(0, 20)
    is_rush = is_rush_hour(arrival_hour)

    # Simulated wait time logic
    base_wait = queue_length * random.uniform(1.0, 1.5)  # queue impact
    if dow == "Thursday":
        base_wait *= 1.3
    if is_rush:
        base_wait *= 1.2
    service_time = service_types[service]
    total_wait = round(base_wait + service_time + random.uniform(-1, 1), 2)

    return {
        "branch_name": branch,
        "service_type": service,
        "arrival_time": arrival_time,
        "day_of_week": dow,
        "queue_length_at_arrival": queue_length,
        "is_rush_hour": "Yes" if is_rush else "No",
        "wait_time_min": total_wait,
    }


# --- Generate Dataset ---
data = [simulate_customer() for _ in range(1000)]
bank_data = pd.DataFrame(data)
# --- Save to CSV ---
bank_data.to_csv("../data/bank_customer_data.csv", index=False)

In [8]:
bank_data.head()

Unnamed: 0,branch_name,service_type,arrival_time,day_of_week,queue_length_at_arrival,is_rush_hour,wait_time_min
0,Motijheel Branch,Loan,15:21,Monday,9,No,28.5
1,Dhanmondi Branch,Deposit,15:35,Tuesday,11,No,20.4
2,Banani Branch,Deposit,11:48,Tuesday,6,No,13.74
3,Motijheel Branch,Withdraw,16:35,Sunday,8,No,16.22
4,Motijheel Branch,Deposit,16:28,Sunday,11,No,18.24


In [9]:
bank_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 7 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   branch_name              1000 non-null   object 
 1   service_type             1000 non-null   object 
 2   arrival_time             1000 non-null   object 
 3   day_of_week              1000 non-null   object 
 4   queue_length_at_arrival  1000 non-null   int64  
 5   is_rush_hour             1000 non-null   object 
 6   wait_time_min            1000 non-null   float64
dtypes: float64(1), int64(1), object(5)
memory usage: 54.8+ KB


### Hospital Data Generation

In [6]:
import pandas as pd
import random
from datetime import datetime

# --- Setup ---
hospitals = [
    "Dhaka Medical",
    "Square Hospital",
    "Apollo Hospital",
    "Ibn Sina",
    "Popular Diagnostic",
]
departments = {"General": 10, "Emergency": 5, "Pediatrics": 8, "Cardiology": 12}
days_of_week = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday"]
weather_choices = ["Clear", "Rainy"]


def simulate_patient():
    hospital = random.choice(hospitals)
    department = random.choice(list(departments.keys()))
    arrival_hour = random.randint(8, 17)
    arrival_minute = random.randint(0, 59)
    arrival_time = f"{arrival_hour:02d}:{arrival_minute:02d}"
    dow = random.choice(days_of_week)
    queue_length = random.randint(0, 30)
    is_emergency = department == "Emergency"
    weather = random.choices(weather_choices, weights=[0.7, 0.3])[0]

    # Simulate wait time
    base = queue_length * random.uniform(1.0, 1.5)
    if dow == "Thursday":
        base *= 1.2
    if weather == "Rainy":
        base *= 1.15
    if is_emergency:
        base *= random.uniform(0.6, 1.2)
    total_wait = round(base + departments[department] + random.uniform(-2, 2), 2)

    return {
        "hospital_name": hospital,
        "department": department,
        "arrival_time": arrival_time,
        "day_of_week": dow,
        "queue_length_at_arrival": queue_length,
        "is_emergency": "Yes" if is_emergency else "No",
        "weather_condition": weather,
        "wait_time_min": total_wait,
    }


# Generate and export
hospital_data = [simulate_patient() for _ in range(1000)]
hospital_data = pd.DataFrame(hospital_data)

# --- Save to CSV ---
hospital_data.to_csv("../data/hospital_patient_data.csv", index=False)


In [27]:
hospital_data.head()

Unnamed: 0,Hospital Name,Department,Arrival Time,Day of Week,Queue Length at Arrival,Is Emergency,Weather Condition,Wait Time (min)
0,Square Hospital,Pediatrics,09:38,Wednesday,26,No,Clear,36.33
1,Popular Diagnostic,Emergency,16:29,Thursday,10,Yes,Clear,21.33
2,Square Hospital,Pediatrics,08:16,Wednesday,16,No,Clear,32.64
3,Ibn Sina,Emergency,10:27,Sunday,29,Yes,Rainy,42.62
4,Popular Diagnostic,Emergency,12:39,Sunday,20,Yes,Clear,28.01


In [28]:
hospital_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Hospital Name            1000 non-null   object 
 1   Department               1000 non-null   object 
 2   Arrival Time             1000 non-null   object 
 3   Day of Week              1000 non-null   object 
 4   Queue Length at Arrival  1000 non-null   int64  
 5   Is Emergency             1000 non-null   object 
 6   Weather Condition        1000 non-null   object 
 7   Wait Time (min)          1000 non-null   float64
dtypes: float64(1), int64(1), object(6)
memory usage: 62.6+ KB


### Tourist Sopt Data Generation

In [7]:
places = ["Lalbagh Fort", "Ahsan Manzil", "National Zoo", "Sajek Valley", "Cox's Bazar"]
seasons = ["Summer", "Winter", "Monsoon"]
weather_options = ["Clear", "Rainy"]
days = ["Friday", "Saturday", "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday"]


def simulate_visitor():
    place = random.choice(places)
    dow = random.choice(days)
    season = random.choices(seasons, weights=[0.4, 0.3, 0.3])[0]
    weather = random.choices(weather_options, weights=[0.7, 0.3])[0]
    hour = random.randint(8, 18)
    minute = random.randint(0, 59)
    arrival = f"{hour:02d}:{minute:02d}"
    holiday = random.choices(["Yes", "No"], weights=[0.4, 0.6])[0]
    queue = random.randint(0, 50)

    base = queue * random.uniform(0.8, 1.3)
    if dow in ["Friday", "Saturday"]:
        base *= 1.4
    if season == "Monsoon" and weather == "Rainy":
        base *= 0.9  # Less crowd
    if holiday == "Yes":
        base *= 1.5

    wait = round(base + random.uniform(0, 5), 2)

    return {
        "place_name": place,
        "day_of_week": dow,
        "arrival_time": arrival,
        "season": season,
        "weather": weather,
        "school_holiday": holiday,
        "queue_length_at_entry": queue,
        "wait_time_min": wait,
    }


# Generate and export
park_data = [simulate_visitor() for _ in range(1000)]
park_data = pd.DataFrame(park_data)

# --- Save to CSV ---
park_data.to_csv("../data/park_visitor_data.csv", index=False)

In [30]:
park_data.head()

Unnamed: 0,Place Name,Day of Week,Arrival Time,Season,Weather,School Holiday,Queue Length at Entry,Wait Time (min)
0,Sajek Valley,Monday,14:49,Winter,Rainy,No,47,49.76
1,Ahsan Manzil,Saturday,09:15,Summer,Clear,No,29,35.23
2,Cox's Bazar,Monday,13:24,Winter,Clear,Yes,40,56.5
3,Sajek Valley,Friday,13:45,Winter,Clear,No,46,64.51
4,Ahsan Manzil,Monday,14:22,Summer,Clear,Yes,13,29.54


In [31]:
park_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Place Name             1000 non-null   object 
 1   Day of Week            1000 non-null   object 
 2   Arrival Time           1000 non-null   object 
 3   Season                 1000 non-null   object 
 4   Weather                1000 non-null   object 
 5   School Holiday         1000 non-null   object 
 6   Queue Length at Entry  1000 non-null   int64  
 7   Wait Time (min)        1000 non-null   float64
dtypes: float64(1), int64(1), object(6)
memory usage: 62.6+ KB
