In [1]:
import pulp
import sys
sys.path.insert(0, '../')
import polars as pl
import pandas as pd
import numpy as np
from src.data_synthesizer import DataSynthesizer

In [2]:
pl.Config(tbl_width_chars=1000)

<polars.config.Config at 0x7e628df89840>

In [3]:
# Load sample data using DataSynthesizer 

ds = DataSynthesizer(10000, 3000) # e.g., 10000 trips, 3000 unsuccesful trips
trip_df = ds.trip_hist_data
charge_df = ds.charge_status_data
car_q_pop_df = ds.car_q_pop_data
park_q_pop_df = ds.park_q_pop_data

In [4]:
print(trip_df)

shape: (3_000, 7)
┌────────────────────────────┬────────────────────────────┬──────────────────┬────────────────┬────────────────────┬─────────┬─────────────┐
│ trip_start_datetime        ┆ trip_end_datetime          ┆ start_station_id ┆ end_station_id ┆ distance_travelled ┆ car_id  ┆ customer_id │
│ ---                        ┆ ---                        ┆ ---              ┆ ---            ┆ ---                ┆ ---     ┆ ---         │
│ datetime[μs]               ┆ datetime[μs]               ┆ i64              ┆ i64            ┆ f64                ┆ str     ┆ str         │
╞════════════════════════════╪════════════════════════════╪══════════════════╪════════════════╪════════════════════╪═════════╪═════════════╡
│ 2024-01-03 14:12:17.869382 ┆ 2024-01-03 15:21:54.869382 ┆ 141              ┆ 343            ┆ 39.205389          ┆ CAR2563 ┆ CUS4759     │
│ 2024-04-24 16:34:31.869413 ┆ 2024-04-24 17:55:39.869413 ┆ 54               ┆ 347            ┆ 18.518535          ┆ CAR2127 ┆ CUS3108  

In [5]:
print(charge_df)

shape: (3_000, 7)
┌────────────────────────────┬────────────────────────────┬─────────────────┬──────────────────┬─────────┬───────────────────────────┬────────────────────────────┐
│ left_station_datetime      ┆ enter_station_datetime     ┆ left_station_id ┆ enter_station_id ┆ car_id  ┆ left_station_charge_level ┆ enter_station_charge_level │
│ ---                        ┆ ---                        ┆ ---             ┆ ---              ┆ ---     ┆ ---                       ┆ ---                        │
│ datetime[μs]               ┆ datetime[μs]               ┆ i64             ┆ i64              ┆ str     ┆ i64                       ┆ i64                        │
╞════════════════════════════╪════════════════════════════╪═════════════════╪══════════════════╪═════════╪═══════════════════════════╪════════════════════════════╡
│ 2024-01-03 14:12:17.869382 ┆ 2024-01-03 15:21:54.869382 ┆ 141             ┆ 343              ┆ CAR2563 ┆ 20                        ┆ 7                          

In [7]:
print(car_q_pop_df)

shape: (4_000, 5)
┌────────────────────────────┬────────────────────────────┬──────────────────┬──────────────┬─────────────┐
│ event_creation_datetime    ┆ event_expiry_datetime      ┆ start_station_id ┆ event_status ┆ customer_id │
│ ---                        ┆ ---                        ┆ ---              ┆ ---          ┆ ---         │
│ datetime[μs]               ┆ datetime[μs]               ┆ i64              ┆ str          ┆ str         │
╞════════════════════════════╪════════════════════════════╪══════════════════╪══════════════╪═════════════╡
│ 2024-01-03 13:49:17.869382 ┆ 2024-01-03 14:19:17.869382 ┆ 141              ┆ successful   ┆ CUS4759     │
│ 2024-04-24 16:11:31.869413 ┆ 2024-04-24 16:41:31.869413 ┆ 54               ┆ successful   ┆ CUS3108     │
│ 2023-07-04 21:08:51.869424 ┆ 2023-07-04 21:38:51.869424 ┆ 297              ┆ successful   ┆ CUS1132     │
│ 2023-08-05 03:54:37.869433 ┆ 2023-08-05 04:24:37.869433 ┆ 304              ┆ successful   ┆ CUS3509     │
│ 2024-02-

In [8]:
print(park_q_pop_df)

shape: (4_000, 5)
┌────────────────────────────┬────────────────────────────┬────────────────┬──────────────┬─────────────┐
│ event_creation_datetime    ┆ event_expiry_datetime      ┆ end_station_id ┆ event_status ┆ customer_id │
│ ---                        ┆ ---                        ┆ ---            ┆ ---          ┆ ---         │
│ datetime[μs]               ┆ datetime[μs]               ┆ i64            ┆ str          ┆ str         │
╞════════════════════════════╪════════════════════════════╪════════════════╪══════════════╪═════════════╡
│ 2024-01-03 14:34:17.869382 ┆ 2024-01-03 14:44:17.869382 ┆ 343            ┆ successful   ┆ CUS4759     │
│ 2024-04-24 16:56:31.869413 ┆ 2024-04-24 17:06:31.869413 ┆ 347            ┆ successful   ┆ CUS3108     │
│ 2023-07-04 21:53:51.869424 ┆ 2023-07-04 22:03:51.869424 ┆ 158            ┆ successful   ┆ CUS1132     │
│ 2023-08-05 04:39:37.869433 ┆ 2023-08-05 04:49:37.869433 ┆ 26             ┆ successful   ┆ CUS3509     │
│ 2024-02-01 07:01:19.869442

In [9]:
import polars as pl


num_stations = 380
periods = 5 # assume we divide the day into 5 periods
np.random.seed(42)

station_ids = [f'Station_{i+1}' for i in range(num_stations)]
time_periods = list(range(periods))

# Generate unmet car reservation demand and unmet parking reservation demand for all stations and periods
unmet_car_demand = np.random.randint(0, 4, size=(num_stations, periods))
unmet_parking_demand = np.random.randint(0, 4, size=(num_stations, periods))

failed_car_reservations = {(station_ids[i], t): unmet_car_demand[i, t] for i in range(num_stations) for t in time_periods}
failed_parking_reservations = {(station_ids[i], t): unmet_parking_demand[i, t] for i in range(num_stations) for t in time_periods}

predicted_demand = {(station_ids[i], t): unmet_car_demand[i, t] + unmet_parking_demand[i, t] + np.random.randint(1, 5)
                    for i in range(num_stations) for t in time_periods}

# Create a list for each column
stations = [station_ids[i] for i in range(num_stations) for t in time_periods]
time_periods_list = [t for i in range(num_stations) for t in time_periods]
failed_car_reservations_list = [failed_car_reservations[(station_ids[i], t)] for i in range(num_stations) for t in time_periods]
failed_parking_reservations_list = [failed_parking_reservations[(station_ids[i], t)] for i in range(num_stations) for t in time_periods]
predicted_demand_list = [predicted_demand[(station_ids[i], t)] for i in range(num_stations) for t in time_periods]

# Create a DataFrame from the lists
df = pl.DataFrame({
    'Station': stations,
    'Time Period': time_periods_list,
    'Failed Car Reservations': failed_car_reservations_list,
    'Failed Parking Reservations': failed_parking_reservations_list,
    'Total unmet demand': [failed_car_reservations_list[i] + failed_parking_reservations_list[i] for i in range(len(failed_car_reservations_list))],
    'Predicted or Average Demand': predicted_demand_list
})

In [10]:
df_pd = df.to_pandas()
lst = []
for i in df_pd["Station"].unique():
    lst.append(df_pd.loc[df_pd["Station"] == i]["Total unmet demand"].tolist())

In [11]:
print(lst)

[[5, 3, 0, 2, 4], [4, 0, 0, 3, 1], [5, 3, 3, 3, 6], [1, 6, 4, 5, 2], [3, 0, 4, 5, 4], [1, 1, 3, 6, 5], [3, 3, 3, 3, 3], [3, 5, 1, 1, 4], [4, 5, 2, 4, 3], [6, 3, 3, 4, 3], [5, 3, 4, 6, 4], [3, 4, 2, 2, 2], [3, 2, 1, 1, 2], [1, 3, 3, 3, 1], [1, 1, 2, 4, 1], [1, 5, 5, 4, 4], [2, 6, 3, 5, 3], [3, 2, 3, 5, 1], [6, 5, 3, 1, 1], [1, 2, 3, 3, 0], [4, 4, 2, 6, 4], [3, 4, 5, 1, 2], [5, 2, 4, 4, 3], [6, 0, 2, 3, 3], [5, 2, 2, 2, 0], [4, 3, 3, 4, 6], [5, 0, 2, 1, 2], [1, 1, 2, 3, 2], [3, 2, 3, 5, 6], [5, 3, 4, 2, 3], [3, 4, 1, 5, 2], [3, 3, 3, 1, 5], [2, 1, 5, 6, 3], [3, 6, 2, 3, 3], [3, 2, 5, 4, 5], [3, 5, 4, 2, 5], [0, 5, 1, 3, 0], [0, 2, 2, 5, 1], [3, 5, 1, 3, 4], [3, 2, 4, 1, 4], [4, 3, 2, 1, 6], [3, 5, 5, 3, 1], [5, 2, 1, 2, 2], [1, 3, 3, 3, 5], [5, 5, 3, 3, 2], [2, 0, 3, 2, 3], [1, 3, 4, 3, 5], [3, 2, 4, 3, 4], [4, 3, 4, 0, 2], [4, 0, 0, 1, 4], [1, 2, 3, 0, 3], [5, 5, 3, 4, 2], [0, 1, 2, 3, 3], [3, 6, 5, 3, 5], [3, 4, 3, 4, 0], [5, 3, 4, 2, 4], [6, 3, 3, 3, 1], [4, 0, 1, 1, 5], [1, 2, 1, 1, 

In [None]:
prob = pulp.LpProblem("EVChargingPortExpansion", pulp.LpMinimize)

In [None]:
additional_ports = pulp.LpVariable.dicts("AdditionalPorts", station_ids, lowBound=0, cat='Integer')

In [None]:
D_c = 1  # Dissatisfaction cost per failed car reservation
D_p = 1  # Dissatisfaction cost per failed parking reservation
prob += pulp.lpSum(D_c * failed_car_reservations[(s, t)] + D_p * failed_parking_reservations[(s, t)]
                   for s in station_ids for t in time_periods)

In [None]:
current_capacity = {station: 4 for station in station_ids}

In [None]:
installation_cost_per_port = 1000  # Cost per additional port
budget = 9999999999999999  # Total budget (you may want to adjust this based on the scale)

In [None]:
# Constraints
for s in station_ids:
    # New capacity must meet predicted demand
    for t in time_periods:
        prob += current_capacity[s] + additional_ports[s] >= predicted_demand[(s, t)]

# Budget constraint
prob += pulp.lpSum(additional_ports[s] * installation_cost_per_port for s in station_ids) <= budget

# Solve the problem
prob.solve()


results = {s: pulp.value(additional_ports[s]) for s in station_ids if pulp.value(additional_ports[s]) > 0}
results_df = pl.DataFrame(list(results.items()), ["Station", "Additional Ports"])
print(results_df.sort('Additional Ports').reverse())