In [26]:
import numpy as np
import pandas as pd

# Data formating

In [27]:
data = pd.read_csv(r"Z:\TC_Sydney_Uni_research_extract_202410.csv")
data = data[data['ROUTE'] == 'LGHTRL'].reset_index(drop=True)  # Filter by light train
data = data[['ORIGIN_DATE', 'ORIGIN_STOP', 'ORIGIN_STOP_XY',
             'DESTINATION_DATE', 'DESTINATION_STOP', 'DESTINATION_STOP_XY']]  # Filter columns by relevance

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 136346 entries, 0 to 136345
Data columns (total 6 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   ORIGIN_DATE          136346 non-null  object
 1   ORIGIN_STOP          136346 non-null  object
 2   ORIGIN_STOP_XY       136346 non-null  object
 3   DESTINATION_DATE     136346 non-null  object
 4   DESTINATION_STOP     136346 non-null  object
 5   DESTINATION_STOP_XY  136346 non-null  object
dtypes: object(6)
memory usage: 6.2+ MB


In [28]:
# Delete the distinction between platforms in the origin and destination

def remove_platform(text):
    if 'Platform' in text:
        return text.split('Platform')[0].strip()
    return text

data['ORIGIN_STOP'] = data['ORIGIN_STOP'].map(remove_platform)
data['DESTINATION_STOP'] = data['DESTINATION_STOP'].map(remove_platform)

# Change the stops for an ID
stops_id = {'8100: Gungahlin Place': 0,
            '8101: Gungahlin Place': 0,
            '8104: Manning Clark Crescent': 1,
            '8105: Manning Clark Crescent': 1,
            '8106: Mapleton Avenue': 2,
            '8107: Mapleton Avenue': 2,
            '8108: Nullarbor Avenue': 3,
            '8109: Nullarbor Avenue': 3,
            '8110: Well Station Drive': 4,
            '8111: Well Station Drive': 4,
            '8114: EPIC and Racecourse': 5,
            '8116: Phillip Avenue': 6,
            '8117: Phillip Avenue': 6,
            '8118: Swinden Street': 7,
            '8119: Swinden Street': 7,
            '8120: Dickson': 8,
            '8121: Dickson': 8,
            '8122: Macarthur Avenue': 9,
            '8123: Macarthur Avenue': 9,
            '8124: Ipima Street': 10,
            '8125: Ipima Street': 10,
            '8126: Elouera Street': 11,
            '8127: Elouera Street': 11,
            '8128: Alinga Street': 12,
            '8129: Alinga Street': 12
            }

data['ORIGIN_STOP'] = data['ORIGIN_STOP'].replace(stops_id)
data['DESTINATION_STOP'] = data['DESTINATION_STOP'].replace(stops_id)

In [29]:
# Average demand between 7am and 8am
minimal_time = 8
maximal_time = 9

# Selected date
selected_date = pd.to_datetime('2024-09-18').date()

# To datetime
data['ORIGIN_DATE'] = pd.to_datetime(data['ORIGIN_DATE'], format='%Y-%m-%d %H:%M:%S')
data['DESTINATION_DATE'] = pd.to_datetime(data['DESTINATION_DATE'], format='%Y-%m-%d %H:%M:%S')
# Calculate only the time
data['ORIGIN_TIME'] = data['ORIGIN_DATE'].dt.hour * 60 + data['ORIGIN_DATE'].dt.minute
data['DESTINATION_TIME'] = data['DESTINATION_DATE'].dt.hour * 60 + data['DESTINATION_DATE'].dt.minute
# Filter by date
data['ORIGIN_DATE_DATE'] = data['ORIGIN_DATE'].dt.date
data = data[data['ORIGIN_DATE_DATE'] == selected_date]
# Filter by time
data = data[(data['ORIGIN_TIME'] >= minimal_time*60) & (data['ORIGIN_TIME'] < maximal_time*60)]
# Group demand
demand = data.groupby(['ORIGIN_STOP', 'DESTINATION_STOP']).size().reset_index(name='DEMAND')

demand.head()

Unnamed: 0,ORIGIN_STOP,DESTINATION_STOP,DEMAND
0,0,0,11
1,0,1,1
2,0,2,10
3,0,3,41
4,0,4,4


In [30]:
# Give the origin-destination matrix shape
demand = demand.pivot(columns='DESTINATION_STOP', index='ORIGIN_STOP', values='DEMAND')
# Make the diagonal zero
for i in range(demand.shape[0]):
    demand.loc[i,i] = 0
# Fill with zero
demand.fillna(0, inplace=True)
# Sort columns
demand = demand[sorted(demand.columns)]

demand

DESTINATION_STOP,0,1,2,3,4,5,6,7,8,9,10,11,12
ORIGIN_STOP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,0.0,1.0,10.0,41.0,4.0,0.0,4.0,9.0,84.0,5.0,15.0,21.0,394.0
1,9.0,0.0,1.0,4.0,0.0,0.0,0.0,1.0,14.0,0.0,2.0,8.0,49.0
2,34.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,32.0,3.0,9.0,4.0,115.0
3,19.0,0.0,0.0,0.0,3.0,0.0,2.0,1.0,14.0,5.0,1.0,10.0,92.0
4,8.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,14.0,0.0,4.0,4.0,102.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,15.0
6,4.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,4.0,1.0,2.0,1.0,56.0
7,3.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,3.0,64.0
8,16.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,3.0,5.0,14.0,204.0
9,6.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,5.0,0.0,0.0,2.0,154.0


In [31]:
# To fabricate the travel times with google maps
travel_time = np.zeros((13,13))
travel_time[0,1] = 2
travel_time[1,2] = 1
travel_time[2,3] = 2
travel_time[3,4] = 2
travel_time[4,5] = 4
travel_time[5,6] = 2
travel_time[6,7] = 2
travel_time[7,8] = 2
travel_time[8,9] = 2
travel_time[9,10] = 2
travel_time[10,11] = 1
travel_time[11,12] = 2

for i in range(13):
    for j in range(i+1, 13):
        travel_time[i,j] = travel_time[i,j-1] + travel_time[j-1,j]
        travel_time[j,i] = travel_time[i,j]

travel_time

array([[ 0.,  2.,  3.,  5.,  7., 11., 13., 15., 17., 19., 21., 22., 24.],
       [ 2.,  0.,  1.,  3.,  5.,  9., 11., 13., 15., 17., 19., 20., 22.],
       [ 3.,  1.,  0.,  2.,  4.,  8., 10., 12., 14., 16., 18., 19., 21.],
       [ 5.,  3.,  2.,  0.,  2.,  6.,  8., 10., 12., 14., 16., 17., 19.],
       [ 7.,  5.,  4.,  2.,  0.,  4.,  6.,  8., 10., 12., 14., 15., 17.],
       [11.,  9.,  8.,  6.,  4.,  0.,  2.,  4.,  6.,  8., 10., 11., 13.],
       [13., 11., 10.,  8.,  6.,  2.,  0.,  2.,  4.,  6.,  8.,  9., 11.],
       [15., 13., 12., 10.,  8.,  4.,  2.,  0.,  2.,  4.,  6.,  7.,  9.],
       [17., 15., 14., 12., 10.,  6.,  4.,  2.,  0.,  2.,  4.,  5.,  7.],
       [19., 17., 16., 14., 12.,  8.,  6.,  4.,  2.,  0.,  2.,  3.,  5.],
       [21., 19., 18., 16., 14., 10.,  8.,  6.,  4.,  2.,  0.,  1.,  3.],
       [22., 20., 19., 17., 15., 11.,  9.,  7.,  5.,  3.,  1.,  0.,  2.],
       [24., 22., 21., 19., 17., 13., 11.,  9.,  7.,  5.,  3.,  2.,  0.]])

In [32]:
# # Calculate travel times
# data['TRAVEL_TIME_MINUTES'] = data['DESTINATION_TIME'] - data['ORIGIN_TIME']
# travel_time = data[['ORIGIN_STOP', 'DESTINATION_STOP', 'TRAVEL_TIME_MINUTES']].groupby(['ORIGIN_STOP', 'DESTINATION_STOP']).mean().reset_index()
# travel_time = travel_time.pivot(columns='DESTINATION_STOP', index='ORIGIN_STOP', values='TRAVEL_TIME_MINUTES')
#
# # Make the diagonal zero
# for i in range(travel_time.shape[0]):
#     travel_time.loc[i,i] = 0
# # Fill with zero
# travel_time.fillna(0, inplace=True)
# # Sort columns
# travel_time = travel_time[sorted(travel_time.columns)]
#
# travel_time

# DI implementation

In [41]:
def transfers(demand: pd.DataFrame) -> list[int]:
    """
    For each stop, calculate the potential transfers.
    """
    transfers_list = []
    for i in demand.index:
        # The transfers are the passengers traveling from before i to after i.
        trans = demand.loc[:i-1, i+1:].values.sum()
        transfers_list.append(trans)

    return transfers_list

def max_flow(demand: pd.DataFrame) -> list[tuple[int, int]]:

    pass


In [42]:
transfers(demand)

[np.float64(0.0),
 np.float64(587.0),
 np.float64(655.0),
 np.float64(774.0),
 np.float64(894.0),
 np.float64(1021.0),
 np.float64(1032.0),
 np.float64(1083.0),
 np.float64(990.0),
 np.float64(1197.0),
 np.float64(1314.0),
 np.float64(1311.0),
 np.float64(0.0)]

In [23]:
demand[0,0]

np.float64(0.0)

In [24]:
demand

array([[  0.,   1.,  10.,  41.,   4.,   0.,   4.,   9.,  84.,   5.,  15.,
         21., 394.],
       [  9.,   0.,   1.,   4.,   0.,   0.,   0.,   1.,  14.,   0.,   2.,
          8.,  49.],
       [ 34.,   0.,   0.,   1.,   1.,   0.,   0.,   0.,  32.,   3.,   9.,
          4., 115.],
       [ 19.,   0.,   0.,   0.,   3.,   0.,   2.,   1.,  14.,   5.,   1.,
         10.,  92.],
       [  8.,   0.,   0.,   0.,   0.,   0.,   1.,   2.,  14.,   0.,   4.,
          4., 102.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   1.,   0.,   0.,
          2.,  15.],
       [  4.,   1.,   1.,   0.,   2.,   0.,   0.,   0.,   4.,   1.,   2.,
          1.,  56.],
       [  3.,   0.,   0.,   1.,   0.,   0.,   0.,   0.,   0.,   2.,   1.,
          3.,  64.],
       [ 16.,   0.,   0.,   0.,   1.,   0.,   1.,   1.,   0.,   3.,   5.,
         14., 204.],
       [  6.,   0.,   0.,   1.,   1.,   0.,   0.,   1.,   5.,   0.,   0.,
          2., 154.],
       [  4.,   0.,   1.,   0.,   0.,   0.,   1., 