<a href="https://colab.research.google.com/github/unnatikdm/trainAIschedule/blob/main/train_schedule_with_disruption.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Train Scheduler with Edge-Case Handling

This notebook computes a baseline timetable for a set of trains using a simple enumeration approach (suitable for small datasets) and includes a function to handle real-time disruptions, such as a train being delayed.

**Key steps:**
- Load the `train_schedules.json` file.
- Parse times and assign priorities to trains.
- Find an optimised departure order subject to headway and priority constraints.
- Define a `handle_disruption` function that can adjust the timetable when a train is delayed.

Upload `train_schedules.json` to your Colab session before running the code below.

In [1]:
import json, itertools, numpy as np, pandas as pd, matplotlib.pyplot as plt

# Load dataset
with open('train_schedules.json') as f:
    data = json.load(f)

# Helper to parse times like '05:15 (Day 1)'
def parse_time(t):
    if t.startswith('Source') or t.startswith('Destination'):
        return None
    parts = t.split('(')[0].strip()
    hh, mm = parts.split(':')
    return int(hh) * 60 + int(mm)

# Priority mapping and helper
priority_order = {
    'vande bharat': 1, 'rajdhani': 2, 'tejas': 3, 'duronto': 4,
    'shatabdi': 5, 'gatiman': 6, 'garib rath': 7, 'jan shatabdi': 8,
    'superfast': 9, 'express': 10, 'fast': 11, 'local': 12,
    'passenger': 13, 'freight': 14
}

def get_priority(name, typ):
    n, t = name.lower(), (typ or '').lower()
    for k, p in priority_order.items():
        if k in n or k in t:
            return p
    return 15

# Disruption handler
def handle_disruption(schedule_list, disrupted_train_number, delay_minutes, headway=3):
    """
    Adjust the schedule when a train is delayed.

    Parameters
    ----------
    schedule_list : list of dicts
        Each dict should contain 'train_number', 'optimized', and 'priority' keys.
    disrupted_train_number : str
        Identifier of the train that is delayed.
    delay_minutes : int
        Number of minutes to add to the delayed train's departure.
    headway : int
        Minimum separation between successive trains (minutes).

    Returns
    -------
    list
        Updated schedule list with enforced headway constraints.
    """
    # Apply delay to the disrupted train
    for train in schedule_list:
        if train['train_number'] == disrupted_train_number:
            train['optimized'] += delay_minutes
            break

    # Re-sort and enforce headway constraints for equal/lower priority trains
    schedule_list.sort(key=lambda x: x['optimized'])
    for i in range(len(schedule_list) - 1):
        current = schedule_list[i]
        next_train = schedule_list[i + 1]
        if (next_train['optimized'] < current['optimized'] + headway and
                next_train['priority'] >= current['priority']):
            next_train['optimized'] = current['optimized'] + headway
    return schedule_list


In [2]:
# Extract trains and assign priorities
trains = []
for idx, tr in enumerate(data):
    # first usable departure
    dep = parse_time(tr['halt_schedule'][0]['departure_time'])
    if dep is None:
        for halt in tr['halt_schedule']:
            val = parse_time(halt['departure_time'])
            if val is not None:
                dep = val
                break
    priority = get_priority(tr['train_name'], tr.get('type', ''))
    trains.append({
        'idx': idx,
        'train_number': tr['train_number'],
        'train_name': tr['train_name'],
        'type': tr.get('type', 'N/A'),
        'scheduled_departure': dep,
        'priority': priority
    })

# Convert priority to weight (higher priority → larger weight)
max_p = max(t['priority'] for t in trains)
for t in trains:
    t['weight'] = max_p - t['priority'] + 1

# Headway in minutes
HEADWAY = 3

# Build priority constraint pairs: (i, j) means train i must depart before train j
pairs = [
    (i, j)
    for i in range(len(trains))
    for j in range(len(trains))
    if trains[i]['priority'] < trains[j]['priority']
]

# Scheduled departure times vector
s_times = [t['scheduled_departure'] for t in trains]

# Enumerate all orderings to find the best (small datasets only)
best_obj, best_order, best_times = None, None, None
for perm in itertools.permutations(range(len(trains))):
    pos = {train_idx: k for k, train_idx in enumerate(perm)}
    # Skip permutations that violate priority order
    if any(pos[i] > pos[j] for i, j in pairs):
        continue
    times = [None] * len(trains)
    obj = 0
    last = None
    for train_idx in perm:
        s = s_times[train_idx]
        if last is None:
            t_i = s
        else:
            t_i = max(s, last + HEADWAY)
        times[train_idx] = t_i
        obj += trains[train_idx]['weight'] * (t_i - s)
        last = t_i
    if best_obj is None or obj < best_obj:
        best_obj, best_order, best_times = obj, perm, times

# Assemble baseline schedule DataFrame
results = []
for i, t in enumerate(trains):
    results.append({
        'train_number': t['train_number'],
        'train_name': t['train_name'],
        'priority': t['priority'],
        'scheduled': t['scheduled_departure'],
        'optimized': best_times[i],
        'delay': best_times[i] - t['scheduled_departure']
    })

df = pd.DataFrame(sorted(results, key=lambda x: x['optimized']))

# Convert to HH:MM strings
df['scheduled_hhmm'] = df['scheduled'].apply(lambda m: f'{m//60:02d}:{m%60:02d}')
df['optimized_hhmm'] = df['optimized'].apply(lambda m: f'{m//60:02d}:{m%60:02d}')

# KPI calculations
avg_delay = df['delay'].mean()
span = df['optimized'].max() - df['optimized'].min()
throughput = len(df) / (span / 60) if span > 0 else float('inf')
print(f'Average delay: {avg_delay:.2f} minutes')
print(f'Throughput: {throughput:.2f} trains/hour')

# Display baseline schedule
df[['train_number', 'scheduled_hhmm', 'optimized_hhmm', 'delay']]

Average delay: 0.43 minutes
Throughput: 7.50 trains/hour


Unnamed: 0,train_number,scheduled_hhmm,optimized_hhmm,delay
0,90051.0,05:15,05:15,0
1,93007.0,05:24,05:24,0
2,90063.0,05:28,05:28,0
3,90067.0,05:32,05:32,0
4,90081.0,05:52,05:52,0
5,90095.0,06:08,06:08,0
6,,06:08,06:11,3


In [4]:
# Example: handle a disruption where train '90063' is delayed by 5 minutes
# Convert the DataFrame to a list of dicts for the handler
schedule_list = df.to_dict(orient='records')
updated_list = handle_disruption(schedule_list, disrupted_train_number='90063',
                                 delay_minutes=5, headway=HEADWAY)

# Convert back to DataFrame for display
df_updated = pd.DataFrame(updated_list)
df_updated['optimized_hhmm'] = df_updated['optimized'].apply(lambda m: f'{m//60:02d}:{m%60:02d}')


print("Schedule after applying a 5 minute delay to train 90063:")
df_updated[['train_number', 'scheduled_hhmm', 'optimized_hhmm', 'priority']]

Schedule after applying a 5 minute delay to train 90063:


Unnamed: 0,train_number,scheduled_hhmm,optimized_hhmm,priority
0,90051.0,05:15,05:15,11
1,93007.0,05:24,05:24,11
2,90067.0,05:32,05:32,11
3,90063.0,05:28,05:35,11
4,90081.0,05:52,05:52,11
5,90095.0,06:08,06:08,11
6,,06:08,06:11,15
