<a href="https://colab.research.google.com/github/unnatikdm/trainAIschedule/blob/main/Untitled5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# %% High-end MILP train scheduler (Gurobi→CPLEX→CBC fallback) + disruption handling
# Installs (safe to run in Colab; no-op if already present)
import sys, subprocess
def _pip_install(pkgs):
    try:
        import pkg_resources  # noqa
    except ImportError:
        pass
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q"] + pkgs)
    except Exception as e:
        print("Pip install warning:", e)

# Try PuLP (CBC) first, it also gives access to commercial solvers if available
try:
    import pulp
except ImportError:
    _pip_install(["pulp"])
    import pulp

import json
import math
import pandas as pd
from pathlib import Path

# -----------------------------
# Config
# -----------------------------
JSON_PATH = "train_schedules.json"
HEADWAY_MIN = 3  # minutes; change as needed
VERBOSE = False  # set True to see solver logs

# -----------------------------
# Parsing + priority
# -----------------------------
def parse_time_with_day(t: str):
    """
    Supports strings like '05:15 (Day 1)' or '22:08' or markers 'Source', 'Destination'.
    Returns minutes from Day 1 00:00 as integer, or None if non-time marker.
    """
    if not isinstance(t, str):
        return None
    lower = t.lower()
    if lower.startswith('source') or lower.startswith('destination'):
        return None
    # split out "(Day k)" if present
    base = t.split('(')[0].strip()
    hh, mm = base.split(':')
    minutes = int(hh) * 60 + int(mm)
    # day offset
    day = 1
    if '(' in t and 'day' in lower:
        try:
            dpart = lower.split('day')[-1]
            dnum = ''.join(ch for ch in dpart if ch.isdigit())
            if dnum:
                day = int(dnum)
        except Exception:
            day = 1
    return minutes + (day - 1) * 1440

priority_order = {
    'vande bharat': 1, 'rajdhani': 2, 'tejas': 3, 'duronto': 4,
    'shatabdi': 5, 'gatiman': 6, 'garib rath': 7, 'jan shatabdi': 8,
    'superfast': 9, 'express': 10, 'fast': 11, 'local': 12,
    'passenger': 13, 'freight': 14
}

def get_priority(name, typ):
    n, t = (name or '').lower(), (typ or '').lower()
    for k, p in priority_order.items():
        if k in n or k in t:
            return p
    return 15  # default lowest priority

def fmt_minutes(m):
    day = m // 1440 + 1
    mins = m % 1440
    hh = mins // 60
    mm = mins % 60
    return f"{hh:02d}:{mm:02d} (Day {day})"

# -----------------------------
# Data load
# -----------------------------
with open(JSON_PATH, "r") as f:
    data = json.load(f)

# Build train list with first usable departure
trains = []
for idx, tr in enumerate(data):
    dep = None
    for halt in tr.get('halt_schedule', []):
        val = parse_time_with_day(halt.get('departure_time', ''))
        if val is not None:
            dep = val
            break
    if dep is None:
        continue  # skip if no timed departure
    pr = get_priority(tr.get('train_name', ''), tr.get('type', ''))
    trains.append({
        "idx": idx,
        "train_number": str(tr.get('train_number', f"T{idx}")),
        "train_name": tr.get('train_name', f"Train {idx}"),
        "type": tr.get('type', 'N/A'),
        "scheduled": dep,
        "priority": pr
    })

if not trains:
    raise ValueError("No trains with valid departure times found in JSON.")

# Weights: higher priority → larger weight
max_p = max(t['priority'] for t in trains)
for t in trains:
    t['weight'] = max_p - t['priority'] + 1

# -----------------------------
# MILP builder/solver
# -----------------------------
def available_solvers():
    try:
        return set(pulp.listSolvers(onlyAvailable=True))
    except Exception:
        return set()

def choose_solver(verbose=False):
    av = available_solvers()
    # Prefer Gurobi if available
    if "GUROBI" in av:
        return pulp.GUROBI(msg=1 if verbose else 0)
    if "GUROBI_CMD" in av:
        return pulp.GUROBI_CMD(msg=1 if verbose else 0)
    # Try CPLEX if present
    if "CPLEX_PY" in av:
        return pulp.CPLEX_PY(msg=1 if verbose else 0)
    if "CPLEX_CMD" in av:
        return pulp.CPLEX_CMD(msg=1 if verbose else 0)
    # CBC default (shipped with PuLP)
    return pulp.PULP_CBC_CMD(msg=1 if verbose else 0)

def solve_schedule(trains_list, headway=3, enforce_priority=True, verbose=False):
    """
    trains_list: list of dicts with keys ['idx','train_number','train_name','scheduled','priority','weight']
    Returns dict: {'status', 'obj', 't_opt', 'order_pairs_fixed'}
      - t_opt: {i: optimized_time_minutes}
    """
    n = len(trains_list)
    idxs = list(range(n))
    s = {i: trains_list[i]['scheduled'] for i in idxs}
    w = {i: trains_list[i]['weight'] for i in idxs}
    pr = {i: trains_list[i]['priority'] for i in idxs}

    # Bounding window for Big-M (tight-ish)
    s_min, s_max = min(s.values()), max(s.values())
    horizon = (s_max - s_min) + 6 * headway + 1440  # allow some slack
    M = max(1440, horizon)  # minutes; safe big-M

    # Model
    m = pulp.LpProblem("Train_Departure_Scheduling", pulp.LpMinimize)

    # Decision vars
    t = {i: pulp.LpVariable(f"t_{i}", lowBound=s[i], cat="Continuous") for i in idxs}
    # Single binary for each unordered pair (i<j): x_ij = 1 means i BEFORE j
    x = {}
    for i in idxs:
        for j in idxs:
            if i < j:
                x[(i, j)] = pulp.LpVariable(f"x_{i}_{j}", lowBound=0, upBound=1, cat="Binary")

    # Objective: minimize weighted total delay
    m += pulp.lpSum([w[i] * (t[i] - s[i]) for i in idxs])

    # Disjunctive headway constraints for all pairs
    for i in idxs:
        for j in idxs:
            if i < j:
                # If x_ij = 1 → j after i by headway
                m += t[j] >= t[i] + headway - M * (1 - x[(i, j)])
                # If x_ij = 0 → i after j by headway
                m += t[i] >= t[j] + headway - M * (x[(i, j)])

    # Priority-enforced precedence: higher-priority must depart before lower-priority
    fixed_pairs = []
    if enforce_priority:
        for i in idxs:
            for j in idxs:
                if i == j:
                    continue
                if pr[i] < pr[j]:
                    # force i before j
                    if i < j:
                        m += x[(i, j)] == 1
                        fixed_pairs.append((i, j))
                    else:
                        m += x[(j, i)] == 0  # since (j,i) var encodes j before i
                        fixed_pairs.append((i, j))

    # Small tiebreaker (optional): discourage overly late schedules
    # Add tiny penalty on absolute time to prefer earlier feasible ordering
    eps = 1e-3
    m += eps * pulp.lpSum([t[i] for i in idxs])

    # Solve
    solver = choose_solver(verbose=verbose)
    status = m.solve(solver)

    status_str = pulp.LpStatus[status]
    if verbose:
        print("Solver:", solver)
        print("Status:", status_str)

    if status_str != "Optimal" and status_str != "Feasible":
        raise RuntimeError(f"MILP solution not found (status: {status_str})")

    t_opt = {i: int(round(pulp.value(t[i]))) for i in idxs}
    obj = float(pulp.value(m.objective))

    return {
        "status": status_str,
        "obj": obj,
        "t_opt": t_opt,
        "order_pairs_fixed": fixed_pairs
    }

# -----------------------------
# Baseline solve
# -----------------------------
solution = solve_schedule(trains, headway=HEADWAY_MIN, enforce_priority=True, verbose=VERBOSE)
t_opt = solution["t_opt"]

# Assemble schedule DataFrame
rows = []
for i, tr in enumerate(trains):
    rows.append({
        "train_number": tr["train_number"],
        "train_name": tr["train_name"],
        "type": tr["type"],
        "priority": tr["priority"],
        "weight": tr["weight"],
        "scheduled": tr["scheduled"],
        "optimized": t_opt[i],
        "delay": t_opt[i] - tr["scheduled"]
    })
df = pd.DataFrame(rows)
df.sort_values("optimized", inplace=True)
df["scheduled_str"] = df["scheduled"].apply(fmt_minutes)
df["optimized_str"]  = df["optimized"].apply(fmt_minutes)

# KPIs
avg_delay = df["delay"].mean()
span = df["optimized"].max() - df["optimized"].min()
throughput = len(df) / (span / 60) if span > 0 else float("inf")

print(f"== Baseline MILP schedule ==")
print(f"Status: {solution['status']} | Objective (weighted delay + eps): {solution['obj']:.3f}")
print(f"Average delay: {avg_delay:.2f} minutes")
print(f"Throughput: {throughput:.2f} trains/hour\n")
display_cols = ["train_number", "train_name", "priority", "scheduled_str", "optimized_str", "delay"]
print(df[display_cols].to_string(index=False))

# -----------------------------
# Disruption handler via re-optimization
# -----------------------------
def reoptimize_with_delay(trains_list, disrupted_train_number: str, delay_minutes: int, headway=3):
    # Copy & shift scheduled time for the disrupted train, then re-solve
    tl = []
    for tr in trains_list:
        trc = tr.copy()
        if trc["train_number"] == str(disrupted_train_number):
            trc["scheduled"] += int(delay_minutes)
        tl.append(trc)
    sol = solve_schedule(tl, headway=headway, enforce_priority=True, verbose=VERBOSE)
    rows2 = []
    for i, tr in enumerate(tl):
        rows2.append({
            "train_number": tr["train_number"],
            "train_name": tr["train_name"],
            "priority": tr["priority"],
            "scheduled": tr["scheduled"],
            "optimized": sol["t_opt"][i],
            "delay": sol["t_opt"][i] - tr["scheduled"]
        })
    df2 = pd.DataFrame(rows2).sort_values("optimized")
    df2["scheduled_str"] = df2["scheduled"].apply(fmt_minutes)
    df2["optimized_str"] = df2["optimized"].apply(fmt_minutes)
    return sol, df2

# Example disruption: add 5 minutes to train '90063' (if present)
example_train_id = "90063"
if any(tr["train_number"] == example_train_id for tr in trains):
    sol2, df2 = reoptimize_with_delay(trains, disrupted_train_number=example_train_id,
                                      delay_minutes=5, headway=HEADWAY_MIN)
    print("\n== Schedule after applying a 5-minute delay to train 90063 ==")
    print(df2[["train_number","scheduled_str","optimized_str","priority","delay"]].to_string(index=False))

# -----------------------------
# Simple path printout: train id + stations
# -----------------------------
print("\n== Train paths (ID + stations) ==")
for tr in data:
    tid = str(tr.get("train_number", "NA"))
    tname = tr.get("train_name", "NA")
    stations = [halt.get("station_name","") for halt in tr.get("halt_schedule", []) if halt.get("station_name")]
    print(f"Train ID: {tid}, Name: {tname}")
    print("Stations:", " -> ".join(stations))
    print("-" * 20)


  import pkg_resources  # noqa


== Baseline MILP schedule ==
Status: Optimal | Objective (weighted delay + eps): 2.390
Average delay: 0.43 minutes
Throughput: 7.50 trains/hour

train_number                                 train_name  priority scheduled_str optimized_str  delay
       90051                                CCG VR FAST        11 05:15 (Day 1) 05:15 (Day 1)      0
       93007                     CCG PALGHAR FAST LOCAL        11 05:24 (Day 1) 05:24 (Day 1)      0
       90063                                CCG VR FAST        11 05:28 (Day 1) 05:28 (Day 1)      0
       90067                                CCG VR FAST        11 05:32 (Day 1) 05:32 (Day 1)      0
       90081                             CCG VIRAR FAST        11 05:52 (Day 1) 05:52 (Day 1)      0
       90095                                CCG VR FAST        11 06:08 (Day 1) 06:08 (Day 1)      0
         N/A Unspecified Train (based on halt schedule)        15 06:08 (Day 1) 06:11 (Day 1)      3

== Schedule after applying a 5-minute delay to

