In [None]:
# Random Forest Reggression

In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error, r2_score


In [2]:
# show Dataset

In [3]:
FILE_PATH = "./Turn_ratio.xlsx"

In [4]:
# important constraints/assumptions while calculating related to PCU

In [5]:
PCU_FACTORS = {
"L": 1.5, # Left turn
"T": 1.0, # Through
"R": 1.0 # Right turn
}


SAT_PER_LANE = 1800 # saturation flow in PCU/hr/lane
DEFAULT_LANES = 2 # assumed lanes per approach
DEFAULT_LOST_TIME = 12 # seconds lost per cycle (yellow + all-red + startup)

In [6]:
# dataset modification to excel sheet

In [7]:
pip install openpyxl

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
data = pd.read_excel(FILE_PATH)

In [9]:
rename_map = {
    "Northbound Left": "NBL",
    "Northbound Thru": "NBT",
    "Northbound Right": "NBR",
    "Southbound Left": "SBL",
    "Southbound Thru": "SBT",
    "Southbound Right": "SBR",
    "Eastbound Left": "EBL",
    "Eastbound Thru": "EBT",
    "Eastbound Right": "EBR",
    "Westbound Left": "WBL",
    "Westbound Thru": "WBT",
    "Westbound Right": "WBR",
}

data = data.rename(columns=rename_map)


In [10]:
def compute_pcus(row):
    pcu_N = row["NB_L"] + row["NB_T"] + row["NB_R"]
    pcu_S = row["SB_L"] + row["SB_T"] + row["SB_R"]
    pcu_E = row["EB_L"] + row["EB_T"] + row["EB_R"]
    pcu_W = row["WB_L"] + row["WB_T"] + row["WB_R"]

    return pd.Series({
        "pcu_N": pcu_N,
        "pcu_S": pcu_S,
        "pcu_E": pcu_E,
        "pcu_W": pcu_W,
        "pcu_NS": pcu_N + pcu_S,  # North-South combined
        "pcu_EW": pcu_E + pcu_W   # East-West combined
    })

pcu_data = data.apply(compute_pcus, axis=1)
data = pd.concat([data, pcu_data], axis=1)


In [11]:
print(data.columns.tolist())

['Int#', 'NB_L', 'NB_T', 'NB_R', 'SB_L', 'SB_T', 'SB_R', 'EB_L', 'EB_T', 'EB_R', 'WB_L', 'WB_T', 'WB_R', 'pcu_N', 'pcu_S', 'pcu_E', 'pcu_W', 'pcu_NS', 'pcu_EW']


In [12]:
# websters formula

In [13]:
print(data.columns.tolist())

['Int#', 'NB_L', 'NB_T', 'NB_R', 'SB_L', 'SB_T', 'SB_R', 'EB_L', 'EB_T', 'EB_R', 'WB_L', 'WB_T', 'WB_R', 'pcu_N', 'pcu_S', 'pcu_E', 'pcu_W', 'pcu_NS', 'pcu_EW']


In [14]:
def webster_cycle_and_splits(row):
    # Step 1: Extract lane flows
    flows = {
        "NB_L": row["NB_L"], "NB_T": row["NB_T"], "NB_R": row["NB_R"],
        "SB_L": row["SB_L"], "SB_T": row["SB_T"], "SB_R": row["SB_R"],
        "EB_L": row["EB_L"], "EB_T": row["EB_T"], "EB_R": row["EB_R"],
        "WB_L": row["WB_L"], "WB_T": row["WB_T"], "WB_R": row["WB_R"]
    }
    
    total_flow = sum(flows.values())
    
    if total_flow == 0:
        return pd.Series({"cycle_length": 0, **{lane+"_green": 0 for lane in flows}})
    
    Y = total_flow / (DEFAULT_LANES * SAT_PER_LANE)
    if Y >= 1:  
        Y = 0.95  # cap to avoid infinite cycle
    
    # Webster's optimal cycle length
    C = (1.5 * DEFAULT_LOST_TIME + 5) / (1 - Y)
    
    C = max(60, min(C, 180))
    
    # Lane-wise green time allocation according to demand
    effective_green = C - DEFAULT_LOST_TIME
    green_splits = {}
    for lane, count in flows.items():
        if count > 0:
            green_splits[lane+"_green"] = (count / total_flow) * effective_green
        else:
            green_splits[lane+"_green"] = 0
    
    return pd.Series({"cycle_length": C, **green_splits})


#  Apply to dataset 
webster_results = data.apply(webster_cycle_and_splits, axis=1)
data = pd.concat([data, webster_results], axis=1)

# dropping invalid rows if any
data = data.dropna()

# results
print("\nSample results with Webster timings:")
cols_to_show = ["cycle_length"] + [c for c in data.columns if "_green" in c]
print(data[cols_to_show].head())




Sample results with Webster timings:
   cycle_length  NB_L_green  NB_T_green  NB_R_green  SB_L_green  SB_T_green  \
0     60.000000    0.622754    1.437126    0.191617   15.137725    0.814371   
1     60.000000    0.603521    0.885163    2.212909    1.367980    1.609388   
2     60.000000    3.441509    6.572507    4.373046    1.138544    6.624259   
3     60.000000    0.000000    0.000000    0.000000    3.652174    0.000000   
4    111.590296    0.871154   20.454690    4.216384    8.746384   20.071382   

   SB_R_green  EB_L_green  EB_T_green  EB_R_green  WB_L_green  WB_T_green  \
0    1.005988    0.431138   14.035928    0.622754    0.335329    8.622754   
1    1.166806    0.523051   17.703269    1.528919    2.414082   16.576697   
2    2.199461    1.371429    5.873854    1.811321    4.787062    7.892183   
3    4.782609    2.811594   16.028986    0.000000    0.000000   18.115942   
4    3.205846    3.554307    5.261769    0.104538    8.258538    9.791768   

   WB_R_green  
0    4.7

In [15]:
# training the model

In [16]:
X_cycle = data[["pcu_NS", "pcu_EW"]]     # Features
y_cycle = data["cycle_length"]           # Target

X_train, X_test, y_train, y_test = train_test_split(X_cycle, y_cycle, test_size=0.2, random_state=42)

cycle_model = LinearRegression()
cycle_model.fit(X_train, y_train)

y_pred_cycle = cycle_model.predict(X_test)
print("=== Cycle Length Model Performance ===")
print("MSE:", mean_squared_error(y_test, y_pred_cycle))
print("R2 Score:", r2_score(y_test, y_pred_cycle))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_cycle)))

# Lane-wise green times model

X_lanes = data[["NB_L", "NB_T", "NB_R",
                "SB_L", "SB_T", "SB_R",
                "EB_L", "EB_T", "EB_R",
                "WB_L", "WB_T", "WB_R"]]   # Traffic per lane

y_lanes = data[[c for c in data.columns if "_green" in c]]

lane_model = RandomForestRegressor(n_estimators=300, random_state=42)
lane_model.fit(X_lanes, y_lanes)


=== Cycle Length Model Performance ===
MSE: 554.7723246609257
R2 Score: 0.7239973886354263
RMSE: 23.553605343151304


In [17]:
# predict the optimised timing

In [18]:
example_cycle = pd.DataFrame({"pcu_NS": [900], "pcu_EW": [700]})
pred_cycle = cycle_model.predict(example_cycle)
print("\nPredicted total cycle length (sec):", pred_cycle[0])

example_lanes = pd.DataFrame([{
    "NB_L": 50, "NB_T": 300, "NB_R": 40,
    "SB_L": 60, "SB_T": 250, "SB_R": 35,
    "EB_L": 70, "EB_T": 320, "EB_R": 45,
    "WB_L": 55, "WB_T": 280, "WB_R": 30
}])

pred_lanes = lane_model.predict(example_lanes)

print("\nPredicted lane-wise green times (sec):")
for lane, green in zip(y_lanes.columns, pred_lanes[0]):
    print(f"{lane}: {green:.2f} sec")



Predicted total cycle length (sec): 56.506844724214446

Predicted lane-wise green times (sec):
NB_L_green: 2.17 sec
NB_T_green: 6.62 sec
NB_R_green: 2.86 sec
SB_L_green: 3.20 sec
SB_T_green: 9.71 sec
SB_R_green: 3.17 sec
EB_L_green: 2.34 sec
EB_T_green: 11.51 sec
EB_R_green: 2.42 sec
WB_L_green: 3.64 sec
WB_T_green: 12.33 sec
WB_R_green: 2.54 sec


In [19]:
# calculates total timing one intersection  takes

In [None]:
# If no Turn_ratio.xlsx or data empty, create dummy data for demo
import os

if 'data' not in globals() or data is None or len(data) == 0:
    demo_rows = [
        {
            'Int#': 1,
            'NB_L': 40, 'NB_T': 320, 'NB_R': 35,
            'SB_L': 50, 'SB_T': 280, 'SB_R': 30,
            'EB_L': 60, 'EB_T': 300, 'EB_R': 40,
            'WB_L': 45, 'WB_T': 260, 'WB_R': 25,
        },
        {
            'Int#': 2,
            'NB_L': 20, 'NB_T': 220, 'NB_R': 25,
            'SB_L': 35, 'SB_T': 240, 'SB_R': 22,
            'EB_L': 30, 'EB_T': 210, 'EB_R': 28,
            'WB_L': 25, 'WB_T': 200, 'WB_R': 20,
        },
    ]
    data = pd.DataFrame(demo_rows)
    print('Loaded dummy demo data with', len(data), 'rows')
else:
    print('Using loaded data with', len(data), 'rows')


In [None]:
# Compute per-lane green/yellow/red schedule from Webster results
YELLOW_TIME = 3  # seconds

def build_phase_schedule(row):
    C = float(row['cycle_length']) if 'cycle_length' in row else 90.0
    effective_green = max(0.0, C - DEFAULT_LOST_TIME)
    # Collect lane greens already computed (from earlier webster_results)
    lane_greens = {c: float(row[c]) for c in row.index if c.endswith('_green')}
    # For a simple diagram, assume each lane gets one contiguous green of its allocated length
    # and shares the same yellow time YELLOW_TIME; red is the remainder in cycle.
    schedule = {}
    t0 = 0.0
    for lane, g in lane_greens.items():
        g = max(0.0, g)
        y = YELLOW_TIME if g > 0 else 0.0
        r = max(0.0, C - (g + y))
        schedule[lane] = {'green': g, 'yellow': y, 'red': r}
    return C, schedule

# Build schedules for the current data (uses previously computed webster_results in notebook)
data_schedules = []
for idx, row in data.iterrows():
    if 'cycle_length' in row:
        C, sched = build_phase_schedule(row)
        data_schedules.append({'index': idx, 'cycle': C, 'schedule': sched})

len(data_schedules)


In [None]:
# UI to render a per-lane phase diagram using ipywidgets and plotly
import ipywidgets as widgets
import plotly.graph_objects as go

lane_order = [
    'NB_L_green','NB_T_green','NB_R_green',
    'SB_L_green','SB_T_green','SB_R_green',
    'EB_L_green','EB_T_green','EB_R_green',
    'WB_L_green','WB_T_green','WB_R_green'
]

row_selector = widgets.IntSlider(value=0, min=0, max=max(0, len(data_schedules)-1), step=1, description='Row')

color_map = {
    'green': '#2ecc71',
    'yellow': '#f1c40f',
    'red': '#e74c3c'
}

def render_phase(idx):
    if not data_schedules:
        print('No schedules available. Run previous cells to compute webster timings.')
        return
    idx = max(0, min(idx, len(data_schedules)-1))
    entry = data_schedules[idx]
    C = entry['cycle']
    sched = entry['schedule']

    fig = go.Figure()
    y_positions = []
    for i, lane_green_col in enumerate(lane_order):
        if lane_green_col not in sched:
            continue
        y = lane_green_col.replace('_green','')
        y_positions.append(y)
        s = sched[lane_green_col]
        t = 0.0
        # Green
        if s['green'] > 0:
            fig.add_trace(go.Bar(x=[s['green']], y=[y], orientation='h', marker_color=color_map['green'], name='green', showlegend=(i==0), base=t))
            t += s['green']
        # Yellow
        if s['yellow'] > 0:
            fig.add_trace(go.Bar(x=[s['yellow']], y=[y], orientation='h', marker_color=color_map['yellow'], name='yellow', showlegend=(i==0), base=t))
            t += s['yellow']
        # Red (rest of cycle)
        red = max(0.0, C - t)
        if red > 0:
            fig.add_trace(go.Bar(x=[red], y=[y], orientation='h', marker_color=color_map['red'], name='red', showlegend=(i==0), base=t))

    fig.update_layout(
        barmode='stack',
        title=f'Phase Diagram (Cycle={C:.1f}s) — Row {entry["index"]}',
        xaxis_title='Time (s)',
        yaxis_title='Lane',
        height=500,
        legend=dict(orientation='h', y=-0.2)
    )
    fig.show()

widgets.interact(render_phase, idx=row_selector)


In [None]:
# Simplified path: use only four approach PCUs (N,S,E,W) and compute 2-phase timings (NS vs EW)
import json

# Try to load from outputs/intersection_summary.json if available
four_pcus = None
try:
    with open('outputs/intersection_summary.json', 'r', encoding='utf-8') as f:
        summary = json.load(f)
    # Expect keys like NB, SB, EB, WB with 'total_pcu'
    n = float(summary.get('NB', {}).get('total_pcu', 0))
    s = float(summary.get('SB', {}).get('total_pcu', 0))
    e = float(summary.get('EB', {}).get('total_pcu', 0))
    w = float(summary.get('WB', {}).get('total_pcu', 0))
    if any([n, s, e, w]):
        four_pcus = {'N': n, 'S': s, 'E': e, 'W': w}
except Exception:
    pass

# If not found, provide a manual input fallback (static dict; edit values as needed)
if four_pcus is None:
    four_pcus = {'N': 500, 'S': 450, 'E': 600, 'W': 550}

four_pcus


In [None]:
# Compute 2-phase Webster timings from four approach totals
N = four_pcus['N']; S = four_pcus['S']; E = four_pcus['E']; W = four_pcus['W']
NS = N + S; EW = E + W

total_flow = NS + EW
if total_flow == 0:
    C = 0.0
    g_NS = g_EW = 0.0
else:
    Y = total_flow / (DEFAULT_LANES * SAT_PER_LANE)
    if Y >= 1:
        Y = 0.95
    C = (1.5 * DEFAULT_LOST_TIME + 5) / (1 - Y)
    C = max(60, min(C, 180))
    effective = C - DEFAULT_LOST_TIME
    # Split cycle between NS and EW proportional to demand
    g_NS = effective * (NS / (NS + EW)) if (NS + EW) > 0 else 0.0
    g_EW = effective - g_NS

# Distribute each phase's green to the two approaches proportionally
NB_green = g_NS * (N / NS) if NS > 0 else 0.0
SB_green = g_NS * (S / NS) if NS > 0 else 0.0
EB_green = g_EW * (E / EW) if EW > 0 else 0.0
WB_green = g_EW * (W / EW) if EW > 0 else 0.0

YELLOW = 3.0

schedule4 = {
    'cycle_length': C,
    'NB': {'green': NB_green, 'yellow': YELLOW, 'red': max(0.0, C - (NB_green + YELLOW))},
    'SB': {'green': SB_green, 'yellow': YELLOW, 'red': max(0.0, C - (SB_green + YELLOW))},
    'EB': {'green': EB_green, 'yellow': YELLOW, 'red': max(0.0, C - (EB_green + YELLOW))},
    'WB': {'green': WB_green, 'yellow': YELLOW, 'red': max(0.0, C - (WB_green + YELLOW))},
}

schedule4


In [None]:
# Plot 4-approach phase diagram (NB/SB/EB/WB)
import plotly.graph_objects as go

C = schedule4['cycle_length']
approaches = ['NB','SB','EB','WB']
colors = {'green':'#2ecc71','yellow':'#f1c40f','red':'#e74c3c'}

fig = go.Figure()
for i, appr in enumerate(approaches):
    segs = schedule4[appr]
    t = 0.0
    if segs['green'] > 0:
        fig.add_trace(go.Bar(x=[segs['green']], y=[appr], orientation='h', marker_color=colors['green'], name='green', showlegend=(i==0), base=t))
        t += segs['green']
    if segs['yellow'] > 0:
        fig.add_trace(go.Bar(x=[segs['yellow']], y=[appr], orientation='h', marker_color=colors['yellow'], name='yellow', showlegend=(i==0), base=t))
        t += segs['yellow']
    red = max(0.0, C - t)
    if red > 0:
        fig.add_trace(go.Bar(x=[red], y=[appr], orientation='h', marker_color=colors['red'], name='red', showlegend=(i==0), base=t))

fig.update_layout(
    barmode='stack',
    title=f'Approach Phase Diagram (Cycle={C:.1f}s)',
    xaxis_title='Time (s)',
    yaxis_title='Approach',
    height=450,
    legend=dict(orientation='h', y=-0.2)
)
fig.show()


In [20]:
PCU_FACTORS = {"L": 1.5, "T": 1.0, "R": 1.0}
SAT_PER_LANE = 1800  # PCU/hr/lane
DEFAULT_LOST_TIME = 12  # sec

# predicted cycle length and lane-wise green times
cycle_length = 120  # sec (from your cycle_model.predict)
example_lanes = pd.DataFrame([{
    "NB_L": 50, "NB_T": 300, "NB_R": 40,
    "SB_L": 60, "SB_T": 250, "SB_R": 35,
    "EB_L": 70, "EB_T": 320, "EB_R": 45,
    "WB_L": 55, "WB_T": 280, "WB_R": 30,
    # green splits predicted from lane_model (dummy values here for demo)
    "NB_L_green": 10, "NB_T_green": 25, "NB_R_green": 8,
    "SB_L_green": 12, "SB_T_green": 22, "SB_R_green": 7,
    "EB_L_green": 14, "EB_T_green": 28, "EB_R_green": 9,
    "WB_L_green": 11, "WB_T_green": 24, "WB_R_green": 6
}])

def compute_total_delay(row, cycle_length, sat_per_lane=SAT_PER_LANE):
    """
    Compute total intersection delay using Webster's formula.
    """
    total_delay = 0

    for lane in ["NB_L","NB_T","NB_R","SB_L","SB_T","SB_R","EB_L","EB_T","EB_R","WB_L","WB_T","WB_R"]:
        q = row[lane]   # demand flow (veh/hr)
        q_per_sec = q / 3600.0  

        # PCU factor
        if "_L" in lane: 
            pcu_factor = PCU_FACTORS["L"]
        elif "_T" in lane: 
            pcu_factor = PCU_FACTORS["T"]
        else: 
            pcu_factor = PCU_FACTORS["R"]

        demand = q * pcu_factor  
        s = sat_per_lane  
        x = demand / s    

        g = row[f"{lane}_green"]  

        if g <= 0 or cycle_length <= 0 or x >= 1:
            continue  

        # Webster's average delay per vehicle (sec/veh)
        d = (0.5 * cycle_length * (1 - g/cycle_length)**2) / (1 - min(1, x) * g/cycle_length)

        # total delay (sec) = delay per veh × vehicles
        D = d * q  
        total_delay += D

    return total_delay

# ouput for first example
row = example_lanes.iloc[0]
total_intersection_delay = compute_total_delay(row, cycle_length)

print("\nTotal intersection delay:", total_intersection_delay, "seconds")



Total intersection delay: 64216.10129228247 seconds


In [21]:
# average delay per vehicle

In [22]:
avg_delay_per_vehicle = total_intersection_delay / row[["NB_L","NB_T","NB_R","SB_L","SB_T","SB_R","EB_L","EB_T","EB_R","WB_L","WB_T","WB_R"]].sum()
print("Average delay per vehicle:", avg_delay_per_vehicle, "sec/veh")


Average delay per vehicle: 41.83459367575405 sec/veh
