# GPU Acceleration Demo: VRPTW Optimization CPU vs GPU

This notebook demonstrates GPU acceleration for Vehicle Routing Problem with Time Windows (VRPTW) using OR-Tools (CPU) vs cuOpt (GPU) on Gehring & Homberger RC2 dataset.

**Objectives:**
- Compare CPU vs GPU performance on VRPTW optimization
- Measure solve-time speedups
- Verify solution feasibility and quality
- Demonstrate minimal migration effort (≤5 lines changed)

## Setup and Configuration

In [None]:
import os
import fnmatch
import numpy as np
import pandas as pd
import zipfile
import traceback

from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp

from cuopt.routing import DataModel, Solve, SolverSettings
import cudf

# Add utils to path
from utils.homberger_to_parquet import parse_homberger_file
from utils.timing import set_cpu_threads, run_timed

# Set reproducible seed
np.random.seed(42)

# Configure CPU threads for fair comparison
set_cpu_threads(12)

## Get and Prepare Dataset

In [2]:
USE_SAMPLE = True  # Set to False for full dataset (1000 customers)
extract_dir = "data/homberger"

# Define data paths
if USE_SAMPLE:
    zip_file = "data/homberger_200_customer_instances.zip"
    instance_pattern = r"c2.*\.txt"  # C2 series, 200 customers
    print("Using SAMPLE dataset (C2 series - ~200 customers)")
else:
    zip_file = "data/homberger_1000_customer_instances.zip"
    instance_pattern = r"rc2.*\.txt"  # RC2 series, 1000 customers
    print("Using FULL dataset (RC2 series - ~1000 customers)")

# Create output directory
os.makedirs(extract_dir, exist_ok=True)

def extract_and_parse_homberger():
    """Extract and parse Homberger VRPTW instance from ZIP file."""
    if not os.path.exists(zip_file):
        raise FileNotFoundError(f"Data file not found: {zip_file}")

    print(f"📁 Extracting from: {os.path.basename(zip_file)}")

    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        # Match files by pattern, regardless of extension
        if USE_SAMPLE:
            pattern = "C2_*"
        else:
            pattern = "RC2_*"
        matching_files = [f for f in zip_ref.namelist() if fnmatch.fnmatch(os.path.basename(f), pattern)]

        if not matching_files:
            # Fallback: use any file
            matching_files = zip_ref.namelist()

        instance_file = matching_files[0]
        print(f"📋 Using instance: {os.path.basename(instance_file)}")

        # Extract to temporary location
        temp_path = os.path.join(extract_dir, "temp_instance.txt")
        with zip_ref.open(instance_file) as source:
            with open(temp_path, 'wb') as target:
                target.write(source.read())

        try:
            customers_df, params = parse_homberger_file(temp_path)
            return customers_df, params
        finally:
            if os.path.exists(temp_path):
                os.remove(temp_path)

# Extract and parse the data
customers_df, vrptw_params = extract_and_parse_homberger()

print(f"\n📊 VRPTW Instance: {vrptw_params['instance']}")
print(f"Customers: {len(customers_df)}")
print(f"Vehicles: {vrptw_params['K']}")
print(f"Capacity: {vrptw_params['Q']}")
print(f"Depot: ({vrptw_params['depot']['x']}, {vrptw_params['depot']['y']})")
print(f"\n✅ Data loaded successfully")
print(f"Customer data schema:")
print(customers_df.info())

Using SAMPLE dataset (C2 series - ~200 customers)
📁 Extracting from: homberger_200_customer_instances.zip
📋 Using instance: C2_2_1.TXT

📊 VRPTW Instance: c2_2_1
Customers: 200
Vehicles: 6
Capacity: 700
Depot: (70.0, 70.0)

✅ Data loaded successfully
Customer data schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   customer_id   200 non-null    int32  
 1   x             200 non-null    float32
 2   y             200 non-null    float32
 3   demand        200 non-null    int16  
 4   tw_start      200 non-null    int32  
 5   tw_end        200 non-null    int32  
 6   service_time  200 non-null    int16  
dtypes: float32(2), int16(2), int32(3)
memory usage: 4.8 KB
None


In [3]:
def prepare_vrptw_data(customers_df, params):
    """Convert DataFrame to optimization-ready format"""
    
    # Add depot as customer 0
    depot = params['depot']
    depot_row = pd.DataFrame({
        'customer_id': [0],
        'x': [depot['x']],
        'y': [depot['y']],
        'demand': [0],
        'tw_start': [depot['tw_start']],
        'tw_end': [depot['tw_end']],
        'service_time': [depot['service_time']]
    })
    
    # Combine depot and customers
    all_locations = pd.concat([depot_row, customers_df], ignore_index=True)
    all_locations = all_locations.sort_values('customer_id').reset_index(drop=True)
    
    # Calculate distance matrix (Euclidean)
    n_locations = len(all_locations)
    distance_matrix = np.zeros((n_locations, n_locations))
    
    for i in range(n_locations):
        for j in range(n_locations):
            if i != j:
                dx = all_locations.iloc[i]['x'] - all_locations.iloc[j]['x']
                dy = all_locations.iloc[i]['y'] - all_locations.iloc[j]['y']
                distance_matrix[i][j] = int(np.sqrt(dx*dx + dy*dy))
    
    # Convert to lists for OR-Tools
    data = {
        'distance_matrix': distance_matrix.astype(int).tolist(),
        'demands': all_locations['demand'].tolist(),
        'time_windows': list(zip(all_locations['tw_start'], all_locations['tw_end'])),
        'service_times': all_locations['service_time'].tolist(),
        'num_vehicles': params['K'],
        'vehicle_capacity': params['Q'],
        'depot': 0
    }
    
    return data, all_locations

vrptw_data, locations_df = prepare_vrptw_data(customers_df, vrptw_params)

print(f"✅ VRPTW data prepared:")
print(f"Locations: {len(vrptw_data['distance_matrix']) - 1}")
print(f"Vehicles: {vrptw_data['num_vehicles']}")
print(f"Max distance: {np.max(vrptw_data['distance_matrix'])}")
print(f"Total demand: {sum(vrptw_data['demands'])}")

✅ VRPTW data prepared:
Locations: 200
Vehicles: 6
Max distance: 184
Total demand: 3770


## CPU Optimization - OR-Tools

In [None]:
def solve_vrptw_ortools(data, use_relaxed_time_windows=False):
    """Solve VRPTW using OR-Tools"""
    
    num_vehicles = 7
    print(f"🚛 Using {num_vehicles} vehicles for OR-Tools solve")

    manager = pywrapcp.RoutingIndexManager(
        len(data['distance_matrix']),
        num_vehicles,
        data['depot']
    )
    
    routing = pywrapcp.RoutingModel(manager)
    
    # Distance callback
    def distance_callback(from_index, to_index):
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return int(data['distance_matrix'][from_node][to_node])
    
    transit_callback_index = routing.RegisterTransitCallback(distance_callback)
    routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
    
    # Capacity constraint
    def demand_callback(from_index):
        from_node = manager.IndexToNode(from_index)
        return int(data['demands'][from_node])
    
    demand_callback_index = routing.RegisterUnaryTransitCallback(demand_callback)
    routing.AddDimensionWithVehicleCapacity(
        demand_callback_index,
        0,  # null capacity slack
        [data['vehicle_capacity']] * num_vehicles,
        True,  # start cumul to zero
        'Capacity'
    )
    
    # Time window constraint
    def time_callback(from_index, to_index):
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        service_time = data['service_times'][from_node]
        travel_time = data['distance_matrix'][from_node][to_node]
        return int(service_time + travel_time)
    
    time_callback_index = routing.RegisterTransitCallback(time_callback)
    
    # Generous time horizon (proven to work)
    max_tw_end = max(tw[1] for tw in data['time_windows'])
    horizon = max_tw_end * 2
    
    routing.AddDimension(
        time_callback_index,
        horizon,  # allow waiting time
        horizon,  # maximum time per vehicle
        False,  # don't force start cumul to zero
        'Time'
    )
    time_dimension = routing.GetDimensionOrDie('Time')
    
    # STANDARDIZED: Time window constraints (same as cuOpt by default)
    depot_tw = data['time_windows'][data['depot']]
    for location_idx, time_window in enumerate(data['time_windows']):
        index = manager.NodeToIndex(location_idx)
        if index != -1:
            if use_relaxed_time_windows and location_idx != data['depot']:
                # Optional: Apply relaxation for comparison
                tw_start = max(0, int(time_window[0] * 0.8))
                tw_end = int(time_window[1] * 1.2)
                time_dimension.CumulVar(index).SetRange(tw_start, tw_end)
                print(f"RELAXED TW for location {location_idx}: [{tw_start}, {tw_end}] (original: [{time_window[0]}, {time_window[1]}])")
            else:
                # STANDARDIZED: Use exact time windows (same as cuOpt)
                time_dimension.CumulVar(index).SetRange(int(time_window[0]), int(time_window[1]))
    
    # Vehicle start/end constraints
    for vehicle_id in range(num_vehicles):
        start_index = routing.Start(vehicle_id)
        end_index = routing.End(vehicle_id)
        time_dimension.CumulVar(start_index).SetRange(int(depot_tw[0]), int(depot_tw[1]))
        time_dimension.CumulVar(end_index).SetRange(int(depot_tw[0]), int(depot_tw[1]))
    
    # Search parameters
    search_parameters = pywrapcp.DefaultRoutingSearchParameters()
    search_parameters.first_solution_strategy = routing_enums_pb2.FirstSolutionStrategy.PARALLEL_CHEAPEST_INSERTION
    search_parameters.local_search_metaheuristic = routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH
    search_parameters.time_limit.FromSeconds(1200)
    
    # CONVERGENCE-BASED EARLY STOPPING
    # search_parameters.solution_limit = 50000  
    search_parameters.lns_time_limit.FromSeconds(120)
    
    # Solve the problem
    solution = routing.SolveWithParameters(search_parameters)
    
    if solution:
        total_distance = 0
        routes = []
        total_served = 0
        total_utilization = 0
        used_vehicles = 0
        
        for vehicle_id in range(num_vehicles):
            index = routing.Start(vehicle_id)
            route = []
            route_distance = 0
            route_demand = 0
            
            while not routing.IsEnd(index):
                node_index = manager.IndexToNode(index)
                route.append(node_index)
                previous_index = index
                index = solution.Value(routing.NextVar(index))
                route_distance += routing.GetArcCostForVehicle(previous_index, index, vehicle_id)
                if node_index != data['depot']:
                    route_demand += data['demands'][node_index]
            
            route.append(manager.IndexToNode(index))  # Add depot at end
            
            if len(route) > 2:  # Only count routes with customers
                routes.append(route)
                total_distance += route_distance
                total_served += len(route) - 2
                total_utilization += route_demand / data['vehicle_capacity'] if data['vehicle_capacity'] > 0 else 0
                used_vehicles += 1
        
        avg_utilization = total_utilization / used_vehicles if used_vehicles > 0 else 0.0
        
        return {
            'feasible': True,
            'objective': total_distance,
            'routes': routes,
            'num_routes': len(routes),
            'customers_served': total_served,
            'avg_utilization': avg_utilization
        }
    else:
        return {
            'feasible': False,
            'objective': float('inf'),
            'routes': [],
            'num_routes': 0,
            'customers_served': 0,
            'avg_utilization': 0.0
        }

In [16]:
# Solve with OR-Tools using 7 vehicles
cpu_solution, cpu_solve_time = run_timed(
    "CPU Solve (OR-Tools)", 
    lambda: solve_vrptw_ortools(vrptw_data),
    use_gpu=False
)
if cpu_solution['feasible']:
    print(f"\n📊 CPU Results (OR-Tools):")
    print(f"Feasible: {cpu_solution['feasible']}")
    print(f"Routes: {cpu_solution['num_routes']}")
    print(f"Customers served: {cpu_solution['customers_served']}/{len(customers_df)}")
    print(f"Total route distance: {cpu_solution['objective']}")
    print(f"Average vehicle utilization: {cpu_solution['avg_utilization']:.2f}")
    print(f"Solve time: {cpu_solve_time:.3f}s")

    print(f"\n✅ CPU optimization completed")

🚛 Using 7 vehicles for OR-Tools solve


AttributeError: local_search_time_limit

## GPU Optimization - cuOpt

In [None]:
def solve_vrptw_cuopt(data):
    """Solve VRPTW using NVIDIA cuOpt with correct route extraction"""
    
    n_locations = len(data['distance_matrix'])
    n_orders = n_locations - 1
    n_vehicles = 7
    
    print(f"🚛 Using {n_vehicles} vehicles for cuOpt (GPU-native)")
    
    try:
        # Create cuOpt DataModel
        data_model = DataModel(n_locations, n_vehicles, n_orders)
        
        # Set distance/cost matrix using cuDF with proper dtype
        distance_matrix_cudf = cudf.DataFrame(data['distance_matrix'], dtype='float32')
        data_model.add_cost_matrix(distance_matrix_cudf)
        
        # Set vehicle capacities and demands using cuDF with proper dtypes
        demands_cudf = cudf.Series(data['demands'][1:], dtype='int32')
        vehicle_capacities_cudf = cudf.Series([data['vehicle_capacity']] * n_vehicles, dtype='int32')
        data_model.add_capacity_dimension("demand", demands_cudf, vehicle_capacities_cudf)
        
        # Set order locations - exclude depot from orders (GPU-native)
        order_indices = cudf.Series(range(1, n_locations), dtype='int32')
        data_model.set_order_locations(order_indices)
        
        # Set order time windows using cuDF with proper dtypes (exclude depot)
        earliest_times = cudf.Series([int(tw[0]) for tw in data['time_windows'][1:]], dtype='int32')
        latest_times = cudf.Series([int(tw[1]) for tw in data['time_windows'][1:]], dtype='int32')
        data_model.set_order_time_windows(earliest_times, latest_times)
        
        # Set service times using cuDF with proper dtype (exclude depot)
        service_times = cudf.Series([int(st) for st in data['service_times'][1:]], dtype='int32')
        data_model.set_order_service_times(service_times)
        
        # Set vehicle start/end locations using cuDF
        depot_idx = data['depot']
        vehicle_start_locs = cudf.Series([depot_idx] * n_vehicles, dtype='int32')
        vehicle_end_locs = cudf.Series([depot_idx] * n_vehicles, dtype='int32')
        data_model.set_vehicle_locations(vehicle_start_locs, vehicle_end_locs)
        
        # Configure solver settings
        solver_settings = SolverSettings()
        solver_settings.set_time_limit(600)  # 10 minutes

        print("🚀 Starting cuOpt solve (GPU-accelerated)...")
        routing_solution = Solve(data_model, solver_settings)
        
        print(f"Solution status: {routing_solution.get_status()}")
        
        # Check if solve was successful
        if routing_solution.get_status() != 0:
            print(f"❌ cuOpt solver failed with status: {routing_solution.get_status()}")
            return {
                'feasible': False,
                'objective': float('inf'),
                'routes': [],
                'num_routes': 0,
                'customers_served': 0,
                'avg_utilization': 0.0
            }
        
        # Extract routes using the correct cuOpt structure
        route_assignment = routing_solution.get_route()
        route_df = route_assignment.to_pandas()
        
        routes = []
        customers_served = 0
        total_utilization = 0
        used_vehicles = 0
        
        # Get unique truck IDs that were actually used
        used_truck_ids = sorted(route_df['truck_id'].unique())
        print(f"Used truck IDs: {used_truck_ids}")
        
        # Extract routes for each used vehicle
        for truck_id in used_truck_ids:
            # Get all stops for this truck, sorted by route order
            truck_stops = route_df[route_df['truck_id'] == truck_id].sort_values('route')
            
            if len(truck_stops) > 0:
                # Extract the sequence of locations visited
                route_sequence = truck_stops['location'].tolist()
                
                # Count customers (excluding depot visits)
                customers_in_route = sum(1 for loc in route_sequence if loc != depot_idx)
                
                if customers_in_route > 0:
                    routes.append(route_sequence)
                    customers_served += customers_in_route
                    used_vehicles += 1
                    
                    # Calculate vehicle utilization
                    route_demand = sum(data['demands'][loc] for loc in route_sequence if loc != depot_idx)
                    utilization = route_demand / data['vehicle_capacity'] if data['vehicle_capacity'] > 0 else 0
                    total_utilization += utilization
                    
                    print(f"Truck {truck_id}: {len(route_sequence)} stops, {customers_in_route} customers, demand={route_demand}")
        
        # Get total cost from cuOpt solution
        total_cost = 0
        if hasattr(routing_solution, 'get_total_objective'):
            total_cost = float(routing_solution.get_total_objective())
        elif hasattr(routing_solution, 'total_objective_value'):
            total_cost = float(routing_solution.total_objective_value)
        else:
            # Calculate total cost using distance matrix
            for route in routes:
                for i in range(len(route) - 1):
                    from_node = route[i]
                    to_node = route[i + 1]
                    total_cost += data['distance_matrix'][from_node][to_node]
        
        avg_utilization = total_utilization / used_vehicles if used_vehicles > 0 else 0.0
        
        print(f"Extracted {len(routes)} routes serving {customers_served} customers")
        print(f"Total cost: {total_cost}")
        
        return {
            'feasible': True,
            'objective': int(total_cost) if total_cost else 0,
            'routes': routes,
            'num_routes': len(routes),
            'customers_served': customers_served,
            'avg_utilization': avg_utilization
        }
        
    except Exception as solve_error:
        print(f"❌ cuOpt solve failed with error: {solve_error}")
        print("\n🔍 Full stack trace:")
        traceback.print_exc()
        return {
            'feasible': False,
            'objective': float('inf'),
            'routes': [],
            'num_routes': 0,
            'customers_served': 0,
            'avg_utilization': 0.0
        }

In [7]:
# Solve with cuOpt using 7 vehicles
gpu_solution, gpu_solve_time = run_timed(
    "GPU Solve (cuOpt)", 
    lambda: solve_vrptw_cuopt(vrptw_data),
    use_gpu=True
)
if gpu_solution['feasible']:
    print(f"\n📊 GPU Results (cuOpt):")
    print(f"Feasible: {gpu_solution['feasible']}")
    print(f"Total route distance: {gpu_solution['objective']}")
    print(f"Routes: {gpu_solution['num_routes']}")
    print(f"Customers served: {gpu_solution['customers_served']}/{len(customers_df)}")
    print(f"Average vehicle utilization: {gpu_solution['avg_utilization']:.2f}")
    print(f"Solve time: {gpu_solve_time:.3f}s")

    print(f"\n✅ GPU optimization completed")

🚛 Using 7 vehicles for cuOpt (GPU-native)
🚀 Starting cuOpt solve (GPU-accelerated)...
Solution status: 0
Used truck IDs: [np.int32(1), np.int32(2), np.int32(3), np.int32(4), np.int32(5), np.int32(6)]
Truck 1: 34 stops, 32 customers, demand=600
Truck 2: 35 stops, 33 customers, demand=640
Truck 3: 36 stops, 34 customers, demand=640
Truck 4: 38 stops, 36 customers, demand=640
Truck 5: 33 stops, 31 customers, demand=610
Truck 6: 36 stops, 34 customers, demand=640
Extracted 6 routes serving 200 customers
Total cost: 1857.0
GPU Solve (cuOpt): 237.739s

📊 GPU Results (cuOpt):
Feasible: True
Total route distance: 1857
Routes: 6
Customers served: 200/200
Average vehicle utilization: 0.90
Solve time: 237.739s

✅ GPU optimization completed


## Performance Comparison and Analysis

In [12]:
# Calculate solve time speedup
if gpu_solve_time > 0:
    solve_speedup = cpu_solve_time / gpu_solve_time
else:
    solve_speedup = float('inf')

# Calculate solution quality metrics
if cpu_solution['feasible'] and gpu_solution['feasible']:
    if cpu_solution['objective'] > 0:
        objective_improvement = (cpu_solution['objective'] - gpu_solution['objective']) / cpu_solution['objective'] * 100
    else:
        objective_improvement = 0.0
else:
    objective_improvement = 0.0

# Create comparison table
comparison_data = [
    {
        'Metric': 'Solve Time (s)',
        'CPU (OR-Tools)': f"{cpu_solve_time:.3f}",
        'GPU (cuOpt)': f"{gpu_solve_time:.3f}",
        'Speedup/Improvement': f"{solve_speedup:.1f}x"
    },
    {
        'Metric': 'Total Vehicle Distance',
        'CPU (OR-Tools)': f"{cpu_solution['objective']}",
        'GPU (cuOpt)': f"{gpu_solution['objective']}",
        'Speedup/Improvement': f"{objective_improvement:+.1f}%" if abs(objective_improvement) > 0.1 else "Same"
    },
    {
        'Metric': 'Routes Used',
        'CPU (OR-Tools)': f"{cpu_solution['num_routes']}",
        'GPU (cuOpt)': f"{gpu_solution['num_routes']}",
        'Speedup/Improvement': f"{cpu_solution['num_routes'] - gpu_solution['num_routes']:+d}" if cpu_solution['num_routes'] != gpu_solution['num_routes'] else 'Same'
    },
    {
        'Metric': 'Customers Served',
        'CPU (OR-Tools)': f"{cpu_solution['customers_served']}/{len(customers_df)}",
        'GPU (cuOpt)': f"{gpu_solution['customers_served']}/{len(customers_df)}",
        'Speedup/Improvement': 'Same' if cpu_solution['customers_served'] == gpu_solution['customers_served'] else 'Different'
    },
    {
        'Metric': 'Avg. Vehicle Utilization',
        'CPU (OR-Tools)': f"{cpu_solution['avg_utilization']:.2f}",
        'GPU (cuOpt)': f"{gpu_solution['avg_utilization']:.2f}",
        'Speedup/Improvement': 'Same' if cpu_solution['avg_utilization'] == gpu_solution['avg_utilization'] else 'Different'
    }
]

comparison_df = pd.DataFrame(comparison_data)
print("⚡ VRPTW Optimization Comparison:")
print(comparison_df.to_string(index=False))

# Solution quality check
solution_quality_ok = (
    cpu_solution['feasible'] == gpu_solution['feasible'] and
    cpu_solution['customers_served'] == gpu_solution['customers_served']
)

⚡ VRPTW Optimization Comparison:
                  Metric CPU (OR-Tools) GPU (cuOpt) Speedup/Improvement
          Solve Time (s)       1200.003     237.739                5.0x
  Total Vehicle Distance           1864        1857               +0.4%
             Routes Used              6           6                Same
        Customers Served        200/200     200/200                Same
Avg. Vehicle Utilization           0.90        0.90                Same
