# GPU Acceleration Demo: VRPTW Optimization CPU vs GPU

This notebook demonstrates GPU acceleration for Vehicle Routing Problem with Time Windows (VRPTW) using OR-Tools (CPU) vs cuOpt (GPU) on Gehring & Homberger RC2 dataset.

**Objectives:**
- Compare CPU vs GPU performance on VRPTW optimization
- Measure solve-time speedups
- Verify solution feasibility and quality
- Demonstrate minimal migration effort (≤5 lines changed)

## Setup and Configuration

In [None]:
import os
import fnmatch
import numpy as np
import pandas as pd
import zipfile

from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp

from cuopt.routing import DataModel, Solve, SolverSettings
import cudf

# Add utils to path
from utils.homberger_to_parquet import parse_homberger_file, convert_homberger_to_parquet
from utils.timing import set_cpu_threads, run_timed

# Set reproducible seed
np.random.seed(42)

# Configure CPU threads for fair comparison
set_cpu_threads(12)

## Get and Prepare Dataset

In [None]:
USE_SAMPLE = True  # Set to False for full dataset (1000 customers)
extract_dir = "data/homberger"

# Define data paths
if USE_SAMPLE:
    zip_file = "data/homberger_200_customer_instances.zip"
    instance_pattern = r"c2.*\.txt"  # C2 series, 200 customers
    print("Using SAMPLE dataset (C2 series - ~200 customers)")
else:
    zip_file = "data/homberger_1000_customer_instances.zip"
    instance_pattern = r"rc2.*\.txt"  # RC2 series, 1000 customers
    print("Using FULL dataset (RC2 series - ~1000 customers)")

# Create output directory
os.makedirs(extract_dir, exist_ok=True)

def extract_and_parse_homberger():
    """Extract and parse Homberger VRPTW instance from ZIP file."""
    if not os.path.exists(zip_file):
        raise FileNotFoundError(f"Data file not found: {zip_file}")

    print(f"📁 Extracting from: {os.path.basename(zip_file)}")

    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        # Match files by pattern, regardless of extension
        if USE_SAMPLE:
            pattern = "C2_*"
        else:
            pattern = "RC2_*"
        matching_files = [f for f in zip_ref.namelist() if fnmatch.fnmatch(os.path.basename(f), pattern)]

        if not matching_files:
            # Fallback: use any file
            matching_files = zip_ref.namelist()

        instance_file = matching_files[0]
        print(f"📋 Using instance: {os.path.basename(instance_file)}")

        # Extract to temporary location
        temp_path = os.path.join(extract_dir, "temp_instance.txt")
        with zip_ref.open(instance_file) as source:
            with open(temp_path, 'wb') as target:
                target.write(source.read())

        try:
            customers_df, params = parse_homberger_file(temp_path)
            return customers_df, params
        finally:
            if os.path.exists(temp_path):
                os.remove(temp_path)

# Extract and parse the data
customers_df, vrptw_params = extract_and_parse_homberger()

print(f"\n📊 VRPTW Instance: {vrptw_params['instance']}")
print(f"Customers: {len(customers_df)}")
print(f"Vehicles: {vrptw_params['K']}")
print(f"Capacity: {vrptw_params['Q']}")
print(f"Depot: ({vrptw_params['depot']['x']}, {vrptw_params['depot']['y']})")
print(f"\n✅ Data loaded successfully")
print(f"Customer data schema:")
print(customers_df.info())

In [None]:
def prepare_vrptw_data(customers_df, params):
    """Convert DataFrame to optimization-ready format"""
    
    # Add depot as customer 0
    depot = params['depot']
    depot_row = pd.DataFrame({
        'customer_id': [0],
        'x': [depot['x']],
        'y': [depot['y']],
        'demand': [0],
        'tw_start': [depot['tw_start']],
        'tw_end': [depot['tw_end']],
        'service_time': [depot['service_time']]
    })
    
    # Combine depot and customers
    all_locations = pd.concat([depot_row, customers_df], ignore_index=True)
    all_locations = all_locations.sort_values('customer_id').reset_index(drop=True)
    
    # Calculate distance matrix (Euclidean)
    n_locations = len(all_locations)
    distance_matrix = np.zeros((n_locations, n_locations))
    
    for i in range(n_locations):
        for j in range(n_locations):
            if i != j:
                dx = all_locations.iloc[i]['x'] - all_locations.iloc[j]['x']
                dy = all_locations.iloc[i]['y'] - all_locations.iloc[j]['y']
                distance_matrix[i][j] = int(np.sqrt(dx*dx + dy*dy) * 10)  # Scale and round
    
    # Convert to lists for OR-Tools
    data = {
        'distance_matrix': distance_matrix.astype(int).tolist(),
        'demands': all_locations['demand'].tolist(),
        'time_windows': list(zip(all_locations['tw_start'], all_locations['tw_end'])),
        'service_times': all_locations['service_time'].tolist(),
        'num_vehicles': params['K'],
        'vehicle_capacity': params['Q'],
        'depot': 0
    }
    
    return data, all_locations

vrptw_data, locations_df = prepare_vrptw_data(customers_df, vrptw_params)

print(f"✅ VRPTW data prepared:")
print(f"Locations: {len(vrptw_data['distance_matrix'])}")
print(f"Vehicles: {vrptw_data['num_vehicles']}")
print(f"Max distance: {np.max(vrptw_data['distance_matrix'])}")
print(f"Total demand: {sum(vrptw_data['demands'])}")

## CPU Optimization - OR-Tools

In [None]:
# Track this cell index for diff analysis
CPU_CELL_OPT = len(globals().get('In', [])) + 1 if 'In' in globals() else 6

def solve_vrptw_ortools(data):
    """Solve VRPTW using OR-Tools"""
    
    # Create the routing index manager
    manager = pywrapcp.RoutingIndexManager(
        len(data['distance_matrix']),
        data['num_vehicles'],
        data['depot']
    )
    
    # Create routing model
    routing = pywrapcp.RoutingModel(manager)
    
    # Create and register distance callback
    def distance_callback(from_index, to_index):
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return data['distance_matrix'][from_node][to_node]
    
    transit_callback_index = routing.RegisterTransitCallback(distance_callback)
    routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
    
    # Add capacity constraint
    def demand_callback(from_index):
        from_node = manager.IndexToNode(from_index)
        return data['demands'][from_node]
    
    demand_callback_index = routing.RegisterUnaryTransitCallback(demand_callback)
    routing.AddDimensionWithVehicleCapacity(
        demand_callback_index,
        0,  # null capacity slack
        [data['vehicle_capacity']] * data['num_vehicles'],  # vehicle capacities
        True,  # start cumul to zero
        'Capacity'
    )
    
    # Add time window constraint
    def time_callback(from_index, to_index):
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        service_time = data['service_times'][from_node]
        travel_time = data['distance_matrix'][from_node][to_node]
        return service_time + travel_time
    
    time_callback_index = routing.RegisterTransitCallback(time_callback)
    
    horizon = max(tw[1] for tw in data['time_windows']) + 1000
    routing.AddDimension(
        time_callback_index,
        horizon,  # allow waiting time
        horizon,  # maximum time per vehicle
        False,  # don't force start cumul to zero
        'Time'
    )
    time_dimension = routing.GetDimensionOrDie('Time')
    
    # Add time window constraints for each location
    for location_idx, time_window in enumerate(data['time_windows']):
        if location_idx == data['depot']:
            continue
        index = manager.NodeToIndex(location_idx)
        time_dimension.CumulVar(index).SetRange(time_window[0], time_window[1])
    
    # Add time window constraints for vehicles at depot
    depot_idx = data['depot']
    for vehicle_id in range(data['num_vehicles']):
        index = routing.Start(vehicle_id)
        time_dimension.CumulVar(index).SetRange(
            data['time_windows'][depot_idx][0],
            data['time_windows'][depot_idx][1]
        )
    
    # Setting first solution heuristic
    search_parameters = pywrapcp.DefaultRoutingSearchParameters()
    search_parameters.first_solution_strategy = (
        routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC
    )
    search_parameters.local_search_metaheuristic = (
        routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH
    )
    search_parameters.time_limit.FromSeconds(60)  # 60 second time limit
    
    # Solve the problem
    solution = routing.SolveWithParameters(search_parameters)
    
    if solution:
        total_distance = 0
        total_time = 0
        routes = []
        
        for vehicle_id in range(data['num_vehicles']):
            index = routing.Start(vehicle_id)
            route = []
            route_distance = 0
            route_load = 0
            
            while not routing.IsEnd(index):
                node_index = manager.IndexToNode(index)
                route.append(node_index)
                route_load += data['demands'][node_index]
                previous_index = index
                index = solution.Value(routing.NextVar(index))
                route_distance += routing.GetArcCostForVehicle(previous_index, index, vehicle_id)
            
            route.append(manager.IndexToNode(index))  # Add depot at end
            
            if len(route) > 2:  # Only count routes with customers
                routes.append(route)
                total_distance += route_distance
        
        return {
            'feasible': True,
            'objective': total_distance,
            'routes': routes,
            'num_routes': len(routes),
            'customers_served': sum(len(route) - 2 for route in routes)
        }
    else:
        return {
            'feasible': False,
            'objective': float('inf'),
            'routes': [],
            'num_routes': 0,
            'customers_served': 0
        }

# Solve with OR-Tools
cpu_solution, cpu_solve_time = run_timed(
    "CPU Solve (OR-Tools)", 
    lambda: solve_vrptw_ortools(vrptw_data),
    use_gpu=False
)

print(f"\n📊 CPU Results (OR-Tools):")
print(f"Feasible: {cpu_solution['feasible']}")
print(f"Objective: {cpu_solution['objective']}")
print(f"Routes: {cpu_solution['num_routes']}")
print(f"Customers served: {cpu_solution['customers_served']}/{len(customers_df)}")
print(f"Solve time: {cpu_solve_time:.3f}s")

if cpu_solution['feasible'] and cpu_solution['routes']:
    print(f"\n🚛 Sample routes:")
    for i, route in enumerate(cpu_solution['routes'][:3]):  # Show first 3 routes
        print(f"  Vehicle {i+1}: {' -> '.join(map(str, route))}")

print(f"\n✅ CPU optimization completed")

## GPU Optimization - cuOpt

In [None]:
def solve_vrptw_cuopt(data):
    """Solve VRPTW using NVIDIA cuOpt (correct API)"""
    
    # Convert data to cuOpt format
    n_locations = len(data['distance_matrix'])
    n_vehicles = data['num_vehicles']
    
    # Create cuOpt DataModel
    data_model = DataModel(n_locations, n_vehicles)
    
    # Set distance/cost matrix using correct method
    distance_matrix_cudf = cudf.DataFrame(data['distance_matrix'])
    data_model.add_cost_matrix(distance_matrix_cudf)
    
    # Set vehicle capacities and demands using correct method
    data_model.add_capacity_dimension(
        "demand",
        cudf.Series(data['demands']),
        cudf.Series([data['vehicle_capacity']] * n_vehicles)
    )
    
    # Set order locations - this should be indices, not coordinates!
    # For VRPTW, orders are typically all locations except depot (location 0)
    # But let's include all locations for now and let cuOpt handle it
    order_indices = cudf.Series(range(n_locations), dtype='int32')
    data_model.set_order_locations(order_indices)
    
    # Set order time windows using correct method
    earliest_times = cudf.Series([tw[0] for tw in data['time_windows']], dtype='float32')
    latest_times = cudf.Series([tw[1] for tw in data['time_windows']], dtype='float32')
    data_model.set_order_time_windows(earliest_times, latest_times)
    
    # Set service times
    service_times = cudf.Series(data['service_times'], dtype='float32')
    data_model.set_order_service_times(service_times)
    
    # Set vehicle start/end locations using location indices, not coordinates
    depot_idx = data['depot']  # This should be 0
    vehicle_start_locs = cudf.Series([depot_idx] * n_vehicles, dtype='int32')
    vehicle_end_locs = cudf.Series([depot_idx] * n_vehicles, dtype='int32')
    
    data_model.set_vehicle_locations(vehicle_start_locs, vehicle_end_locs)
    
    # Configure solver settings
    solver_settings = SolverSettings()
    solver_settings.set_time_limit(60)  # 60 seconds
    
    # Solve the problem
    routing_solution = Solve(data_model, solver_settings)
    
    print(f"Solution status: {routing_solution.get_status()}")
    
    if routing_solution.get_status() == 0:  # Success
        # Extract routes from solution
        routes = []
        total_cost = 0
        customers_served = 0
        
        # Get solution methods to understand how to extract routes
        solution_methods = [method for method in dir(routing_solution) if not method.startswith('_')]
        print("Available solution methods:")
        route_methods = [m for m in solution_methods if 'route' in m.lower() or 'path' in m.lower()]
        print(f"Route-related methods: {route_methods}")
        
        # Try to get route information
        try:
            for vehicle_id in range(n_vehicles):
                if hasattr(routing_solution, 'get_route'):
                    route = routing_solution.get_route(vehicle_id)
                    if len(route) > 0:
                        routes.append(route.tolist() if hasattr(route, 'tolist') else list(route))
                        customers_served += max(0, len(route) - 2)  # Exclude depot start/end
                elif hasattr(routing_solution, 'routes'):
                    # Alternative method
                    all_routes = routing_solution.routes
                    if len(all_routes) > vehicle_id:
                        route = all_routes[vehicle_id]
                        routes.append(route)
                        customers_served += max(0, len(route) - 2)
            
            # Get total cost
            if hasattr(routing_solution, 'final_cost'):
                total_cost = routing_solution.final_cost
            elif hasattr(routing_solution, 'get_cost'):
                total_cost = routing_solution.get_cost()
            elif hasattr(routing_solution, 'cost'):
                total_cost = routing_solution.cost
            else:
                total_cost = 0  # Fallback
                
        except Exception as route_error:
            print(f"Route extraction error: {route_error}")
            routes = []
            customers_served = 0
            total_cost = 0
        
        return {
            'feasible': True,
            'objective': int(total_cost),
            'routes': routes,
            'num_routes': len(routes),
            'customers_served': customers_served
        }
    else:
        print(f"cuOpt solver failed with status: {routing_solution.get_status()}")
        return {
            'feasible': False,
            'objective': float('inf'),
            'routes': [],
            'num_routes': 0,
            'customers_served': 0
        }

gpu_solution, gpu_solve_time = run_timed(
        "GPU Solve (cuOpt)", 
        lambda: solve_vrptw_cuopt(vrptw_data),
        use_gpu=True
    )
    
print(f"\n📊 GPU Results (cuOpt):")
print(f"Feasible: {gpu_solution['feasible']}")
print(f"Objective: {gpu_solution['objective']}")
print(f"Routes: {gpu_solution['num_routes']}")
print(f"Customers served: {gpu_solution['customers_served']}/{len(customers_df)}")
print(f"Solve time: {gpu_solve_time:.3f}s")

if gpu_solution['feasible'] and gpu_solution['routes']:
    print(f"\n🚛 Sample routes:")
    for i, route in enumerate(gpu_solution['routes'][:3]):  # Show first 3 routes
        print(f"  Vehicle {i+1}: {' -> '.join(map(str, route))}")

has_cuopt = True
print(f"\n✅ GPU optimization completed")


## Performance Comparison and Analysis

In [None]:
# Calculate solve time speedup
if gpu_solve_time > 0:
    solve_speedup = cpu_solve_time / gpu_solve_time
else:
    solve_speedup = float('inf')

# Calculate solution quality metrics
if cpu_solution['feasible'] and gpu_solution['feasible']:
    if cpu_solution['objective'] > 0:
        objective_improvement = (cpu_solution['objective'] - gpu_solution['objective']) / cpu_solution['objective'] * 100
    else:
        objective_improvement = 0.0
else:
    objective_improvement = 0.0

# Create comparison table
comparison_data = [
    {
        'Metric': 'Solve Time (s)',
        'CPU (OR-Tools)': f"{cpu_solve_time:.3f}",
        'GPU (cuOpt)': f"{gpu_solve_time:.3f}",
        'Speedup/Improvement': f"{solve_speedup:.1f}x"
    },
    {
        'Metric': 'Feasible Solution',
        'CPU (OR-Tools)': '✅' if cpu_solution['feasible'] else '❌',
        'GPU (cuOpt)': '✅' if gpu_solution['feasible'] else '❌',
        'Speedup/Improvement': 'Same' if cpu_solution['feasible'] == gpu_solution['feasible'] else 'Different'
    },
    {
        'Metric': 'Objective Value',
        'CPU (OR-Tools)': f"{cpu_solution['objective']}",
        'GPU (cuOpt)': f"{gpu_solution['objective']}",
        'Speedup/Improvement': f"{objective_improvement:+.1f}%" if abs(objective_improvement) > 0.1 else "Same"
    },
    {
        'Metric': 'Routes Used',
        'CPU (OR-Tools)': f"{cpu_solution['num_routes']}",
        'GPU (cuOpt)': f"{gpu_solution['num_routes']}",
        'Speedup/Improvement': 'Same' if cpu_solution['num_routes'] == gpu_solution['num_routes'] else 'Different'
    },
    {
        'Metric': 'Customers Served',
        'CPU (OR-Tools)': f"{cpu_solution['customers_served']}/{len(customers_df)}",
        'GPU (cuOpt)': f"{gpu_solution['customers_served']}/{len(customers_df)}",
        'Speedup/Improvement': 'Same' if cpu_solution['customers_served'] == gpu_solution['customers_served'] else 'Different'
    }
]

comparison_df = pd.DataFrame(comparison_data)
print("⚡ VRPTW Optimization Comparison:")
print(comparison_df.to_string(index=False))

# Solution quality check
solution_quality_ok = (
    cpu_solution['feasible'] == gpu_solution['feasible'] and
    cpu_solution['customers_served'] == gpu_solution['customers_served']
)