In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pickle

In [3]:
data_path = '/content/drive/MyDrive/CS7643Group/Dataset/training/pickled_waymo_external_actors_srishti.pkl'
with open(data_path, "rb") as file:
  tf_data = pickle.load(file)

In [4]:
print(tf_data[0]['vehicle'])

{'state/id': 576.0, 'state/type': 1.0, 'state/is_sdc': 0, 'state/tracks_to_predict': 1, 'state/current/bbox_yaw': array([-0.6792184], dtype=float32), 'state/current/height': array([1.5639589], dtype=float32), 'state/current/length': array([4.6911273], dtype=float32), 'state/current/timestamp_micros': array([1000219]), 'state/current/valid': array([1]), 'state/current/vel_yaw': array([-0.68125886], dtype=float32), 'state/current/velocity_x': array([8.153687], dtype=float32), 'state/current/velocity_y': array([-6.610565], dtype=float32), 'state/current/width': array([2.1082966], dtype=float32), 'state/current/x': array([344.0412], dtype=float32), 'state/current/y': array([155.78773], dtype=float32), 'state/current/z': array([-106.44158], dtype=float32), 'state/future/bbox_yaw': array([-0.678944  , -0.67824805, -0.6785089 , -0.67810285, -0.680195  ,
       -0.6780548 , -0.6771952 , -0.6771955 , -0.67666876, -0.6763851 ,
       -0.6763134 , -0.67506635, -0.67428493, -0.6742915 , -0.6732043

In [5]:
fields = ["timestamp_micros", "x", "y", "velocity_x", "velocity_y", "bbox_yaw"]
ped_fields = ["x", "y"]
cyclist_fields = ["x", "y"]
roadgraph_fields = ["xyz", "type"]

parsed_data = []

num_vehicles = len(tf_data)

# first, we get (91,6) per vehicle with the original fields

for i in range(num_vehicles):
  # check to see if vehicle is valid
  if np.all(np.array(tf_data[i]["vehicle"]["state/past/valid"])) and np.all(np.array(tf_data[i]["vehicle"]["state/current/valid"])) and np.all(np.array(tf_data[i]["vehicle"]["state/future/valid"])): # Only append examples where all timestamps are valid
    datapoint = None # each of these should be (91,14) at the end

    for timerange in ["past", "current", "future"]: # Combine all time ranges (we don't care about past vs current vs future distinction)
      features = [] # these are the 6 (and eventually 14)
      for field in fields:
        features.append(tf_data[i]["vehicle"][f"state/{timerange}/{field}"])

      features = np.array(features).T
      if datapoint is None:
          datapoint = features
      else:
          datapoint = np.concatenate((datapoint, features), axis=0)

    # also, go through all pedestrians x,y and make any invalid ones to +infinity

    valid = tf_data[i]["pedestrians"][0]["state/past/valid"]
    x_vals = tf_data[i]["pedestrians"][0]["state/past/x"].astype(np.float32)
    idx_x, idx_y = np.where(valid == 0)
    x_vals[idx_x, idx_y] = float(999999999999)
    tf_data[i]["pedestrians"][0]["state/past/x"] = x_vals

    valid = tf_data[i]["pedestrians"][0]["state/current/valid"]
    x_vals = tf_data[i]["pedestrians"][0]["state/current/x"].astype(np.float32)
    idx_x, idx_y = np.where(valid == 0)
    x_vals[idx_x, idx_y] = float(999999999999)
    tf_data[i]["pedestrians"][0]["state/current/x"] = x_vals

    valid = tf_data[i]["pedestrians"][0]["state/future/valid"]
    x_vals = tf_data[i]["pedestrians"][0]["state/future/x"].astype(np.float32)
    idx_x, idx_y = np.where(valid == 0)
    x_vals[idx_x, idx_y] = float(999999999999)
    tf_data[i]["pedestrians"][0]["state/future/x"] = x_vals

    valid = tf_data[i]["cyclists"][0]["state/past/valid"]
    x_vals = tf_data[i]["cyclists"][0]["state/past/x"].astype(np.float32)
    idx_x, idx_y = np.where(valid == 0)
    x_vals[idx_x, idx_y] = float(999999999999)
    tf_data[i]["cyclists"][0]["state/past/x"] = x_vals

    valid = tf_data[i]["cyclists"][0]["state/current/valid"]
    x_vals = tf_data[i]["cyclists"][0]["state/current/x"].astype(np.float32)
    idx_x, idx_y = np.where(valid == 0)
    x_vals[idx_x, idx_y] = float(999999999999)
    tf_data[i]["cyclists"][0]["state/current/x"] = x_vals

    valid = tf_data[i]["cyclists"][0]["state/future/valid"]
    x_vals = tf_data[i]["cyclists"][0]["state/future/x"].astype(np.float32)
    idx_x, idx_y = np.where(valid == 0)
    x_vals[idx_x, idx_y] = float(999999999999)
    tf_data[i]["cyclists"][0]["state/future/x"] = x_vals

    valid = tf_data[i]["pedestrians"][0]["state/past/valid"]
    x_vals = tf_data[i]["pedestrians"][0]["state/past/y"].astype(np.float32)
    idx_x, idx_y = np.where(valid == 0)
    x_vals[idx_x, idx_y] = float(999999999999)
    tf_data[i]["pedestrians"][0]["state/past/y"] = x_vals

    valid = tf_data[i]["pedestrians"][0]["state/current/valid"]
    x_vals = tf_data[i]["pedestrians"][0]["state/current/y"].astype(np.float32)
    idx_x, idx_y = np.where(valid == 0)
    x_vals[idx_x, idx_y] = float(999999999999)
    tf_data[i]["pedestrians"][0]["state/current/y"] = x_vals

    valid = tf_data[i]["pedestrians"][0]["state/future/valid"]
    x_vals = tf_data[i]["pedestrians"][0]["state/future/y"].astype(np.float32)
    idx_x, idx_y = np.where(valid == 0)
    x_vals[idx_x, idx_y] = float(999999999999)
    tf_data[i]["pedestrians"][0]["state/future/y"] = x_vals

    valid = tf_data[i]["cyclists"][0]["state/past/valid"]
    x_vals = tf_data[i]["cyclists"][0]["state/past/y"].astype(np.float32)
    idx_x, idx_y = np.where(valid == 0)
    x_vals[idx_x, idx_y] = float(999999999999)
    tf_data[i]["cyclists"][0]["state/past/y"] = x_vals

    valid = tf_data[i]["cyclists"][0]["state/current/valid"]
    x_vals = tf_data[i]["cyclists"][0]["state/current/y"].astype(np.float32)
    idx_x, idx_y = np.where(valid == 0)
    x_vals[idx_x, idx_y] = float(999999999999)
    tf_data[i]["cyclists"][0]["state/current/y"] = x_vals

    valid = tf_data[i]["cyclists"][0]["state/future/valid"]
    x_vals = tf_data[i]["cyclists"][0]["state/future/y"].astype(np.float32)
    idx_x, idx_y = np.where(valid == 0)
    x_vals[idx_x, idx_y] = float(999999999999)
    tf_data[i]["cyclists"][0]["state/future/y"] = x_vals

    # now, we want to add the two pedestrian features (x and y) to make (91,6) into (91,8)
    # we need (91,2) per vehicle
    ped_array = np.zeros((91,2))
    cyc_array =  np.zeros((91,2))
    # go thru each of the past timesteps
    # print(tf_data[i]["pedestrians"][0]["state/past/x"])
    for t in range(10):
      for actor in ["pedestrians", "cyclists"]:
        # get the x,y positions of vehicle at this timestep
        vehicle_x = tf_data[i]["vehicle"]["state/past/x"][t]
        vehicle_y = tf_data[i]["vehicle"]["state/past/y"][t]
        # Compare to every pedestrian’s x,y for that timestep to the vehicle’s x,y at that time step
        ped_x = tf_data[i][actor][0]["state/past/x"][:,t]
        ped_y = tf_data[i][actor][0]["state/past/y"][:,t]

        # If ped_x and ped_y are empty, then just create an array of zeros to input instead
        if ped_x.size == 0 or ped_y.size == 0:
          if actor == "pedestrians":
            ped_array[t,:] = 0,0
          else:
            cyc_array[t,:] = 0,0
        else:
          # Compute the Euclidean distances
          distances = np.sqrt((vehicle_x - ped_x)**2 + (vehicle_y - ped_y)**2)
          # Find the index of the pedestrian with the minimum distance
          min_index = np.argmin(distances)
          # Get the x, y of the closest pedestrian
          closest_ped_x = ped_x[min_index]
          closest_ped_y = ped_y[min_index]
          if actor == "pedestrians":
            ped_array[t,:] = closest_ped_x, closest_ped_y
          else:
            cyc_array[t,:] = closest_ped_x, closest_ped_y

    for t in range(1):
      for actor in ["pedestrians", "cyclists"]:
        # get the x,y positions of vehicle at this timestep
        vehicle_x = tf_data[i]["vehicle"]["state/current/x"][t]
        vehicle_y = tf_data[i]["vehicle"]["state/current/y"][t]
        # Compare to every pedestrian’s x,y for that timestep to the vehicle’s x,y at that time step
        ped_x = tf_data[i][actor][0]["state/current/x"][:,t]
        ped_y = tf_data[i][actor][0]["state/current/y"][:,t]

        # If ped_x and ped_y are empty, then just create an array of zeros to input instead
        if ped_x.size == 0 or ped_y.size == 0:
          if actor == "pedestrians":
            ped_array[t+10,:] = 0,0
          else:
            cyc_array[t+10,:] = 0,0
        else:
          # Compute the Euclidean distances
          distances = np.sqrt((vehicle_x - ped_x)**2 + (vehicle_y - ped_y)**2)
          # Find the index of the pedestrian with the minimum distance
          min_index = np.argmin(distances)
          # Get the x, y of the closest pedestrian
          closest_ped_x = ped_x[min_index]
          closest_ped_y = ped_y[min_index]
          if actor == "pedestrians":
            ped_array[t+10,:] = closest_ped_x, closest_ped_y
          else:
            cyc_array[t+10,:] = closest_ped_x, closest_ped_y

    for t in range(80):
      for actor in ["pedestrians", "cyclists"]:
        # get the x,y positions of vehicle at this timestep
        vehicle_x = tf_data[i]["vehicle"]["state/future/x"][t]
        vehicle_y = tf_data[i]["vehicle"]["state/future/y"][t]
        # Compare to every pedestrian’s x,y for that timestep to the vehicle’s x,y at that time step
        ped_x = tf_data[i][actor][0]["state/future/x"][:,t]
        ped_y = tf_data[i][actor][0]["state/future/y"][:,t]

        # If ped_x and ped_y are empty, then just create an array of zeros to input instead
        if ped_x.size == 0 or ped_y.size == 0:
          if actor == "pedestrians":
            ped_array[t+11,:] = 0,0
          else:
            cyc_array[t+11,:] = 0,0
        else:
          # Compute the Euclidean distances
          distances = np.sqrt((vehicle_x - ped_x)**2 + (vehicle_y - ped_y)**2)
          # Find the index of the pedestrian with the minimum distance
          min_index = np.argmin(distances)
          # Get the x, y of the closest pedestrian
          closest_ped_x = ped_x[min_index]
          closest_ped_y = ped_y[min_index]
          if actor == "pedestrians":
            ped_array[t+11,:] = closest_ped_x, closest_ped_y
          else:
            cyc_array[t+11,:] = closest_ped_x, closest_ped_y

    # now, we combine datapoint (which has the (91,6) data) with ped_array and cyc_array to get (91, 6+2+2)
    combined_array = np.hstack((datapoint, ped_array))
    updated_datapoint = np.hstack((combined_array, cyc_array))

    # now, let's add the roadgraph data -> so we go from (91,10) to (91, 14)
    roadgraph = tf_data[i]["roadgraph"][0]["xyz"][:,None]
    for t in range(1,91):
      pos = tf_data[i]["roadgraph"][t]["xyz"][:,None]
      roadgraph = np.concatenate((roadgraph, pos), axis=1)
    roadgraph = roadgraph.T

    r_type = [tf_data[i]["roadgraph"][0]["type"]]
    for t in range(1,91):
      r_type.append(tf_data[i]["roadgraph"][t]["type"])
    r_type = np.array(r_type)[:,None]

    r_final = np.hstack((roadgraph, r_type))

    # now add roadgraph info to final datapoint
    final_datapoint = np.hstack((updated_datapoint, r_final))
    # print(final_datapoint.shape) # (91,14)

    # now, we make all position values relative to the starting position

    # make all pedestrian positions relative to the vehicle position at that timestep
    start_positions = final_datapoint[:, 1:3]
    final_datapoint[:, 6:8] = final_datapoint[:, 6:8] - start_positions

    # and do the same for cyclists
    start_positions = final_datapoint[:, 1:3]
    final_datapoint[:, 8:10] = final_datapoint[:, 8:10] - start_positions

    # Make all vehicle positions relative to start position
    start_position = final_datapoint[0, 1:3]
    final_datapoint[:, 1:3] -= start_position

    # we don't do it for the roadgraphs bc we have x,y,z data

    # Timestamps should start at zero
    final_datapoint[:, 0] -= final_datapoint[0, 0]
    final_datapoint[:, 0] /= 1e6 # Convert to seconds

    parsed_data.append(final_datapoint)

parsed_data = np.array(parsed_data)
pickle.dump(parsed_data, open("/content/drive/MyDrive/CS7643Group/Dataset/training/parsed_data_final_srishti.pkl", "wb"))