In [None]:
!pip install waymo-open-dataset-tf-2-12-0==1.6.4

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#Cell 2
import os
# TODO: Enter the relative path in your Google Drive to the unzipped folder for hw1_code_submission.zip
FOLDERNAME = 'CS7643Group/Dataset/training' # e.g. 'cs7643/hw1/Code'

assert FOLDERNAME is not None, "[!] Enter the foldername."
working_directory = os.path.join("/content/drive/MyDrive/", FOLDERNAME)
assert os.path.exists(working_directory), "Make sure your FOLDERNAME is correct"
%cd $working_directory


In [None]:
import glob

# Define the path to your training folder
tfrecord_folder = '/content/drive/MyDrive/CS7643Group/Dataset/training/'

# Get a list of all .tfrecord files in the folder
tfrecord_files = glob.glob(os.path.join(tfrecord_folder, '*.tfrecord*'))

# Check how many tfrecord files were found
print(f"Found {len(tfrecord_files)} .tfrecord files.")

In [None]:
import math
import os
import uuid
import time

from matplotlib import cm
import matplotlib.animation as animation
import matplotlib.pyplot as plt

import numpy as np
from IPython.display import HTML
import itertools
import tensorflow as tf

from google.protobuf import text_format
from waymo_open_dataset.metrics.ops import py_metrics_ops
from waymo_open_dataset.metrics.python import config_util_py as config_util
from waymo_open_dataset.protos import motion_metrics_pb2

# If you use a custom conversion from Scenario to tf.Example, set the correct
# number of map samples here.
num_map_samples = 30000

# # Example field definition
roadgraph_features = {
    'roadgraph_samples/dir': tf.io.FixedLenFeature(
        [num_map_samples, 3], tf.float32, default_value=None
    ),
    'roadgraph_samples/id': tf.io.FixedLenFeature(
        [num_map_samples, 1], tf.int64, default_value=None
    ),
    'roadgraph_samples/type': tf.io.FixedLenFeature(
        [num_map_samples, 1], tf.int64, default_value=None
    ),
    'roadgraph_samples/valid': tf.io.FixedLenFeature(
        [num_map_samples, 1], tf.int64, default_value=None
    ),
    'roadgraph_samples/xyz': tf.io.FixedLenFeature(
        [num_map_samples, 3], tf.float32, default_value=None
    ),
}
# Features of other agents.
state_features = {
    'state/id':
        tf.io.FixedLenFeature([128], tf.float32, default_value=None),
    'state/type':
        tf.io.FixedLenFeature([128], tf.float32, default_value=None),
    'state/is_sdc':
        tf.io.FixedLenFeature([128], tf.int64, default_value=None),
    'state/tracks_to_predict':
        tf.io.FixedLenFeature([128], tf.int64, default_value=None),
    'state/current/bbox_yaw':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/height':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/length':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/timestamp_micros':
        tf.io.FixedLenFeature([128, 1], tf.int64, default_value=None),
    'state/current/valid':
        tf.io.FixedLenFeature([128, 1], tf.int64, default_value=None),
    'state/current/vel_yaw':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/velocity_x':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/velocity_y':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/width':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/x':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/y':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/current/z':
        tf.io.FixedLenFeature([128, 1], tf.float32, default_value=None),
    'state/future/bbox_yaw':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/height':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/length':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/timestamp_micros':
        tf.io.FixedLenFeature([128, 80], tf.int64, default_value=None),
    'state/future/valid':
        tf.io.FixedLenFeature([128, 80], tf.int64, default_value=None),
    'state/future/vel_yaw':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/velocity_x':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/velocity_y':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/width':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/x':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/y':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/future/z':
        tf.io.FixedLenFeature([128, 80], tf.float32, default_value=None),
    'state/past/bbox_yaw':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/height':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/length':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/timestamp_micros':
        tf.io.FixedLenFeature([128, 10], tf.int64, default_value=None),
    'state/past/valid':
        tf.io.FixedLenFeature([128, 10], tf.int64, default_value=None),
    'state/past/vel_yaw':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/velocity_x':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/velocity_y':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/width':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/x':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/y':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
    'state/past/z':
        tf.io.FixedLenFeature([128, 10], tf.float32, default_value=None),
}
# traffic_light_features = {
#     'traffic_light_state/current/state':
#         tf.io.FixedLenFeature([1, 16], tf.int64, default_value=None),
#     'traffic_light_state/current/valid':
#         tf.io.FixedLenFeature([1, 16], tf.int64, default_value=None),
#     'traffic_light_state/current/x':
#         tf.io.FixedLenFeature([1, 16], tf.float32, default_value=None),
#     'traffic_light_state/current/y':
#         tf.io.FixedLenFeature([1, 16], tf.float32, default_value=None),
#     'traffic_light_state/current/z':
#         tf.io.FixedLenFeature([1, 16], tf.float32, default_value=None),
#     'traffic_light_state/past/state':
#         tf.io.FixedLenFeature([10, 16], tf.int64, default_value=None),
#     'traffic_light_state/past/valid':
#         tf.io.FixedLenFeature([10, 16], tf.int64, default_value=None),
#     'traffic_light_state/past/x':
#         tf.io.FixedLenFeature([10, 16], tf.float32, default_value=None),
#     'traffic_light_state/past/y':
#         tf.io.FixedLenFeature([10, 16], tf.float32, default_value=None),
#     'traffic_light_state/past/z':
#         tf.io.FixedLenFeature([10, 16], tf.float32, default_value=None),
# }

features_description = {}
features_description.update(roadgraph_features)
features_description.update(state_features)
# features_description.update(traffic_light_features)

In [None]:
# Determine Number of Roadgraph samples

import tensorflow as tf

# Path to your TFRecord file
tfrecord_file = "/content/drive/MyDrive/training/training_tfexample.tfrecord-00000-of-01000"

# Define a minimal parsing schema to extract the size
feature_description = {
    'roadgraph_samples/xyz': tf.io.VarLenFeature(tf.float32)
}

# Function to parse one record
def parse_tfrecord_fn(example_proto):
    return tf.io.parse_single_example(example_proto, feature_description)

# Load the TFRecord file
dataset = tf.data.TFRecordDataset(tfrecord_file)

# Parse one example to get the number of samples
for raw_record in dataset.take(1):
    parsed_record = parse_tfrecord_fn(raw_record)
    num_samples_map = parsed_record['roadgraph_samples/xyz'].values.numpy().shape[0] // 3
    break

print(f"Number of map samples: {num_samples_map}")


In [None]:
import torch
import tensorflow as tf
import numpy as np

# Path to your TFRecord file
FILENAME = '/content/drive/MyDrive/CS7643Group/Dataset/training/training_tfexample.tfrecord-00000-of-01000'

# List of state features (vehicle-related data)
state_features_list = [
    'state/id',
    'state/type',
    'state/is_sdc',
    'state/tracks_to_predict',
    'state/current/bbox_yaw',
    'state/current/height',
    'state/current/length',
    'state/current/timestamp_micros',
    'state/current/valid',
    'state/current/vel_yaw',
    'state/current/velocity_x',
    'state/current/velocity_y',
    'state/current/width',
    'state/current/x',
    'state/current/y',
    'state/current/z',
    'state/future/bbox_yaw',
    'state/future/height',
    'state/future/length',
    'state/future/timestamp_micros',
    'state/future/valid',
    'state/future/vel_yaw',
    'state/future/velocity_x',
    'state/future/velocity_y',
    'state/future/width',
    'state/future/x',
    'state/future/y',
    'state/future/z',
    'state/past/bbox_yaw',
    'state/past/height',
    'state/past/length',
    'state/past/timestamp_micros',
    'state/past/valid',
    'state/past/vel_yaw',
    'state/past/velocity_x',
    'state/past/velocity_y',
    'state/past/width',
    'state/past/x',
    'state/past/y',
    'state/past/z'
]

# List of roadgraph features
roadgraph_features_list = [
    'roadgraph_samples/dir',
    'roadgraph_samples/id',
    'roadgraph_samples/type',
    'roadgraph_samples/valid',
    'roadgraph_samples/xyz'
]

# Helper function to find the nearest roadgraph sample for a given vehicle position
def find_closest_roadgraph_sample(vehicle_position, roadgraph_samples, roadgraph_types):
    """
    Finds the closest roadgraph sample and its type for a given vehicle position.
    Args:
        vehicle_position: (x, y) tuple of vehicle coordinates.
        roadgraph_samples: Array of roadgraph sample positions (N x 3 for xyz).
        roadgraph_types: Array of roadgraph sample types (N x 1).
    Returns:
        The closest roadgraph sample (x, y, z) and its type.
    """
    vehicle_x, vehicle_y = vehicle_position
    distances = np.linalg.norm(roadgraph_samples[:, :2] - np.array([vehicle_x, vehicle_y]).T, axis=1)
    closest_idx = np.argmin(distances)
    return roadgraph_samples[closest_idx], roadgraph_types[closest_idx][0]

# Load the dataset
dataset = tf.data.TFRecordDataset(FILENAME, compression_type='')

j = 0  # Counter for processed examples
vehicles_list = []  # Stores combined data for all vehicles and roadgraph info
vehicles = 0  # Counter for total vehicles processed

for raw_record in dataset:
    j += 1
    if j == 200:  # Process only the first 200 examples for this demonstration
        break

    # Parse each raw record using the feature description
    parsed_example = tf.io.parse_single_example(raw_record, features_description)


    # Vehicle parsing: Extract vehicle data and filter by type
    numpy_array = parsed_example['state/type'].numpy()
    parsed_type = torch.from_numpy(numpy_array)
    vehicle_indices = torch.where(parsed_type == 1.0)[0].tolist()  # Indices of vehicles
    vehicles += len(vehicle_indices)

    # Roadgraph parsing: Extract roadgraph data and filter valid samples
    roadgraph_samples = np.array(parsed_example['roadgraph_samples/xyz'].numpy())  # Roadgraph positions
    roadgraph_types = np.array(parsed_example['roadgraph_samples/type'].numpy())  # Roadgraph types
    roadgraph_valid = np.array(parsed_example['roadgraph_samples/valid'].numpy())  # Validity flags
    valid_indices = roadgraph_valid.flatten() == 1  # Boolean mask for valid samples
    valid_roadgraph_samples = roadgraph_samples[valid_indices]  # Filtered roadgraph positions
    valid_roadgraph_types = roadgraph_types[valid_indices]  # Filtered roadgraph types


    # Combine vehicle and roadgraph data
    for i in vehicle_indices:
      data_point = {
          'vehicle': {key: parsed_example[key][i].numpy() for key in state_features_list if key in parsed_example},
          'roadgraph': [],  # To store 91 roadgraph samples (position + type) for 91 timesteps
          'cyclists': [],
          'pedestrians' : []
      }

      # go through the type array and see where the pedestrians are, and do same thing for cyclists (torch.where)
      ped_indices = torch.where(parsed_type == 2.0)[0].tolist()
      cyclist_indcies = torch.where(parsed_type == 3.0)[0].tolist()
      # where the indices are, extract position information for the p number of pedestrians across all 91 timesteps
      pedestrian_data = {}
      pedestrian_data['state/past/x'] = parsed_example['state/past/x'].numpy()[ped_indices,:]
      pedestrian_data['state/past/y'] = parsed_example['state/past/y'].numpy()[ped_indices,:]
      pedestrian_data['state/current/x'] = parsed_example['state/current/x'].numpy()[ped_indices,:]
      pedestrian_data['state/current/y'] = parsed_example['state/current/y'].numpy()[ped_indices,:]
      pedestrian_data['state/future/x'] = parsed_example['state/future/x'].numpy()[ped_indices,:]
      pedestrian_data['state/future/y'] = parsed_example['state/future/y'].numpy()[ped_indices,:]
      pedestrian_data['state/past/valid'] = parsed_example['state/past/valid'].numpy()[ped_indices,:]
      pedestrian_data['state/current/valid'] = parsed_example['state/current/valid'].numpy()[ped_indices,:]
      pedestrian_data['state/future/valid'] = parsed_example['state/future/valid'].numpy()[ped_indices,:]
      data_point['pedestrians'].append(pedestrian_data)
      # where the indices are, extract position information for the c number of cyclists across all 91 timesteps
      cyclist_data = {}
      cyclist_data['state/past/x'] = parsed_example['state/past/x'].numpy()[cyclist_indcies,:]
      cyclist_data['state/past/y'] = parsed_example['state/past/y'].numpy()[cyclist_indcies,:]
      cyclist_data['state/current/x'] = parsed_example['state/current/x'].numpy()[cyclist_indcies,:]
      cyclist_data['state/current/y'] = parsed_example['state/current/y'].numpy()[cyclist_indcies,:]
      cyclist_data['state/future/x'] = parsed_example['state/future/x'].numpy()[cyclist_indcies,:]
      cyclist_data['state/future/y'] = parsed_example['state/future/y'].numpy()[cyclist_indcies,:]
      cyclist_data['state/past/valid'] = parsed_example['state/past/valid'].numpy()[cyclist_indcies,:]
      cyclist_data['state/current/valid'] = parsed_example['state/current/valid'].numpy()[cyclist_indcies,:]
      cyclist_data['state/future/valid'] = parsed_example['state/future/valid'].numpy()[cyclist_indcies,:]
      data_point['cyclists'].append(cyclist_data)

      # Iterate over the 91 timesteps (10 past, 1 current, 80 future)
      for t in range(10):  # Past timesteps
          # print(parsed_example['state/past/x'].shape)
          vehicle_position = (
              parsed_example['state/past/x'][i, t].numpy(),
              parsed_example['state/past/y'][i, t].numpy()
          )
          closest_sample, sample_type = find_closest_roadgraph_sample(vehicle_position, valid_roadgraph_samples, valid_roadgraph_types)
          data_point['roadgraph'].append({'xyz': closest_sample, 'type': sample_type})


      # Current timestep
      current_position = (
          parsed_example['state/current/x'][t].numpy(),
          parsed_example['state/current/y'][t].numpy()
      )
      closest_sample, sample_type = find_closest_roadgraph_sample(current_position, valid_roadgraph_samples, valid_roadgraph_types)
      data_point['roadgraph'].append({'xyz': closest_sample, 'type': sample_type})

      # Future timesteps
      for t in range(80):
          future_position = (
              parsed_example['state/future/x'][i, t].numpy(),
              parsed_example['state/future/y'][i, t].numpy()
          )
          closest_sample, sample_type = find_closest_roadgraph_sample(future_position, valid_roadgraph_samples, valid_roadgraph_types)
          data_point['roadgraph'].append({'xyz': closest_sample, 'type': sample_type})

      vehicles_list.append(data_point)

# Output results
print(f"Processed {len(vehicles_list)} vehicles across {j-1} examples.")

In [None]:
import pickle

pickled_file_path = '/content/drive/MyDrive/CS7643Group/Dataset/training/pickled_waymo_external_actors_srishti.pkl'
with open(pickled_file_path, 'wb') as file:
  pickle.dump(vehicles_list, file)