# Exploring the CommaAI Device data

This requires python 3.11 or later because of the usage of the tools folder from Openpilot, which uses 3.11 features

In [90]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import pandas as pd
from pathlib import Path
# This is taken from the Openpilot project
from openpilot.tools.lib.logreader import LogReader
from dataclasses import dataclass
from typing import List

In [91]:
device_path = Path("/home/ulrikro/datasets/CommaAI/2024-01-14--13-01-26--10/")

# About the folder

Each folder, marked by their data and a number, contains 5 files: ecamera.hevc, fcamera.hevc, qcamera.ts, qlog, rlog. Each file contains 1 min video. 

* ecamera.hevc - road facing wide angle camera 1928*1208 at 20Hz
* fcamera.hevc - road facing narrow camera 1928*1208 at 20Hz
* qcamera.ts - driver facing wide angle camera 526*330 at 20Hz
* qlog - some
* rlog - some



In [92]:
ecamera_path = device_path / "ecamera.hevc"
fcamera_path = device_path / "fcamera.hevc"
qcamera_path = device_path / "qcamera.ts"
qlog_path = device_path / "qlog"
rlog_path = device_path / "rlog"


# Overview of the video files

In [93]:
def read_video_frames(file_path: str):
    frames = []
    video = cv2.VideoCapture(file_path)
    while video.isOpened():
        ret, frame = video.read()
        if not ret:
            break
        frames.append(frame)
    video.release()
    return frames

In [94]:
ecamera_frames = read_video_frames(ecamera_path.as_posix())
len(ecamera_frames)

1200

In [95]:
ecamera_frames[0].shape

(1208, 1928, 3)

In [96]:
# fcamera_frames = read_video_frames(fcamera_path.as_posix())
#len(fcamera_frames)

In [97]:
# fcamera_frames[0].shape

In [98]:
qcamera_frames = read_video_frames(qcamera_path.as_posix())
len(qcamera_frames)

1200

In [99]:
qcamera_frames[0].shape

(330, 526, 3)

# Overview of the log files

In [100]:
qlog_data = LogReader(qlog_path.as_posix())

In [101]:
qlog_list = list(qlog_data)
len(qlog_list)

11515

Each iteration throug the log data retruns a capnp.lib.capnp._DynamicStructReader object. 

capnp is a proto schema parsing library 

The proto schema seems to be defined in openpilot/common/params.cc
 


In [102]:
@dataclass
class CommandEntry:
    key: str
    value: str

@dataclass
class Commands:
    entries: List[CommandEntry]

@dataclass
class ParamEntry:
    key: str
    value: str

@dataclass
class Params:
    entries: List[ParamEntry]

@dataclass
class InitData:
    kernelArgs: List[str]
    dongleId: str
    deviceType: str
    version: str
    dirty: bool
    gitCommit: str
    gitBranch: str
    passive: bool
    gitRemote: str
    kernelVersion: str
    params: Params
    osVersion: str
    commands: Commands

@dataclass
class LogData:
    logMonoTime: int
    initData: InitData
    valid: bool
    # these just include a subset of the data

In [103]:
qlog_list: List[LogData] = list(qlog_data)

In [104]:
print([attr for attr in dir(qlog_list[0]) if not attr.startswith("__")])

['_get', '_get_by_field', '_has', '_has_by_field', '_parent', '_which', '_which_str', 'accelerometer', 'accelerometer2', 'androidGnssDEPRECATED', 'androidLog', 'applanixLocationDEPRECATED', 'applanixRawDEPRECATED', 'as_builder', 'boot', 'cameraOdometry', 'can', 'carControl', 'carParams', 'carState', 'cellInfoDEPRECATED', 'clocks', 'controlsState', 'customReserved0', 'customReserved1', 'customReserved2', 'customReserved3', 'customReserved4', 'customReserved5', 'customReserved6', 'customReserved7', 'customReserved8', 'customReserved9', 'customReservedRawData0', 'customReservedRawData1', 'customReservedRawData2', 'deviceState', 'driverCameraState', 'driverEncodeData', 'driverEncodeIdx', 'driverMonitoringState', 'driverStateDEPRECATED', 'driverStateV2', 'errorLogMessage', 'ethernetDataDEPRECATED', 'featuresDEPRECATED', 'gnssMeasurements', 'gpsLocation', 'gpsLocationExternal', 'gpsNMEA', 'gpsPlannerPlanDEPRECATED', 'gpsPlannerPointsDEPRECATED', 'gyroscope', 'gyroscope2', 'initData', 'is_roo

In [105]:
print(str(qlog_list[0].initData.params.entries[0].value))

b'{"alias": null, "athena_host": "prod-comma-public-athena-0.prod-comma-public-athena.production.svc.cluster.local", "device_type": "threex", "dongle_id": "45ba82b7ecf23784", "ignore_uploads": null, "last_athena_ping": 1705231623, "last_gps_accuracy": null, "last_gps_bearing": null, "last_gps_lat": null, "last_gps_lng": null, "last_gps_speed": null, "last_gps_time": null, "public_key": "-----BEGIN PUBLIC KEY-----\\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA5tgJB/RQmWgUQlIQBG6H\\ncPB65/ZjQJMxeQpZeaPvBrHGmIKybjnirGDgEqlfa1bTzSfXC8LbNu73J9qXbDM6\\nKnJ3Zh6M4NAFtAd/g04vWbEhmg/3ff2Abjq2sKolEDIyGBwli16usZEvGBAsuQpt\\nElxgVyx1TrLxHux9sZ++5sQce12gkhkiaItT26X7QOvjEwqTjP/LN3E4yUQNbAoq\\nhiDpcWgVQDT04HbMqIITvpOjd5sXLXhqr01sqoNVli69SwgjGyaN3eSAx4C2OiFw\\nwsc6gED93kOcVJmYeTahpfmJ0zRTgccDPC6eNHoTTVmnrZIhCqnm532a4bDbjN9z\\nnQIDAQAB\\n-----END PUBLIC KEY-----\\n", "serial": "9e856db5", "sim_id": "", "trial_claimed": true, "is_paired": true, "eligible_features": {"prime": true, "prime_data": true, "na

In [106]:
def isiterable(item):
    try:
        iter(item)
        return True
    except TypeError:
        return False
def get_fields(o, print_progress=True, depth=0):
    if isinstance(o, list) or isinstance(o, tuple) or isinstance(o, set) or isiterable(o):
        fields = []
        for item in o:
            field = get_fields(item, print_progress, depth=depth+1)
            fields.append(field)
        return fields
    elif isinstance(o, dict):
        total_dict = dict()
        for key, value in o.items():
            total_dict[key] = get_fields(value, print_progress, depth=depth+1)
        return total_dict
    elif isinstance(o, str) or isinstance(o, int) or isinstance(o, float) or isinstance(o, bool) or isinstance(o, bytes) or o is None:
        return type(o).__name__
    else:
        item_attrs = [attr for attr in dir(o) if not attr.startswith("_")]
        # print("item_attrs", item_attrs)
        total_dict = dict()
        progress = 0
        for attr in item_attrs:
            if print_progress:
                progress += 1
                print(f"Progress {depth}: {progress}/{len(item_attrs)} attr={attr}", end="\r")
            try: 
                total_dict[attr] = get_fields(getattr(o, attr), print_progress, depth=depth+1)
            except Exception:
                continue
        return total_dict
        
        

In [148]:
import re

def parse_capnp_string(s: str):
    # Wrap words after = that are not numbers or true of false in quotes
    s = re.sub(r'(?<=\s=\s)(?!true|false|\d+)(\w+)', r'"\1"', s)
    # Wrap in quotes and replace = with :
    s = re.sub(r"(\w+) = ", r'"\1": ', s)

    # Replace ( with [ for lists
    pattern = r'\((?:"[^"]*"|\d+)(?:\s*,\s*(?:"[^"]*"|\d+))*\)'

    def convert(match):
        inner_text = match.group(0)[1:-1]  # Remove the outer parentheses
        return "[" + inner_text + "]"  # Surround with square brackets

    s = re.sub(pattern, convert, s)

    # Replace the rest of the parentheses with curly braces and remove newlines
    s = s.replace("(", "{").replace(")", "}").replace("\n", "")
    return s

In [108]:
# import json
# field_json = capnp_to_json(str(qlog_list[0]))
# field_json
# parse_custom_structure(str(qlog_list[0]))

In [152]:
str(type(qlog_list[0]).to_dict())

TypeError: unbound method _DynamicStructReader.to_dict() needs an argument

In [198]:
import capnp
def capnp_to_dict(obj):
    """
    Convert a Cap'n Proto _DynamicStructReader object to a Python dictionary.
    This handles nested structures, lists, and basic data types.
    """
    if isinstance(obj, capnp.lib.capnp._DynamicStructReader):
        result = {}
        for field in list(obj.schema.node.struct.fields):
            field_name = field.name
            field_value = getattr(obj, field_name)
            print("name", field_name)
            result[field_name] = capnp_to_dict(field_value)
        return result
    elif isinstance(obj, capnp.lib.capnp._DynamicListReader):
        return [capnp_to_dict(item) for item in obj]
    else:
        return obj

'roadEncodeIdx'

In [191]:
for field in list(qlog_list[0].schema.node.struct.fields):
    print(field.name)

logMonoTime
initData
roadCameraState
gpsNMEA
sensorEventDEPRECATED
can
deviceState
controlsState
liveEventDEPRECATED
model
featuresDEPRECATED
sensorEventsDEPRECATED
pandaStateDEPRECATED
radarState
liveUIDEPRECATED
roadEncodeIdx
liveTracks
sendcan
logMessage
liveCalibration
androidLog
gpsLocation
carState
carControl
longitudinalPlan
liveLocationDEPRECATED
ethernetDataDEPRECATED
navUpdateDEPRECATED
cellInfoDEPRECATED
wifiScanDEPRECATED
androidGnssDEPRECATED
qcomGnss
lidarPtsDEPRECATED
procLog
ubloxGnss
clocks
liveMpcDEPRECATED
liveLongitudinalMpcDEPRECATED
navStatusDEPRECATED
ubloxRaw
gpsPlannerPointsDEPRECATED
gpsPlannerPlanDEPRECATED
applanixRawDEPRECATED
trafficEventsDEPRECATED
liveLocationTimingDEPRECATED
orbslamCorrectionDEPRECATED
liveLocationCorrectedDEPRECATED
orbObservationDEPRECATED
gpsLocationExternal
locationDEPRECATED
uiNavigationEventDEPRECATED
liveLocationKalmanDEPRECATED
testJoystick
orbOdometryDEPRECATED
orbFeaturesDEPRECATED
applanixLocationDEPRECATED
orbKeyFrameDEPRECA

In [199]:
capnp_to_dict(qlog_list[0])

name logMonoTime
name initData
name kernelArgs
name gctxDEPRECATED
name dongleId
name deviceType
name version
name androidBuildInfo
name board
name bootloader
name brand
name device
name display
name fingerprint
name hardware
name host
name id
name manufacturer
name model
name product
name radioVersion
name serial
name supportedAbis
name tags
name time
name type
name user
name versionCodename
name versionRelease
name versionSdk
name versionSecurityPatch
name androidSensorsDEPRECATED
name chffrAndroidExtraDEPRECATED
name allCameraCharacteristics
name entries
name pandaInfo
name hasPanda
name dongleId
name stVersion
name espVersion
name dirty
name gitCommit
name gitBranch
name passive
name gitRemote
name iosBuildInfoDEPRECATED
name appVersion
name appBuild
name osVersion
name deviceModel
name kernelVersion
name androidProperties
name entries
name params
name entries
name key
name value
name key
name value
name key
name value
name key
name value
name key
name value
name key
name value
nam

KjException: capnp/dynamic.c++:141: failed: expected isSetInUnion(field); Tried to get() a union member which is not currently initialized.; field.getProto().getName() = roadCameraState; schema.getProto().getDisplayName() = log.capnp:Event
stack: 7f039fb36389 7f039fb39afa 7f039fb3a8df 7f039f9fda3d 7f039f9caecd 7f03f110e12a 7f03f105d3dd 7f03f11b23ae 7f03f11ae27f 7f03f105d51b 7f03f10d1374 7f03f10615b7 7f03f10d1374 7f03f10615b7 7f03f10d2454 7f03f105d885 7f03f11b2510 7f03f10bb0b7 7f03f10b7cef 7f03f105aeac 7f03f10d1374 7f03f10615b7 7f03f10d1374 7f03f10615b7 7f03f10d1374 7f03f10615b7 7f03f10d1374 7f03f10615b7 7f03f10d1374 7f03f10615b7 7f03f10d1374

In [150]:
result = parse_capnp_string(str(qlog_list[0]))
result



In [146]:
import json
json.loads(result)

JSONDecodeError: Expecting ',' delimiter: line 1 column 529 (char 528)

In [212]:
model_qlogs = [log for log in qlog_list if log.which() == "modelV2"]
len(model_qlogs)

30

In [216]:
print(model_qlogs[3])

( logMonoTime = 1157092048359,
  valid = true,
  modelV2 = (
    frameId = 12124,
    frameAge = 0,
    frameDropPerc = 0,
    timestampEof = 1157054624400,
    position = (
      x = [-1.5592933, -1.4079607, -0.94034994, -0.15331817, 0.94597781, 2.3502755, 4.0572281, 6.0816054, 8.4199743, 11.076779, 14.033125, 17.309542, 20.904549, 24.821371, 29.01038, 33.533928, 38.38641, 43.53376, 49.012196, 54.781616, 60.868141, 67.290619, 74.051605, 81.10144, 88.533752, 96.25666, 104.33369, 112.71635, 121.48546, 130.49567, 140.00322, 149.6834, 159.83925],
      y = [0.14344487, 0.14307681, 0.14316955, 0.14308006, 0.14098066, 0.13810413, 0.13371153, 0.12756485, 0.12173094, 0.11274366, 0.10223039, 0.090855211, 0.077490106, 0.060879104, 0.045715623, 0.030229293, 0.017647684, 0.014660567, 0.016167995, 0.033507023, 0.074235037, 0.13414395, 0.22833961, 0.34605789, 0.51319385, 0.70221877, 0.94211924, 1.2192439, 1.5349848, 1.9019217, 2.3123288, 2.7670474, 3.3022468],
      z = [-0.0004885589, -0.000652969

# The different types of CAN messages

In [208]:
types = [item.which() for item in qlog_list]
len(types)

11515

In [210]:
unique_types = set(types)
len(unique_types)

43

In [211]:
unique_types

{'accelerometer',
 'cameraOdometry',
 'can',
 'carControl',
 'carParams',
 'carState',
 'controlsState',
 'deviceState',
 'driverCameraState',
 'driverEncodeIdx',
 'driverMonitoringState',
 'driverStateV2',
 'gpsLocation',
 'gyroscope',
 'initData',
 'lateralPlanDEPRECATED',
 'liveCalibration',
 'liveLocationKalman',
 'liveParameters',
 'liveTorqueParameters',
 'longitudinalPlan',
 'magnetometer',
 'managerState',
 'mapRenderState',
 'microphone',
 'modelV2',
 'navInstruction',
 'navModel',
 'onroadEvents',
 'pandaStates',
 'peripheralState',
 'procLog',
 'qRoadEncodeIdx',
 'radarState',
 'roadCameraState',
 'roadEncodeIdx',
 'sentinel',
 'temperatureSensor',
 'thumbnail',
 'uiDebug',
 'uiPlan',
 'wideRoadCameraState',
 'wideRoadEncodeIdx'}

Which of these are the most interesting to look at? 


1. modelV2
