In [2]:
# Run this code block just once after the kernel starts
# to change current directory to root

import sys
import os

sys.path.append("..")
os.chdir("..")
print(os.getcwd())

/home/student/bev-project


In [3]:
from nuscenes import NuScenes
import nuscenes_utilities as nusc_utils

In [4]:
nuscenes = NuScenes(
    version="v1.0-mini",
    dataroot="nuscenes",
    verbose=True,
)

Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.369 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.


In [5]:
tokens = list()
scene_labels = list()
camera_labels = list()

for scene in nuscenes.scene:
    
    # Iterate over samples
    for sample in nusc_utils.iterate_samples(
        nuscenes, scene["first_sample_token"]
    ):
        # Iterate over cameras
        for camera in nusc_utils.CAMERA_NAMES:
            tokens.append(sample["data"][camera])
            scene_labels.append(scene["name"])
            camera_labels.append(camera)

In [6]:
import pandas as pd

In [23]:
data = pd.DataFrame(dict(token=tokens, scene_name=scene_labels, camera=camera_labels))

In [24]:
data["scene_name"] + "=" + data["camera"]

0             scene-0061=CAM_FRONT
1        scene-0061=CAM_FRONT_LEFT
2       scene-0061=CAM_FRONT_RIGHT
3         scene-0061=CAM_BACK_LEFT
4        scene-0061=CAM_BACK_RIGHT
                   ...            
2419     scene-1100=CAM_FRONT_LEFT
2420    scene-1100=CAM_FRONT_RIGHT
2421      scene-1100=CAM_BACK_LEFT
2422     scene-1100=CAM_BACK_RIGHT
2423           scene-1100=CAM_BACK
Length: 2424, dtype: object

In [25]:
data["labels"] = data["scene_name"] + "=" + data["camera"]

In [26]:
data

Unnamed: 0,token,scene_name,camera,labels
0,e3d495d4ac534d54b321f50006683844,scene-0061,CAM_FRONT,scene-0061=CAM_FRONT
1,fe5422747a7d4268a4b07fc396707b23,scene-0061,CAM_FRONT_LEFT,scene-0061=CAM_FRONT_LEFT
2,aac7867ebf4f446395d29fbd60b63b3b,scene-0061,CAM_FRONT_RIGHT,scene-0061=CAM_FRONT_RIGHT
3,43893a033f9c46d4a51b5e08a67a1eb7,scene-0061,CAM_BACK_LEFT,scene-0061=CAM_BACK_LEFT
4,79dbb4460a6b40f49f9c150cb118247e,scene-0061,CAM_BACK_RIGHT,scene-0061=CAM_BACK_RIGHT
...,...,...,...,...
2419,8a87b74c6f8c46d49b73e96dd6d5f263,scene-1100,CAM_FRONT_LEFT,scene-1100=CAM_FRONT_LEFT
2420,62f2f587354b4f0fa8954447394a0df4,scene-1100,CAM_FRONT_RIGHT,scene-1100=CAM_FRONT_RIGHT
2421,8fdaff77e2de4d8ba21392e9b7d840ef,scene-1100,CAM_BACK_LEFT,scene-1100=CAM_BACK_LEFT
2422,f253e8760cb0441fbeade8f4e0ba7b11,scene-1100,CAM_BACK_RIGHT,scene-1100=CAM_BACK_RIGHT


In [10]:
data_cam_front = data[data["camera"]=="CAM_FRONT"]
data_cam_front

Unnamed: 0,token,scene_name,camera
0,e3d495d4ac534d54b321f50006683844,scene-0061,CAM_FRONT
6,4b6870ae200c4b969b91c50a9737f712,scene-0061,CAM_FRONT
12,d0d9ef23e3934ea09d55afdc24db9827,scene-0061,CAM_FRONT
18,74e7a9260c5d45b78b831528b62daf41,scene-0061,CAM_FRONT
24,21ca7cbfbde14f088143cf001570d01b,scene-0061,CAM_FRONT
...,...,...,...
2394,ccf3c969a3c74cd1b34ab6b1045ad0ab,scene-1100,CAM_FRONT
2400,d05e9e109cd7431eb173763f0d2b2727,scene-1100,CAM_FRONT
2406,02525bc381f14e45936356c339a23922,scene-1100,CAM_FRONT
2412,c76cfc72a390467b8a12f87df95f394b,scene-1100,CAM_FRONT


In [12]:
from sklearn.model_selection import train_test_split

In [27]:
(
    train_tokens,
    val_tokens,
    train_scene_names,
    val_scene_names,
    train_camera_labels,
    val_camera_labels,
) = train_test_split(
    data["token"],
    data["scene_name"],
    data["camera"],
    test_size=0.3,
    stratify=data["labels"],
)

In [31]:
train_camera_labels.value_counts()

camera
CAM_BACK           283
CAM_BACK_RIGHT     283
CAM_FRONT          283
CAM_FRONT_LEFT     283
CAM_FRONT_RIGHT    283
CAM_BACK_LEFT      281
Name: count, dtype: int64

In [36]:
train_tokens.to_csv("configs/stratified_mini_allcam_train_tokens.csv", header=False, index=False)
val_tokens.to_csv("configs/stratified_mini_allcam_val_tokens.csv", header=False, index=False)

In [37]:
(
    camfront_train_tokens,
    camfront_val_tokens,
    camfront_train_scene_names,
    camfront_val_scene_names,
) = train_test_split(
    data_cam_front["token"],
    data_cam_front["scene_name"],
    test_size=0.3,
    stratify=data_cam_front["scene_name"],
)

In [38]:
camfront_train_scene_names.value_counts()

scene_name
scene-0655    29
scene-0553    29
scene-1077    29
scene-0757    28
scene-0103    28
scene-1100    28
scene-0916    28
scene-1094    28
scene-0796    28
scene-0061    27
Name: count, dtype: int64

In [39]:
camfront_train_tokens.to_csv("configs/stratified_mini_frontcam_train_tokens.csv", header=False, index=False)
camfront_val_tokens.to_csv("configs/stratified_mini_frontcam_val_tokens.csv", header=False, index=False)