In [None]:
import os
import json

import numpy as np
import pandas as pd
import seaborn as sns
from venn import venn
import matplotlib.pyplot as plt

import utils
from constants import DATA_ROOT

## Dataset analysis

This dataset is provided with meta information such as weather conditions, solar conditions, dynamic objects present on annotated frames and present annotations for each frame.

In [None]:
meta_info_file = os.path.join(DATA_ROOT, "dataframes/metadata_info.csv")
meta_df = pd.read_csv(meta_info_file, index_col=False)
meta_df['time']= pd.to_datetime(meta_df['time'])
meta_df.head()

In [None]:
meta_df.describe()

There are 7 columns with features:
- 3 for amount of dynamic objects present on annotated frames (Pedestrian, Vehicle, VulnerableVehicle)
- 2 for solar conditions (solar_angle_elevation and solar_angle_relative_azimuth)
- 2 for weather conditions (cloud_cover and precipitation_type)
precipitation_type is encoded. There is a weather_codes.csv file to decode it.

Dataset consists of 6666 unique multimodal sequences. For each frame there is an annotated core one.  
Vision data consists of the core frame and one previous and one next frame from vision log.  
lidar_data is a full lidar scan that corresponds to the core frame.  
range_lidar_data normally contains 21 lidar scans: the core one and full scans for 1s before and 1s after.  
OXTS and vehicle data also grouped around the core timestamp.  
sequence_id is unique identifier for each sequence, frame_id is unique identifier for each frame.

Data have been collected by 2 vehicles: india and golf (vehicle).

time column contains timestamps for each frame.

There are 4 types of annotations:
- dynamic_objects
- ego_road
- lane_markings
- static_objects

Corresponding columns have 0 or 1 values which says if specific frame have corresponding annotations.

In [None]:
feature_columns = ["Pedestrian", "Vehicle", "VulnerableVehicle", "solar_angle_elevation",
                   "solar_angle_relative_azimuth", "cloud_cover", "precipitation_type"]

In [None]:
sns.set(style='whitegrid', palette="deep", font_scale=1.1)
sns_plot = sns.pairplot(data=meta_df[feature_columns])

### Plot histograms for features

In [None]:
sns.set(style='whitegrid', palette="deep", font_scale=1.1, rc={"figure.figsize": [14, 8]})

ax = sns.barplot(x="Pedestrian", y="Pedestrian", data=meta_df, 
                 estimator=lambda x: len(x) / len(meta_df) * 100)
ax.set(xlabel='Pedestrian', ylabel="Percent")
ax.set(xlim=(-1, 22))

In [None]:
sns.set(style='whitegrid', palette="deep", font_scale=1, rc={"figure.figsize": [20, 8]})
ax = sns.barplot(x="Vehicle", y="Vehicle", data=meta_df, 
                 estimator=lambda x: len(x) / len(meta_df) * 100)
for item in ax.get_xticklabels():
    item.set_rotation(90)
ax.set(xlabel='Vehicle', ylabel="Percent")
ax.set(xlim=(-1, 70))

In [None]:
sns.set(style='whitegrid', palette="deep", font_scale=1.1, rc={"figure.figsize": [14, 8]})
ax = sns.barplot(x="VulnerableVehicle", y="VulnerableVehicle", data=meta_df, 
                 estimator=lambda x: len(x) / len(meta_df) * 100)
ax.set(xlabel='VulnerableVehicle', ylabel="Percent")
ax.set(xlim=(-1, 10))

In [None]:
fig, ax = plt.subplots()
col = meta_df['solar_angle_relative_azimuth'].dropna()
col.hist(range=(-180, 180), weights=np.ones(len(col)) / len(col)*100, bins=20, ax=ax).set(
    xlabel='Solar Angle Relative Azimuth', ylabel="Percent")

In [None]:
fig, ax = plt.subplots()
col = meta_df['solar_angle_elevation'].dropna()
col.hist(range=(-25, 55), weights=np.ones(len(col)) / len(col)*100, bins=20, ax=ax).set(
    xlabel='Solar Angle Elevation', ylabel="Percent")

In [None]:
fig, ax = plt.subplots()
col = meta_df['cloud_cover'].dropna()
col.hist(weights=np.ones(len(col)) / len(col)*100, bins=20, ax=ax).set(
    xlabel='Cloud Cover', ylabel="Percent")

In [None]:
with open(os.path.join(DATA_ROOT, "dataframes/weather_codes.json"), "r") as f:
    weather_codes = json.loads(f.read())
    weather_codes = {int(key): value for key, value in weather_codes.items()}

meta_df["prec_decoded"] = meta_df["precipitation_type"].replace(to_replace=weather_codes).values.tolist()
ax = sns.barplot(y="precipitation_type", x="prec_decoded", data=meta_df, 
                 estimator=lambda x: len(x) / len(meta_df) * 100)
ax.set(xlabel='Precipitation Type', ylabel="Percent")

Plot Venn diagram for annotation types.

In [None]:
dynamic_objects = meta_df.iloc[meta_df["dynamic_objects"].to_numpy().nonzero()].index.tolist()
lane_markings = meta_df.iloc[meta_df["lane_markings"].to_numpy().nonzero()].index.tolist()
ego_road = meta_df.iloc[meta_df["ego_road"].to_numpy().nonzero()].index.tolist()
static_objects = meta_df.iloc[meta_df["static_objects"].to_numpy().nonzero()].index.tolist()

labels = ["dynamic_objects", "lane_markings", "ego_road", "static_objects"]
sets = {
    labels[0]: set(dynamic_objects),
    labels[1]: set(lane_markings),
    labels[2]: set(ego_road),
    labels[3]: set(static_objects),
}
fig, ax = plt.subplots(1, figsize=(16,12))
venn(sets, ax=ax)
plt.legend(labels, ncol=4)
plt.show()

Plot timeline of data collection.

In [None]:
fig, ax = plt.subplots()
meta_df['time'].hist(weights=np.ones(len(meta_df)) / len(meta_df)*100, bins=22, ax=ax, xrot=45).set(
    xlabel='Data Collection Time', ylabel="Percent")

Plot place of data collection.

In [None]:
ALL_FRAMES_COORDINATES = os.path.join(DATA_ROOT, "frame_coordinates.json")
utils.show_gps_for_all_frames(ALL_FRAMES_COORDINATES)