In [6]:
input_dir = "data/pvs"
output_dir = "data/pvs-converted"

In [7]:
import os
import pandas as pd

label_fname = "dataset_labels.csv"
left_fname = "dataset_gps_mpu_left.csv"
right_fname = "dataset_gps_mpu_right.csv"

mpu_cols = ["timestamp","acc_x_dashboard","acc_y_dashboard","acc_z_dashboard","gyro_x_dashboard","gyro_y_dashboard","gyro_z_dashboard","timestamp_gps","latitude","longitude"]
label_cols = ["good_road_left","regular_road_left","bad_road_left","good_road_right","regular_road_right","bad_road_right"]

label_left_cols = ["good_road_left","regular_road_left","bad_road_left"]
label_right_cols = ["good_road_right","regular_road_right","bad_road_right"]

def list_subfolders(dirname):
  subfolders= [f.path for f in os.scandir(dirname) if f.is_dir()]
  return subfolders

subdirs = list_subfolders(input_dir)


In [8]:
filtered_left_postfix = "left-filtered"
filtered_right_postfix = "right-filtered"

def filter_input_cols(base, dirname):
  dir = os.path.join(base, dirname)
  out_left_file_path = os.path.join(output_dir, f"{dirname}-{filtered_left_postfix}.csv")
  out_right_file_path = os.path.join(output_dir, f"{dirname}-{filtered_right_postfix}.csv")
  label_file_path = os.path.join(dir, label_fname)
  left_file_path = os.path.join(dir, left_fname)
  right_file_path = os.path.join(dir, right_fname)

  label_df = pd.read_csv(label_file_path, usecols=label_cols)
  left_df = pd.read_csv(left_file_path, usecols=mpu_cols)
  right_df = pd.read_csv(right_file_path, usecols=mpu_cols)

  left_out_df = pd.concat([left_df, label_df[label_left_cols]], axis=1).rename(
    columns={"good_road_left": "good_road", "regular_road_left": "regular_road", "bad_road_left": "bad_road"})
  
  right_out_df = pd.concat([right_df, label_df[label_right_cols]], axis=1).rename(
    columns={"good_road_right": "good_road", "regular_road_right": "regular_road", "bad_road_right": "bad_road"})
  
  left_out_df.to_csv(out_left_file_path, index=False)
  right_out_df.to_csv(out_right_file_path, index=False)

  print(label_df.shape, left_df.shape, right_df.shape, left_out_df.shape, right_out_df.shape)


for subdir in subdirs:
  base = os.path.dirname(subdir)
  dir = os.path.basename(subdir)
  filter_input_cols(base, dir)

(144036, 6) (144036, 10) (144036, 10) (144036, 13) (144036, 13)
(124684, 6) (124684, 10) (124684, 10) (124684, 13) (124684, 13)
(105816, 6) (105816, 10) (105816, 10) (105816, 13) (105816, 13)
(132492, 6) (132492, 10) (132492, 10) (132492, 13) (132492, 13)
(133877, 6) (133877, 10) (133877, 10) (133877, 13) (133877, 13)
(96279, 6) (96279, 10) (96279, 10) (96279, 13) (96279, 13)
(128548, 6) (128548, 10) (128548, 10) (128548, 13) (128548, 13)
(123618, 6) (123618, 10) (123618, 10) (123618, 13) (123618, 13)
(91555, 6) (91555, 10) (91555, 10) (91555, 13) (91555, 13)


In [9]:
import numpy as np

normalized_left_postfix = "left-normalized"
normalized_right_postfix = "right-normalized"

# accelerometer data := accelerometer data - avg(accelerometer_z)
def normalize(base, dirname):
  left_file_path = os.path.join(output_dir, f"{dirname}-{filtered_left_postfix}.csv")
  right_file_path = os.path.join(output_dir, f"{dirname}-{filtered_right_postfix}.csv")
  out_left_normalized_path = os.path.join(output_dir, f"{dirname}-{normalized_left_postfix}.csv")
  out_right_normalized_path = os.path.join(output_dir, f"{dirname}-{normalized_right_postfix}.csv")

  left_df = pd.read_csv(left_file_path)
  right_df = pd.read_csv(right_file_path)

  left_acc_z_mean = left_df.loc[:, "acc_z_dashboard"].mean()
  right_acc_z_mean = right_df.loc[:, "acc_z_dashboard"].mean()

  left_df["acc_z_dashboard"] = left_df["acc_z_dashboard"] - left_acc_z_mean
  right_df["acc_z_dashboard"] = right_df["acc_z_dashboard"] - right_acc_z_mean

  left_df["timestamp"] = (left_df["timestamp"] * 1000000).astype(np.int64)
  right_df["timestamp"] = (right_df["timestamp"] * 1000000).astype(np.int64)
  left_df["timestamp_gps"] = (left_df["timestamp_gps"] * 1000000).astype(np.int64)
  right_df["timestamp_gps"] = (right_df["timestamp_gps"] * 1000000).astype(np.int64)

  left_df.to_csv(out_left_normalized_path, index=False)
  right_df.to_csv(out_right_normalized_path, index=False)

  print(left_df.shape, right_df.shape)


for subdir in subdirs:
  base = os.path.dirname(subdir)
  dir = os.path.basename(subdir)
  normalize(base, dir)

(144036, 13) (144036, 13)
(124684, 13) (124684, 13)
(105816, 13) (105816, 13)
(132492, 13) (132492, 13)
(133877, 13) (133877, 13)
(96279, 13) (96279, 13)
(128548, 13) (128548, 13)
(123618, 13) (123618, 13)
(91555, 13) (91555, 13)
