## Get relevent CSV files

In [1]:
# we are going to make a big bebop dataset!
# first we will look for all csv files in the flight_data folder and subfolders
# we will only look a the log files that use an extended kalman filter

import os
import numpy as np
import csv
import pandas as pd

flight_data_path = 'flight_data'

relevant_names = [
        'time', # TIME
        'ekf_X1', 'ekf_X2', 'ekf_X3',    # POS
        'ekf_X4', 'ekf_X5', 'ekf_X6',    # VEL
        'ekf_X7', 'ekf_X8', 'ekf_X9',    # ATT
        'ekf_X10', 'ekf_X11', 'ekf_X12', # BIAS ACC
        'ekf_X13', 'ekf_X14', 'ekf_X15', # BIAS GYRO
        'ekf_U1', 'ekf_U2', 'ekf_U3',    # ACC RAW
        'ekf_U4', 'ekf_U5', 'ekf_U6',    # GYRO RAW
        'ekf_Z1', 'ekf_Z2', 'ekf_Z3',    # POS OPTITRACK
        'rpm_obs_1', 'rpm_obs_2', 'rpm_obs_3', 'rpm_obs_4', # RPM
        'rpm_ref_1', 'rpm_ref_2', 'rpm_ref_3', 'rpm_ref_4', # RPM REF
        'autopilot_mode' # AUTOPILOT MODE
]

relevant_csv_files = []

for root, dirs, files in os.walk(flight_data_path):
    for file in files:
        if file.endswith('.csv'):
            path = os.path.join(root, file)
            # check if the csv file contains the word ekf_X1 in the first row
            with open(os.path.join(root, file), 'r') as f:
                reader = csv.reader(f)
                row1 = next(reader)
                # all of the relevant names must be in the first row
                relevant = all(name in row1 for name in relevant_names)
                # exception:
                if 'bum' in path and 'no_bum' not in path:
                    relevant = False
                if 'aug' not in path:
                    relevant = False
                if relevant:
                    relevant_csv_files.append(path)
                    print('[Relevant]   - ' + path)
                else:
                    print('[Irrelevant] - ' + path)


[Irrelevant] - flight_data/min_snap/20230208-121903_alpha=0.75.csv
[Irrelevant] - flight_data/min_snap/20230208-121752_alpha=0.5.csv
[Irrelevant] - flight_data/corner_free_final_velocity/test_5.csv
[Irrelevant] - flight_data/new13/20210921-124504.csv
[Irrelevant] - flight_data/new13/20210921-130747.csv
[Irrelevant] - flight_data/new13/20210921-125359.csv
[Irrelevant] - flight_data/new13/20210921-124053.csv
[Irrelevant] - flight_data/new4/simulation.csv
[Irrelevant] - flight_data/new4/simulation5.csv
[Irrelevant] - flight_data/new4/simulation9.csv
[Irrelevant] - flight_data/new4/simulation7.csv
[Irrelevant] - flight_data/new4/simulation6.csv
[Irrelevant] - flight_data/new4/simulation3.csv
[Irrelevant] - flight_data/new4/gcnet_power_optimal2.csv
[Irrelevant] - flight_data/new4/simulation8.csv
[Irrelevant] - flight_data/new4/4wpfollow.csv
[Irrelevant] - flight_data/new4/4wpfollow2.csv
[Irrelevant] - flight_data/new4/simulation2.csv
[Irrelevant] - flight_data/new4/simulation4.csv
[Irreleva

## Load all relevant files and concatenate them into one dictionary

In [2]:
big_bebop_data = dict()

for file in relevant_csv_files:
    data = pd.read_csv(file)
    # load relevant columns
    data = data[relevant_names].to_dict('list')
    # convert to numpy arrays
    for key in data.keys():
        data[key] = np.array(data[key])
    big_bebop_data[file] = data

In [3]:
for key in relevant_csv_files:
    if 'bum' in key:
        print(key)

## Preprocess the data

In [4]:
# renaming variables
# computing extra useful variables
# removing invalid data (crashes, etc.)

# rotatation matrix to convert from body to world frame
def Rmat(phi, theta, psi):
    Rx = np.array([[1, 0, 0], [0, np.cos(phi), -np.sin(phi)], [0, np.sin(phi), np.cos(phi)]])
    Ry = np.array([[np.cos(theta), 0, np.sin(theta)],[0, 1, 0],[-np.sin(theta), 0, np.cos(theta)]])
    Rz = np.array([[np.cos(psi), -np.sin(psi), 0],[np.sin(psi), np.cos(psi), 0], [0, 0, 1]])
    R = Rz@Ry@Rx
    return R

# preprocess data
for file in relevant_csv_files:
    data = big_bebop_data[file]

    # RENAME VARIABLES
    data['t'] = data.pop('time')

    data['x'] = data.pop('ekf_X1')
    data['y'] = data.pop('ekf_X2')
    data['z'] = data.pop('ekf_X3')

    data['vx'] = data.pop('ekf_X4')
    data['vy'] = data.pop('ekf_X5')
    data['vz'] = data.pop('ekf_X6')

    data['phi'] = data.pop('ekf_X7')
    data['theta'] = data.pop('ekf_X8')
    data['psi'] = data.pop('ekf_X9')

    data['ax_raw'] = data.pop('ekf_U1')
    data['ay_raw'] = data.pop('ekf_U2')
    data['az_raw'] = data.pop('ekf_U3')
    data['p_raw'] = data.pop('ekf_U4')
    data['q_raw'] = data.pop('ekf_U5')
    data['r_raw'] = data.pop('ekf_U6')

    data['bias_ax'] = data.pop('ekf_X10')
    data['bias_ay'] = data.pop('ekf_X11')
    data['bias_az'] = data.pop('ekf_X12')
    data['bias_p'] = data.pop('ekf_X13')
    data['bias_q'] = data.pop('ekf_X14')
    data['bias_r'] = data.pop('ekf_X15')

    data['x_opti'] = data.pop('ekf_Z1')
    data['y_opti'] = data.pop('ekf_Z2')
    data['z_opti'] = data.pop('ekf_Z3')

    data['u1'] = data.pop('rpm_ref_1')
    data['u2'] = data.pop('rpm_ref_2')
    data['u3'] = data.pop('rpm_ref_3')
    data['u4'] = data.pop('rpm_ref_4')
    data['w1'] = data.pop('rpm_obs_1')
    data['w2'] = data.pop('rpm_obs_2')
    data['w3'] = data.pop('rpm_obs_3')
    data['w4'] = data.pop('rpm_obs_4')

    # COMPUTE EXTRA VARIABLES
    # unbiased measurements
    data['ax'] = data['ax_raw'] - data['bias_ax']
    data['ay'] = data['ay_raw'] - data['bias_ay']
    data['az'] = data['az_raw'] - data['bias_az']
    data['p'] = data['p_raw'] - data['bias_p']
    data['q'] = data['q_raw'] - data['bias_q']
    data['r'] = data['r_raw'] - data['bias_r']

    # speed
    data['v'] = np.sqrt(data['vx']**2 + data['vy']**2 + data['vz']**2)

    # body velocities
    v_body = np.stack([
        Rmat(phi, theta, psi).T@[vx, vy, vz]
        for vx, vy, vz, phi, theta, psi
        in zip(data['vx'],data['vy'],data['vz'],data['phi'],data['theta'],data['psi'])
    ])
    data['vbx'] = v_body[:,0]
    data['vby'] = v_body[:,1]
    data['vbz'] = v_body[:,2]
    
    # REMOVE INVALID DATA AND CUT THE DATA INTO VALID PIECES
    out_of_bounds = (np.abs(data['x'])>5) | (np.abs(data['y'])>5) | (data['z']>-0.3)
    out_of_bounds |= (np.abs(data['ax'])>10) | (np.abs(data['ay'])>10)
    out_of_bounds |= (data['az']<-40) | (data['az']>20)
    out_of_bounds |= (np.abs(data['p'])>15) | (np.abs(data['q'])>15) | (np.abs(data['r'])>10)
    v_too_high = data['v']>10

    # only use data where autopilot_mode = 1
    autopilot_mode = data['autopilot_mode']

    # invalid indices
    invalid = out_of_bounds | v_too_high  | (autopilot_mode < 1)

    # valid indices
    valid = ~invalid

    # calculate the valid intervals
    jumps = [0]+[i for i in range(len(valid)-1) if valid[i] != valid[i+1]]+[len(valid)-1]
    if valid[0]:
        valid_intervals = [(jumps[i], jumps[i+1]) for i in range(0, len(jumps), 2) if i+1 < len(jumps)]
    else:
        valid_intervals = [(jumps[i], jumps[i+1]) for i in range(1, len(jumps), 2) if i+1 < len(jumps)]
    valid_intervals = [(i,j) for i,j in valid_intervals if j-i > 0]    

    if len(valid_intervals) == 0:
        print('[NOT USED]')
        big_bebop_data.pop(file)
    else:
        for idx, i in enumerate(valid_intervals):
            name = file+'_'+str(idx)
            print(name)
            big_bebop_data[name] = {key: data[key][i[0]:i[1]] for key in data.keys()}
            big_bebop_data[name]['t'] -= big_bebop_data[name]['t'][0]
            min_t = 4
            if big_bebop_data[name]['t'][-1] < min_t:
                print('[NOT USED]')
                big_bebop_data.pop(name)
        big_bebop_data.pop(file)

    # final_index = -1
    # for index, value in enumerate(invalid):
    #     if value:
    #         final_index = index
    #         break
    # for key in data.keys():
    #     data[key] = data[key][:final_index]
    # if final_index == 0:
    #     big_bebop_data.pop(file)
    #     print('[NOT USED]')

print('new dictionary keys:')
data.keys()

flight_data/4aug/illegal3(10000).csv_0
[NOT USED]
flight_data/4aug/illegal.csv_0
flight_data/4aug/20230804-145542.csv_0
[NOT USED]
flight_data/4aug/illegal2.csv_0
flight_data/4aug/20230804-145056.csv_0
flight_data/4aug/20230804-145056.csv_1
[NOT USED]
flight_data/4aug/20230804-145056.csv_2
[NOT USED]
flight_data/4aug/20230804-145056.csv_3
[NOT USED]
flight_data/4aug/20230804-145056.csv_4
[NOT USED]
flight_data/10aug/low_freq_dist_input2.csv_0
flight_data/10aug/low_freq_dist_input2.csv_1
[NOT USED]
flight_data/10aug/20230810-113228.csv_0
flight_data/10aug/low_freq_dist_input.csv_0
flight_data/10aug/low_freq_dist_input.csv_1
flight_data/10aug/20230810-113700.csv_0
flight_data/10aug/low_freq_dist.csv_0
flight_data/7aug/20230807-195503.csv_0
flight_data/7aug/20230807-195503.csv_1
[NOT USED]
flight_data/14aug/20230814-091451.csv_0
flight_data/14aug/20230814-091451.csv_1
[NOT USED]
flight_data/14aug/20230814-091451.csv_2
[NOT USED]
flight_data/14aug/20230814-091451.csv_3
[NOT USED]
flight_da

dict_keys(['autopilot_mode', 't', 'x', 'y', 'z', 'vx', 'vy', 'vz', 'phi', 'theta', 'psi', 'ax_raw', 'ay_raw', 'az_raw', 'p_raw', 'q_raw', 'r_raw', 'bias_ax', 'bias_ay', 'bias_az', 'bias_p', 'bias_q', 'bias_r', 'x_opti', 'y_opti', 'z_opti', 'u1', 'u2', 'u3', 'u4', 'w1', 'w2', 'w3', 'w4', 'ax', 'ay', 'az', 'p', 'q', 'r', 'v', 'vbx', 'vby', 'vbz'])

In [5]:
import matplotlib.pyplot as plt

def plot(var_name):
    for key in big_bebop_data.keys():
        plt.plot(big_bebop_data[key][var_name])
    plt.title(var_name)
    plt.show()

# for key in big_bebop_data.keys():
#     x = big_bebop_data[key]['x']
#     t = big_bebop_data[key]['t']
#     dx = np.gradient(x, t)
#     plt.plot(t, dx)
#     plt.show()

## Animate the dataset trajectories

In [6]:
import importlib
from quadcopter_animation import animation

data = big_bebop_data
files = big_bebop_data.keys()

traj = {
    't' : [data[f]['t'] for f in files],
    'x' : [data[f]['x'] for f in files],
    'y' : [data[f]['y'] for f in files],
    'z' : [data[f]['z'] for f in files],
    'phi' : [data[f]['phi'] for f in files],
    'theta' : [data[f]['theta'] for f in files],
    'psi' : [data[f]['psi'] for f in files],
    'u' : [np.stack([
        (data[f]['u1']-3000)/(12000-3000),
        (data[f]['u2']-3000)/(12000-3000),
        (data[f]['u3']-3000)/(12000-3000),
        (data[f]['u4']-3000)/(12000-3000)
    ]).T for f in files]
}
print(traj)

animation.animate(**traj, multiple_trajectories=True)

{'t': [array([0.000000e+00, 2.933000e-03, 4.882000e-03, ..., 8.266594e+00,
       8.268547e+00, 8.270500e+00]), array([0.0000000e+00, 2.9370000e-03, 4.8810000e-03, ..., 1.2102529e+01,
       1.2104481e+01, 1.2106436e+01]), array([0.0000000e+00, 2.9300000e-03, 4.8830000e-03, ..., 1.2146485e+01,
       1.2148438e+01, 1.2150391e+01]), array([0.0000000e+00, 2.9310000e-03, 4.8810000e-03, ..., 4.0376933e+01,
       4.0378886e+01, 4.0380840e+01]), array([0.000000e+00, 2.941000e-03, 4.887000e-03, ..., 9.897457e+00,
       9.898437e+00, 9.900391e+00]), array([0.000000e+00, 2.941000e-03, 4.892000e-03, ..., 5.502926e+00,
       5.504882e+00, 5.506843e+00]), array([0.0000000e+00, 2.9240000e-03, 3.9050000e-03, ..., 2.1630858e+01,
       2.1632807e+01, 2.1640623e+01]), array([0.0000000e+00, 2.9330000e-03, 4.8840000e-03, ..., 1.7224594e+01,
       1.7226547e+01, 1.7228511e+01]), array([0.000000e+00, 2.941000e-03, 4.943000e-03, ..., 5.085942e+00,
       5.087893e+00, 5.089846e+00]), array([0.0000000e+

QObject::moveToThread: Current thread (0xa080a60) is not the object's thread (0x1f9ff10).
Cannot move to target thread (0xa080a60)

QObject::moveToThread: Current thread (0xa080a60) is not the object's thread (0x1f9ff10).
Cannot move to target thread (0xa080a60)

QObject::moveToThread: Current thread (0xa080a60) is not the object's thread (0x1f9ff10).
Cannot move to target thread (0xa080a60)

QObject::moveToThread: Current thread (0xa080a60) is not the object's thread (0x1f9ff10).
Cannot move to target thread (0xa080a60)

QObject::moveToThread: Current thread (0xa080a60) is not the object's thread (0x1f9ff10).
Cannot move to target thread (0xa080a60)

QObject::moveToThread: Current thread (0xa080a60) is not the object's thread (0x1f9ff10).
Cannot move to target thread (0xa080a60)

QObject::moveToThread: Current thread (0xa080a60) is not the object's thread (0x1f9ff10).
Cannot move to target thread (0xa080a60)

QObject::moveToThread: Current thread (0xa080a60) is not the object's thread

## Save the dataset

In [7]:
# we want to save the dictionary 'big_bebop_data' to a file
# so that we can load it later without having to run the code again
import pickle

# save the dictionary to a file
with open('datasets/aug_bebop_data.pickle', 'wb') as handle:
    pickle.dump(big_bebop_data, handle, protocol=pickle.HIGHEST_PROTOCOL)