In [59]:
import pandas as pd
import datetime

In [97]:
#File paths for all pertinent data
file_paths = [
              'GyroscopeUncalibrated.csv',
              'Accelerometer.csv',
              #'Metadata.csv',
              #'Battery.csv',
              'Gyroscope.csv',
              'Gravity.csv',
              'Barometer.csv',
              'AccelerometerUncalibrated.csv',
              'Location.csv',
              'Magnetometer.csv',
              'MagnetometerUncalibrated.csv',
              'TotalAcceleration.csv',
              'Orientation.csv'
              ]

#create a dictionary with file names as key and dataframe as value

dataframes = {}
filenames = []

for file_path in file_paths:
    filename = file_path.split('.')[0]
    filenames.append(filename)
    #load csvs
    dataframes[filename] = pd.read_csv(file_path)
    print(filename)

GyroscopeUncalibrated
Accelerometer
Gyroscope
Gravity
Barometer
AccelerometerUncalibrated
Location
Magnetometer
MagnetometerUncalibrated
TotalAcceleration
Orientation


In [62]:
# Change timestamp format from unix to datetime (ns)
for filename in filenames:
    dataframes[filename]['Timestamp'] = dataframes[filename]['time'].apply(lambda x: datetime.datetime.utcfromtimestamp(x / 1e9))

In [87]:
#Resample data every 1 sec using first as criteria drop redundant columns
dataframes_reindex = {}
for key, df in dataframes.items():
  #Resample index juggling
  df_testing = df.reset_index()
  df_testing.set_index('Timestamp', inplace=True)
  df_whatever = df_testing.resample('S').first()
  #drop redundant columns and load dataframes to new dictionary
  df_whatever = df_whatever.drop(columns=['time', 'seconds_elapsed', 'index']).reset_index()
  dataframes_reindex[key] = df_whatever

In [90]:
#give each data frame a source column with the file name associated in ti for tracking
for key, df in dataframes_reindex.items():
    df['Source'] = key

In [94]:
#merge all dataframes in dictionary
merged_df = None
for key, df in dataframes_reindex.items():
    if merged_df is None:
        merged_df = df
        #print(key, ' None')
    else:
        merged_df = pd.merge(merged_df, df, on='Timestamp', how='outer', suffixes=('', f'_{key}'))
       # print(key)

In [96]:
len(merged_df.columns), merged_df.columns

(54,
 Index(['Timestamp', 'z', 'y', 'x', 'Source', 'z_Accelerometer',
        'y_Accelerometer', 'x_Accelerometer', 'Source_Accelerometer',
        'z_Gyroscope', 'y_Gyroscope', 'x_Gyroscope', 'Source_Gyroscope',
        'z_Gravity', 'y_Gravity', 'x_Gravity', 'Source_Gravity',
        'relativeAltitude', 'pressure', 'Source_Barometer',
        'z_AccelerometerUncalibrated', 'y_AccelerometerUncalibrated',
        'x_AccelerometerUncalibrated', 'Source_AccelerometerUncalibrated',
        'bearingAccuracy', 'speedAccuracy', 'verticalAccuracy',
        'horizontalAccuracy', 'speed', 'bearing', 'altitude', 'longitude',
        'latitude', 'Source_Location', 'z_Magnetometer', 'y_Magnetometer',
        'x_Magnetometer', 'Source_Magnetometer', 'z_MagnetometerUncalibrated',
        'y_MagnetometerUncalibrated', 'x_MagnetometerUncalibrated',
        'Source_MagnetometerUncalibrated', 'z_TotalAcceleration',
        'y_TotalAcceleration', 'x_TotalAcceleration',
        'Source_TotalAcceleration', 

In [120]:
# List all the 'Source_' columns
source_columns = [col for col in merged_df.columns if col.startswith('Source_')]

# Fill the 'Source' column with the values from the 'Source_' columns where it is null
for source_col in source_columns:
    merged_df['Source'] = merged_df['Source'].fillna(merged_df[source_col])

# Drop the 'Source_' columnsmerged_df
merged_df.drop(columns=source_columns, inplace=True)
merged_df.dropna(inplace=True)

In [127]:
#Sort by Timestamp
merged_df_sorted = merged_df.sort_values(by='Timestamp')
# Reset the index after sorting
merged_df_sorted.reset_index(drop=True, inplace=True)


In [128]:
#Need this mapper dictionary to translate the column names
mapper_dict = {
              #'Timestamp',
              'z': 'gyroZ(rad/s)',
              'y': 'gyroY(rad/s)',
              'x': 'gyroX(rad/s)',
              #'z_Gyroscope',
              #'y_Gyroscope',
              #'x_Gyroscope',

              'z_Accelerometer':'accelUserZ(g)',
              'y_Accelerometer':'accelUserY(g)',
              'x_Accelerometer':'accelUserX(g)',
              'z_AccelerometerUncalibrated':'accelZ(g)',
              'y_AccelerometerUncalibrated':'accelY(g)',
              'x_AccelerometerUncalibrated':'accelX(g)',

              #'z_Gravity',
              #'y_Gravity',
              #'x_Gravity',

              'relativeAltitude':'RelativeAltitude(meters)',

              'pressure':'Pressure(kilopascals)',

              #'bearingAccuracy',
              #'speedAccuracy',

              'verticalAccuracy': 'VerticalAccuracy(m)',
              'horizontalAccuracy':'HorizontalAccuracy(m)',
              'speed':'Speed(m/s)',
              'bearing':'Course',
              'altitude':'Alt(m)',
              'longitude':'Long',
              'latitude':'Lat',

              'z_Magnetometer':'calMagZ(µT)',
              'y_Magnetometer':'calMagY(µT)',
              'x_Magnetometer':'calMagX(µT)',

              'z_MagnetometerUncalibrated':'magZ(µT)',
              'y_MagnetometerUncalibrated':'magY(µT)',
              'x_MagnetometerUncalibrated':'magX(µT)',

              #'z_TotalAcceleration',
              #'y_TotalAcceleration',
              #'x_TotalAcceleration',

              'qz':'qZ',
              'qy':'qY',
              'qx':'qX',
              'qw':'qW',

              'roll':'Roll(rads)',
              'pitch':'Pitch(rads)',
              'yaw':'Yaw(rads)',
              #'Source',
        }

drop_columns = [
                'z_Gyroscope',
                'y_Gyroscope',
                'x_Gyroscope',
                'z_Gravity',
                'y_Gravity',
                'x_Gravity',
                'bearingAccuracy',
                'speedAccuracy',
                'z_TotalAcceleration',
                'y_TotalAcceleration',
                'x_TotalAcceleration',
                'Source',
                ]

In [129]:
#drop straneous columns
df_clean = merged_df_sorted.rename(columns=mapper_dict).drop(columns=drop_columns)
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11121 entries, 0 to 11120
Data columns (total 32 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Timestamp                 11121 non-null  datetime64[ns]
 1   gyroZ(rad/s)              11121 non-null  float64       
 2   gyroY(rad/s)              11121 non-null  float64       
 3   gyroX(rad/s)              11121 non-null  float64       
 4   accelUserZ(g)             11121 non-null  float64       
 5   accelUserY(g)             11121 non-null  float64       
 6   accelUserX(g)             11121 non-null  float64       
 7   RelativeAltitude(meters)  11121 non-null  float64       
 8   Pressure(kilopascals)     11121 non-null  float64       
 9   accelZ(g)                 11121 non-null  float64       
 10  accelY(g)                 11121 non-null  float64       
 11  accelX(g)                 11121 non-null  float64       
 12  VerticalAccuracy(m

In [124]:
#plot it for good measure

trace = go.Scatter(x=df_clean['Timestamp'], y=df_clean['Alt(m)'],
                   mode='lines',
                   name='Altitude')

layout = go.Layout(title='Altitude vs Timestamp',
                   xaxis=dict(title='Timestamp'),
                   yaxis=dict(title='Altitude (m)'))
fig = go.Figure(data=[trace], layout=layout)
fig.show()


In [126]:
#Export it, change file name acordingly
file_name = df_clean.csv
df_clean.to_csv(file_name, index=False)