# Virb Video

The goal is to be able to automatically process video from 

- Read the gyro/accel/mag data from the FitFile
- Figure out timing.  Is the data synched, or really async
- Estimate rotation



In [1]:
import itertools as it
import pandas as pd
import collections

# These are libraries written for RaceAnalysis
from global_variables import G
from nbutils import display_markdown, display

G.init_seattle(logging_level="INFO")

import matplotlib.pyplot as plt
import numpy as np

import race_logs
import process as p
import analysis as a
import chart as c

# This is the python-fitparse library
from fitparse import FitFile

In [1]:
# Read a smallish fit file.  Note, reading FIT file with these tools is *SLOW* (10s of seconds or more).
# This may not be a problem, since we are reading the file in order to process video, which is slower still.
# But consider pre-processing and the serializing as a pandas file.

# ff = FitFile('Data/Virb360Fit/2020-04-09-21-41-03.fit')
# ff = FitFile('/Volumes/Big/Virb/GMetrix/2020-07-11-09-27-15.fit')
# ff = FitFile('Data/Virb360Fit/2020-07-11-09-27-15.fit')

# Short video from my backyard
ff = FitFile('Data/Virb360Fit/2020-08-30-17-24-00.fit')

recording = dict(fitfile = '/Users/viola/Python/sailing/Data/Virb360Fit/2020-09-06-16-48-20.fit',
                 video_file_name = 'V0920368.MP4',
                 description = 'rotate 180 to right yaw, rotate 180 to right yaw, pause, wiggle in yaw, rotate left 360 in yaw, fast right 360 yaw, roll left 90 degrees, pitch up and then pitch down (several times), roll back, returning to original position')

ff = FitFile(recording['fitfile'])

fitfile = ff

In [1]:
# print a single message

messages = it.islice(ff.get_messages(), 0, 5, None)
msg = next(messages)
print(repr(msg))

In [1]:
# Display the various operations available on a message.

def display_msg_details(msg):
    display_markdown("**`mesg_num` refers to the FIT file global schema**")
    print(msg.mesg_num)
    display_markdown("**`as_dict()` The reference to the raw data is likely helpful, and very verbose**")
    print(msg.as_dict())
    display_markdown("**`def_mesg` Every data message has an associated definition message.**")
    print(msg.def_mesg)
    display_markdown("**`fields` The fields are defined in the definition message, along with info on conversions.**")
    print(msg.fields)
    display_markdown("**`get_values()`: the values in the record...  converted when possible.**")
    print(msg.get_values())
    display_markdown("**`header`**")
    print(msg.header)
    display_markdown("**`name`: more like the type of the data message.**")
    print(msg.name)
    display_markdown("**`type`: either data or definition.**")
    print(msg.type)
    
display_msg_details(msg)

In [1]:
# Find all the types of messages and keep one of each.

msg_dict = {}
for i, m in enumerate(it.islice(ff.get_messages(), 0, 50000, None)):
    if i % 10000 == 0:
        print(i)
    msg_dict[m.name] = m

display(list(msg_dict.keys()))
message_types = ""
for k, msg in msg_dict.items():
    message_types += f"- **{k}**\n   - {repr(msg)}\n"
    
display_markdown(message_types)

In [1]:
# Display some messages with time stamps

messages = it.islice(ff.get_messages(), 0, 10, None)

def message_time(msg_values):
    """
    Given message values, return the message timestamp, and if available the fulltime,
    which is the time plus offset in milliseconds.
    """
    ts = msg_values.get('timestamp', None)
    ts_ms = msg_values.get('timestamp_ms', None)
    fulltime = None
    if ts is not None:
        fulltime = float(ts)
        if ts_ms is not None:
            fulltime = ts + ts_ms/1000.0
    return ts, fulltime


def display_messages(messages):
    for i, msg in enumerate(messages):
        vals = msg.get_values()
        ts, ft = message_time(vals)
        if ts is None:
            ts = 0
            ft = 0.0
        print(f"# {i} TS: {ts:5d} FT: {ft:5.2f} -------------")
        print(repr(msg))

display_messages(messages)

In [1]:
# Timestamps are in seconds (most likely to save file space), finer time resolution is encoded 
# in the message. 

# Note the messages are **NOT** in timestamp order!!!

messages = it.islice(ff.get_messages(), 0, 50, None)

# Note, the messages are *NOT* in monotonic order!  Why?
def print_message_times(messages):
    for i, msg in enumerate(messages):
        vals = msg.get_values()
        ts, ft = message_time(vals)
        if ts is None:
            ts, ft = 0, 0.0
        print(f"{i:3d}, {ts:5d}, {ft:7.2f} {msg.name}")
    return msg
        
print_message_times(messages)

In [1]:
# Barometer data is very regular
messages = it.islice(ff.get_messages('barometer_data'), 0, 10000, 10)

msg = print_message_times(messages)

display_markdown("The data in the baro message contains many samples at different offsets.")
display(msg.get_values())


In [1]:
# Frequent messgaes have the same timestamp.  Time is encoded in ms
# Any given message type appears to be in order.  

display_markdown("Every message.")
messages = it.islice(ff.get_messages('gps_metadata'), 0, 100, 1)
print_message_times(messages)

display_markdown("Every 100th message.")
messages = it.islice(ff.get_messages('gps_metadata'), 0, 1000, 100)
print_message_times(messages)

display_markdown("Notice the slight drift in time.")

In [1]:
msg = msg_dict['gps_metadata']
print(msg)

def extract_flat(messages, fields):
    rows = []
    for i, msg in enumerate(messages):
        vals = msg.get_values()
        ts_sec = vals.get('timestamp', 0)
        ts_ms = vals.get('timestamp_ms', 0)
        ts = ts_sec + ts_ms / 1000.0
        row = dict(ts = ts)
        for key in fields:
            row[key] = vals.get(key, None)
        rows.append(row)
    return pd.DataFrame(rows)

messages = it.islice(ff.get_messages('gps_metadata'), 0, 3000, 1)

gps_fields = ['timestamp', 'position_lat', 'position_long', 'enhanced_altitude',
              'enhanced_speed', 'utc_timestamp', 'timestamp_ms', 'heading', 'velocity']

gps_df = extract_flat(messages, gps_fields)

def extract_multitime_measurements(messages, fields):
    """
    A multitime message, has many measurements embedded in a single messages (for
    efficiency).  The message has a timestamp in two parts, seconds and milliseconds.
    Each measurement additional has a offset from that in ms.

    Additionally a multitime message may have multiple measurements (gyro and accel have
    3: X, Y, and Z).

       - <DataMessage: gyroscope_data (#164) -- local mesg: #14, fields: [timestamp: 194, sample_time_offset: (0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, None, None, None, None, None), gyro_x: (32808, 32813, 32817, 32823, 32825, 32826, 32818, 32812, 32809, 32801, 32802, 32805, 32807, 32804, 32800, 32799, 32797, 32766, 32686, 32926, 32909, 32708, 32891, 32825, 32776, None, None, None, None, None), gyro_y: (32764, 32762, 32764, 32763, 32766, 32770, 32771, 32775, 32777, 32777, 32776, 32779, 32782, 32782, 32785, 32786, 32786, 32789, 32811, 32772, 32789, 32788, 32791, 32794, 32781, None, None, None, None, None), gyro_z: (32697, 32697, 32699, 32699, 32701, 32701, 32701, 32701, 32698, 32698, 32699, 32699, 32699, 32698, 32697, 32694, 32702, 32713, 32423, 32487, 32741, 32691, 32693, 32693, 32693, None, None, None, None, None), timestamp_ms: 239]>
       - <DataMessage: accelerometer_data (#165) -- local mesg: #15, fields: [timestamp: 194, sample_time_offset: (0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, None, None, None, None, None), accel_x: (32724, 32728, 32730, 32720, 32730, 32728, 32726, 32737, 32739, 32733, 32734, 32735, 32734, 32736, 32737, 32752, 32650, 32737, 32810, 32735, 32736, 32728, 32773, 32742, 32752, None, None, None, None, None), accel_y: (32786, 32794, 32791, 32787, 32778, 32774, 32786, 32776, 32780, 32790, 32790, 32788, 32780, 32778, 32774, 32831, 32282, 32642, 33045, 32374, 33018, 32811, 32637, 32872, 32786, None, None, None, None, None), accel_z: (30665, 30666, 30662, 30655, 30664, 30660, 30659, 30669, 30665, 30663, 30666, 30660, 30661, 30669, 30674, 30671, 30685, 30713, 30695, 30752, 30594, 30652, 30712, 30656, 30685, None, None, None, None, None), timestamp_ms: 239]>
    """
    columns = collections.defaultdict(list)
    for i, msg in enumerate(messages):
        vals = msg.get_values()
        ts_sec = vals.get('timestamp', 0)
        ts_ms = vals.get('timestamp_ms', 0)
        ts = ts_sec + ts_ms / 1000.0
        offsets = [dt for dt in vals.get('sample_time_offset') if dt is not None]
        columns['ts'] += [ts + dt/1000.0 for dt in offsets]
        columns['sample_time_offset'] += [dt for dt in offsets]
        columns['timestamp'] += [ts_sec for dt in offsets]
        columns['timestamp_ms'] += [ts_ms for dt in offsets]        
        for key in fields:
            sensor_values = [v for v, dt in zip(vals.get(key), offsets)]
            columns[key] += sensor_values
    return pd.DataFrame(data=columns)

messages = it.islice(ff.get_messages('barometer_data'), 0, 300, 1)
baro_fields = ['baro_pres']
baro_df = extract_multitime_measurements(messages, baro_fields)


def virb_video_segments(messages):
    """
    A single FIT file often refers to multiple video files (each about 30 mins).  The
    start and end times of these segments is signaled by start and end messages.

    Returns a list of start/end times.
    """
    segments = []
    current = [None, None]
    for msg in messages:
        if msg.name == 'camera_event':
            vals = msg.get_values()
            if vals.get('camera_event_type', None) == 'video_start':
                _, current[0] = message_time(vals)
            if vals.get('camera_event_type', None) == 'video_end':
                _, current[1] = message_time(vals)
                segments.append(current)
                current = [None, None]
    return segments


def three_d_sensor_calibrations(messages, sensor_type):
    """
    Extract the sequence of calibration messages from the file.  Note, currently designed
    for gyro and accel.

    Example:
    
     {'timestamp': 200,
      'gyro_cal_factor': 5,
      'calibration_divisor': 82,
      'level_shift': 32768,
      'offset_cal': (35, 13, -70),
      'orientation_matrix': (-1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0),
      'sensor_type': 'gyroscope'}]
    """
    res = []
    for msg in messages:
        if msg.name == 'three_d_sensor_calibration':
            vals = msg.get_values()
            if sensor_type == vals.get('sensor_type', None):
                res.append(vals)
    return res


def calibrate_sensors(df, calibrations, fields):
    """
    Convert the sensor measurements from raw form into more meaningful units.  Typically
    converts from an unsigned int16(?) to signed float.  Includes bias offsets (this is
    really the calibration bit, the rest is unit conversion).

    Works for gyro and accel, which are measured in triples (x, y, z).
    """
    for cal1, cal2 in pairwise_longest(calibrations):
        if cal2 is None:
            row_selector = df.ts > cal1['timestamp']
        else:
            row_selector = (df.ts > cal1['timestamp']) & (df.ts <= cal2['timestamp'])
        
        for i, field in enumerate(fields):
            # Iterate over each of the field (typically x, y, z).
            
            # Shift, typically to make it signed.
            df.loc[row_selector, field] -= cal1['level_shift']
            # Include bias offset 
            df.loc[row_selector, field] -= cal1['offset_cal'][i]
            # These could have been combined, but we seems to prefer INTS.
            df.loc[row_selector, field] *= cal_factor(cal1)
            df.loc[row_selector, field] /= cal1['calibration_divisor']


def fitfile_messages(fitfile, msg_name=None, msg_slice=slice(None, None, None)):
    "Extract the set of messages which match msg_name and msg_slice."
    for msg in it.islice(fitfile.get_messages(), msg_slice.start, msg_slice.stop, msg_slice.step):
        if msg_name is None or msg.name == msg_name:
            yield msg


def pairwise_longest(stuff):
    "Returns a list of sequential pairs, with the last having NULL as the second element."
    one, two = it.tee(stuff)
    next(two)
    return it.zip_longest(one, two)


def cal_factor(calibration_record):
    "Abstract the extraction of the cal factor for two types of messages: gyro and accel."
    if 'gyro_cal_factor' in calibration_record:
        return calibration_record['gyro_cal_factor']
    elif 'accel_cal_factor' in calibration_record:
        return calibration_record['accel_cal_factor']
    else:
        raise Exception("cal_factor missing from {calibration_record}")


def gyro_process(fitfile, msg_slice=slice(None, None, None)):
    return imu_process(fitfile, "gyro_x gyro_y gyro_z".split(), 'gyroscope')


def accel_process(fitfile, msg_slice=slice(None, None, None)):
    return imu_process(fitfile, "accel_x accel_y accel_z".split(), 'accelerometer')


def imu_process(fitfile, fields, sensor_name, msg_slice=slice(None, None, None)):
    """
    Extract IMU data and convert to meaningful units.  Their are two types of IMU
    messages, GYROSCOPE and ACCELEROMETER, stored in two different messages (though there
    appears to be a single sensor, so all messages are timestamped with the same times).
    """

    if True:  # test
        fitfile = ff
        msg_slice = slice(None, None, None)
        sensor_name = "accelerometer"
        fields = "accel_x accel_y accel_z".split()
        
    calibrations = three_d_sensor_calibrations(fitfile_messages(fitfile, msg_slice=msg_slice), sensor_name)

    # A large FIT file can be broken into multiple video segments.  Extract and label these.
    segment_times = virb_video_segments(fitfile_messages(fitfile, msg_slice=msg_slice))

    df = extract_multitime_measurements(fitfile_messages(fitfile, sensor_name+'_data', msg_slice=msg_slice), fields)
    df['diff'] = df.ts.diff()
    calibrate_sensors(df, calibrations, fields)
    video_segments(df, segment_times, fields)
    plot_sensors(df, fields)

    return df


def video_segments(df, segment_times, fields):
    df['segment'] = 0
    df['ts_segment'] = df.ts 
    for segment_number, (ts_start, ts_end) in enumerate(segment_times):
        row_selector = (df.ts > ts_start) & (df.ts <= ts_end)
        df.loc[row_selector, 'ts_segment'] = df.ts_segment - ts_start
        df.loc[row_selector, 'segment'] = segment_number + 1


def plot_sensors(df, fields, fignum=None):
    fig = plt.figure(num=fignum)
    fig.clf()
    ax = fig.add_subplot(111)    
    ts_start = df[df.segment >= 0].ts.min()

    max_val = 0
    for field, color in zip(fields, "r g b".split()):    
        ax.plot(df.ts-ts_start, df[field], color=color)
        max_val = max(max_val, df[field].max())
    segment_count = df.segment.max()
    ax.plot(df.ts-ts_start, max_val*df.segment/segment_count, color='orange')
    ax.legend(fields + ['video'])


################################################################

gdf = gyro_process(ff)
adf = accel_process(ff)


In [1]:
msg = msg_dict['gps_metadata']

display(repr(msg))
display(msg.get_values())

display(msg.as_dict())


In [1]:
print(dir(m))
m.mesg_type
m.fields

In [1]:
help(ff.get_messages)

In [1]:

messages = list(it.islice(ff.get_messages('camera_event'), 0, 2, None))

dfs = []
rows = [m.get_values() for m in messages]
dfs.append(pd.DataFrame(rows))

df = dfs[0]

df


In [1]:

messages = list(it.islice(ff.get_messages('gyroscope_data'), 0, 2, None))

dfs = []
rows = [m.get_values() for m in messages]
dfs.append(pd.DataFrame(rows))

df = dfs[0]

df
