In [458]:
import numpy as np
import pandas as pd
import glob
from zoneinfo import ZoneInfo
from datetime import timezone
import math

In [189]:
TZ_US_LA = ZoneInfo("America/Los_Angeles")
TZ_UTC = timezone.utc

In [376]:
# These are in PST timezone
LAUNCH_DATA_PATHS = [x for x in glob.glob("./data/LaunchMonitor/ags-shots-*")]
APPLE_WATCH_DATA_PATHS = [x for x in glob.glob("./data/SensorLogFiles/*Watch*")]

print(LAUNCH_DATA_PATHS, APPLE_WATCH_DATA_PATHS, sep='\n')

['./data/LaunchMonitor/ags-shots-2023-05-11.csv', './data/LaunchMonitor/ags-shots-2023-04-26.csv', './data/LaunchMonitor/ags-shots-2023-04-17.csv']
['./data/SensorLogFiles/2023-05-11_1_19_58 PM_Apple Watch.csv', './data/SensorLogFiles/2023-04-26_4_18_56 PM_Apple Watch.csv', './data/SensorLogFiles/stream Apple Watch 230423 10_06_54 PM.csv', './data/SensorLogFiles/stream Apple Watch 230426 4_19_13 PM.csv', './data/SensorLogFiles/2023-05-11_1_42_45 PM_Apple Watch.csv']


In [314]:
def load_launch_data(format='AwesomeGolf', tz=TZ_UTC):
    if format != 'AwesomeGolf':
        raise Exception('Not a support format!')

    tdf_list = []
    for file_path in LAUNCH_DATA_PATHS:
        tdf = pd.read_csv(file_path, parse_dates=[0], header=0)
        # Drop the unit row
        tdf = tdf.drop(labels=0, axis=0)
        tdf_list.append(tdf)
    launchdata = pd.concat(tdf_list, axis=0, ignore_index=True)
    launchdata['Date'] = launchdata['Date'].dt.tz_localize(tz)
    launchdata = launchdata.sort_values(by='Date')
    launchdata = launchdata.reset_index()

    return launchdata

In [315]:
launchdata = load_launch_data(tz=TZ_US_LA)
display(launchdata)
print(launchdata.dtypes)

Unnamed: 0,index,Date,Club Type,Club Description,Altitude,Club Speed,Ball Speed,Carry Distance,Total Distance,Roll Distance,...,Spin Rate,Spin Axis,Spin Reading,Low Point,Club Path,Face Path,Face Target,Swing Plane Tilt,Swing Plane Rotation,Shot Classification
0,146,2023-04-17 14:34:40-07:00,8 Iron,,0.00,72.79,98.63,121.47,149.39,27.93,...,2558,2.85,Actual,,0.39,-0.48,-0.09,,,Straight
1,147,2023-04-17 14:36:42-07:00,8 Iron,,0.00,71.27,65.23,63.77,82.79,19.02,...,1394,3.13,Actual,,0.52,16.54,17.06,,,Push
2,148,2023-04-17 14:37:19-07:00,8 Iron,,0.00,72.92,104.46,160.35,175.56,15.21,...,3788,-22.05,Actual,,2.57,-4.05,-1.49,,,Hook
3,149,2023-04-17 14:38:22-07:00,8 Iron,,0.00,72.61,97.51,138.21,153.74,15.53,...,4569,-3.73,Actual,,1.65,-1.29,0.36,,,Straight
4,150,2023-04-17 14:38:53-07:00,8 Iron,,0.00,74.91,106.86,158.65,173.74,15.09,...,4254,-0.35,Actual,,1.24,-0.23,1.02,,,Straight
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159,83,2023-05-11 14:12:31-07:00,5 Iron,,0.00,75.21,101.33,142.26,162.61,20.35,...,4345,-7.31,Actual,,-0.69,-0.76,-1.45,,,Draw
160,84,2023-05-11 14:12:43-07:00,5 Iron,,0.00,68.29,96.95,133.49,156.05,22.56,...,3218,17.60,Actual,,-1.56,3.18,1.62,,,Slice
161,85,2023-05-11 14:12:59-07:00,5 Iron,,0.00,74.09,106.57,131.87,161.15,29.29,...,2437,-13.43,Actual,,-0.75,0.24,-0.50,,,Draw
162,86,2023-05-11 14:13:16-07:00,5 Iron,,0.00,72.63,101.60,142.06,165.68,23.61,...,2161,-8.60,Actual,,-0.23,-0.01,-0.24,,,Draw


index                                                   int64
Date                      datetime64[ns, America/Los_Angeles]
Club Type                                              object
Club Description                                      float64
Altitude                                               object
Club Speed                                             object
Ball Speed                                             object
Carry Distance                                         object
Total Distance                                         object
Roll Distance                                          object
Smash                                                 float64
Vertical Launch                                        object
Peak Height                                            object
Descent Angle                                          object
Horizontal Launch                                      object
Carry Lateral Distance                                 object
Total La

In [254]:
print('Launch Data Range: {} to {}'.format(launchdata['Date'].min(), launchdata['Date'].max()))

Launch Data Range: 2023-04-17 14:34:40-07:00 to 2023-05-11 14:13:31-07:00


In [428]:
def load_into_combined_df(paths):
    tdf_list = []
    for file_path in paths:
        tdf_list.append(pd.read_csv(file_path, header=0))
    df = pd.concat(tdf_list, axis=0, ignore_index=True)
    df = df.drop_duplicates(keep='first')
    df = df.reset_index(drop=True)
    return df

In [435]:
# FIXME: For some reason, Pandas complain that there are duplicated indexes if we include the last
# file even when we already drop all duplicates... so will just ignore that file for now.
applewatchdata = load_into_combined_df(APPLE_WATCH_DATA_PATHS[0:4])
applewatchdata = applewatchdata.rename(columns={
    'loggingTime(txt)': 'Date'
})
# The original Date is encoded in UTC-7 (or UTC-8 during PST) format.
applewatchdata['Date'] = pd.to_datetime(applewatchdata['Date']).dt.tz_convert(TZ_US_LA)
# Convert to TimeSeries
applewatchdata = applewatchdata.set_index('Date', drop=True)
applewatchdata = applewatchdata.sort_index()

display(applewatchdata)
print(applewatchdata.dtypes)

Unnamed: 0_level_0,locationTimestamp_since1970(s),locationLatitude(WGS84),locationLongitude(WGS84),locationAltitude(m),locationSpeed(m/s),locationSpeedAccuracy(m/s),locationCourse(°),locationCourseAccuracy(°),locationVerticalAccuracy(m),locationHorizontalAccuracy(m),...,pedometerDistance(m),pedometerFloorAscended(N),pedometerFloorDescended(N),pedometerEndDate(txt),altimeterTimestamp_sinceReboot(s),altimeterReset(bool),altimeterRelativeAltitude(m),altimeterPressure(kPa),batteryState(N),batteryLevel(R)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-04-23 22:06:54.330000-07:00,1.682313e+09,37.426567,-122.161385,28.16475,-1.000000,-1.000000,-1.000000,-1.0,68.583588,79.333333,...,0.00000,0.0,0.0,,7.040056e+08,0.0,-0.02,101.2778,1.0,0.67
2023-04-23 22:06:54.364000-07:00,1.682313e+09,37.426567,-122.161385,28.16475,-1.000000,-1.000000,-1.000000,-1.0,68.583588,79.333333,...,0.00000,0.0,0.0,,7.040056e+08,0.0,-0.02,101.2778,1.0,0.67
2023-04-23 22:06:54.400000-07:00,1.682313e+09,37.426567,-122.161385,28.16475,-1.000000,-1.000000,-1.000000,-1.0,68.583588,79.333333,...,0.00000,0.0,0.0,,7.040056e+08,0.0,-0.02,101.2778,1.0,0.67
2023-04-23 22:06:54.431000-07:00,1.682313e+09,37.426567,-122.161385,28.16475,-1.000000,-1.000000,-1.000000,-1.0,68.583588,79.333333,...,0.00000,0.0,0.0,,7.040056e+08,0.0,-0.02,101.2778,1.0,0.67
2023-04-23 22:06:54.465000-07:00,1.682313e+09,37.426567,-122.161385,28.16475,-1.000000,-1.000000,-1.000000,-1.0,68.583588,79.333333,...,0.00000,0.0,0.0,,7.040056e+08,0.0,-0.02,101.2778,1.0,0.67
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-11 13:37:51.697000-07:00,1.683837e+09,37.423214,-122.181596,45.40375,0.031873,0.023398,73.345652,180.0,2.400000,2.099110,...,28.38083,0.0,0.0,2023-05-11T13:36:53.295-07:00,7.055303e+08,0.0,1.86,101.3255,1.0,0.74
2023-05-11 13:37:51.728000-07:00,1.683837e+09,37.423214,-122.181596,45.40375,0.031873,0.023398,73.345652,180.0,2.400000,2.099110,...,28.38083,0.0,0.0,2023-05-11T13:36:53.295-07:00,7.055303e+08,0.0,1.86,101.3255,1.0,0.74
2023-05-11 13:37:51.763000-07:00,1.683837e+09,37.423214,-122.181596,45.40375,0.031873,0.023398,73.345652,180.0,2.400000,2.099110,...,28.38083,0.0,0.0,2023-05-11T13:36:53.295-07:00,7.055303e+08,0.0,1.86,101.3255,1.0,0.74
2023-05-11 13:37:51.798000-07:00,1.683837e+09,37.423214,-122.181596,45.40375,0.031873,0.023398,73.345652,180.0,2.400000,2.099110,...,28.38083,0.0,0.0,2023-05-11T13:36:53.295-07:00,7.055303e+08,0.0,1.86,101.3255,1.0,0.74


locationTimestamp_since1970(s)               float64
locationLatitude(WGS84)                      float64
locationLongitude(WGS84)                     float64
locationAltitude(m)                          float64
locationSpeed(m/s)                           float64
locationSpeedAccuracy(m/s)                   float64
locationCourse(°)                            float64
locationCourseAccuracy(°)                    float64
locationVerticalAccuracy(m)                  float64
locationHorizontalAccuracy(m)                float64
locationFloor(Z)                             float64
accelerometerTimestamp_sinceReboot(s)        float64
accelerometerAccelerationX(G)                float64
accelerometerAccelerationY(G)                float64
accelerometerAccelerationZ(G)                float64
motionTimestamp_sinceReboot(s)               float64
motionYaw(rad)                               float64
motionRoll(rad)                              float64
motionPitch(rad)                             f

In [431]:
duplicated_data = applewatchdata.duplicated(keep=False)
[i for i, v in duplicated_data.items() if v == True]

[]

In [417]:
print('Apple Data Range: {} to {}'.format(applewatchdata.index.min(), applewatchdata.index.max()))

Apple Data Range: 2023-05-11 13:42:45.523000-07:00 to 2023-05-11 14:13:34.919000-07:00


In [418]:
display(type(launchdata['Date'][0]))
display(type(applewatchdata.index.min()))

pandas._libs.tslibs.timestamps.Timestamp

pandas._libs.tslibs.timestamps.Timestamp

In [499]:
def get_index_of_nearest_datetime(df, dt):
    """
    Parameters
    ----------
    df - a Pandas TimeSeries
    dt - a datetime object
    """
    return df.index.get_indexer([dt], method='nearest')[0]

In [501]:
dt = launchdata['Date'][90]
print(dt)
idt = get_index_of_nearest_datetime(applewatchdata, dt)
print(idt)
item = applewatchdata.iloc[idt]
print(item)
print('='*50)
print(type(item.name))

2023-05-11 13:30:31-07:00
44321
locationTimestamp_since1970(s)                           1683837029.999994
locationLatitude(WGS84)                                          37.423211
locationLongitude(WGS84)                                       -122.181558
locationAltitude(m)                                                44.9145
locationSpeed(m/s)                                                0.130642
locationSpeedAccuracy(m/s)                                        0.027877
locationCourse(°)                                               173.365528
locationCourseAccuracy(°)                                        21.733831
locationVerticalAccuracy(m)                                            2.4
locationHorizontalAccuracy(m)                                     1.799688
locationFloor(Z)                                                   -9999.0
accelerometerTimestamp_sinceReboot(s)                       1005875.532594
accelerometerAccelerationX(G)                                     0.

In [502]:
def is_missing_from_sensor_data(dt, threshold_s=5):
    """
    Pandas will return the index of nearest time.
    In some case, we have missing gap in our sensor dataset.
    We want to make sure that the return data is at least within `threshold_s`
    time delta, otherwise, we consider the data missing.
    """
    idt = get_index_of_nearest_datetime(applewatchdata, dt)
    item = applewatchdata.iloc[idt]
    delta = abs(item.name - dt)
    return delta.total_seconds() > threshold_s

In [506]:
mapped = []
for index, row in launchdata.iterrows():
    dt = row['Date']
    awd_i = get_index_of_nearest_datetime(applewatchdata, dt)
    awd_i_start = awd_i - 100
    awd_i_end = awd_i + 100
    if is_missing_from_sensor_data(dt):
        continue
    mapped.append((row, applewatchdata.iloc[awd_i_start: awd_i_end]))

In [508]:
for launch, awdata in mapped:
    print(launch)
    print("="*50)
    print(awdata)

index                                            88
Date                      2023-04-26 16:20:36-07:00
Club Type                                 Lob Wedge
Club Description                                NaN
Altitude                                       0.00
Club Speed                                    49.93
Ball Speed                                    50.94
Carry Distance                                46.13
Total Distance                                58.11
Roll Distance                                 11.98
Smash                                          1.02
Vertical Launch                               24.55
Peak Height                                   19.26
Descent Angle                                 31.16
Horizontal Launch                              8.45
Carry Lateral Distance                         7.35
Total Lateral Distance                         9.49
Carry Curve Distance                           0.58
Total Curve Distance                           0.97
Attack Angle