# Get Time Series Data From Timestamps

In [16]:
import boto3
import datetime
import pytz
import pandas as pd
import numpy as np
from io import BytesIO

IMU_BUCKET = 'matt3r-imu-us-west-2'
s3_client = boto3.client('s3')

In [2]:
# create timestamps to use as a test set
events = []
start_time = datetime.datetime(2023, 7, 27, 17, 0, 0, tzinfo=datetime.timezone.utc).timestamp()
# start_time = datetime.datetime(2023, 7, 25, 17, 0, 0).timestamp()
for i in range(30):
    events.append((start_time + i*120, start_time + i*120 + 5))

In [3]:
# convert timestamp to date and hour
file_names = set()
for event in events:
    start_timestamp = event[0]
    dt_object = datetime.datetime.fromtimestamp(start_timestamp) + datetime.timedelta(hours=1)
    file_names.add(dt_object.astimezone(pytz.timezone('UTC')).strftime("%Y-%m-%d_%H-00-00.parquet"))

file_names

{'2023-07-27_18-00-00.parquet'}

In [4]:
def fetch_imu_data(org_id, k3y_id, file_names):
    # get a list of all parquet files in the prefix and filter them to within the date range
    response = s3_client.list_objects_v2(Bucket=IMU_BUCKET, Prefix=org_id + '/' + 'k3y-' + k3y_id + '/accel/')
    all_keys = [item['Key'] for item in response.get('Contents', [])]

    while response['IsTruncated']:
        response = s3_client.list_objects_v2(Bucket=IMU_BUCKET, Prefix=org_id + '/' + 'k3y-' + k3y_id + '/accel/', ContinuationToken=response['NextContinuationToken'])
        all_keys.extend([item['Key'] for item in response.get('Contents', [])])
    
    keys = [file for file in all_keys if file.split('/')[-1] in file_names]
    keys = sorted(keys, key=lambda x: x.split('/')[-1].split('.')[0])
    
    # retrieve and combine filtered perquet files
    df_list = []
    for key in keys:
        response = s3_client.get_object(Bucket=IMU_BUCKET, Key=key)
        buffer = BytesIO(response['Body'].read())
        ac_df = pd.read_parquet(buffer, engine='pyarrow')
        response = s3_client.get_object(Bucket=IMU_BUCKET, Key=key.replace('accel', 'gyro'))
        buffer = BytesIO(response['Body'].read())
        gy_df = pd.read_parquet(buffer, engine='pyarrow')
        df_list.append(pd.merge(ac_df, gy_df, on='timestamp(epoch in sec)', how='inner'))
    try:
        imu_df = pd.concat(df_list, axis=0, ignore_index=True)
    except Exception as e:
        print("ERROR: No IMU acceleration data available")
        raise e

    return imu_df

In [5]:
imu_df = fetch_imu_data('hamid', '17700cf8', file_names)

In [19]:
data_list = []
for event in events:
    df = imu_df[imu_df['timestamp(epoch in sec)'].between(event[0], event[1])]
    data_list.append({col: df[col].to_numpy() for col in df.columns})

In [21]:
data_list

[{'timestamp(epoch in sec)': array([1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
         1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
         1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
         1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
         1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
         1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
         1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
         1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
         1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
         1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
         1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
         1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
         1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
         1.6904772e+09, 1.6904772e+09, 1.6904772e+09, 1.6904772e+09,
       