From 5d8297646d96fc626d0b61fa1e184f07bc182f3f Mon Sep 17 00:00:00 2001 From: Justin Wong Date: Mon, 19 Oct 2020 16:28:42 -0500 Subject: [PATCH] adding a pandas wrapper --- .gitignore | 3 +- oura/__init__.py | 5 +- oura/client_pandas.py | 327 ++++++++++++++++++++++++++++++++++++ tests/__init__.py | 2 +- tests/test_client_pandas.py | 134 +++++++++++++++ 5 files changed, 467 insertions(+), 4 deletions(-) create mode 100644 oura/client_pandas.py create mode 100644 tests/test_client_pandas.py diff --git a/.gitignore b/.gitignore index fd447c5..0cfc60b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ build *.egg-info .tox docs/_build/ -data \ No newline at end of file +data +test_token.json diff --git a/oura/__init__.py b/oura/__init__.py index 0d67fa0..8912d14 100644 --- a/oura/__init__.py +++ b/oura/__init__.py @@ -6,7 +6,8 @@ ------------------ -It's a description for __init__.py, innit. +It's a description for __init__.py, innit. """ -from .client import OuraClient, OuraOAuth2Client \ No newline at end of file +from .client import OuraClient, OuraOAuth2Client +from .client_pandas import OuraClientDataFrame diff --git a/oura/client_pandas.py b/oura/client_pandas.py new file mode 100644 index 0000000..6a00f2c --- /dev/null +++ b/oura/client_pandas.py @@ -0,0 +1,327 @@ +from datetime import datetime, timedelta +from collections import defaultdict +import pandas as pd + +from .client import OuraClient + +class OuraClientDataFrame(OuraClient): + """ + Similiar to OuraClient, but data is returned instead + as a pandas.DataFrame (df) object + """ + + def __init__(self, client_id, client_secret=None, access_token=None, refresh_token=None, refresh_callback=None): + super().__init__(client_id, client_secret, access_token, refresh_token, refresh_callback) + + + def __summary_df(self, summary, metrics=None): + """ + Creates a dataframe from a summary object + + :param summary: A summary object returned from API + :type summary: dictionary of dictionaries. See https://cloud.ouraring.com/docs/readiness for an example + + :param metrics: The metrics to include in the DF. None includes all metrics + :type metrics: A list of metric names, or alternatively a string for one metric name + """ + df = pd.DataFrame(summary) + if metrics: + if type(metrics) == str: + metrics = [metrics] + else: + metrics = metrics.copy() + #drop any invalid cols the user may have entered + metrics = [metric for metric in metrics if metric in df.columns] + #summary_date is a required col + if 'summary_date' not in metrics: + metrics.insert(0, 'summary_date') + df = df[metrics] + df['summary_date'] = pd.to_datetime(df['summary_date']).dt.date + df = df.set_index('summary_date') + return df + + + def sleep_df_raw(self, start=None, end=None, metrics=None): + """ + Create a dataframe from sleep summary dict object. + The dataframe is minimally edited, i.e 'raw' + + :param start: Beginning of date range + :type start: string representation of a date i.e. '2020-10-31' + + :param end: End of date range, or None if you want the current day. + :type end: string representation of a date i.e. '2020-10-31' + + :param metrics: Metrics to include in the df. + :type metrics: A list of strings, or a string + """ + sleep_summary = self.sleep_summary(start, end)['sleep'] + return self.__summary_df(sleep_summary, metrics) + + + def sleep_df_edited(self, start=None, end=None, metrics=None): + """ + Create a dataframe from sleep summary dict object. + Some cols are unit converted for easier use or readability. + + :param start: Beginning of date range + :type start: string representation of a date i.e. '2020-10-31' + + :param end: End of date range, or None if you want the current day. + :type end: string representation of a date i.e. '2020-10-31' + + :param metrics: Metrics to include in the df. + :type metrics: A list of strings, or a string + """ + sleep_df = self.sleep_df_raw(start, end, metrics) + sleep_df = SleepConverter().convert_metrics(sleep_df) + return sleep_df + + + def activity_df_raw(self, start=None, end=None, metrics=None): + """ + Create a dataframe from activity summary dict object. + The dataframe is minimally edited, i.e 'raw' + + :param start: Beginning of date range + :type start: string representation of a date i.e. '2020-10-31' + + :param end: End of date range, or None if you want the current day. + :type end: string representation of a date i.e. '2020-10-31' + + :param metrics: Metrics to include in the df. + :type metrics: A list of strings, or a string + """ + activity_summary = self.activity_summary(start, end)['activity'] + return self.__summary_df(activity_summary, metrics) + + + def activity_df_edited(self, start=None, end=None, metrics=None): + """ + Create a dataframe from activity summary dict object. + Some cols are unit converted for easier use or readability. + + :param start: Beginning of date range + :type start: string representation of a date i.e. '2020-10-31' + + :param end: End of date range, or None if you want the current day. + :type end: string representation of a date i.e. '2020-10-31' + + :param metrics: Metrics to include in the df. + :type metrics: A list of strings, or a string + """ + activity_df = self.activity_df_raw(start, end, metrics) + return ActivityConverter().convert_metrics(activity_df) + + + def readiness_df_raw(self, start=None, end=None, metrics=None): + """ + Create a dataframe from ready summary dict object. + The dataframe is minimally edited, i.e 'raw' + + :param start: Beginning of date range + :type start: string representation of a date i.e. '2020-10-31' + + :param end: End of date range, or None if you want the current day. + :type end: string representation of a date i.e. '2020-10-31' + + :param metrics: Metrics to include in the df. + :type metrics: A list of strings, or a string + """ + readiness_summary = self.readiness_summary(start, end)['readiness'] + return self.__summary_df(readiness_summary, metrics) + + + def readiness_df_edited(self, start=None, end=None, metrics=None): + """ + Create a dataframe from ready summary dict object. + Readiness has no cols to unit convert. + + :param start: Beginning of date range + :type start: string representation of a date i.e. '2020-10-31' + + :param end: End of date range, or None if you want the current day. + :type end: string representation of a date i.e. '2020-10-31' + + :param metrics: Metrics to include in the df. + :type metrics: A list of strings, or a string + """ + return self.readiness_df_raw(start, end, metrics) + + + def combined_df_edited(self, start=None, end=None, metrics=None): + """ + Combines sleep, activity, and summary into one DF + Some cols are unit converted for easier use or readability. + + If user specifies a metric that appears in all 3 summaries, + i.e. 'score', then all 3 metrics will be returned. + + Each summary's column is prepended with the summary name. + i.e. sleep summary 'total' metric will be re-named 'SLEEP.total' + + :param start: Beginning of date range + :type start: string representation of a date i.e. '2020-10-31' + + :param end: End of date range, or None if you want the current day. + :type end: string representation of a date i.e. '2020-10-31' + + :param metrics: Metrics to include in the df. + :type metrics: A list of strings, or a string + """ + + def prefix_cols(df, prefix): + d_to_rename = {} + for col in df.columns: + if col != 'summary_date': + d_to_rename[col] = prefix + ':' + col + return df.rename(columns=d_to_rename) + + sleep_df = self.sleep_df_edited(start, end, metrics) + sleep_df = prefix_cols(sleep_df, 'SLEEP') + readiness_df = self.readiness_df_edited(start, end, metrics) + readiness_df = prefix_cols(readiness_df, 'READY') + activity_df = self.activity_df_edited(start, end, metrics) + activity_df = prefix_cols(activity_df, 'ACTIVITY') + + combined_df = sleep_df.merge(readiness_df, on='summary_date').merge(activity_df, on='summary_date') + return combined_df + + + def save_as_xlsx(self, df, file, index=True, **to_excel_kwargs): + """ + Save dataframe as .xlsx file with dates properly formatted + + :param df: dataframe to save + :type df: df object + + :param file: File path + :type file: string + + :param index: save df index, in this case summary_date + :type index: Boolean + """ + + def localize(df): + """ + Remove tz from datetime cols since Excel doesn't allow + """ + tz_cols = df.select_dtypes(include=['datetimetz']).columns + for tz_col in tz_cols: + df[tz_col] = df[tz_col].dt.tz_localize(None) + return df + + import xlsxwriter + df = df.copy() + df = localize(df) + writer = pd.ExcelWriter(file, engine='xlsxwriter', date_format = "m/d/yyy", datetime_format = "m/d/yyy h:mmAM/PM",) + df.to_excel(writer, index=index, **to_excel_kwargs) + writer.save() + + + def tableize(self, df, tablefmt='pretty', is_print=True, filename=None): + """ + Converts dataframe to a formatted table + For more details, see https://pypi.org/project/tabulate/ + + :param df: dataframe to save + :type df: df object + + :param tablefmt: format of table + :type tablefmt: string + + :param is_print: print to standard output? + :type is_print: boolean + + :param filename: optionally, filename to print to + :type filename: string + """ + from tabulate import tabulate + table = tabulate(df, headers='keys', tablefmt=tablefmt, showindex=True, stralign='center', numalign='center') + if is_print: + print(table) + if filename: + with open(filename, 'w') as f: + print(table, file=f) + return table + + +class UnitConverter(): + """ + Use this class to convert units for certain dataframe cols + """ + + all_dt_metrics = [] + all_sec_metrics = [] + + def rename_converted_cols(self, df, metrics, suffix_str): + """ + Rename converted cols by adding a suffix to the col name + For example, 'bedtime_start' becomes 'bedtime_start_dt_adjusted' + + :param df: a dataframe + :type df: pandas dataframe obj + + :param metrics: metrics to rename + :type metrics: list of strings + + :param suffix_str: the str to append to each metric name + :type suffix_str: str + """ + updated_headers = [header + suffix_str for header in metrics] + d_to_rename = dict(zip(metrics, updated_headers)) + df = df.rename(columns=d_to_rename) + return df + + def convert_to_dt(self, df, dt_metrics): + """ + Convert dataframe fields to datetime dtypes + + :param df: dataframe + :type df: pandas dataframe obj + + :param dt_metrics: List of metrics to be converted to datetime + :type dt_metrics: List + """ + for i, dt_metric in enumerate(dt_metrics): + df[dt_metric] = pd.to_datetime(df[dt_metric], format='%Y-%m-%d %H:%M:%S') + df = self.rename_converted_cols(df, dt_metrics, '_dt_adjusted') + return df + + def convert_to_hrs(self, df, sec_metrics): + """ + Convert fields from seconds to minutes + + :param df: dataframe + :type df: pandas dataframe obj + + :param sec_metrics: List of metrics to be converted from sec -> hrs + :type sec_metrics: List + """ + df[sec_metrics] = df[sec_metrics] / 60 / 60 + df = self.rename_converted_cols(df, sec_metrics, '_in_hrs') + return df + + def convert_metrics(self, df): + """ + Convert metrics to new unit type + + :param df: dataframe + :type df: pandas dataframe obj + """ + dt_metrics = [col for col in df.columns if col in self.all_dt_metrics] + sec_metrics = [col for col in df.columns if col in self.all_sec_metrics] + if dt_metrics: + df = self.convert_to_dt(df, dt_metrics) + if sec_metrics: + df = self.convert_to_hrs(df, sec_metrics) + return df + +class SleepConverter(UnitConverter): + all_dt_metrics = ['bedtime_end', 'bedtime_start'] + all_sec_metrics = ['awake', 'deep', 'duration', 'light', 'onset_latency', 'rem', 'total'] + +class ActivityConverter(UnitConverter): + all_dt_metrics = ['day_end', 'day_start'] + all_sec_metrics = [] + diff --git a/tests/__init__.py b/tests/__init__.py index 3ce070d..633bcca 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +1 @@ -from . import test_auth, test_client \ No newline at end of file +from . import test_auth, test_client, test_client_pandas diff --git a/tests/test_client_pandas.py b/tests/test_client_pandas.py new file mode 100644 index 0000000..1cc7b78 --- /dev/null +++ b/tests/test_client_pandas.py @@ -0,0 +1,134 @@ +import pytest +import os +from datetime import date +import pandas as pd + +parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +os.sys.path.insert(0, parent_dir) + +from oura import OuraClientDataFrame +import json + +#test_token.json is .gitignored +with open(os.path.join(parent_dir, 'tests/', 'test_token.json'), 'r') as f: + env = json.load(f) +client = OuraClientDataFrame(env['client_id'], env['client_secret'], env['access_token']) + + +def test_sleep_summary_df(): + """ + Objectives: + 1. Test that dataframe summary_date match the args passed into + start and end date + + 2. Test that the correct number of metrics are being returned + + 3. Test raw and edited dataframes are returning correctly named + fields and correct data types + """ + sleep_df_raw1 = client.sleep_df_raw(start='2020-09-30') + #check all cols are included + assert sleep_df_raw1.shape[1] >= 36 + #check that start date parameter is correct + assert sleep_df_raw1.index[0] > date(2020, 9, 29) + + sleep_df_raw2 = client.sleep_df_raw(start='2020-09-30', end='2020-10-01', metrics=['bedtime_start', 'score']) + #check that correct metrics are being included + assert sleep_df_raw2.shape[1] == 2 + #check that end date parameter is correct + assert sleep_df_raw2.index[-1] < date(2020, 10, 2) + #check that data type has not been altered + assert type(sleep_df_raw2['bedtime_start'][0]) == str + + #test that invalid metric 'zzz' is dropped + sleep_df_raw3 = client.sleep_df_raw(start='2020-09-30', end='2020-10-01', metrics=['bedtime_start', 'zzz']) + assert sleep_df_raw3.shape[1] == 1 + + #check that bedtime start has been renamed and is now a timestamp + sleep_df_edited = client.sleep_df_edited(start='2020-09-30', end='2020-10-01', metrics=['bedtime_start', 'zzz']) + assert type(sleep_df_edited['bedtime_start_dt_adjusted'][0]) != str + + +def test_activity_summary_df(): + activity_df_raw1 = client.activity_df_raw(start='2020-09-30') + #check all cols are included + assert activity_df_raw1.shape[1] >= 34 + assert activity_df_raw1.index[0] > date(2020, 9, 29) + + activity_df_raw2 = client.activity_df_raw(start='2020-09-30', end='2020-10-01', metrics=['day_start', 'medium']) + assert activity_df_raw2.shape[1] == 2 + assert activity_df_raw2.index[-1] < date(2020, 10, 2) + assert type(activity_df_raw2['day_start'][0]) == str + + #test that invalid metric is dropped + activity_df_raw3 = client.activity_df_raw(start='2020-09-30', end='2020-10-01', metrics=['day_start', 'zzz']) + assert activity_df_raw3.shape[1] == 1 + + #check that day_start has been renamed and is now a timestamp + activity_df_edited = client.activity_df_edited(start='2020-09-30', end='2020-10-01', metrics=['day_start', 'zzz']) + assert type(activity_df_edited['day_start_dt_adjusted'][0]) != str + + +def test_ready_summary_df(): + readiness_df_raw1 = client.readiness_df_raw(start='2020-09-30') + #check all cols are included + assert readiness_df_raw1.shape[1] >= 10 + assert readiness_df_raw1.index[0] > date(2020, 9, 29) + + readiness_df_raw2 = client.readiness_df_raw(start='2020-09-30', end='2020-10-01', metrics=['score_hrv_balance', 'score_recovery_index']) + assert readiness_df_raw2.shape[1] == 2 + assert readiness_df_raw2.index[-1] < date(2020, 10, 2) + + #test that invalid metric is dropped + readiness_df_raw3 = client.readiness_df_raw(start='2020-09-30', end='2020-10-01', metrics=['score_hrv_balance', 'zzz']) + assert readiness_df_raw3.shape[1] == 1 + + #check that readiness edited and readiness raw is the same + readiness_df_edited = client.readiness_df_edited(start='2020-09-30', end='2020-10-01', metrics='score_hrv_balance') + assert pd.DataFrame.equals(readiness_df_raw3, readiness_df_edited) + #assert type(readiness_df_edited['day_start_dt_adjusted'][0]) != str + + +def test_combined_summary_df(): + combined_df_edited1 = client.combined_df_edited(start='2020-09-30') + #check all cols are included + assert combined_df_edited1.shape[1] >= 80 + assert combined_df_edited1.index[0] > date(2020, 9, 29) + + #check start and end dates work accordingly + combined_df_edited2 = client.combined_df_edited(start='2020-09-30', end='2020-10-01', metrics=['score_hrv_balance', 'steps', 'efficiency']) + assert combined_df_edited2.shape[1] == 3 + assert combined_df_edited2.index[-1] < date(2020, 10, 2) + + #test that invalid metric is dropped + combined_df_edited2 = client.combined_df_edited(start='2020-09-30', end='2020-10-01', metrics=['score_hrv_balance', 'steps', 'bedtime_start', 'zzz']) + assert combined_df_edited2.shape[1] == 3 + + #check that columns are pre-fixed with their summary name + assert 'ACTIVITY:steps' in combined_df_edited2 + #check that columns are suffixed with unit conversions + assert 'SLEEP:bedtime_start_dt_adjusted' in combined_df_edited2 + + +def test_save_xlsx(): + """ + Check that both raw and edited df's save without issue + """ + df_raw = client.sleep_df_raw(start='2020-09-30') + df_edited = client.sleep_df_edited(start='2020-09-30', end='2020-10-01', metrics=['bedtime_start', 'bedtime_end', 'score']) + raw_file = 'df_raw.xlsx' + edited_file = 'df_edited.xlsx' + client.save_as_xlsx(df_raw, raw_file, sheet_name='hello world') + client.save_as_xlsx(df_edited, 'df_edited.xlsx') + assert os.path.exists(raw_file) + assert os.path.exists(edited_file) + + +def test_tableize(): + """ + Check that df was printed to file + """ + f = 'df_tableized.txt' + df_raw = client.sleep_df_raw(start='2020-09-30', metrics='score') + client.tableize(df_raw, filename=f) + assert os.path.exists(f)