In [4]:
import pandas as pd

def extract_lifesnaps_features(csv_file_path):
    # Define relevant columns for each category, now including id and date
    columns = [
        'id', 'date',  # Added identifiers
        # Activity Level
        'calories', 'steps', 'distance', 
        'lightly_active_minutes', 'moderately_active_minutes', 
        'very_active_minutes', 'sedentary_minutes',
        # Sleep Quality
        'sleep_duration', 'sleep_efficiency',
        # Stress Level
        'stress_score', 'rmssd', 'nremhr', 
        'resting_hr', 'bpm', 'mindfulness_session'
    ]
    
    # Read the CSV file
    df = pd.read_csv(csv_file_path)
    
    # Drop rows where any of the relevant columns are missing
    df_filtered = df.dropna(subset=columns)
    
    # Select only the relevant columns
    df_filtered = df_filtered[columns]
    
    return df_filtered


df_filtered = extract_lifesnaps_features('daily_fitbit_sema_df_unprocessed.csv')
print(df_filtered.head())
print(df_filtered.shape)



                         id        date  calories    steps  distance  \
0  621e2e8e67b776a24055b564  2021-05-24   2351.59   8833.0    6517.5   
1  621e2e8e67b776a24055b564  2021-05-25   2332.08   9727.0    7178.6   
2  621e2e8e67b776a24055b564  2021-05-26   2262.30   8253.0    6090.9   
3  621e2e8e67b776a24055b564  2021-05-27   2325.10   9015.0    6653.1   
4  621e2e8e67b776a24055b564  2021-05-28   2586.76  12949.0    9557.9   

   lightly_active_minutes  moderately_active_minutes  very_active_minutes  \
0                   149.0                       24.0                 33.0   
1                   132.0                       25.0                 31.0   
2                   112.0                       27.0                 31.0   
3                   133.0                       21.0                 37.0   
4                   136.0                       42.0                 54.0   

   sedentary_minutes  sleep_duration  sleep_efficiency  stress_score    rmssd  \
0              713.0   

In [6]:
# Save the filtered DataFrame to a CSV file with column names as the first row
df_filtered.to_csv('lifesnaps_filtered.csv', index=False, header=True)

In [8]:
import pandas as pd

def extract_latest_n_rows_per_user(df, n=7):
    # Ensure 'date' is in datetime format for correct sorting
    df['date'] = pd.to_datetime(df['date'])
    # Sort by id and date so latest dates are last within each group
    df_sorted = df.sort_values(['id', 'date'])
    # Group by 'id' and keep the last n rows for each user
    df_latest = df_sorted.groupby('id', group_keys=False).tail(n)
    return df_latest


df_filtered = extract_lifesnaps_features('lifesnaps_filtered.csv')
df_latest = extract_latest_n_rows_per_user(df_filtered, n=7)
print(df_latest.head())
print(df_latest.shape)


                          id       date  calories    steps  distance  \
55  621e2e8e67b776a24055b564 2021-07-25   2023.77   2472.0    1819.0   
56  621e2e8e67b776a24055b564 2021-07-26   2493.46  12090.0    8924.7   
57  621e2e8e67b776a24055b564 2021-07-27   1914.66   1911.0    1407.1   
58  621e2e8e67b776a24055b564 2021-07-28   2300.74   8522.0    6289.5   
59  621e2e8e67b776a24055b564 2021-07-29   2860.51  18004.0   13291.5   

    lightly_active_minutes  moderately_active_minutes  very_active_minutes  \
55                   142.0                        0.0                  0.0   
56                   121.0                       25.0                 61.0   
57                   104.0                        0.0                  0.0   
58                   124.0                       18.0                 38.0   
59                   167.0                       64.0                 65.0   

    sedentary_minutes  sleep_duration  sleep_efficiency  stress_score  \
55              746.0    

In [9]:
df_latest.to_csv('lifesnaps_latest_7.csv', index=False, header=True)

In [12]:
import pandas as pd
import numpy as np
import json

# Assuming df_filtered is your DataFrame with columns: id, date, and all relevant features

# List your feature columns (excluding 'id' and 'date')
features = [
    'calories', 'steps', 'distance', 'lightly_active_minutes', 'moderately_active_minutes',
    'very_active_minutes', 'sedentary_minutes', 'sleep_duration', 'sleep_efficiency',
    'stress_score', 'rmssd', 'nremhr', 'resting_hr', 'bpm'
]

# # Ensure date is datetime and data is sorted
# df_filtered['date'] = pd.to_datetime(df_filtered['date'])
# df_filtered = df_filtered.sort_values(['id', 'date'])


# Function to compute moving average, std, and trend for each feature
def calculate_trends(group):
    result = {}
    for feature in features:
        # Calculate 7-day moving average and std (window=7)
        rolling_mean = group[feature].rolling(window=7, min_periods=1).mean()
        rolling_std = group[feature].rolling(window=7, min_periods=1).std().fillna(0)
        
        # Latest (current) value
        current_value = group[feature].iloc[-1]
        current_mean = rolling_mean.iloc[-1]
        current_std = rolling_std.iloc[-1]
        
        # Determine trend
        if current_value > current_mean + current_std:
            trend = 'high'
        elif current_value < current_mean - current_std:
            trend = 'low'
        else:
            trend = 'moderate'
        
        # Store results
        result[feature] = {
            'current_value': current_value,
            '7_day_moving_average': current_mean,
            '7_day_std_dev': current_std,
            'trend': trend
        }
    return result

# Apply function per user
trend_results = df_latest.groupby('id').apply(calculate_trends, include_groups=False).to_dict()

# # Save each user's result to a JSON file (one file per user)
# for user_id, user_data in trend_results.items():
#     with open(f'lifesnaps_trends_{user_id}.json', 'w') as f:
#         json.dump({user_id: user_data}, f, indent=4)

# If you want all users in a single JSON file:
with open('lifesnaps_trends_all_users.json', 'w') as f:
    json.dump(trend_results, f, indent=4)
