# Garmin Health Data Analysis

This notebook provides a comprehensive overview of your Garmin health data, including:
- Data loading and quality checks
- Summary statistics across all metrics
- Key insights and trends
- Links to specialized analysis notebooks

This serves as a starting point for exploring your health data. For detailed analyses, see:
- `day_of_week_analysis.ipynb` - How metrics vary by day of week
- `stress_time_of_day_analysis.ipynb` - Stress patterns throughout the day
- `hr_daily.ipynb` - Heart rate analysis over time


In [None]:
# Import required libraries
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import logging
from datetime import datetime, timedelta
from pathlib import Path

# Import analysis modules
from garmin_analysis.utils.data_loading import load_master_dataframe

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configure plot style
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['font.size'] = 10

print("✓ Imports complete")


## 1. Load Data

Load the master daily summary dataset containing all health metrics.


In [None]:
# Load the master dataframe
logger.info("Loading master daily summary data...")
df = load_master_dataframe()

print(f"✓ Loaded {len(df):,} days of data")
print(f"📅 Date range: {df['day'].min()} to {df['day'].max()}")
print(f"📊 Total days covered: {(df['day'].max() - df['day'].min()).days} days")
print(f"\n🔢 DataFrame shape: {df.shape}")
print(f"   {df.shape[0]:,} rows × {df.shape[1]} columns")

# Display first few rows
df.head()


## 2. Data Overview

Examine the available metrics and data types.


In [None]:
# Display column information
print("Available Metrics:")
print("=" * 70)

# Group columns by category
sleep_cols = [col for col in df.columns if 'sleep' in col.lower() or 'score' in col.lower()]
hr_cols = [col for col in df.columns if 'hr' in col.lower() or 'heart' in col.lower()]
activity_cols = [col for col in df.columns if any(x in col.lower() for x in ['steps', 'distance', 'calories', 'intensity'])]
body_cols = [col for col in df.columns if any(x in col.lower() for x in ['bb_', 'body_battery', 'weight', 'stress'])]
other_cols = [col for col in df.columns if col not in sleep_cols + hr_cols + activity_cols + body_cols and col != 'day']

print(f"\n💤 Sleep Metrics ({len(sleep_cols)}):")
for col in sleep_cols:
    print(f"   - {col}")

print(f"\n❤️  Heart Rate Metrics ({len(hr_cols)}):")
for col in hr_cols:
    print(f"   - {col}")

print(f"\n🏃 Activity Metrics ({len(activity_cols)}):")
for col in activity_cols:
    print(f"   - {col}")

print(f"\n💪 Body Metrics ({len(body_cols)}):")
for col in body_cols:
    print(f"   - {col}")

if other_cols:
    print(f"\n📋 Other Metrics ({len(other_cols)}):")
    for col in other_cols:
        print(f"   - {col}")

print(f"\n📊 Total: {len(df.columns) - 1} metrics (excluding 'day' column)")


## 3. Data Quality Assessment

Check for missing data and data quality issues.


In [None]:
# Analyze data quality
print("Data Quality Summary:")
print("=" * 70)

# Basic statistics
total_records = len(df)
date_range_days = (df['day'].max() - df['day'].min()).days
completeness = (1 - df.isnull().sum().sum() / (df.shape[0] * df.shape[1])) * 100

print(f"\n📊 Overall Statistics:")
print(f"   Total Records: {total_records:,}")
print(f"   Date Range: {date_range_days} days")
print(f"   Data Completeness: {completeness:.1f}%")

# Check missing data
print("\n🔍 Missing Data Analysis:")
missing_info = df.isnull().sum()
missing_pct = (df.isnull().sum() / len(df) * 100).round(2)
missing_df = pd.DataFrame({
    'Missing Count': missing_info,
    'Missing %': missing_pct
}).sort_values('Missing %', ascending=False)

# Show columns with missing data
missing_cols = missing_df[missing_df['Missing Count'] > 0]
if len(missing_cols) > 0:
    print(f"\n   Columns with missing data ({len(missing_cols)} total):")
    for col, row in missing_cols.head(10).iterrows():
        print(f"   - {col}: {row['Missing Count']:,} missing ({row['Missing %']:.1f}%)")
    if len(missing_cols) > 10:
        print(f"   ... and {len(missing_cols) - 10} more columns")
else:
    print("   ✓ No missing data!")

# Show data types
print(f"\n📋 Data Types:")
print(df.dtypes.value_counts())


## 4. Summary Statistics

View basic statistics for key metrics.


In [None]:
# Select key metrics for summary
key_metrics = []
possible_metrics = ['steps', 'score', 'bb_max', 'bb_min', 'rhr', 
                   'hydration_intake', 'stress_avg', 'total_distance']

for metric in possible_metrics:
    if metric in df.columns:
        key_metrics.append(metric)

if key_metrics:
    print("Key Metrics Summary Statistics:")
    print("=" * 70)
    summary = df[key_metrics].describe().round(2)
    display(summary)
else:
    print("⚠️  No standard key metrics found in dataset")
    print("\nShowing summary of first 10 numeric columns:")
    numeric_cols = df.select_dtypes(include=[np.number]).columns[:10]
    display(df[numeric_cols].describe().round(2))


## 5. Recent Trends (Last 30 Days)

Visualize recent trends in key health metrics.


In [None]:
# Get last 30 days of data
last_date = df['day'].max()
start_date = last_date - timedelta(days=30)
recent_df = df[df['day'] >= start_date].copy()

print(f"Recent Data (Last 30 Days): {len(recent_df)} days")
print(f"Date range: {recent_df['day'].min()} to {recent_df['day'].max()}")

# Plot key metrics
fig, axes = plt.subplots(2, 2, figsize=(16, 10))
fig.suptitle('Recent Trends (Last 30 Days)', fontsize=16, fontweight='bold')

# Plot 1: Steps
if 'steps' in recent_df.columns:
    ax = axes[0, 0]
    ax.plot(recent_df['day'], recent_df['steps'], marker='o', linewidth=2, markersize=4)
    ax.axhline(recent_df['steps'].mean(), color='r', linestyle='--', label='Average', alpha=0.7)
    ax.set_title('Daily Steps', fontsize=12, fontweight='bold')
    ax.set_ylabel('Steps')
    ax.grid(True, alpha=0.3)
    ax.legend()
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)

# Plot 2: Sleep Score
if 'score' in recent_df.columns:
    ax = axes[0, 1]
    ax.plot(recent_df['day'], recent_df['score'], marker='o', linewidth=2, markersize=4, color='purple')
    ax.axhline(recent_df['score'].mean(), color='r', linestyle='--', label='Average', alpha=0.7)
    ax.set_title('Sleep Score', fontsize=12, fontweight='bold')
    ax.set_ylabel('Score')
    ax.grid(True, alpha=0.3)
    ax.legend()
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)

# Plot 3: Body Battery
if 'bb_max' in recent_df.columns and 'bb_min' in recent_df.columns:
    ax = axes[1, 0]
    ax.plot(recent_df['day'], recent_df['bb_max'], marker='o', linewidth=2, markersize=4, label='Max', color='green')
    ax.plot(recent_df['day'], recent_df['bb_min'], marker='o', linewidth=2, markersize=4, label='Min', color='orange')
    ax.fill_between(recent_df['day'], recent_df['bb_min'], recent_df['bb_max'], alpha=0.2)
    ax.set_title('Body Battery Range', fontsize=12, fontweight='bold')
    ax.set_ylabel('Body Battery')
    ax.grid(True, alpha=0.3)
    ax.legend()
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)

# Plot 4: Resting Heart Rate
if 'rhr' in recent_df.columns:
    ax = axes[1, 1]
    ax.plot(recent_df['day'], recent_df['rhr'], marker='o', linewidth=2, markersize=4, color='red')
    ax.axhline(recent_df['rhr'].mean(), color='blue', linestyle='--', label='Average', alpha=0.7)
    ax.set_title('Resting Heart Rate', fontsize=12, fontweight='bold')
    ax.set_ylabel('BPM')
    ax.grid(True, alpha=0.3)
    ax.legend()
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)

plt.tight_layout()
plt.show()


## 6. Correlation Analysis

Examine relationships between different health metrics.


In [None]:
# Select numeric columns for correlation
numeric_df = df.select_dtypes(include=[np.number])

# If we have many columns, select a subset of key metrics
if len(numeric_df.columns) > 15:
    correlation_metrics = []
    preferred = ['steps', 'score', 'bb_max', 'bb_min', 'rhr', 'stress_avg', 
                'hydration_intake', 'total_distance', 'hr_avg']
    for col in preferred:
        if col in numeric_df.columns:
            correlation_metrics.append(col)
    
    # If we still don't have enough, add more
    if len(correlation_metrics) < 10:
        remaining = [col for col in numeric_df.columns[:15] if col not in correlation_metrics]
        correlation_metrics.extend(remaining[:10-len(correlation_metrics)])
    
    correlation_df = numeric_df[correlation_metrics]
else:
    correlation_df = numeric_df

# Calculate correlation matrix
corr_matrix = correlation_df.corr()

# Plot heatmap
fig, ax = plt.subplots(figsize=(12, 10))
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0,
            square=True, linewidths=1, cbar_kws={'label': 'Correlation'},
            ax=ax, vmin=-1, vmax=1)
ax.set_title('Correlation Matrix of Health Metrics', fontsize=14, fontweight='bold', pad=15)
plt.tight_layout()
plt.show()

# Show strongest correlations
print("\nStrongest Positive Correlations:")
print("=" * 50)
# Get upper triangle of correlation matrix
mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
corr_values = corr_matrix.mask(mask).stack().sort_values(ascending=False)
print(corr_values.head(10))

print("\n\nStrongest Negative Correlations:")
print("=" * 50)
print(corr_values.tail(10))


## 7. Long-term Trends

Visualize trends over the entire data range.


In [None]:
# Calculate 7-day rolling averages for smoothing
df_sorted = df.sort_values('day').copy()

metrics_to_plot = []
for metric in ['steps', 'score', 'rhr', 'bb_max']:
    if metric in df_sorted.columns:
        metrics_to_plot.append(metric)
        df_sorted[f'{metric}_rolling'] = df_sorted[metric].rolling(window=7, min_periods=1).mean()

if metrics_to_plot:
    fig, axes = plt.subplots(len(metrics_to_plot), 1, figsize=(16, 4*len(metrics_to_plot)))
    if len(metrics_to_plot) == 1:
        axes = [axes]
    
    fig.suptitle('Long-term Trends (7-Day Rolling Average)', fontsize=16, fontweight='bold')
    
    titles = {
        'steps': 'Daily Steps',
        'score': 'Sleep Score',
        'rhr': 'Resting Heart Rate',
        'bb_max': 'Maximum Body Battery'
    }
    
    for i, metric in enumerate(metrics_to_plot):
        ax = axes[i]
        # Plot actual values with low opacity
        ax.plot(df_sorted['day'], df_sorted[metric], alpha=0.3, linewidth=1, color='gray', label='Daily')
        # Plot rolling average
        ax.plot(df_sorted['day'], df_sorted[f'{metric}_rolling'], linewidth=2, label='7-Day Average')
        ax.set_title(titles.get(metric, metric), fontsize=12, fontweight='bold')
        ax.set_ylabel(metric)
        ax.grid(True, alpha=0.3)
        ax.legend()
    
    plt.tight_layout()
    plt.show()
else:
    print("⚠️  No standard metrics available for long-term trend visualization")


## 8. Next Steps

### Specialized Analysis Notebooks

For more detailed analysis, explore these specialized notebooks:

1. **Day of Week Analysis** (`day_of_week_analysis.ipynb`)
   - Discover patterns in sleep, body battery, and hydration by day of week
   - Compare weekend vs weekday performance
   - Identify your best and worst days

2. **Stress Time of Day Analysis** (`stress_time_of_day_analysis.ipynb`)
   - Understand hourly stress patterns
   - View stress heatmaps by day and hour
   - Identify peak stress times

3. **Heart Rate Analysis** (`hr_daily.ipynb`)
   - Analyze heart rate trends over time
   - Track resting heart rate changes

### Analysis Modules

You can also use the analysis modules directly:
```python
from garmin_analysis.features import day_of_week_analysis
from garmin_analysis.features import time_of_day_stress_analysis
from garmin_analysis.reporting import generate_trend_summary
```
