<a href="https://colab.research.google.com/github/ufbfung/apple-health-integration/blob/main/Analysis_of_Apple_Health_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# mount google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
# Define file path to apple health records
apple_health_data_path = '/content/drive/MyDrive/coding/myhealth/data/HealthData2.json'

In [14]:
from pydantic import BaseModel, Field
from typing import List, Dict, Any
import json
from collections import Counter, defaultdict
import statistics
from datetime import datetime
from dateutil import parser

class DeviceInfo(BaseModel):
    name: str = Field(default="Unknown")
    manufacturer: str = Field(default="Unknown")
    model: str = Field(default="Unknown")
    hardwareVersion: str = Field(default="Unknown")
    firmwareVersion: str = Field(default="Unknown")
    softwareVersion: str = Field(default="Unknown")
    localIdentifier: str = Field(default="Unknown")
    udiDeviceIdentifier: str = Field(default="Unknown")

class HealthDataSample(BaseModel):
    uuid: str
    type: str
    startDate: str
    endDate: str
    value: float
    unit: str
    source: str
    device: DeviceInfo
    metadata: Dict[str, Any]

class HealthData(BaseModel):
    samples: List[HealthDataSample]

def load_health_data(json_file: str) -> HealthData:
    with open(json_file, 'r') as file:
        data = json.load(file)
        samples = [HealthDataSample(**sample) for sample in data]
        return HealthData(samples=samples)

def profile_health_data(health_data: HealthData):
    data_types_counter = Counter()
    devices_counter = Counter()
    observations_counter = defaultdict(int)
    devices_info = set()

    for sample in health_data.samples:
        data_types_counter[sample.type] += 1
        device_identifier = (
            sample.device.name,
            sample.device.manufacturer,
            sample.device.model,
            sample.device.hardwareVersion,
            sample.device.firmwareVersion,
            sample.device.softwareVersion
        )
        devices_info.add(device_identifier)
        devices_counter[sample.device.name] += 1
        observations_counter[sample.type] += 1

    print("Data Types Count:")
    for data_type, count in data_types_counter.items():
        print(f"  {data_type}: {count}")

    print("\nDevices Count:")
    for device, count in devices_counter.items():
        print(f"  {device}: {count}")

    print("\nDevices Details:")
    for device in devices_info:
        print(f"  Name: {device[0]}, Manufacturer: {device[1]}, Model: {device[2]}, "
              f"Hardware Version: {device[3]}, Firmware Version: {device[4]}, Software Version: {device[5]}")

    print("\nObservations Count by Type:")
    for obs_type, count in observations_counter.items():
        print(f"  {obs_type}: {count}")

def compute_statistics(data: List[float]) -> Dict[str, float]:
    return {
        "min": round(min(data), 2),
        "max": round(max(data), 2),
        "average": round(sum(data) / len(data), 2),
        "median": round(statistics.median(data), 2)
    }

def compute_period(dates: List[str]) -> Dict[str, str]:
    parsed_dates = [parser.parse(date) for date in dates]
    earliest = min(parsed_dates)
    latest = max(parsed_dates)
    return {
        "earliest": earliest.strftime("%b %d, %Y"),
        "latest": latest.strftime("%b %d, %Y")
    }

def statistical_insights(health_data: HealthData):
    data_by_type = defaultdict(list)
    dates_by_type = defaultdict(list)

    for sample in health_data.samples:
        data_by_type[sample.type].append(sample.value)
        dates_by_type[sample.type].append(sample.startDate)

    for data_type, values in data_by_type.items():
        stats = compute_statistics(values)
        period = compute_period(dates_by_type[data_type])
        print(f"\nStatistics for {data_type}:")
        print(f"  Min: {stats['min']}")
        print(f"  Max: {stats['max']}")
        print(f"  Average: {stats['average']}")
        print(f"  Median: {stats['median']}")
        print(f"  Period: {period['earliest']} - {period['latest']}")

def main():
    # Assuming apple_health_data_path is defined elsewhere
    json_file = apple_health_data_path  # Use the predefined path
    health_data = load_health_data(json_file)

    profile_health_data(health_data)
    statistical_insights(health_data)

if __name__ == "__main__":
    main()


Data Types Count:
  HKQuantityTypeIdentifierStepCount: 544
  HKQuantityTypeIdentifierActiveEnergyBurned: 5032
  HKQuantityTypeIdentifierHeartRate: 4551

Devices Count:
  Apple Watch: 9859
  iPhone: 200
  Apple Watch via Apple Health: 64
  Body Smart: 4

Devices Details:
  Name: Apple Watch via Apple Health, Manufacturer: Withings, Model: Withings Tracker, Hardware Version: Unknown, Firmware Version: Unknown, Software Version: Unknown
  Name: iPhone, Manufacturer: Apple Inc., Model: iPhone, Hardware Version: iPhone16,1, Firmware Version: Unknown, Software Version: 17.4.1
  Name: Body Smart, Manufacturer: Withings, Model: Withings Scale, Hardware Version: 0, Firmware Version: 1071, Software Version: Unknown
  Name: iPhone, Manufacturer: Apple Inc., Model: iPhone, Hardware Version: iPhone16,1, Firmware Version: Unknown, Software Version: 17.5.1
  Name: Apple Watch, Manufacturer: Apple Inc., Model: Watch, Hardware Version: Watch6,1, Firmware Version: Unknown, Software Version: 10.5

Observ