In [16]:
import os
import json
import pprint
import pandas as pd

In [2]:
def load_bite_data(parent_folder):
    data = {}
    
    # Walk through the parent folder to find all matching JSON files
    for root, dirs, files in os.walk(parent_folder):
        for file_name in files:
            if file_name.startswith("annotated_bite_events_weights_") and file_name.endswith(".json"):
                subject_id = file_name.replace("annotated_bite_events_weights_", "").replace(".json", "")
                file_path = os.path.join(root, file_name)
                
                # Load JSON file content
                with open(file_path, 'r') as f:
                    content = json.load(f)
                    data[subject_id] = content.get("bites", [])
    
    return data

In [4]:
path = 'processed_events/bites'
bites_data = load_bite_data(path)
# print(bites_data)

In [62]:
def extract_summary(data):
    # Convert data to a DataFrame for easier manipulation
    all_bites = []
    for subject_id, bites in data.items():
        for bite in bites:
            bite['subject_id'] = subject_id
            all_bites.append(bite)
    
    df = pd.DataFrame(all_bites)
    
    # Filter out rows where 'weight' or 'duration_seconds' are None
    df = df.dropna(subset=['weight', 'duration_seconds'])
    
    # Basic Statistics
    total_sessions = df['subject_id'].nunique()
    total_bites = len(df)
    average_bites_per_subject = round(df.groupby('subject_id').size().mean(), 2)
    
    # Bite Statistics
    average_duration_of_bite = round(df['duration_seconds'].mean(), 2)
    median_bite_weight = round(df['weight'].median(), 2)
    bite_weight_range = (round(df['weight'].min(), 2), round(df['weight'].max(), 2))
    bite_duration_range = (round(df['duration_seconds'].min(), 2), round(df['duration_seconds'].max(), 2))
    average_bite_weight = round(df['weight'].mean(), 2)
    bite_weight_std = round(df['weight'].std(), 2)
    bite_duration_std = round(df['duration_seconds'].std(), 2)
    
    # Meal-Level Statistics
    meal_durations = df.groupby('subject_id').apply(lambda x: x['end_seconds'].max() - x['start_seconds'].min())
    average_meal_duration = round(meal_durations.mean(), 2)
    median_meal_duration = round(meal_durations.median(), 2)
    meal_duration_range = (round(meal_durations.min(), 2), round(meal_durations.max(), 2))
    
    # Total Consumption per Meal
    total_consumption_per_meal = df.groupby('subject_id')['weight'].sum()
    average_total_consumption_per_meal = round(total_consumption_per_meal.mean(), 2)
    
    # Time Between Bites
    df['time_between_bites'] = df.groupby('subject_id')['start_seconds'].diff()
    average_time_between_bites = round(df['time_between_bites'].mean(), 2)
    
    # Total IMU Samples (if available)
    total_imu_samples = int(df['imu_samples'].sum()) if 'imu_samples' in df.columns else None
    
    # Missing Values Count
    total_na = int(df.isna().sum().sum())

    # Summary dictionary
    summary = {
        "Total Sessions": total_sessions,
        "Total Bites": total_bites,
        "Average Bites per Subject": average_bites_per_subject,
        
        "Bite Duration Statistics": {
            "Average Duration of a Bite (s)": average_duration_of_bite,
            "Bite Duration Range (s)": bite_duration_range,
            "Bite Duration Std Dev (s)": bite_duration_std
        },
        
        "Bite Weight Statistics": {
            "Median Bite Weight (g)": median_bite_weight,
            "Average Bite Weight (g)": average_bite_weight,
            "Bite Weight Range (g)": bite_weight_range,
            "Bite Weight Std Dev (g)": bite_weight_std
        },
        
        "Meal-Level Statistics": {
            "Average Meal Duration (s)": average_meal_duration,
            "Median Meal Duration (s)": median_meal_duration,
            "Meal Duration Range (s)": meal_duration_range,
            "Average Total Consumption per Meal (g)": average_total_consumption_per_meal
        },
        
        "Time Between Bites": {
            "Average Time Between Bites (s)": average_time_between_bites
        },
        
        "Additional": {
            "Total IMU Samples": total_imu_samples,
            "Total Missing Values (NaN)": total_na
        }
    }
    
    return summary

In [63]:
summary_info = extract_summary(bites_data)

In [64]:
pprint.pp(summary_info)

{'Total Sessions': 10,
 'Total Bites': 342,
 'Average Bites per Subject': 34.2,
 'Bite Duration Statistics': {'Average Duration of a Bite (s)': 6.45,
                              'Bite Duration Range (s)': (1.61, 27.19),
                              'Bite Duration Std Dev (s)': 3.41},
 'Bite Weight Statistics': {'Median Bite Weight (g)': 10.0,
                            'Average Bite Weight (g)': 10.89,
                            'Bite Weight Range (g)': (0.0, 34.0),
                            'Bite Weight Std Dev (g)': 6.05},
 'Meal-Level Statistics': {'Average Meal Duration (s)': 437.35,
                           'Median Meal Duration (s)': 358.63,
                           'Meal Duration Range (s)': (217.68, 857.68),
                           'Average Total Consumption per Meal (g)': 372.3},
 'Time Between Bites': {'Average Time Between Bites (s)': 12.99},
 'Additional': {'Total IMU Samples': None, 'Total Missing Values (NaN)': 10}}


In [42]:
all_bites = []
for subject_id, bites in bites_data.items():
    for bite in bites:
        bite['subject_id'] = subject_id
        all_bites.append(bite)

df = pd.DataFrame(all_bites)

In [43]:
df

Unnamed: 0,bite_id,start_seconds,end_seconds,duration_seconds,weight,subject_id
0,1,53.939,60.911,6.972,21.0,17_meal_2
1,2,67.152,85.136,17.984,10.0,17_meal_2
2,3,91.041,101.353,10.312,23.0,17_meal_2
3,4,117.384,123.458,6.074,18.0,17_meal_2
4,5,135.986,141.960,5.974,24.0,17_meal_2
...,...,...,...,...,...,...
351,27,313.080,316.216,3.136,,14
352,28,316.250,327.393,11.143,,14
353,29,327.495,335.370,7.875,7.0,14
354,30,337.238,340.040,2.802,,14
