# Intensity demo

In [1]:
import pandas as pd
import numpy as np

In [2]:
data_id00b70b13 = pd.read_csv('./walk_id00b70b13.csv')

In [3]:
data_id00b70b13.head()

Unnamed: 0,time,x,y,z
0,2013-10-21 10:04:23.140,0.137,-1.648,0.082
1,2013-10-21 10:04:23.150,0.156,-1.594,0.098
2,2013-10-21 10:04:23.160,0.16,-1.539,0.109
3,2013-10-21 10:04:23.170,0.156,-1.504,0.113
4,2013-10-21 10:04:23.180,0.148,-1.5,0.105


In [4]:
data_id00b70b13['time'] = pd.to_datetime(data_id00b70b13['time'])

# Step 1: Calculate VM
data_id00b70b13['VM'] = np.sqrt(data_id00b70b13['x']**2 + data_id00b70b13['y']**2 + data_id00b70b13['z']**2)

# Step 2: Round time to the nearest minute
data_id00b70b13['minute'] = data_id00b70b13['time'].dt.floor('T')

# Step 3: Calculate mean VM for each minute
mean_vm_per_minute1 = data_id00b70b13.groupby('minute')['VM'].mean().reset_index()

mean_vm_per_minute1['minute'] = range(1, len(mean_vm_per_minute1) + 1)
mean_vm_per_minute1['subj_id'] = 25

In [5]:
# Display the result
mean_vm_per_minute1.head()

Unnamed: 0,minute,VM,subj_id
0,1,1.474835,25
1,2,1.419384,25
2,3,1.464863,25
3,4,1.500132,25
4,5,1.598421,25


In [6]:
data_id079c763c = pd.read_csv('./walk_id079c763c.csv')

In [7]:
data_id079c763c.head()

Unnamed: 0,time,x,y,z
0,2013-10-21 10:06:23.500,-0.004,-1.047,0.27
1,2013-10-21 10:06:23.510,0.059,-1.113,0.336
2,2013-10-21 10:06:23.520,0.113,-1.168,0.395
3,2013-10-21 10:06:23.530,0.16,-1.246,0.449
4,2013-10-21 10:06:23.540,0.223,-1.316,0.531


In [8]:
data_id079c763c['time'] = pd.to_datetime(data_id079c763c['time'])

# Step 1: Calculate VM
data_id079c763c['VM'] = np.sqrt(data_id079c763c['x']**2 + data_id079c763c['y']**2 + data_id079c763c['z']**2)

# Step 2: Round time to the nearest minute
data_id079c763c['minute'] = data_id079c763c['time'].dt.floor('T')

# Step 3: Calculate mean VM for each minute
mean_vm_per_minute2 = data_id079c763c.groupby('minute')['VM'].mean().reset_index()

mean_vm_per_minute2['minute'] = range(1, len(mean_vm_per_minute2) + 1)
mean_vm_per_minute2['subj_id'] = 22

In [9]:
mean_vm_per_minute2.head()

Unnamed: 0,minute,VM,subj_id
0,1,1.382706,22
1,2,1.228663,22
2,3,1.315262,22
3,4,1.330361,22
4,5,1.309624,22


In [10]:
combined_data = pd.concat([mean_vm_per_minute1, mean_vm_per_minute2], ignore_index=True)

In [11]:
combined_data

Unnamed: 0,minute,VM,subj_id
0,1,1.474835,25
1,2,1.419384,25
2,3,1.464863,25
3,4,1.500132,25
4,5,1.598421,25
5,6,1.436595,25
6,7,1.485451,25
7,8,1.493609,25
8,9,1.479175,25
9,10,1.578543,25


# Intensity

In [12]:
import glob

In [13]:

# Step 1: Load the ID mapping CSV file
id_mapping = pd.read_csv("./ID_check.csv")  # Replace with the actual path to your id_mapping CSV

id_mapping.head()

Unnamed: 0,subj_id,ID
0,1,idabd0c53c
1,2,id5993bf4a
2,3,idd80ac2b4
3,4,id82b9735c
4,5,id8af5374b


In [14]:
file_paths = glob.glob("./walk_*.csv")  # Replace with the actual directory path

mean_vm_data = []

for file_path in file_paths:
    # Extract long ID from filename
    long_id = file_path.split('_')[-1].split('.')[0]  # Adjust based on filename pattern

    # Load each CSV file
    data = pd.read_csv(file_path)
    
    # Calculate VM
    data['VM'] = np.sqrt(data['x']**2 + data['y']**2 + data['z']**2)
    
    # Calculate mean VM
    mean_vm = data['VM'].mean()
    
    # Append result with long ID
    mean_vm_data.append({'ID': long_id, 'mean_VM': mean_vm})

# Create DataFrame with mean VM results
mean_vm_df = pd.DataFrame(mean_vm_data)

# Step 3: Merge with the ID mapping to add short IDs
result_df = pd.merge(mean_vm_df, id_mapping, on='ID')
result_df = result_df.sort_values(by='subj_id')

# Remove the ID column
result_df = result_df.drop(columns=['ID'])

# Reset the index (optional, for clean indexing)
result_df = result_df.reset_index(drop=True)

# Display the result
print(result_df)

     mean_VM  subj_id
0   1.011795        1
1   1.294254        2
2   1.115007        3
3   1.232750        4
4   1.152213        5
5   0.997173        6
6   1.193929        7
7   1.197825        8
8   1.063966        9
9   1.056688       10
10  1.537770       11
11  1.142460       12
12  1.140391       13
13  1.199758       14
14  1.198713       15
15  1.132088       16
16  1.216709       17
17  1.279515       18
18  1.128538       19
19  1.268131       20
20  1.148438       21
21  1.389853       22
22  1.270116       23
23  1.055329       24
24  1.472572       25
25  1.114945       26
26  1.070060       27
27  1.045208       28
28  1.109867       29
29  1.142638       30
30  1.078568       31
31  1.100167       32


# Clusters

In [15]:
demo = pd.read_csv("./demo.csv")

In [16]:
demo.head()

Unnamed: 0,subj_id,gender,age,age_group,height_in,weight_lbs,bmi,race,step_per_min
0,1,male,23,20-25,72,165,22.375579,white,108.65
1,2,female,45,40-45,69,141,20.819786,white,120.42
2,3,female,29,25-30,73,154,20.315631,white,113.4
3,4,male,43,40-45,71,185,25.799445,white,107.57
4,5,male,47,45-50,75,238,29.744711,white,105.79


In [17]:
all_data = pd.merge(demo, result_df, on='subj_id')

In [18]:
all_data.head()

Unnamed: 0,subj_id,gender,age,age_group,height_in,weight_lbs,bmi,race,step_per_min,mean_VM
0,1,male,23,20-25,72,165,22.375579,white,108.65,1.011795
1,2,female,45,40-45,69,141,20.819786,white,120.42,1.294254
2,3,female,29,25-30,73,154,20.315631,white,113.4,1.115007
3,4,male,43,40-45,71,185,25.799445,white,107.57,1.23275
4,5,male,47,45-50,75,238,29.744711,white,105.79,1.152213


In [19]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [20]:
all_data['gender'] = all_data['gender'].map({'male': 0, 'female': 1})
scaler = StandardScaler()
all_data_scale = all_data.copy()
all_data_scale[['mean_VM', 'step_per_min', 'age', 'bmi']] = scaler.fit_transform(all_data_scale[['mean_VM', 'step_per_min', 'age', 'bmi']])

In [21]:
all_data.head()

Unnamed: 0,subj_id,gender,age,age_group,height_in,weight_lbs,bmi,race,step_per_min,mean_VM
0,1,0,23,20-25,72,165,22.375579,white,108.65,1.011795
1,2,1,45,40-45,69,141,20.819786,white,120.42,1.294254
2,3,1,29,25-30,73,154,20.315631,white,113.4,1.115007
3,4,0,43,40-45,71,185,25.799445,white,107.57,1.23275
4,5,0,47,45-50,75,238,29.744711,white,105.79,1.152213


In [22]:
kmeans = KMeans(n_clusters=3, random_state=42)
all_data_scale['cluster'] = kmeans.fit_predict(all_data_scale[['mean_VM', 'step_per_min', 'gender', 'age', 'bmi']])

In [23]:
all_data_scale.head()

Unnamed: 0,subj_id,gender,age,age_group,height_in,weight_lbs,bmi,race,step_per_min,mean_VM,cluster
0,1,0,-1.812653,20-25,72,165,-0.515884,white,-0.399928,-1.324612,0
1,2,1,0.674886,40-45,69,141,-0.796492,white,0.483422,0.986739,1
2,3,1,-1.134233,25-30,73,154,-0.887422,white,-0.043436,-0.480034,0
3,4,0,0.448746,40-45,71,185,0.101654,white,-0.480983,0.483454,1
4,5,0,0.901026,45-50,75,238,0.813233,white,-0.614574,-0.175577,1


In [24]:
all_data.head()

Unnamed: 0,subj_id,gender,age,age_group,height_in,weight_lbs,bmi,race,step_per_min,mean_VM
0,1,0,23,20-25,72,165,22.375579,white,108.65,1.011795
1,2,1,45,40-45,69,141,20.819786,white,120.42,1.294254
2,3,1,29,25-30,73,154,20.315631,white,113.4,1.115007
3,4,0,43,40-45,71,185,25.799445,white,107.57,1.23275
4,5,0,47,45-50,75,238,29.744711,white,105.79,1.152213


In [25]:
final_data = all_data.merge(all_data_scale[['subj_id', 'cluster']], on='subj_id', how='left')

In [26]:
final_data.head()

Unnamed: 0,subj_id,gender,age,age_group,height_in,weight_lbs,bmi,race,step_per_min,mean_VM,cluster
0,1,0,23,20-25,72,165,22.375579,white,108.65,1.011795,0
1,2,1,45,40-45,69,141,20.819786,white,120.42,1.294254,1
2,3,1,29,25-30,73,154,20.315631,white,113.4,1.115007,0
3,4,0,43,40-45,71,185,25.799445,white,107.57,1.23275,1
4,5,0,47,45-50,75,238,29.744711,white,105.79,1.152213,1


In [27]:
profiles = final_data.groupby('cluster').agg({
    'mean_VM': ['mean', 'min', 'max'],
    'step_per_min': ['mean', 'min', 'max'],
    'age': ['mean', 'min', 'max'],
    'bmi': ['mean', 'min', 'max']
}).reset_index()

# Rename columns for easier interpretation
profiles.columns = ['cluster', 'activity_intensity_avg', 'activity_intensity_min', 'activity_intensity_max',
                    'cadence_avg', 'cadence_min', 'cadence_max',
                    'age_avg', 'age_min', 'age_max',
                    'bmi_avg', 'bmi_min', 'bmi_max']

print("Cluster Profiles:")
print(profiles)

Cluster Profiles:
   cluster  activity_intensity_avg  activity_intensity_min  \
0        0                1.126579                0.997173   
1        1                1.237644                1.056688   
2        2                1.078568                1.078568   

   activity_intensity_max  cadence_avg  cadence_min  cadence_max    age_avg  \
0                1.270116   112.870000       104.20       125.67  31.941176   
1                1.537770   110.736429        95.25       121.03  47.500000   
2                1.078568   178.220000       178.22       178.22  41.000000   

   age_min  age_max    bmi_avg    bmi_min    bmi_max  
0       23       44  22.590676  17.712270  30.427632  
1       39       54  28.717961  20.191988  39.797297  
2       41       41  21.453857  21.453857  21.453857  


In [28]:
profiles.to_csv("cluster_profiles.csv", index=False)
print("Cluster profiles saved to 'cluster_profiles.csv'")

Cluster profiles saved to 'cluster_profiles.csv'
