# Pipeline for actigraph analyses: 

### Some setup and import

In [4]:
import pandas as pd
import numpy as np


## Load the raw data!

In [5]:
data = pd.read_csv ('Data/test.csv', skiprows= 162).drop(['Ligne','Marqueur','Lumière blanche','Lumière rouge','Lumière verte','Lumière bleue','Unnamed: 14'],axis=1)
data

Unnamed: 0,Jour,Date,Heure,Statut hors poignet,Activité,Sommeil/Éveil,Statut de l’intervalle,Statut Sommeil/Éveil
0,1,2021-08-11,15:14:00,0,NAN,NAN,ACTIVITÉ,NAN
1,1,2021-08-11,15:15:00,0,NAN,NAN,ACTIVITÉ,NAN
2,1,2021-08-11,15:16:00,0,NAN,NAN,ACTIVITÉ,NAN
3,1,2021-08-11,15:17:00,0,NAN,NAN,ACTIVITÉ,NAN
4,1,2021-08-11,15:18:00,0,NAN,NAN,ACTIVITÉ,NAN
...,...,...,...,...,...,...,...,...
8277,7,2021-08-17,09:11:00,0,139,1,ACTIVITÉ,NAN
8278,7,2021-08-17,09:12:00,0,144,1,ACTIVITÉ,NAN
8279,7,2021-08-17,09:13:00,0,182,1,ACTIVITÉ,NAN
8280,7,2021-08-17,09:14:00,0,17,NAN,ACTIVITÉ,NAN


In [6]:
print(data.iloc[0:406])


     Jour        Date     Heure  Statut hors poignet Activité Sommeil/Éveil  \
0       1  2021-08-11  15:14:00                    0      NAN           NAN   
1       1  2021-08-11  15:15:00                    0      NAN           NAN   
2       1  2021-08-11  15:16:00                    0      NAN           NAN   
3       1  2021-08-11  15:17:00                    0      NAN           NAN   
4       1  2021-08-11  15:18:00                    0      NAN           NAN   
..    ...         ...       ...                  ...      ...           ...   
401     1  2021-08-11  21:55:00                    1      NAN           NAN   
402     1  2021-08-11  21:56:00                    1      NAN           NAN   
403     1  2021-08-11  21:57:00                    1      NAN           NAN   
404     1  2021-08-11  21:58:00                    1      NAN           NAN   
405     1  2021-08-11  21:59:00                    1      NAN           NAN   

    Statut de l’intervalle Statut Sommeil/Éveil  
0

### Remove data of day 1

In [7]:
df=data.drop(data.index[0:406],axis=0).reset_index(drop=True)
df

Unnamed: 0,Jour,Date,Heure,Statut hors poignet,Activité,Sommeil/Éveil,Statut de l’intervalle,Statut Sommeil/Éveil
0,1,2021-08-11,22:00:00,1,NAN,NAN,EXCLU,EXCLU
1,1,2021-08-11,22:01:00,0,176,NAN,ACTIVITÉ,NAN
2,1,2021-08-11,22:02:00,0,482,1,ACTIVITÉ,NAN
3,1,2021-08-11,22:03:00,0,373,1,ACTIVITÉ,NAN
4,1,2021-08-11,22:04:00,0,452,1,ACTIVITÉ,NAN
...,...,...,...,...,...,...,...,...
7871,7,2021-08-17,09:11:00,0,139,1,ACTIVITÉ,NAN
7872,7,2021-08-17,09:12:00,0,144,1,ACTIVITÉ,NAN
7873,7,2021-08-17,09:13:00,0,182,1,ACTIVITÉ,NAN
7874,7,2021-08-17,09:14:00,0,17,NAN,ACTIVITÉ,NAN


### Define day/ night intervals

In [8]:
day_night_intervals= df[(df["Heure"]== '22:00:00')|(df["Heure"]== '07:00:00')] 
day_night_intervals


Unnamed: 0,Jour,Date,Heure,Statut hors poignet,Activité,Sommeil/Éveil,Statut de l’intervalle,Statut Sommeil/Éveil
0,1,2021-08-11,22:00:00,1,NAN,NAN,EXCLU,EXCLU
540,2,2021-08-12,07:00:00,0,23,0,S-REPOS,NAN
1440,2,2021-08-12,22:00:00,0,261,1,ACTIVITÉ,NAN
1980,3,2021-08-13,07:00:00,0,0,0,S-REPOS,NAN
2880,3,2021-08-13,22:00:00,0,339,1,ACTIVITÉ,NAN
3420,4,2021-08-14,07:00:00,0,82,1,S-REPOS,NAN
4320,4,2021-08-14,22:00:00,0,10,0,ACTIVITÉ,NAN
4860,5,2021-08-15,07:00:00,0,0,0,S-REPOS,NAN
5760,5,2021-08-15,22:00:00,0,60,1,ACTIVITÉ,NAN
6300,6,2021-08-16,07:00:00,0,0,0,S-REPOS,NAN


### Add a column "index1"

In [9]:
df = df.rename_axis('index1').reset_index()
df

Unnamed: 0,index1,Jour,Date,Heure,Statut hors poignet,Activité,Sommeil/Éveil,Statut de l’intervalle,Statut Sommeil/Éveil
0,0,1,2021-08-11,22:00:00,1,NAN,NAN,EXCLU,EXCLU
1,1,1,2021-08-11,22:01:00,0,176,NAN,ACTIVITÉ,NAN
2,2,1,2021-08-11,22:02:00,0,482,1,ACTIVITÉ,NAN
3,3,1,2021-08-11,22:03:00,0,373,1,ACTIVITÉ,NAN
4,4,1,2021-08-11,22:04:00,0,452,1,ACTIVITÉ,NAN
...,...,...,...,...,...,...,...,...,...
7871,7871,7,2021-08-17,09:11:00,0,139,1,ACTIVITÉ,NAN
7872,7872,7,2021-08-17,09:12:00,0,144,1,ACTIVITÉ,NAN
7873,7873,7,2021-08-17,09:13:00,0,182,1,ACTIVITÉ,NAN
7874,7874,7,2021-08-17,09:14:00,0,17,NAN,ACTIVITÉ,NAN


### Create function that checks multiple conditions 

In [10]:
def rename_periods(x):
    if x in range(0, 540):
        return ('night_1')
    if x in range(540, 1440):
        return ('day_2')
    if x in range(1440, 1980):
        return ('night_2')
    if x in range(1980, 2880):
        return ('day_3')
    if x in range(2880, 3420):
        return ('night_3')
    if x in range(3420, 4320):
        return ('day_4')
    if x in range(4320, 4860):
        return ('night_4')
    if x in range(4860, 5760):
        return ('day_5')
    if x in range(5760, 6300):
        return ('night_5')
    if x in range(6300, 7200):
        return ('day_6')
    if x in range(7200, 7740):
        return ('night_6')
    if x in range(7740, 7876):
        return ('day_7')
        

### Apply function on extra column added earlier

In [11]:
df['index1'] = df['index1'].apply(rename_periods)
df

Unnamed: 0,index1,Jour,Date,Heure,Statut hors poignet,Activité,Sommeil/Éveil,Statut de l’intervalle,Statut Sommeil/Éveil
0,night_1,1,2021-08-11,22:00:00,1,NAN,NAN,EXCLU,EXCLU
1,night_1,1,2021-08-11,22:01:00,0,176,NAN,ACTIVITÉ,NAN
2,night_1,1,2021-08-11,22:02:00,0,482,1,ACTIVITÉ,NAN
3,night_1,1,2021-08-11,22:03:00,0,373,1,ACTIVITÉ,NAN
4,night_1,1,2021-08-11,22:04:00,0,452,1,ACTIVITÉ,NAN
...,...,...,...,...,...,...,...,...,...
7871,day_7,7,2021-08-17,09:11:00,0,139,1,ACTIVITÉ,NAN
7872,day_7,7,2021-08-17,09:12:00,0,144,1,ACTIVITÉ,NAN
7873,day_7,7,2021-08-17,09:13:00,0,182,1,ACTIVITÉ,NAN
7874,day_7,7,2021-08-17,09:14:00,0,17,NAN,ACTIVITÉ,NAN


### Rename the column

In [12]:
df_clean =df.rename(columns={"index1":"periods"})

In [13]:
df_clean

Unnamed: 0,periods,Jour,Date,Heure,Statut hors poignet,Activité,Sommeil/Éveil,Statut de l’intervalle,Statut Sommeil/Éveil
0,night_1,1,2021-08-11,22:00:00,1,NAN,NAN,EXCLU,EXCLU
1,night_1,1,2021-08-11,22:01:00,0,176,NAN,ACTIVITÉ,NAN
2,night_1,1,2021-08-11,22:02:00,0,482,1,ACTIVITÉ,NAN
3,night_1,1,2021-08-11,22:03:00,0,373,1,ACTIVITÉ,NAN
4,night_1,1,2021-08-11,22:04:00,0,452,1,ACTIVITÉ,NAN
...,...,...,...,...,...,...,...,...,...
7871,day_7,7,2021-08-17,09:11:00,0,139,1,ACTIVITÉ,NAN
7872,day_7,7,2021-08-17,09:12:00,0,144,1,ACTIVITÉ,NAN
7873,day_7,7,2021-08-17,09:13:00,0,182,1,ACTIVITÉ,NAN
7874,day_7,7,2021-08-17,09:14:00,0,17,NAN,ACTIVITÉ,NAN


In [14]:
#df_clean = pd.DataFrame(df_clean, columns=['periods', 'Jour','Date','Heure','Status hors poignet',"Activité","Sommeil/Éveil","Statut de l'intervalle",'Statut Sommeil/Éveil'])

### Transform the data of Activité in float data

In [15]:
df_clean["Activité"].dtype

dtype('O')

In [16]:
df_clean['Activité'] = df_clean['Activité'].astype(float, errors = 'ignore')

In [17]:
df_clean["Activité"].dtype

dtype('float64')

### Calculate sum and mean activity for each period

In [18]:
df_sums = df_clean.groupby('periods')['Activité'].agg(['sum','mean'])

In [19]:
df_sums

Unnamed: 0_level_0,sum,mean
periods,Unnamed: 1_level_1,Unnamed: 2_level_1
day_2,101721.0,113.023333
day_3,167278.0,185.864444
day_4,228567.0,309.711382
day_5,206042.0,230.988789
day_6,87384.0,97.964126
day_7,29368.0,215.941176
night_1,26052.0,48.333952
night_2,20120.0,38.470363
night_3,76891.0,142.390741
night_4,46662.0,86.411111


### Find the number of Nan in 'Activité'

In [20]:
df_clean.groupby('periods').agg({'Activité': [ lambda x: x.isnull().sum()]})

Unnamed: 0_level_0,Activité
Unnamed: 0_level_1,<lambda>
periods,Unnamed: 1_level_2
day_2,0
day_3,0
day_4,162
day_5,8
day_6,8
day_7,0
night_1,1
night_2,17
night_3,0
night_4,0


### Replace nan by the mean of the associate day

In [21]:
for p in df_clean["periods"]:
    df_clean.loc[(df_clean.periods==p) & (df_clean['Activité'].isnull()), "Activité"] = df_sums.loc[p]["mean"]

### Verify all nan are remplaced

In [22]:
df_clean.groupby('periods').agg({'Activité': [ lambda x: x.isnull().sum()]})

Unnamed: 0_level_0,Activité
Unnamed: 0_level_1,<lambda>
periods,Unnamed: 1_level_2
day_2,0
day_3,0
day_4,0
day_5,0
day_6,0
day_7,0
night_1,0
night_2,0
night_3,0
night_4,0


### Calculate activity ratio

In [26]:
# Daytime activity ratio = (Daytime activity/24-Hour activity) × 100.

for i in range(2, 7):
    activity_ratio = df_sums.loc[f"day_{i}"]["sum"] / (df_sums.loc[f"day_{i}"]["sum"] + df_sums.loc[f"night_{i}"]["sum"])
    print(f"activity ratio for day {i}: {activity_ratio:.2f}")


activity ratio for day 2: 0.75
activity ratio for day 3: 0.57
activity ratio for day 4: 0.78
activity ratio for day 5: 0.83
activity ratio for day 6: 0.65
