In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pyreadstat
from datetime import datetime, timedelta
import json
import csv
import glob

In [2]:
#Read csv files
final_df = pd.read_csv('final.csv', header=None, names=['participantId', 'file_path'])
non_nan_participant_ids_with_BP_date = pd.read_csv('non_nan_participant_ids_with_BP_date.csv')
non_nan_participant_ids_with_BP_date.head()

Unnamed: 0,patientId,BP_date
0,1138tyi7,2021-04-27
1,1218su9b,2022-05-12
2,123319vl,2020-05-19
3,12r8yacr,2022-09-07
4,137yw8d9,2022-07-17


In [3]:
filtered_final_df = final_df[final_df['participantId'].isin(non_nan_participant_ids_with_BP_date['patientId'])]

filtered_final_df.to_csv('filtered_final.csv', index=False, header=None)

print("Filtered CSV file 'filtered_final.csv' has been saved.")

Filtered CSV file 'filtered_final.csv' has been saved.


In [4]:
#count number of participants
num_participants = filtered_final_df['participantId'].nunique()

print(f'Total number of participants before filtering based on BD_date: {num_participants}')

Total number of participants before filtering based on BD_date: 1353


In [5]:
filtered_final_df.head()

Unnamed: 0,participantId,file_path
1,1138tyi7,/proj/sens2021503/mom2b/decrypted-data/1njtfXo...
2,1138tyi7,/proj/sens2021503/mom2b/decrypted-data/1njtfXo...
3,1138tyi7,/proj/sens2021503/mom2b/decrypted-data/1njtfXo...
4,1138tyi7,/proj/sens2021503/mom2b/decrypted-data/1njtfXo...
5,1138tyi7,/proj/sens2021503/mom2b/decrypted-data/1njtfXo...


In [6]:
non_nan_participant_ids_with_BP_date.head()

Unnamed: 0,patientId,BP_date
0,1138tyi7,2021-04-27
1,1218su9b,2022-05-12
2,123319vl,2020-05-19
3,12r8yacr,2022-09-07
4,137yw8d9,2022-07-17


In [7]:
#create a DataFrame and CSV with all the accelerometer data after our filtering
combined_df = pd.DataFrame()

unique_participant_ids = set()

max_participant_ids = 1500

for index, row in filtered_final_df.iterrows():
    participant_id = row['participantId']

    if participant_id in unique_participant_ids:
        continue
    
    file_path = row['file_path']
    
    try:
        temp_df = pd.read_csv(file_path, encoding='latin1')

        temp_df['participant_id'] = participant_id

        combined_df = combined_df.append(temp_df, ignore_index=True)

        unique_participant_ids.add(participant_id)
        
        print(f"Added: {participant_id}")
    
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        continue
    
    if len(unique_participant_ids) >= max_participant_ids:
        break

print(combined_df)
combined_df.to_csv('data.csv', index=False)

Added: 1138tyi7
Added: 1218su9b
Added: 123319vl
Added: 12r8yacr
Added: 137yw8d9
Added: 13sbznj2
Added: 15v44gng
Added: 18gn254h
Added: 18q5rnyh
Added: 1a6ns7az
Added: 1aiwoi1r
Added: 1awpx63g
Added: 1bb1sbd4
Added: 1eitcp2v
Added: 1etji59k
Added: 1fdkrv31
Added: 1fuvg9fg
Added: 1g49kecu
Added: 1gvqfjot
Added: 1igxecsv
Added: 1iimzkcp
Added: 1j9bjj7t
Added: 1mbe4pj1
Added: 1njosvui
Added: 1nwnx4fv
Added: 1oyfbgo6
Added: 1qw7f85b
Added: 1qzg7hub
Added: 1s38ylka
Added: 1sjr3cob
Added: 1u4mk2dm
Added: 1vgt37zx
Added: 1w88oxbw
Added: 1x4vdo24
Added: 1yk9wv4n
Added: 1zw6nloy
Added: 21grthwt
Added: 23g4jewz
Added: 23votu9w
Added: 256lmwns
Added: 26azys4c
Added: 27ehy4lg
Added: 27nq1yq2
Added: 28s6n4v5


  interactivity=interactivity, compiler=compiler, result=result)


Added: 291se9py
Added: 29j5z27b
Added: 29lphm4m
Added: 2d5h5je7
Added: 2dvfpjnu
Added: 2e44wce4
Added: 2f1pd71i
Added: 2fn14h1t
Added: 2gjlsr41
Added: 2gn4j3eg
Added: 2kondenw
Added: 2ldm1viv
Added: 2ldvcvoi
Added: 2ljs29wk
Added: 2m8euavf
Added: 2mhvgkuy
Added: 2n1jyicz
Added: 2nm9eo5u
Added: 2nuudy2h
Added: 2o2gozd4
Added: 2ovblnqy
Added: 2py9gr83
Added: 2q6ywmkh
Added: 2q7qhk8p
Added: 2rki4ezp
Added: 2rm1yzpf
Added: 2skpyibk
Added: 2ve4udqs
Added: 2wc2eg41
Added: 2wxl95t4
Added: 2xz8775k
Added: 2ytmqib7
Added: 314yfoq6
Added: 32a4ldjc
Added: 33btpmd3
Added: 34gf6a95
Added: 36mgwnod
Added: 37lpj6q8
Added: 37uvc3mk
Added: 3912wyay
Added: 3973wh7b
Added: 39nznaac
Added: 39trgo8h
Added: 3a31d3lt
Added: 3ajavs5r
Added: 3b363dnt
Added: 3cfprhca
Added: 3cu4d7t7
Added: 3dpwfucu
Added: 3e78b1io
Added: 3eu8ry23
Added: 3fe6guni
Added: 3g47mclc
Added: 3h7v2i3q
Added: 3hovkdk7
Added: 3hschsyu
Added: 3hsqqe6e
Added: 3hzjn4wi
Added: 3ikqah5a
Added: 3j4lo6fi
Added: 3lgoslh8
Added: 3pkuiucw
Added: 3

  interactivity=interactivity, compiler=compiler, result=result)


Added: 9xcyne6m
Added: 9xf5643z
Added: 9yfb4k2b
Added: 9yfxsl22
Added: 9ypunl81
Added: 9yxvlvei
Added: 9zbi7mvv
Added: a2hg8owz
Added: a2w8vnbo
Added: a3bhugqd
Added: a3ktszct
Added: a4vr4rzo
Added: a7r3kvre
Added: a88fgdk2
Added: a89hyea7
Added: a8glrexa
Added: a9lb5lcg
Added: a9nhlehf
Added: aag6ln44
Added: ab2lnq1u
Added: ab8lo1ex
Added: abazja7s
Added: acxxmgb2
Added: adiebo2t
Added: adkujdof
Added: aduqekoo
Added: adv9iftj
Added: afae6iam
Added: aflnz2h1
Added: afw9wrlc
Added: ah6abwmg
Added: aimbcynu
Added: akbg4fo9
Added: al74hpca
Added: alszby2a
Added: amy43cab
Added: anibdjnh
Added: anpcij3q
Added: ap9twy8o
Added: aq263314
Added: aqwfoctq
Added: auxxzbry
Added: avsayuok
Added: avsdvocm
Added: aw5waxhs
Added: ax4n4l99
Added: axzclvuh


  interactivity=interactivity, compiler=compiler, result=result)


Added: ay2yk395
Added: az27ojef
Added: az8jot5l
Added: azk3zimh
Added: azq7nzyd
Added: azsqieav
Added: b1luk2n2
Added: b2v3xh6e
Added: b2ww5ozb
Added: b3l9i4z4
Added: b4byaz27
Added: b4whcoi6
Added: b5kxxy3j
Added: b5nnuaxe
Added: b7ob8db4
Added: b8rsg6fr
Added: b8vpq1es
Added: b999rwbx
Added: baee7dfr
Added: bahb4hjt
Added: bbd1gjk7
Added: bbi88rrj
Added: bc4rkwfc
Error reading file /proj/sens2021503/mom2b/decrypted-data/1njtfXoAe9nkpTD6Q1wvxttT/bcfyjhvy/accel_2020-10.csv: Error tokenizing data. C error: Buffer overflow caught - possible malformed input file.

Added: bcfyjhvy
Added: bckychvr
Added: bcmm16x4
Added: bcrsztqa
Added: bcs95659
Added: bd9462gl
Added: bdprdoxx
Added: bh3cet56
Added: bh4v2z6r
Added: bhvxov6v
Added: binlli47
Added: bjbzj7nd
Added: bjoyy1sr
Added: bjyd1mc9
Added: bk6u7osm
Added: bkemijrb
Added: bmbyzv1v
Added: bnehmqnx
Added: boc7t33t
Added: bofustou
Added: bpgstvdd
Added: bq7vgpj9
Added: bqcxypcs
Added: brsuklwt
Added: bsrz5gje
Added: btbk3cj1
Added: btqv9xpe


Added: nj5cpqpi
Added: nkmbyxm2
Added: nkrbze4d
Added: nl9rzlsb
Added: nnc7xvr7
Added: np4gavsx
Added: nqne5wga
Added: ntl9iy24
Added: nuc3g4zb
Added: ny68d87n
Added: nya5lfm6
Added: nyew2noj
Added: nzfmidqk
Added: nzkcpn76
Added: nzyabr4m
Added: o167aqa6
Added: o3a6erz7
Added: o41ld763
Added: o4aggpv3
Added: o4q5pup6
Added: o5186u68
Added: o5uaz8nc
Added: o76kfo4y
Added: o81a3t5m
Added: o8vjqe88
Added: o96prip6
Added: o9a3sztn
Added: ob6ufrb9
Added: obgn8ch6
Added: ocx67aao
Added: ocyv8awt
Added: od5a2is5
Added: odh36ugt
Added: oe89f9ho
Added: oehsgrti
Added: of73w78y
Added: ogwowvw3
Added: ohe6xm7u
Added: ohezpspw
Added: okjmrtfp
Added: omlj58vd
Added: omuqw4i9
Added: on5d65tn
Added: on8h7fxw
Added: ontryssu
Added: ookknk4n
Added: oosyoq3j
Added: oq2bb1ww
Added: oq87zhnn
Added: oq8d7os6
Added: osxxlwhf
Added: ot28g5nz
Added: otvmy9os
Added: owvxs7ot
Added: owwen9lg
Added: p1c67whq
Added: p1fesa8l
Added: p1mfu9qm
Added: p2cb4u2l
Added: p3dtzahw
Added: p5lmpqzy
Added: p5qk977a
Added: p

  interactivity=interactivity, compiler=compiler, result=result)


Added: yvqb1hop
Added: yw9by7s7
Added: ywg2krkl
Added: yzeps5to
Added: z2e8vhnf
Added: z3odci6b
Added: z546sdi4
Added: z5mnb2tm
Added: z9xhexwh
Added: zawnmv3k
Added: zb7qi353
Added: zbtcpqvm
Added: zc8hufmq
Added: zc8k1oyu
Added: zcaxzj9i
Added: zcjosdsx
Added: zdhw3ile
Added: zdxrklnv
Added: zfs7uq41
Added: zg9rgeyb
Added: znq774yg
Added: zohn9vns
Added: zq4d1k76
Added: zsuinu58
Added: zt1svhal
Added: zuezvr6l
Added: zwjbh8i8
               timestamp accuracy            x           y         z  \
0          1638319134619  unknown  7.62939e-05   0.0280151 -0.997101   
1          1638319167717  unknown -0.000442505   0.0270844 -0.996017   
2          1638319200915  unknown  0.000305176   0.0275116 -0.996979   
3          1638319234073  unknown -6.10352e-05   0.0275269 -0.997025   
4          1638319267252  unknown -0.000411987   0.0270844 -0.997040   
5          1638319300309  unknown  6.10352e-05   0.0278015 -0.996658   
6          1638319333427  unknown  -0.00012207   0.0280151 -0.99

In [2]:
data_df = pd.read_csv('data.csv')

#create datetime column based on the timestamp
data_df['datetime'] = pd.to_datetime(data_df['timestamp'], unit='ms', errors='coerce')

#make x, y and z columns as numeric
data_df[['x', 'y', 'z']] = data_df[['x', 'y', 'z']].apply(pd.to_numeric, errors='coerce')
#drop rows with NaN values in columns x, y and z
data_df = data_df.dropna(subset=['x', 'y', 'z'])


  interactivity=interactivity, compiler=compiler, result=result)


In [3]:

bp_dates_df = pd.read_csv('non_nan_participant_ids_with_BP_date.csv')


In [4]:
#convert 'datetime' and 'BP_date' columns to datetime format
#data_df['datetime'] = pd.to_datetime(data_df['datetime'])
bp_dates_df['BP_date'] = pd.to_datetime(bp_dates_df['BP_date'])

In [5]:
#keep only data for each participant before the birthdate
merged_df = pd.merge(data_df, bp_dates_df, left_on='participant_id', right_on='patientId', how='left')

#filter rows based on BP_date
filtered_df = merged_df[merged_df['datetime'] >= merged_df['BP_date']]

data_df = filtered_df.drop(['patientId', 'BP_date'], axis=1)

data_df.head()

Unnamed: 0,timestamp,accuracy,x,y,z,participant_id,datetime
277168,1591024661185,unknown,-0.466537,-0.112259,-0.932922,123319vl,2020-06-01 15:17:41.185
277169,1591024661285,unknown,-0.457977,-0.072937,-1.057404,123319vl,2020-06-01 15:17:41.285
277170,1591024661384,unknown,-0.483963,-0.048065,-0.873962,123319vl,2020-06-01 15:17:41.384
277171,1591024661484,unknown,-0.432449,-0.067795,-0.743698,123319vl,2020-06-01 15:17:41.484
277172,1591024661583,unknown,-0.46077,-0.131516,-0.879547,123319vl,2020-06-01 15:17:41.583


In [6]:
#calculate combined magnitude
data_df['magnitude'] = (data_df['x']**2 + data_df['y']**2 + data_df['z']**2).pow(0.5)

In [7]:
#Count number of participants
unique_participant_count = data_df['participant_id'].nunique()
print("Count of unique participant_id:", unique_participant_count)

Count of unique participant_id: 709


In [9]:
data_df.to_csv('data.csv', index=False)