# ALS Project

### Vital Signs

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn

In [2]:
%load_ext autoreload
%autoreload 2

## functions

## Vitals

In [96]:
vital = pd.read_csv("data/VitalSigns.csv")

In [97]:
print(vital.shape)
vital.head()

(72422, 36)


Unnamed: 0,subject_id,Blood_Pressure_Diastolic,Blood_Pressure_Diastolic_Units,Blood_Pressure_Systolic,Blood_Pressure_Systolic_Units,Height,Height_Units,Pulse,Pulse_Units,Respiratory_Rate,...,Baseline_Standing_Pulse,Endpoint_Weight,Endpoint_Standing_BP_Diastolic,Endpoint_Standing_BP_Systolic,Endpoint_Supine_BP_Diastolic,Endpoint_Supine_BP_Systolic,Supine_BP_Diastolic,Supine_BP_Systolic,Standing_BP_Diastolic,Standing_BP_Systolic
0,329,,,,,157.5,,,,,...,,,,,,,,,,
1,329,,,,,,,,,32.0,...,,,,,,,70.0,110.0,60.0,106.0
2,329,,,,,,,,,29.0,...,,,,,,,65.0,107.0,65.0,107.0
3,329,,,,,,,,,26.0,...,,,,,,,60.0,104.0,70.0,108.0
4,329,,,,,,,,,26.0,...,,,,,,,70.0,120.0,64.0,112.0


In [98]:
# add days since disease onset
onset=pd.read_csv("clean_data/onset_date.csv")
onset=onset[['subject_id', 'Onset_Delta']].copy()
onset.head()

Unnamed: 0,subject_id,Onset_Delta
0,329,-1706.0
1,348,-501.0
2,533,-1023.0
3,586,-715.0
4,649,-341.0


In [99]:
vital = vital.merge(onset, how='outer', on='subject_id')

In [100]:
vital.columns

Index(['subject_id', 'Blood_Pressure_Diastolic',
       'Blood_Pressure_Diastolic_Units', 'Blood_Pressure_Systolic',
       'Blood_Pressure_Systolic_Units', 'Height', 'Height_Units', 'Pulse',
       'Pulse_Units', 'Respiratory_Rate', 'Respiratory_Rate_Units',
       'Temperature', 'Temperature_Units', 'Weight', 'Weight_Units',
       'Vital_Signs_Delta', 'Baseline_Standing_BP_Diastolic',
       'Baseline_Standing_BP_Systolic', 'Baseline_Supine_BP_Diastolic',
       'Baseline_Supine_BP_Systolic', 'Baseline_Weight', 'Supine_Pulse',
       'Standing_Pulse', 'Endpoint_Supine_Pulse', 'Endpoint_Standing_Pulse',
       'Baseline_Supine_Pulse', 'Baseline_Standing_Pulse', 'Endpoint_Weight',
       'Endpoint_Standing_BP_Diastolic', 'Endpoint_Standing_BP_Systolic',
       'Endpoint_Supine_BP_Diastolic', 'Endpoint_Supine_BP_Systolic',
       'Supine_BP_Diastolic', 'Supine_BP_Systolic', 'Standing_BP_Diastolic',
       'Standing_BP_Systolic', 'Onset_Delta'],
      dtype='object')

In [101]:
vital['days_since_onset'] = np.abs(vital['Onset_Delta']) + vital['Vital_Signs_Delta']

In [102]:
print(vital.shape)
vital.head()

(72653, 38)


Unnamed: 0,subject_id,Blood_Pressure_Diastolic,Blood_Pressure_Diastolic_Units,Blood_Pressure_Systolic,Blood_Pressure_Systolic_Units,Height,Height_Units,Pulse,Pulse_Units,Respiratory_Rate,...,Endpoint_Standing_BP_Diastolic,Endpoint_Standing_BP_Systolic,Endpoint_Supine_BP_Diastolic,Endpoint_Supine_BP_Systolic,Supine_BP_Diastolic,Supine_BP_Systolic,Standing_BP_Diastolic,Standing_BP_Systolic,Onset_Delta,days_since_onset
0,329,,,,,157.5,,,,,...,,,,,,,,,-1706.0,
1,329,,,,,,,,,32.0,...,,,,,70.0,110.0,60.0,106.0,-1706.0,1706.0
2,329,,,,,,,,,29.0,...,,,,,65.0,107.0,65.0,107.0,-1706.0,1714.0
3,329,,,,,,,,,26.0,...,,,,,60.0,104.0,70.0,108.0,-1706.0,1722.0
4,329,,,,,,,,,26.0,...,,,,,70.0,120.0,64.0,112.0,-1706.0,1748.0


## Weight

In [115]:
# weight units
vital['Weight_Units'].value_counts()

Kilograms    35709
Pounds         286
Name: Weight_Units, dtype: int64

In [141]:
# convert pounds to kilograms
vital.loc[ vital['Weight_Units']=="Pounds", 'Weight'] = vital['Weight'] * 0.453592
vital['Weight_Units'].replace('Pounds', 'Kilograms', inplace=True)
vital['Weight_Units'].value_counts()

In [145]:
# get weights for each subject for use in models.
weight=vital[['subject_id', 'days_since_onset', 'Weight']].copy()
weight.dropna(inplace=True)
weight.reset_index(drop=True, inplace=True)
weight['weight']=weight['Weight']
weight.drop('Weight', axis=1, inplace=True)
print(weight.shape)
weight.head()

(49242, 3)


Unnamed: 0,subject_id,days_since_onset,weight
0,329,1706.0,52.2
1,329,1714.0,52.1
2,329,1722.0,52.0
3,329,1748.0,49.8
4,329,1778.0,45.8


In [146]:
# how much does weight change for each subject over time?
gr_weight_mean = weight.groupby('subject_id').agg({'weight':['mean', 'std']})
gr_weight_mean.head()

Unnamed: 0_level_0,weight,weight
Unnamed: 0_level_1,mean,std
subject_id,Unnamed: 1_level_2,Unnamed: 2_level_2
329,45.4,4.766932
348,119.7125,1.790002
533,49.833333,2.015564
586,61.8,
649,68.05,7.763258


In [147]:
# the average standard deviation for each subject is # 2.887 kg
# ok to proceed using first value for weight.
gr_weight_mean['weight']['std'].mean()

2.823979374570086

In [148]:
# index of min day
gr_weight = weight.groupby('subject_id')['days_since_onset'].idxmin()
gr_weight.head()

subject_id
329     0
348    12
533    20
586    29
649    30
Name: days_since_onset, dtype: int64

In [149]:
# for each subject, each question's response at max assessment study day 
min_day_weights = weight.iloc[gr_weight]
min_day_weights.head()

Unnamed: 0,subject_id,days_since_onset,weight
0,329,1706.0,52.2
12,348,501.0,118.8
20,533,1023.0,51.0
29,586,715.0,61.8
30,649,341.0,73.7


In [150]:
min_day_weights['days_since_onset'].unique()

array([1706.,  501., 1023., ..., 1589., 1226., 2865.])

In [151]:
weights = min_day_weights[['subject_id', 'weight']]
weights.reset_index(drop=True, inplace=True)
weights.head()

Unnamed: 0,subject_id,weight
0,329,52.2
1,348,118.8
2,533,51.0
3,586,61.8
4,649,73.7


In [152]:
# weight greater than 115 kg.


In [153]:
# save scores to file
filename = 'clean_data/weight.csv'
weights.to_csv(filename, index=False)